Coverage for polars_analysis/plotting/noise_stability

1import logging

2from pathlib import Path

3from typing import Any, Dict, List, Literal, Optional, Tuple

5import matplotlib.pyplot as plt

6import numpy as np

7import polars as pl

8from scipy import stats

10from polars_analysis import utils

11from polars_analysis.plotting import helper

12from polars_analysis.plotting.helper import Metadata

14logger = logging.getLogger(__name__)

17def _process_temp_correlation_data(

18 derived_data: pl.DataFrame,

19 temp_df: pl.DataFrame,

20 settling_time: Optional[int],

21 plot_dir: Path,

22 temp_str: str,

23 run_metadata: Metadata,

24) -> dict:

25 """Process temperature correlation data and create individual channel plots."""

26 colors = {"lo": "blue", "hi": "red"}

28 # Initialize data structures

29 slopes_data: Dict[str, Dict[int, float]] = {"lo": {}, "hi": {}}

30 intercepts_data: Dict[str, Dict[int, float]] = {"lo": {}, "hi": {}}

31 slope_errors: Dict[str, Dict[int, float]] = {"lo": {}, "hi": {}}

32 intercept_errors: Dict[str, Dict[int, float]] = {"lo": {}, "hi": {}}

33 combined_data: Dict[str, Dict[str, List[np.ndarray]]] = {

34 "lo": {"temp": [], "mean": [], "std": []},

35 "hi": {"temp": [], "mean": [], "std": []},

36 }

38 settling_time_plt = None

40 for gain in ["lo", "hi"]:

41 gain_filtered_df = derived_data.filter(pl.col("gain") == gain)

43 for channel in gain_filtered_df["channel"].unique().to_list():

44 channel_df = gain_filtered_df.filter(pl.col("channel") == channel)

45 if len(channel_df) == 0:

46 continue

48 # Apply settling time filter if specified

49 if settling_time is not None:

50 temp_df_tmp, settling_time_plt = _filter_by_settling_time(temp_df, settling_time)

51 channel_df_tmp, settling_time_plt = _filter_by_settling_time(channel_df, settling_time)

52 else:

53 temp_df_tmp = temp_df

54 channel_df_tmp = channel_df

56 # Join temperature and channel data

57 combined_df = _join_temp_channel_data(channel_df_tmp, temp_df_tmp)

59 # Extract arrays

60 mean_values = combined_df.select("mean").to_numpy().flatten()

61 std_values = combined_df.select("std").to_numpy().flatten()

62 temp_values = combined_df.select("temp_interp").to_numpy().flatten()

64 # Store combined data

65 combined_data[gain]["temp"].extend(temp_values)

66 combined_data[gain]["mean"].extend(mean_values)

67 combined_data[gain]["std"].extend(std_values)

69 # Perform linear regression

70 result = stats.linregress(temp_values, mean_values)

72 # Store fit results

73 slopes_data[gain][channel] = result.slope.astype(float)

74 intercepts_data[gain][channel] = result.intercept.astype(float)

75 slope_errors[gain][channel] = result.stderr.astype(float)

76 intercept_errors[gain][channel] = result.intercept_stderr.astype(float)

78 # Create individual channel plot

79 _plot_individual_channel(

80 temp_values,

81 mean_values,

82 std_values,

83 result.slope.astype(float),

84 result.intercept.astype(float),

85 channel,

86 gain,

87 colors[gain],

88 plot_dir,

89 temp_str,

90 settling_time_plt,

91 run_metadata,

92 )

94 return {

95 "slopes_data": slopes_data,

96 "intercepts_data": intercepts_data,

97 "slope_errors": slope_errors,

98 "intercept_errors": intercept_errors,

99 "combined_data": combined_data,

100 "colors": colors,

101 "settling_time_plt": settling_time_plt,

102 }

103

104

105def _join_temp_channel_data(

106 channel_df: pl.DataFrame,

107 temp_df: pl.DataFrame,

108) -> pl.DataFrame:

109 """Join channel and temperature data by timestamp."""

110 return channel_df.sort("timestamp").join_asof(

111 temp_df.select(["timestamp", "value"]).rename({"value": "temp_interp"}).sort("timestamp"),

112 on="timestamp",

113 strategy="nearest",

114 )

115

116

117def _plot_individual_channel(

118 temp_values: np.ndarray,

119 mean_values: np.ndarray,

120 std_values: np.ndarray,

121 slope: float,

122 intercept: float,

123 channel: int,

124 gain: str,

125 color: str,

126 plot_dir: Path,

127 temp_str: str,

128 settling_time_plt: Optional[int],

129 run_metadata: Metadata,

130) -> None:

131 """Create individual channel temperature correlation plot."""

132 channel_str = str(channel).zfill(3)

133

134 # Create fit line

135 temp_fit = np.linspace(temp_values.min(), temp_values.max(), 100)

136 mean_fit = slope * temp_fit + intercept

137

138 plt.figure(figsize=(10, 5))

139 plt.errorbar(temp_values, mean_values, yerr=std_values, fmt="o", color=color, label=f"Ch {channel_str} {gain} Gain")

140 plt.plot(temp_fit, mean_fit, "-", color=color, label=f"Fit: y = {slope:.3f}x + {intercept:.2f}")

141

142 settling_time_str = f", {settling_time_plt} min settling time" if settling_time_plt is not None else ""

143 plt.title(

144 helper.plot_summary_string(

145 board_id=run_metadata.board_id,

146 run_numbers=run_metadata.run_numbers,

147 name=f"Temperature vs. Pedestal Mean Ch.{channel_str} {gain} Gain{settling_time_str}",

148 attenuation=run_metadata.att_val,

149 pas_mode=run_metadata.pas_mode,

150 )

151 )

152 plt.xlabel("Temperature [°C]")

153 plt.ylabel("Pedestal Mean [ADC Counts]")

154 plt.legend()

155 plt.grid(True, alpha=0.3)

156 plt.tight_layout()

157

158 filename = f"mean_vs_temp{temp_str}_channel{channel_str}_{gain}.png"

159 plt.savefig(plot_dir / filename)

160 logger.debug(f"Saved to {plot_dir / filename}")

161 plt.close()

162

163

164def _create_combined_plots(

165 fit_results: Dict[str, Any],

166 plot_dir: Path,

167 temp_str: str,

168 run_metadata: Metadata,

169) -> dict:

170 """Create combined temperature correlation plots for all channels."""

171 combined_data = fit_results["combined_data"]

172 colors = fit_results["colors"]

173

174 plt.figure(figsize=(12, 8))

175

176 # Plot data points

177 for gain in ["lo", "hi"]:

178 if combined_data[gain]["temp"]:

179 temp_array = np.array(combined_data[gain]["temp"])

180 mean_array = np.array(combined_data[gain]["mean"])

181 std_array = np.array(combined_data[gain]["std"])

182

183 plt.errorbar(

184 temp_array,

185 mean_array,

186 yerr=std_array,

187 fmt="o",

188 color=colors[gain],

189 alpha=0.6,

190 label=f"{gain.capitalize()} Gain Data",

191 markersize=4,

192 )

193

194 # Create fit lines and combined fit

195 fit_dict = {}

196 all_temp = np.array(combined_data["lo"]["temp"] + combined_data["hi"]["temp"])

197 all_mean = np.array(combined_data["lo"]["mean"] + combined_data["hi"]["mean"])

198

199 for gain in ["lo", "hi"]:

200 if combined_data[gain]["temp"]:

201 temp_array = np.array(combined_data[gain]["temp"])

202 mean_array = np.array(combined_data[gain]["mean"])

203

204 result = stats.linregress(temp_array, mean_array)

205 temp_range = np.linspace(temp_array.min(), temp_array.max(), 100)

206 fit_line = result.slope * temp_range + result.intercept

207

208 plt.plot(

209 temp_range,

210 fit_line,

211 color=colors[gain],

212 linewidth=2,

213 label=f"{gain.capitalize()}: y = {result.slope:.3f}x + {result.intercept:.2f}",

214 )

215

216 fit_dict[gain] = {"slope": result.slope, "intercept": result.intercept}

217 board_id: str = run_metadata.board_id if run_metadata.board_id is not None else ""

218 utils.add_run_info(f"{gain}_slope", result.slope, board_id, plot_dir, False)

219 utils.add_run_info(f"{gain}_intercept", result.intercept, board_id, plot_dir, False)

220

221 # Combined fit

222 if len(all_temp) > 0:

223 result = stats.linregress(all_temp, all_mean)

224 slope_combined = result.slope

225 intercept_combined = result.intercept

226 temp_range_combined = np.linspace(all_temp.min(), all_temp.max(), 100)

227 fit_combined = slope_combined * temp_range_combined + intercept_combined

228 plt.plot(

229 temp_range_combined,

230 fit_combined,

231 color="black",

232 linewidth=2,

233 linestyle="--",

234 label=f"Combined: y = {slope_combined:.3f}x + {intercept_combined:.2f}",

235 )

236

237 plt.title(

238 helper.plot_summary_string(

239 board_id=run_metadata.board_id,

240 run_numbers=run_metadata.run_numbers,

241 name="Pedestal Mean vs. Temperature - All Channels Combined",

242 attenuation=run_metadata.att_val,

243 pas_mode=run_metadata.pas_mode,

244 )

245 )

246 plt.xlabel("Temperature [°C]")

247 plt.ylabel("Pedestal Mean [ADC Counts]")

248 plt.legend()

249 plt.grid(True, alpha=0.3)

250 plt.tight_layout()

251

252 filename = f"mean_vs_temp{temp_str}_combined.png"

253 plt.savefig(plot_dir / filename)

254 logger.debug(f"Saved combined plot to {plot_dir / filename}")

255 plt.close()

256

257 return fit_dict

258

259

260def _create_summary_plots(

261 fit_results: Dict[str, Any],

262 plot_dir: Path,

263 temp_str: str,

264 run_metadata: Metadata,

265) -> None:

266 """Create summary plots for temperature coefficients and intercepts."""

267 slopes_data = fit_results["slopes_data"]

268 intercepts_data = fit_results["intercepts_data"]

269 slope_errors = fit_results["slope_errors"]

270 intercept_errors = fit_results["intercept_errors"]

271 colors = fit_results["colors"]

272

273 # Temperature coefficient vs channel plot

274 plt.figure(figsize=(12, 6))

275 for gain in ["lo", "hi"]:

276 if slopes_data[gain]:

277 channels = sorted(slopes_data[gain].keys())

278 slopes = [slopes_data[gain][ch] for ch in channels]

279 slope_errs = [slope_errors[gain][ch] for ch in channels]

280

281 plt.errorbar(

282 channels,

283 slopes,

284 yerr=slope_errs,

285 fmt="o",

286 color=colors[gain],

287 label=f"{gain.capitalize()} Gain",

288 markersize=6,

289 capsize=3,

290 )

291

292 plt.title(

293 helper.plot_summary_string(

294 board_id=run_metadata.board_id,

295 run_numbers=run_metadata.run_numbers,

296 name="Temperature Coefficient vs Channel",

297 attenuation=run_metadata.att_val,

298 pas_mode=run_metadata.pas_mode,

299 )

300 )

301 plt.xlabel("Channel Number")

302 plt.ylabel("Temperature Coefficient [ADC Counts/°C]")

303 plt.legend()

304 plt.grid(True, alpha=0.3)

305 plt.tight_layout()

306

307 filename = f"temp{temp_str}_coefficient_vs_channel.png"

308 plt.savefig(plot_dir / filename)

309 logger.debug(f"Saved coefficient plot to {plot_dir / filename}")

310 plt.close()

311

312 # Y-intercept vs channel plot

313 plt.figure(figsize=(12, 6))

314 for gain in ["lo", "hi"]:

315 if intercepts_data[gain]:

316 channels = sorted(intercepts_data[gain].keys())

317 intercepts = [intercepts_data[gain][ch] for ch in channels]

318 intercept_errs = [intercept_errors[gain][ch] for ch in channels]

319

320 plt.errorbar(

321 channels,

322 intercepts,

323 yerr=intercept_errs,

324 fmt="o",

325 color=colors[gain],

326 label=f"{gain.capitalize()} Gain",

327 markersize=6,

328 capsize=3,

329 )

330

331 plt.title(

332 helper.plot_summary_string(

333 board_id=run_metadata.board_id,

334 run_numbers=run_metadata.run_numbers,

335 name="Y-Intercept vs Channel",

336 attenuation=run_metadata.att_val,

337 pas_mode=run_metadata.pas_mode,

338 )

339 )

340 plt.xlabel("Channel Number")

341 plt.ylabel("Y-Intercept [ADC Counts]")

342 plt.legend()

343 plt.grid(True, alpha=0.3)

344 plt.tight_layout()

345

346 filename = f"temp{temp_str}_intercept_vs_channel.png"

347 plt.savefig(plot_dir / filename)

348 logger.debug(f"Saved intercept plot to {plot_dir / filename}")

349 plt.close()

350

351

352def _create_corrected_time_plots(

353 derived_data: pl.DataFrame,

354 temp_df: pl.DataFrame,

355 fit_results: Dict[str, Any],

356 settling_time: Optional[int],

357 plot_dir: Path,

358 temp_str: str,

359 run_metadata: Metadata,

360) -> None:

361 """Create temperature-corrected mean vs time plots."""

362 if "fit_dict" not in fit_results:

363 return

364

365 fit_dict = fit_results["fit_dict"]

366 colors = fit_results["colors"]

367 settling_time_plt = fit_results["settling_time_plt"]

368

369 for gain in ["lo", "hi"]:

370 if gain not in fit_dict:

371 continue

372

373 gain_filtered_df = derived_data.filter(pl.col("gain") == gain)

374 slope = fit_dict[gain]["slope"]

375 intercept = fit_dict[gain]["intercept"]

376 color = colors[gain]

377

378 for channel in gain_filtered_df["channel"].unique().to_list():

379 channel_df = gain_filtered_df.filter(pl.col("channel") == channel)

380 if len(channel_df) == 0:

381 continue

382

383 # Apply settling time filter if specified

384 if settling_time is not None:

385 temp_df_tmp, _ = _filter_by_settling_time(temp_df, settling_time)

386 channel_df_tmp, _ = _filter_by_settling_time(channel_df, settling_time)

387 else:

388 temp_df_tmp = temp_df

389 channel_df_tmp = channel_df

390

391 # Join data

392 combined_df = _join_temp_channel_data(channel_df_tmp, temp_df_tmp)

393

394 # Extract values and apply temperature correction

395 temp_values = combined_df.select("temp_interp").to_numpy().flatten()

396 mean_values = combined_df.select("mean").to_numpy().flatten()

397 t = helper.datetime_to_hr(combined_df["timestamp"])

398

399 # Temperature correction

400 temp_contribution = slope * temp_values + intercept

401 reference_temp = np.mean(temp_values)

402 reference_contribution = slope * reference_temp + intercept

403 corrected_means = mean_values - temp_contribution + reference_contribution

404

405 # Create plot

406 plt.figure(figsize=(10, 5))

407 plt.scatter(

408 t,

409 corrected_means,

410 alpha=0.7,

411 color=color,

412 s=30,

413 label=f"Ch {str(channel).zfill(3)} {gain.capitalize()} Gain",

414 )

415

416 settling_time_str = f", {settling_time_plt} min settling time" if settling_time_plt is not None else ""

417 plt.title(

418 helper.plot_summary_string(

419 board_id=run_metadata.board_id,

420 run_numbers=run_metadata.run_numbers,

421 name=f"Temperature-Corrected Pedestal Mean vs Time \

422 Ch.{str(channel).zfill(3)} {gain} Gain{settling_time_str}",

423 attenuation=run_metadata.att_val,

424 pas_mode=run_metadata.pas_mode,

425 )

426 )

427 plt.xlabel("Time")

428 plt.ylabel("Temperature-Corrected Pedestal Mean [ADC Counts]")

429 plt.legend()

430 plt.grid(True, alpha=0.3)

431 plt.xticks(rotation=45)

432 plt.tight_layout()

433

434 channel_str = str(channel).zfill(3)

435 filename = f"temp{temp_str}_corrected_mean_vs_time_channel{channel_str}_{gain}.png"

436 plt.savefig(plot_dir / filename)

437 logger.debug(f"Saved corrected time plot to {plot_dir / filename}")

438 plt.close()

439

440

441def _baseline_correction(df: pl.DataFrame) -> pl.DataFrame:

442 # Correct the baseline of the samples by subtracting the mean for each gain and channel

443 df = df.with_columns(pl.col("mean").mean().over(["gain", "channel"]).alias("overall_mean"))

444 df = df.with_columns(pl.col("samples") - pl.col("mean") + pl.col("overall_mean").alias("samples"))

445 return df

446

447

448def _get_temp_df(temp_source: str, monitoring_df: pl.DataFrame, lab_env_data: pl.DataFrame) -> pl.DataFrame:

449 if temp_source == "thermistors":

450 return (

451 monitoring_df.filter(pl.col("group_name") == "thermistor")

452 .group_by("measurement")

453 .agg([pl.col("value").mean().alias("value"), pl.col("timestamp").first().alias("timestamp")])

454 )

455 elif temp_source == "bpol48":

456 return (

457 monitoring_df.filter(pl.col("monitor").is_in(["PAT1", "PAT2"]))

458 .group_by("measurement")

459 .agg([pl.col("value").mean().alias("value"), pl.col("timestamp").first().alias("timestamp")])

460 )

461 elif "TEMP" in temp_source or temp_source.startswith("PAT"):

462 return monitoring_df.filter(pl.col("monitor") == temp_source)

463 elif temp_source == "lab_environment":

464 return lab_env_data.with_columns(pl.mean_horizontal(["lab_temp", "crate_temp"]).alias("value"))

465 elif temp_source == "lab_temp":

466 return lab_env_data.with_columns(pl.col("lab_temp").alias("value"))

467 elif temp_source == "crate_temp":

468 return lab_env_data.with_columns(pl.col("crate_temp").alias("value"))

469 else:

470 print(f"Unknown temperature source: {temp_source}. Returning empty DataFrame.")

471 return pl.DataFrame()

472

473

474def _filter_by_settling_time(

475 df: pl.DataFrame,

476 settling_time_minutes: int,

477) -> Tuple[pl.DataFrame, Optional[int]]:

478 """

479 Filter dataframe to only include entries after settling time has passed.

480

481 Args:

482 df: DataFrame with a 'timestamp' column

483 settling_time_minutes: Time in minutes to wait after first timestamp

484

485 Returns:

486 Filtered DataFrame

487 """

488 first_timestamp = df["timestamp"].min()

489 settling_threshold = first_timestamp + pl.duration(minutes=settling_time_minutes)

490 filtered_df = df.filter(pl.col("timestamp") >= settling_threshold)

491 if len(filtered_df) == 0:

492 logger.warning(

493 f"No data points found after {settling_time_minutes} minutes settling time. Returning original DataFrame."

494 )

495 return df, None

496

497 return filtered_df, settling_time_minutes

498

499

500def moving_average(a: np.ndarray, n: int = 4) -> np.ndarray:

501 ret = np.cumsum(a, dtype=float)

502 ret[n:] = ret[n:] - ret[:-n]

503 return ret[n - 1 :] / n

504

505

506def plot_monitoring(

507 monitoring_df: pl.DataFrame, lab_env_data: pl.DataFrame, plot_dir: Path, tzero_correction: bool = True

508) -> None:

509 # Plots monitoring data from the monitoring DataFrame and lab environment data.

510 run_number = monitoring_df["run_number"][0]

511 board_id = monitoring_df["board_id"][0] if "board_id" in monitoring_df.columns else None

512 pas_mode = monitoring_df["pas_mode"][0] if "pas_mode" in monitoring_df.columns else None

513 attenuation = monitoring_df["att_val"][0] if "att_val" in monitoring_df.columns else None

514

515 corrected_temperature_col_name = "value_tzero_corr"

516

517 df_gb = (

518 monitoring_df.sort(["timestamp"])

519 .with_columns(pl.col("timestamp").dt.epoch(time_unit="s").alias("seconds"))

520 .group_by(pl.col("monitor"))

521 .agg(["value", "seconds", "group_name", "unit", "monitor_type"])

522 .with_columns(

523 pl.col("group_name").list.first().alias("group_name"),

524 pl.col("unit").list.first().alias("unit"),

525 pl.col("monitor_type").list.first().alias("monitor_type"),

526 pl.col("value").list.first().alias("first"),

527 ((pl.col("seconds") - pl.col("seconds").list.first()) / 3600).alias("hours"),

528 )

529 )

530

531 df_avg = df_gb.group_by("group_name").agg(pl.col("first").mean().alias("tzero_avg"))

532 df_gb = df_gb.join(df_avg, on="group_name").with_columns(

533 (pl.col("value") - pl.col("value").list.first() + pl.col("tzero_avg")).alias(corrected_temperature_col_name)

534 )

535

536 for monitor_group in df_gb["group_name"].unique():

537 for monitor_unit in df_gb.filter(pl.col("group_name") == monitor_group)["monitor_type"].unique():

538 monitor_group_df = df_gb.filter(

539 (pl.col("group_name") == monitor_group) & (pl.col("monitor_type") == monitor_unit)

540 )

541 for monitor in sorted(monitor_group_df["monitor"].unique().to_list(), key=lambda x: (len(x), x)):

542 value_col = (

543 corrected_temperature_col_name if (tzero_correction and monitor_unit.lower() == "c") else "value"

544 )

545

546 monitor_df = monitor_group_df.filter(pl.col("monitor") == monitor)

547 plt.plot(monitor_df["hours"].item(), monitor_df[value_col].item(), label=monitor)

548 if monitor_group == "":

549 monitor_group = "other"

550 plt.title(

551 helper.plot_summary_string(

552 board_id=board_id,

553 run_numbers=run_number,

554 name=f"{monitor_group} {monitor_unit}",

555 attenuation=attenuation,

556 pas_mode=pas_mode,

557 )

558 )

559 plt.legend(loc="center left", bbox_to_anchor=(1, 0.47), fontsize=6)

560 plt.tight_layout(rect=(0.03, 0.03, 1, 0.95))

561 plt.xlabel("Time since configuration [Hours]")

562 plt.ylabel(f"{monitor_group} {monitor_unit} [{monitor_group_df['unit'][0]}]")

563 plt.savefig(f"{plot_dir}/{monitor_group}_{monitor_unit}.png")

564 plt.savefig(f"{plot_dir}/{monitor_group}_{monitor_unit}.pdf")

565 plt.close()

566 logger.info(f"Saved run{run_number} {monitor_group} to {plot_dir}/{monitor_group}_{monitor_unit}.png")

567

568 # Plot lab environment temp

569 for monitor in ["lab_temp", "crate_temp"]:

570 t = helper.datetime_to_hr(lab_env_data["timestamp"])

571 plt.plot(t, lab_env_data[monitor], label=monitor)

572 plt.title(

573 helper.plot_summary_string(

574 board_id=board_id,

575 run_numbers=run_number,

576 name="Lab Temperature",

577 attenuation=attenuation,

578 pas_mode=pas_mode,

579 )

580 )

581 plt.legend(loc="center left", bbox_to_anchor=(1, 0.47), fontsize=6)

582 plt.tight_layout(rect=(0.03, 0.03, 1, 0.95))

583 plt.xlabel("Time since configuration [Hours]")

584 plt.ylabel("Lab Temperature [C]")

585 plt.savefig(f"{plot_dir}/lab_temperature.png")

586 plt.close()

587

588

589def plot_monitor_channel_correlation(

590 derived_data: pl.DataFrame, monitoring_df: pl.DataFrame, lab_env_data: pl.DataFrame, plot_dir: Path

591) -> None:

592 temp_df = (

593 monitoring_df.filter(pl.col("monitor_type") == "temperature")

594 .group_by("measurement", "monitor")

595 .agg([pl.col("timestamp").first().alias("timestamp"), pl.col("value").first()])

596 )

597 ### Remove later

598 ### temp_df = temp_df.with_columns(pl.col("timestamp").dt.replace_time_zone("UTC", non_existent="null"))

599 ###

600 temp_pivoted = temp_df.pivot(index=["measurement", "timestamp"], on="monitor", values="value").sort("timestamp")

601 combined_temp_df = (

602 temp_pivoted.sort("timestamp")

603 .join_asof(

604 lab_env_data.select(["timestamp", "crate_temp", "lab_temp"]).sort("timestamp"),

605 on="timestamp",

606 strategy="nearest",

607 )

608 .drop("measurement", "timestamp")

609 )

610 sorted_cols = sorted(combined_temp_df.columns)

611 combined_temp_df = combined_temp_df.select(sorted_cols)

612

613 for gain in derived_data["gain"].unique().to_list():

614 gain_filtered_df = derived_data.filter(pl.col("gain") == gain).sort("timestamp").select("mean", "channel")

615 channels = gain_filtered_df["channel"].unique().to_list()

616 monitors = combined_temp_df.columns

617 corr_matrix = np.full((len(monitors), len(channels)), np.nan)

618

619 if len(monitors) == 0:

620 continue

621 null_mask = np.isnan(combined_temp_df[monitors[0]].to_numpy().flatten())

622 for i, monitor in enumerate(monitors):

623 monitor_data_tmp = combined_temp_df[monitor].to_numpy().flatten()

624 monitor_data = monitor_data_tmp[~null_mask]

625

626 for j, channel in enumerate(channels):

627 channel_data = gain_filtered_df.filter(pl.col("channel") == channel).select("mean").to_numpy().flatten()

628

629 if len(monitor_data) > 1 and len(channel_data) > 1:

630 if len(monitor_data) != len(channel_data):

631 min_len = min(len(monitor_data), len(channel_data))

632 monitor_data = monitor_data[:min_len]

633 channel_data = channel_data[:min_len]

634 corr_matrix[i, j] = np.corrcoef(channel_data, monitor_data)[0, 1]

635

636 for gain in derived_data["gain"].unique().to_list():

637 gain_filtered_df = derived_data.filter(pl.col("gain") == gain).sort("timestamp").select("mean", "channel")

638 channels = gain_filtered_df["channel"].unique().to_list()

639 monitors = combined_temp_df.columns

640 corr_matrix = np.full((len(monitors), len(channels)), np.nan)

641

642 if len(monitors) == 0:

643 continue

644 temp_null_mask = np.isnan(combined_temp_df[monitors[0]].to_numpy().flatten())

645 for i, monitor in enumerate(monitors):

646 monitor_data_tmp = combined_temp_df[monitor].to_numpy().flatten()

647 monitor_data = monitor_data_tmp[~temp_null_mask]

648

649 for j, channel in enumerate(channels):

650 channel_data = gain_filtered_df.filter(pl.col("channel") == channel).select("mean").to_numpy().flatten()

651

652 if abs(len(monitor_data) - len(channel_data)) > 1:

653 logger.warning(

654 f"Length mismatch for monitor {monitor} and channel {channel}: "

655 f"{len(monitor_data)} vs {len(channel_data)}"

656 )

657 if len(monitor_data) != len(channel_data):

658 min_len = min(len(monitor_data), len(channel_data))

659 monitor_data = monitor_data[:min_len]

660 channel_data = channel_data[:min_len]

661 mask = ~(np.isnan(monitor_data) | np.isnan(channel_data))

662 monitor_clean = monitor_data[mask]

663 channel_clean = channel_data[mask]

664

665 if len(monitor_clean) >= 10: # Need sufficient data points

666 if np.std(monitor_clean) > 1e-10 and np.std(channel_clean) > 1e-10:

667 corr_val = np.corrcoef(monitor_clean, channel_clean)[0, 1]

668 corr_matrix[i, j] = corr_val

669

670 plt.figure(figsize=(30, 10))

671 display_matrix = corr_matrix.copy()

672 nan_mask = np.isnan(corr_matrix)

673 display_matrix[nan_mask] = 0

674

675 plt.imshow(display_matrix, cmap="RdBu", vmin=-1, vmax=1)

676

677 # Add text annotations

678 for i, monitor in enumerate(monitors):

679 for j, channel in enumerate(channels):

680 if nan_mask[i, j]:

681 plt.text(j, i, "N/A", ha="center", va="center", color="gray", fontsize=6)

682 else:

683 corr_val = corr_matrix[i, j]

684 text_color = "white" if abs(corr_val) > 0.5 else "black"

685 plt.text(j, i, f"{corr_val:.2f}", ha="center", va="center", color=text_color, fontsize=4)

686

687 plt.colorbar(label="Correlation Coefficient")

688 plt.xticks(ticks=np.arange(len(channels)), labels=channels, rotation=90, ha="right")

689 plt.yticks(ticks=np.arange(len(monitors)), labels=monitors)

690 plt.title("Correlation Matrix: Monitor Temp vs Channel ADC Counts")

691 plt.tight_layout()

692 plt.savefig(plot_dir / f"monitor_channel_corr_{gain}.png")

693 plt.close()

694

695

696def plot_monitor_monitor_correlation(

697 monitoring_df: pl.DataFrame,

698 lab_env_data: pl.DataFrame,

699 plot_dir: Path,

700) -> None:

701 temp_df = (

702 monitoring_df.filter(pl.col("monitor_type") == "temperature")

703 .group_by("measurement", "monitor")

704 .agg([pl.col("timestamp").first().alias("timestamp"), pl.col("value").first()])

705 )

706 ### Remove later

707 # temp_df = temp_df.with_columns(pl.col("timestamp").dt.replace_time_zone("UTC", non_existent="null"))

708 ###

709 temp_pivoted = temp_df.pivot(index=["measurement", "timestamp"], on="monitor", values="value").sort("measurement")

710 combined_df = (

711 temp_pivoted.sort("timestamp")

712 .join_asof(

713 lab_env_data.select(["timestamp", "crate_temp", "lab_temp"]).sort("timestamp"),

714 on="timestamp",

715 strategy="nearest",

716 )

717 .drop("measurement", "timestamp")

718 )

719

720 sorted_cols = sorted(combined_df.columns)

721 combined_df = combined_df.select(sorted_cols)

722

723 data_matrix = combined_df.to_numpy().astype(float)

724 n_cols = len(combined_df.columns)

725 corr_matrix = np.full((n_cols, n_cols), np.nan)

726

727 # Calculate correlations pairwise

728 for i in range(n_cols):

729 for j in range(n_cols):

730 if i == j:

731 corr_matrix[i, j] = 1.0

732 else:

733 # Get data for both columns

734 x = data_matrix[:, i]

735 y = data_matrix[:, j]

736

737 # Remove rows where either is NaN

738 mask = ~(np.isnan(x) | np.isnan(y))

739 x_clean = x[mask]

740 y_clean = y[mask]

741

742 if len(x_clean) >= 10: # Need sufficient data points

743 # Check for constant values

744 if np.std(x_clean) > 1e-10 and np.std(y_clean) > 1e-10:

745 corr_val = np.corrcoef(x_clean, y_clean)[0, 1]

746 corr_matrix[i, j] = corr_val

747

748 plt.figure(figsize=(15, 12))

749 display_matrix = corr_matrix.copy()

750 nan_mask = np.isnan(corr_matrix)

751 display_matrix[nan_mask] = 0

752

753 plt.imshow(display_matrix, cmap="Blues", vmin=0, vmax=1)

754

755 # Add text annotations

756 for i in range(len(combined_df.columns)):

757 for j in range(len(combined_df.columns)):

758 if nan_mask[i, j]:

759 plt.text(j, i, "N/A", ha="center", va="center", color="gray", fontsize=6)

760 else:

761 corr_val = corr_matrix[i, j]

762 text_color = "white" if abs(corr_val) > 0.5 else "black"

763 plt.text(j, i, f"{corr_val:.2f}", ha="center", va="center", color=text_color, fontsize=4)

764

765 plt.colorbar(label="Correlation Coefficient")

766 plt.xticks(ticks=np.arange(len(combined_df.columns)), labels=combined_df.columns, rotation=90, ha="right")

767 plt.yticks(ticks=np.arange(len(combined_df.columns)), labels=combined_df.columns)

768 plt.title("Correlation Matrix of Monitoring Channels")

769 plt.tight_layout()

770 plt.savefig(plot_dir / "monitor_monitor_corr.png")

771 plt.close()

772

773

774def plot_temp_correlation(

775 derived_data: pl.DataFrame,

776 monitoring_df: pl.DataFrame,

777 lab_env_data: pl.DataFrame,

778 plot_dir: Path,

779 temp_source: Optional[str] = None,

780 settling_time: Optional[int] = None,

781) -> None:

782 temp_str = f"_{temp_source}" if temp_source else ""

783 temp_source = temp_source or "lab_environment"

784

785 run_metadata = Metadata.fill_from_dataframe(derived_data)

786

787 temp_df = _get_temp_df(temp_source, monitoring_df, lab_env_data)

788 if len(temp_df) == 0:

789 logger.warning("No temperature data found for correlation plot.")

790 return

791

792 # Process data and create fits

793 fit_results = _process_temp_correlation_data(derived_data, temp_df, settling_time, plot_dir, temp_str, run_metadata)

794

795 # Generate all plots

796 fit_dict = _create_combined_plots(fit_results, plot_dir, temp_str, run_metadata)

797 fit_results["fit_dict"] = fit_dict

798 _create_summary_plots(fit_results, plot_dir, temp_str, run_metadata)

799 _create_corrected_time_plots(derived_data, temp_df, fit_results, settling_time, plot_dir, temp_str, run_metadata)

800

801

802def plot_mean_rms_vs_time(gain_filtered_df: pl.DataFrame, plot_dir: Path) -> None:

803 color_dict = {"lo": "b", "hi": "r"}

804 gain = gain_filtered_df["gain"][0]

805 color = color_dict[gain]

806 board_id = gain_filtered_df["board_id"][0] if "board_id" in gain_filtered_df.columns else None

807 run_number = gain_filtered_df["run_number"][0]

808 pas_mode = gain_filtered_df["pas_mode"][0] if "pas_mode" in gain_filtered_df.columns else None

809 attenuation = gain_filtered_df["att_val"][0] if "att_val" in gain_filtered_df.columns else None

810

811 for channel in gain_filtered_df["channel"].unique().to_list():

812 channel_df = gain_filtered_df.select("channel", "gain", "samples", "timestamp", "mean", "std").filter(

813 pl.col("channel") == channel

814 )

815

816 if len(channel_df) == 0:

817 continue

818

819 t = helper.datetime_to_hr(channel_df["timestamp"])

820 channel = str(channel).zfill(3)

821

822 # Mean vs Time

823 fig1, ax1 = plt.subplots(figsize=(10, 5))

824 ax1.grid(visible=True, zorder=0)

825 ax1.scatter(t, channel_df["mean"], c=color, marker="o")

826 ax1.set_title(

827 helper.plot_summary_string(

828 board_id=board_id,

829 run_numbers=run_number,

830 name=f"{gain} Gain Mean vs. Time Ch.{channel}",

831 attenuation=attenuation,

832 pas_mode=pas_mode,

833 )

834 )

835 ax1.set_ylabel("Pedestal Mean [ADC Counts]")

836 ax1.set_xlabel("Time [Hours]")

837 fig1.savefig(plot_dir / f"mean_vs_time_channel{channel}_{gain}.png")

838 plt.close(fig1)

839

840 # RMS vs Time

841 fig2, ax2 = plt.subplots(figsize=(10, 5))

842 ax2.scatter(t, channel_df["std"], c=color, marker="o")

843 ax2.set_title(

844 helper.plot_summary_string(

845 board_id=board_id,

846 run_numbers=run_number,

847 name=f"{gain} Gain RMS vs. Time Ch.{channel}",

848 attenuation=attenuation,

849 pas_mode=pas_mode,

850 )

851 )

852 ax2.set_ylabel("Pedestal RMS [ADC Counts]")

853 ax2.set_xlabel("Time [Hours]")

854 fig2.savefig(plot_dir / f"rms_vs_time_channel{channel}_{gain}.png")

855 plt.close(fig2)

856

857

858def plot_sample_range_vs_time(gain_filtered_df: pl.DataFrame, plot_dir: Path) -> None:

859 board_id = gain_filtered_df["board_id"][0] if "board_id" in gain_filtered_df.columns else None

860 run_number = gain_filtered_df["run_number"][0]

861 pas_mode = gain_filtered_df["pas_mode"][0] if "pas_mode" in gain_filtered_df.columns else None

862 attenuation = gain_filtered_df["att_val"][0] if "att_val" in gain_filtered_df.columns else None

863 plt.figure(figsize=(10, 5))

864 color_dict = {"lo": "b", "hi": "r"}

865 title_dict = {"lo": "LG", "hi": "HG"}

866 gain = gain_filtered_df["gain"][0]

867 color = color_dict[gain]

868 title = title_dict[gain]

869

870 first_channel = True

871 for channel in gain_filtered_df["channel"].unique().to_list():

872 channel_df = gain_filtered_df.select("channel", "gain", "samples", "timestamp", "mean", "std").filter(

873 pl.col("channel") == channel

874 )

875 fig, ax = plt.subplots(figsize=(8, 6))

876 ax.grid(visible=True, zorder=0)

877 avg_filter_df = channel_df.select(

878 [

879 pl.col("samples").list.max().alias("max_per_list"),

880 pl.col("samples").list.min().alias("min_per_list"),

881 pl.col("samples").list.len().alias("list_lens"),

882 (pl.col("samples").list.max() - pl.col("samples").list.min()).alias("range_per_list"),

883 ]

884 )

885

886 t = helper.datetime_to_hr(channel_df["timestamp"])

887 range_per_list = avg_filter_df["range_per_list"]

888 error = channel_df["std"] / np.sqrt(avg_filter_df["list_lens"])

889 label = title if first_channel else None # Only label first channel per gain

890 first_channel = False

891

892 ax.errorbar(t, range_per_list, error, fmt=f"{color}o", label=label)

893

894 ax.set_title(

895 helper.plot_summary_string(

896 board_id=board_id,

897 run_numbers=run_number,

898 channels=helper.list_to_text_ranges(channel),

899 name="Sample Range vs. Time",

900 attenuation=attenuation,

901 pas_mode=pas_mode,

902 gain=gain,

903 )

904 )

905 channel = str(channel).zfill(3)

906 ax.set_ylabel("Avg. Sample Range [ADC Counts]")

907 ax.set_xlabel("Time[hours]")

908 fig.tight_layout()

909 fig.savefig(plot_dir / f"sample_range_vs_time_channel{channel}_{gain}.png")

910 plt.close(fig)

911 fig.clf()

912

913

914def avg_rms_mean_vs_channel(

915 df: pl.DataFrame,

916 plot_dir: Path,

917 info: Optional[Metadata] = None,

918 settling_time: int = 4, # Settling time in minutes for max rms deviation calculation

919):

920 board_id = df["board_id"][0] if "board_id" in df.columns else None

921 run_number = df["run_number"][0]

922 pas_mode = df["pas_mode"][0] if "pas_mode" in df.columns else None

923 attenuation = df["att_val"][0] if "att_val" in df.columns else None

924

925 plt.figure(figsize=(10, 5))

926 n_channels = df["channel"].n_unique()

927 channels = df["channel"].unique().to_list()

928 color_dict = {"lo": "b", "hi": "r"}

929 title_dict = {"lo": "LG", "hi": "HG"}

930

931 fig, ax = plt.subplots(figsize=(8, 6))

932 plt.xticks(np.arange(0, n_channels, 4), rotation=70)

933 fig2, ax2 = plt.subplots(1, figsize=(8, 6))

934 fig3, ax3 = plt.subplots(1, figsize=(8, 6))

935 ax.grid(visible=True, zorder=0)

936 ax.xaxis.set_tick_params(pad=0.1)

937 ax2.grid(visible=True)

938 ax3.grid(visible=True)

939 plt.xticks(np.arange(0, n_channels, 4), rotation=70)

940

941 # Plot avg mean, avg rms, and max rms deviation per channel

942 for gain in ["hi", "lo"]:

943 gain_filtered_df = df.filter(pl.col("gain") == gain)

944 first_channel = True

945 for channel in gain_filtered_df["channel"].unique().to_list():

946 channel_df = gain_filtered_df.select("channel", "gain", "samples", "timestamp", "mean", "std").filter(

947 pl.col("channel") == channel

948 )

949 color = color_dict[gain]

950 title = title_dict[gain]

951

952 means = channel_df["mean"].to_numpy()

953 stds = channel_df["std"].to_numpy()

954

955 means_mean = np.mean(means)

956 means_min = np.min(means)

957 means_max = np.max(means)

958 stds_mean = np.mean(stds)

959

960 n_measurements = len(means)

961 stds_err = np.std(stds) / np.sqrt(n_measurements)

962 means_err = np.array([[means_mean - means_min], [means_max - means_mean]])

963

964 label = title if first_channel else None

965 first_channel = False

966

967 ax.errorbar(channel, means_mean, yerr=means_err, fmt=f"{color}o", label=label)

968 ax2.errorbar(channel, stds_mean, stds_err, fmt=f"{color}o", label=label)

969

970 filtered_df, _ = _filter_by_settling_time(channel_df, settling_time)

971 stds_settled = filtered_df["std"].to_numpy()

972 max_rms_deviation = np.max(np.abs(stds_settled - stds_mean))

973 ax3.plot(channel, max_rms_deviation, f"{color}o", label=label)

974

975 ax.set_title(

976 helper.plot_summary_string(

977 board_id=board_id,

978 run_numbers=run_number,

979 channels=helper.list_to_text_ranges(channels),

980 name="Overall Mean vs. Channel #\nError bars show min and max",

981 attenuation=attenuation,

982 pas_mode=pas_mode,

983 )

984 )

985 ax.set_ylabel("Avg. Pedestal Mean [ADC Counts]")

986 ax.set_xlabel("Channel")

987 fig.legend()

988 fig.tight_layout()

989 fig.savefig(plot_dir / "avg_mean_vs_channel.png")

990 fig.clf()

991

992 ax2.set_title(

993 helper.plot_summary_string(

994 board_id=board_id,

995 run_numbers=run_number,

996 channels=helper.list_to_text_ranges(channels),

997 name="Overall RMS vs. Channel #",

998 attenuation=attenuation,

999 pas_mode=pas_mode,

1000 info=info,

1001 )

1002 )

1003 ax2.set_ylabel("Avg. Pedestal RMS [ADC Counts]")

1004 ax2.set_xlabel("Channel")

1005 fig2.legend()

1006 fig2.tight_layout()

1007 fig2.savefig(plot_dir / "avg_rms_vs_time.png")

1008 fig2.clf()

1009

1010 ax3.set_title(

1011 helper.plot_summary_string(

1012 board_id=board_id,

1013 run_numbers=run_number,

1014 channels=helper.list_to_text_ranges(channels),

1015 name=f"Max RMS Deviation vs. Channel # After {settling_time} min Settling Time)",

1016 attenuation=attenuation,

1017 pas_mode=pas_mode,

1018 info=info,

1019 )

1020 )

1021 ax3.set_ylabel("Max RMS Deviation [ADC Counts]")

1022 ax3.set_xlabel("Channel")

1023 fig3.legend()

1024 fig3.tight_layout()

1025 fig3.savefig(plot_dir / "max_rms_deviation_vs_channel.png")

1026 fig3.clf()

1027

1028

1029def plot_outliers(

1030 df: pl.DataFrame,

1031 plot_dir: Path,

1032 info: Optional[Metadata] = None,

1033):

1034 df = _baseline_correction(df)

1035 board_id = df["board_id"][0] if "board_id" in df.columns else None

1036 run_number = df["run_number"][0]

1037 pas_mode = df["pas_mode"][0] if "pas_mode" in df.columns else None

1038 attenuation = df["att_val"][0] if "att_val" in df.columns else None

1039

1040 # Summary plot setup

1041 fig, ax = plt.subplots(figsize=(10, 5))

1042 channels = df["channel"].unique().to_list()

1043 color_dict = {"lo": "b", "hi": "r"}

1044 title_dict = {"lo": "LG", "hi": "HG"}

1045

1046 # For time-series deviation summary plot

1047 max_outlier_rate_deviations: Dict[str, Dict[int, float]] = {"hi": {}, "lo": {}}

1048 # For average outlier rate summary plot

1049 avg_outlier_rates: Dict[str, Dict[int, float]] = {"hi": {}, "lo": {}}

1050 # For max outlier rate summary plot

1051 max_outlier_rates: Dict[str, Dict[int, float]] = {"hi": {}, "lo": {}}

1052

1053 gains: List[Literal["hi", "lo"]] = ["hi", "lo"]

1054 for gain in gains:

1055 gain_filtered_df = df.filter(pl.col("gain") == gain)

1056 first_channel = True

1057

1058 for channel in gain_filtered_df["channel"].unique().to_list():

1059 channel_df = gain_filtered_df.select("channel", "gain", "samples", "timestamp", "mean", "std").filter(

1060 pl.col("channel") == channel

1061 )

1062 channel_str = str(channel).zfill(3)

1063 color = color_dict[gain]

1064 title = title_dict[gain]

1065 label = title if first_channel else None

1066 first_channel = False

1067

1068 # Original outlier analysis

1069 samples = np.array(channel_df["samples"].explode().to_list())

1070

1071 # gaussian fit statistics

1072 hist_bins: np.ndarray = np.linspace(min(samples), max(samples), 100)

1073 fig2, ax2 = plt.subplots(1)

1074

1075 _ = ax2.hist(samples, bins=hist_bins.tolist())

1076 xaxis: np.ndarray = np.linspace(np.min(samples), np.max(samples), 50)

1077 fit_pars = helper.calc_gaussian(samples, hist_bins)

1078 fit_mu, fit_sigma, fit_N = fit_pars[0], fit_pars[2], fit_pars[4]

1079 d_mu, d_sigma = fit_pars[1], fit_pars[3]

1080 gauss_fit = helper.gauss(xaxis, fit_mu, fit_sigma, fit_N)

1081 upper_bound = fit_mu + 5 * fit_sigma

1082 lower_bound = fit_mu - 5 * fit_sigma

1083 ax2.axvline(upper_bound, color="red", linestyle="--", label="5σ")

1084 ax2.axvline(lower_bound, color="red", linestyle="--")

1085 ax2.axvline(fit_mu + 3 * fit_sigma, color="green", linestyle="--", label="3σ")

1086 ax2.axvline(fit_mu - 3 * fit_sigma, color="green", linestyle="--")

1087 ax2.plot(

1088 xaxis,

1089 gauss_fit,

1090 label=rf"""Gaussian Fit

1091 $\mu$ = {fit_mu:.03f} $\pm$ {d_mu:.03g}

1092 $\sigma$ = {fit_sigma:.03f} $\pm$ {d_sigma:.03g}""",

1093 )

1094

1095 ax2.set_title(

1096 helper.plot_summary_string(

1097 board_id=board_id,

1098 run_numbers=run_number,

1099 channels=channel,

1100 name="Pedestal Sample Outliers [ADC counts]",

1101 attenuation=attenuation,

1102 pas_mode=pas_mode,

1103 gain=gain,

1104 )

1105 )

1106 ax2.set_ylabel("Entries")

1107 ax2.set_xlabel("Sample [ADC counts]")

1108 ax2.legend()

1109 fig2.savefig(f"{plot_dir}/channel{channel_str}_{gain}_sample_hist.png")

1110 fig2.clf()

1111 plt.close(fig2)

1112

1113 # Calculate total outliers for summary bar plot

1114 five_sigma = 5 * fit_sigma

1115 outliers = samples[np.abs(samples - fit_mu) > five_sigma]

1116 num_outliers = len(outliers)

1117

1118 ax.bar(int(channel), num_outliers, fill=False, ec=color, label=label, zorder=3)

1119

1120 # Time-series analysis - outlier rate vs time

1121 fig3, ax3 = plt.subplots(figsize=(12, 6))

1122 ax3.grid(visible=True, zorder=0)

1123

1124 t = helper.datetime_to_hr(channel_df["timestamp"])

1125 outlier_rates = []

1126

1127 for sample in channel_df["samples"]:

1128 sample = np.array(sample)

1129 hist_bins = np.linspace(min(sample), max(sample), 100)

1130

1131 fit_pars = helper.calc_gaussian(sample, hist_bins)

1132 fit_mu, fit_sigma = fit_pars[0], fit_pars[2]

1133 five_sigma = 5 * fit_sigma

1134 outliers = sample[np.abs(sample - fit_mu) > five_sigma]

1135

1136 # Calculate outlier rate as percentage

1137 outlier_rate = (len(outliers) / len(sample)) * 100

1138 outlier_rates.append(outlier_rate)

1139

1140 max_outlier_rates[gain][channel] = max(outlier_rates) if outlier_rates else 0

1141 ax3.plot(t, outlier_rates, "o", color=color, alpha=0.7)

1142

1143 max_rate = max(outlier_rates) if outlier_rates else 0

1144 y_max = max(0.05, max_rate * 1.2)

1145 ax3.set_ylim(0, y_max)

1146

1147 # Calculate max deviation and average rate for summary plots

1148 mean_rate = np.mean(outlier_rates)

1149 max_deviation = np.max(np.abs(np.array(outlier_rates) - mean_rate))

1150 max_outlier_rate_deviations[gain][channel] = max_deviation

1151 avg_outlier_rates[gain][channel] = mean_rate.astype(float)

1152

1153 ax3.set_title(

1154 helper.plot_summary_string(

1155 board_id=board_id,

1156 run_numbers=run_number,

1157 channels=channel,

1158 name=f"Channel {channel} Outlier Rate vs Time, 5σ Gaussian Fit Cut",

1159 attenuation=attenuation,

1160 pas_mode=pas_mode,

1161 gain=gain,

1162 )

1163 )

1164

1165 ax3.set_ylabel("Outlier Rate (%)")

1166 ax3.set_xlabel("Time [hours]")

1167 fig3.tight_layout()

1168 fig3.savefig(plot_dir / f"outliers_vs_time_channel{channel_str}_{gain}.png", dpi=300)

1169 plt.close(fig3)

1170

1171 # Create summary plot: Number of outliers per channel

1172 ax.set_title(

1173 helper.plot_summary_string(

1174 board_id=board_id,

1175 run_numbers=run_number,

1176 channels=helper.list_to_text_ranges(channels),

1177 name="Number of Baseline Corrected Sample Outliers Per Channel, 5σ Gaussian Fit Cut",

1178 attenuation=attenuation,

1179 pas_mode=pas_mode,

1180 info=info,

1181 )

1182 )

1183 ax.set_xlabel("Channel", loc="right")

1184 ax.set_ylabel("Number of Outliers")

1185 ax.legend(loc="upper right")

1186 fig.tight_layout()

1187 fig.savefig(f"{plot_dir}/outliers_summary.png")

1188 plt.close(fig)

1189

1190 # Create summary plot: Maximum outlier rate deviation vs channel #

1191 fig4, ax4 = plt.subplots(figsize=(12, 8))

1192 ax4.grid(visible=True, zorder=0)

1193

1194 for gain in gains:

1195 if max_outlier_rate_deviations[gain]:

1196 channels_sorted = sorted(max_outlier_rate_deviations[gain].keys())

1197 deviations = [max_outlier_rate_deviations[gain][ch] for ch in channels_sorted]

1198

1199 ax4.plot(

1200 channels_sorted,

1201 deviations,

1202 "o",

1203 color=color_dict[gain],

1204 label=f"{gain} Gain",

1205 alpha=0.7,

1206 markersize=6,

1207 )

1208

1209 ax4.set_title(

1210 helper.plot_summary_string(

1211 board_id=board_id,

1212 run_numbers=run_number,

1213 channels=helper.list_to_text_ranges(list(range(min(channels_sorted), max(channels_sorted) + 1))),

1214 name="Maximum Outlier Rate Deviation vs Channel Number, 5σ Gaussian Fit Cut",

1215 attenuation=attenuation,

1216 pas_mode=pas_mode,

1217 info=info,

1218 )

1219 )

1220

1221 ax4.set_ylabel("Maximum Outlier Rate Deviation (%)")

1222 ax4.set_xlabel("Channel Number")

1223 ax4.legend()

1224 fig4.tight_layout()

1225 fig4.savefig(plot_dir / "max_outlier_rate_deviation_vs_channel.png", dpi=300)

1226 plt.close(fig4)

1227

1228 # Create summary plots: Average outlier rate vs channel

1229 for gain in gains:

1230 if avg_outlier_rates[gain]:

1231 fig5, ax5 = plt.subplots(figsize=(12, 8))

1232 ax5.grid(visible=True, zorder=0)

1233

1234 channels_sorted = sorted(avg_outlier_rates[gain].keys())

1235 avg_rates = [avg_outlier_rates[gain][ch] for ch in channels_sorted]

1236 max_rates = [max_outlier_rates[gain][ch] for ch in channels_sorted]

1237

1238 # Calculate error bars: upper = max - avg, lower = 0

1239 upper_errors = [max_rate - avg_rate for max_rate, avg_rate in zip(max_rates, avg_rates)]

1240 lower_errors = [0] * len(avg_rates) # No lower error bars

1241

1242 ax5.errorbar(

1243 channels_sorted,

1244 avg_rates,

1245 yerr=np.array([lower_errors, upper_errors]),

1246 fmt="o",

1247 color=color_dict[gain],

1248 label=f"{gain} Gain",

1249 alpha=0.7,

1250 capsize=3,

1251 )

1252

1253 ax5.set_title(

1254 helper.plot_summary_string(

1255 board_id=board_id,

1256 run_numbers=run_number,

1257 channels=helper.list_to_text_ranges(list(range(min(channels_sorted), max(channels_sorted) + 1))),

1258 name=f"Average Outlier Rate vs Channel Number - {gain.upper()} Gain, 5σ Gaussian Fit Cut",

1259 attenuation=attenuation,

1260 pas_mode=pas_mode,

1261 info=info,

1262 )

1263 )

1264

1265 ax5.set_ylabel("Average Outlier Rate (%)")

1266 ax5.set_xlabel("Channel Number")

1267 ax5.legend()

1268 fig5.tight_layout()

1269 fig5.savefig(plot_dir / f"avg_outlier_rate_vs_channel_{gain}.png", dpi=300)

1270 ax5.set_yscale("log")

1271 fig5.savefig(plot_dir / f"avg_outlier_rate_vs_channel_{gain}_log.png", dpi=300)

1272 plt.close(fig5)

1273

1274

1275def plot_avg_sample_range(

1276 df: pl.DataFrame,

1277 plot_dir: Path,

1278 info: Optional[Metadata] = None,

1279) -> None:

1280 board_id = df["board_id"][0] if "board_id" in df.columns else None

1281 run_number = df["run_number"][0]

1282 pas_mode = df["pas_mode"][0] if "pas_mode" in df.columns else None

1283 attenuation = df["att_val"][0] if "att_val" in df.columns else None

1284

1285 plt.figure(figsize=(10, 5))

1286 n_channels = df["channel"].n_unique()

1287 channels = df["channel"].unique().to_list()

1288 color_dict = {"lo": "b", "hi": "r"}

1289 title_dict = {"lo": "LG", "hi": "HG"}

1290

1291 fig, ax = plt.subplots(figsize=(8, 6))

1292 plt.xticks(np.arange(0, n_channels, 4), rotation=70)

1293 ax.xaxis.set_tick_params(pad=0.1)

1294 _, ax2 = plt.subplots(1, figsize=(8, 6))

1295 ax.grid(visible=True)

1296 ax2.grid(visible=True)

1297 plt.xticks(np.arange(0, n_channels, 4), rotation=70)

1298

1299 for gain in ["hi", "lo"]:

1300 gain_filtered_df = df.filter(pl.col("gain") == gain)

1301 first_channel = True

1302 for channel in gain_filtered_df["channel"].unique().to_list():

1303 channel_df = gain_filtered_df.select("channel", "gain", "samples", "timestamp", "mean", "std").filter(

1304 pl.col("channel") == channel

1305 )

1306 color = color_dict[gain]

1307 title = title_dict[gain]

1308 ax.grid(visible=True, zorder=0)

1309

1310 avg_filter_df = channel_df.select(

1311 [

1312 pl.col("samples").list.max().alias("max_per_list"),

1313 pl.col("samples").list.min().alias("min_per_list"),

1314 (pl.col("samples").list.max() - pl.col("samples").list.min()).alias("range_per_list"),

1315 (pl.col("samples").list.max() - pl.col("samples").list.min()).mean().alias("avg_range"),

1316 ]

1317 )

1318 error = avg_filter_df["range_per_list"].std() / np.sqrt(len(avg_filter_df["range_per_list"]))

1319 avg_range = avg_filter_df["avg_range"][0]

1320 label = title if first_channel else None # Only label first channel per gain

1321 first_channel = False

1322

1323 ax.errorbar(channel, avg_range, error, fmt=f"{color}o", label=label)

1324

1325 ax.set_title(

1326 helper.plot_summary_string(

1327 board_id=board_id,

1328 run_numbers=run_number,

1329 channels=helper.list_to_text_ranges(channels),

1330 name="Avg. Sample Range vs. Channel #",

1331 attenuation=attenuation,

1332 pas_mode=pas_mode,

1333 info=info,

1334 )

1335 )

1336 ax.set_ylabel("Avg. Sample Range [ADC Counts]")

1337 ax.set_xlabel("Channel")

1338 fig.legend()

1339 fig.tight_layout()

1340 fig.savefig(plot_dir / "avg_sample_range.png")

1341 fig.clf()

Coverage for polars_analysis / plotting / noise_stability_plotting.py: 74%

665 statements