Coverage for polars_analysis / plotting / noise_stability_plotting.py: 74%

665 statements  

« prev     ^ index     » next       coverage.py v7.13.4, created at 2026-03-16 15:00 -0400

1import logging 

2from pathlib import Path 

3from typing import Any, Dict, List, Literal, Optional, Tuple 

4 

5import matplotlib.pyplot as plt 

6import numpy as np 

7import polars as pl 

8from scipy import stats 

9 

10from polars_analysis import utils 

11from polars_analysis.plotting import helper 

12from polars_analysis.plotting.helper import Metadata 

13 

14logger = logging.getLogger(__name__) 

15 

16 

17def _process_temp_correlation_data( 

18 derived_data: pl.DataFrame, 

19 temp_df: pl.DataFrame, 

20 settling_time: Optional[int], 

21 plot_dir: Path, 

22 temp_str: str, 

23 run_metadata: Metadata, 

24) -> dict: 

25 """Process temperature correlation data and create individual channel plots.""" 

26 colors = {"lo": "blue", "hi": "red"} 

27 

28 # Initialize data structures 

29 slopes_data: Dict[str, Dict[int, float]] = {"lo": {}, "hi": {}} 

30 intercepts_data: Dict[str, Dict[int, float]] = {"lo": {}, "hi": {}} 

31 slope_errors: Dict[str, Dict[int, float]] = {"lo": {}, "hi": {}} 

32 intercept_errors: Dict[str, Dict[int, float]] = {"lo": {}, "hi": {}} 

33 combined_data: Dict[str, Dict[str, List[np.ndarray]]] = { 

34 "lo": {"temp": [], "mean": [], "std": []}, 

35 "hi": {"temp": [], "mean": [], "std": []}, 

36 } 

37 

38 settling_time_plt = None 

39 

40 for gain in ["lo", "hi"]: 

41 gain_filtered_df = derived_data.filter(pl.col("gain") == gain) 

42 

43 for channel in gain_filtered_df["channel"].unique().to_list(): 

44 channel_df = gain_filtered_df.filter(pl.col("channel") == channel) 

45 if len(channel_df) == 0: 

46 continue 

47 

48 # Apply settling time filter if specified 

49 if settling_time is not None: 

50 temp_df_tmp, settling_time_plt = _filter_by_settling_time(temp_df, settling_time) 

51 channel_df_tmp, settling_time_plt = _filter_by_settling_time(channel_df, settling_time) 

52 else: 

53 temp_df_tmp = temp_df 

54 channel_df_tmp = channel_df 

55 

56 # Join temperature and channel data 

57 combined_df = _join_temp_channel_data(channel_df_tmp, temp_df_tmp) 

58 

59 # Extract arrays 

60 mean_values = combined_df.select("mean").to_numpy().flatten() 

61 std_values = combined_df.select("std").to_numpy().flatten() 

62 temp_values = combined_df.select("temp_interp").to_numpy().flatten() 

63 

64 # Store combined data 

65 combined_data[gain]["temp"].extend(temp_values) 

66 combined_data[gain]["mean"].extend(mean_values) 

67 combined_data[gain]["std"].extend(std_values) 

68 

69 # Perform linear regression 

70 result = stats.linregress(temp_values, mean_values) 

71 

72 # Store fit results 

73 slopes_data[gain][channel] = result.slope.astype(float) 

74 intercepts_data[gain][channel] = result.intercept.astype(float) 

75 slope_errors[gain][channel] = result.stderr.astype(float) 

76 intercept_errors[gain][channel] = result.intercept_stderr.astype(float) 

77 

78 # Create individual channel plot 

79 _plot_individual_channel( 

80 temp_values, 

81 mean_values, 

82 std_values, 

83 result.slope.astype(float), 

84 result.intercept.astype(float), 

85 channel, 

86 gain, 

87 colors[gain], 

88 plot_dir, 

89 temp_str, 

90 settling_time_plt, 

91 run_metadata, 

92 ) 

93 

94 return { 

95 "slopes_data": slopes_data, 

96 "intercepts_data": intercepts_data, 

97 "slope_errors": slope_errors, 

98 "intercept_errors": intercept_errors, 

99 "combined_data": combined_data, 

100 "colors": colors, 

101 "settling_time_plt": settling_time_plt, 

102 } 

103 

104 

105def _join_temp_channel_data( 

106 channel_df: pl.DataFrame, 

107 temp_df: pl.DataFrame, 

108) -> pl.DataFrame: 

109 """Join channel and temperature data by timestamp.""" 

110 return channel_df.sort("timestamp").join_asof( 

111 temp_df.select(["timestamp", "value"]).rename({"value": "temp_interp"}).sort("timestamp"), 

112 on="timestamp", 

113 strategy="nearest", 

114 ) 

115 

116 

117def _plot_individual_channel( 

118 temp_values: np.ndarray, 

119 mean_values: np.ndarray, 

120 std_values: np.ndarray, 

121 slope: float, 

122 intercept: float, 

123 channel: int, 

124 gain: str, 

125 color: str, 

126 plot_dir: Path, 

127 temp_str: str, 

128 settling_time_plt: Optional[int], 

129 run_metadata: Metadata, 

130) -> None: 

131 """Create individual channel temperature correlation plot.""" 

132 channel_str = str(channel).zfill(3) 

133 

134 # Create fit line 

135 temp_fit = np.linspace(temp_values.min(), temp_values.max(), 100) 

136 mean_fit = slope * temp_fit + intercept 

137 

138 plt.figure(figsize=(10, 5)) 

139 plt.errorbar(temp_values, mean_values, yerr=std_values, fmt="o", color=color, label=f"Ch {channel_str} {gain} Gain") 

140 plt.plot(temp_fit, mean_fit, "-", color=color, label=f"Fit: y = {slope:.3f}x + {intercept:.2f}") 

141 

142 settling_time_str = f", {settling_time_plt} min settling time" if settling_time_plt is not None else "" 

143 plt.title( 

144 helper.plot_summary_string( 

145 board_id=run_metadata.board_id, 

146 run_numbers=run_metadata.run_numbers, 

147 name=f"Temperature vs. Pedestal Mean Ch.{channel_str} {gain} Gain{settling_time_str}", 

148 attenuation=run_metadata.att_val, 

149 pas_mode=run_metadata.pas_mode, 

150 ) 

151 ) 

152 plt.xlabel("Temperature [°C]") 

153 plt.ylabel("Pedestal Mean [ADC Counts]") 

154 plt.legend() 

155 plt.grid(True, alpha=0.3) 

156 plt.tight_layout() 

157 

158 filename = f"mean_vs_temp{temp_str}_channel{channel_str}_{gain}.png" 

159 plt.savefig(plot_dir / filename) 

160 logger.debug(f"Saved to {plot_dir / filename}") 

161 plt.close() 

162 

163 

164def _create_combined_plots( 

165 fit_results: Dict[str, Any], 

166 plot_dir: Path, 

167 temp_str: str, 

168 run_metadata: Metadata, 

169) -> dict: 

170 """Create combined temperature correlation plots for all channels.""" 

171 combined_data = fit_results["combined_data"] 

172 colors = fit_results["colors"] 

173 

174 plt.figure(figsize=(12, 8)) 

175 

176 # Plot data points 

177 for gain in ["lo", "hi"]: 

178 if combined_data[gain]["temp"]: 

179 temp_array = np.array(combined_data[gain]["temp"]) 

180 mean_array = np.array(combined_data[gain]["mean"]) 

181 std_array = np.array(combined_data[gain]["std"]) 

182 

183 plt.errorbar( 

184 temp_array, 

185 mean_array, 

186 yerr=std_array, 

187 fmt="o", 

188 color=colors[gain], 

189 alpha=0.6, 

190 label=f"{gain.capitalize()} Gain Data", 

191 markersize=4, 

192 ) 

193 

194 # Create fit lines and combined fit 

195 fit_dict = {} 

196 all_temp = np.array(combined_data["lo"]["temp"] + combined_data["hi"]["temp"]) 

197 all_mean = np.array(combined_data["lo"]["mean"] + combined_data["hi"]["mean"]) 

198 

199 for gain in ["lo", "hi"]: 

200 if combined_data[gain]["temp"]: 

201 temp_array = np.array(combined_data[gain]["temp"]) 

202 mean_array = np.array(combined_data[gain]["mean"]) 

203 

204 result = stats.linregress(temp_array, mean_array) 

205 temp_range = np.linspace(temp_array.min(), temp_array.max(), 100) 

206 fit_line = result.slope * temp_range + result.intercept 

207 

208 plt.plot( 

209 temp_range, 

210 fit_line, 

211 color=colors[gain], 

212 linewidth=2, 

213 label=f"{gain.capitalize()}: y = {result.slope:.3f}x + {result.intercept:.2f}", 

214 ) 

215 

216 fit_dict[gain] = {"slope": result.slope, "intercept": result.intercept} 

217 board_id: str = run_metadata.board_id if run_metadata.board_id is not None else "" 

218 utils.add_run_info(f"{gain}_slope", result.slope, board_id, plot_dir, False) 

219 utils.add_run_info(f"{gain}_intercept", result.intercept, board_id, plot_dir, False) 

220 

221 # Combined fit 

222 if len(all_temp) > 0: 

223 result = stats.linregress(all_temp, all_mean) 

224 slope_combined = result.slope 

225 intercept_combined = result.intercept 

226 temp_range_combined = np.linspace(all_temp.min(), all_temp.max(), 100) 

227 fit_combined = slope_combined * temp_range_combined + intercept_combined 

228 plt.plot( 

229 temp_range_combined, 

230 fit_combined, 

231 color="black", 

232 linewidth=2, 

233 linestyle="--", 

234 label=f"Combined: y = {slope_combined:.3f}x + {intercept_combined:.2f}", 

235 ) 

236 

237 plt.title( 

238 helper.plot_summary_string( 

239 board_id=run_metadata.board_id, 

240 run_numbers=run_metadata.run_numbers, 

241 name="Pedestal Mean vs. Temperature - All Channels Combined", 

242 attenuation=run_metadata.att_val, 

243 pas_mode=run_metadata.pas_mode, 

244 ) 

245 ) 

246 plt.xlabel("Temperature [°C]") 

247 plt.ylabel("Pedestal Mean [ADC Counts]") 

248 plt.legend() 

249 plt.grid(True, alpha=0.3) 

250 plt.tight_layout() 

251 

252 filename = f"mean_vs_temp{temp_str}_combined.png" 

253 plt.savefig(plot_dir / filename) 

254 logger.debug(f"Saved combined plot to {plot_dir / filename}") 

255 plt.close() 

256 

257 return fit_dict 

258 

259 

260def _create_summary_plots( 

261 fit_results: Dict[str, Any], 

262 plot_dir: Path, 

263 temp_str: str, 

264 run_metadata: Metadata, 

265) -> None: 

266 """Create summary plots for temperature coefficients and intercepts.""" 

267 slopes_data = fit_results["slopes_data"] 

268 intercepts_data = fit_results["intercepts_data"] 

269 slope_errors = fit_results["slope_errors"] 

270 intercept_errors = fit_results["intercept_errors"] 

271 colors = fit_results["colors"] 

272 

273 # Temperature coefficient vs channel plot 

274 plt.figure(figsize=(12, 6)) 

275 for gain in ["lo", "hi"]: 

276 if slopes_data[gain]: 

277 channels = sorted(slopes_data[gain].keys()) 

278 slopes = [slopes_data[gain][ch] for ch in channels] 

279 slope_errs = [slope_errors[gain][ch] for ch in channels] 

280 

281 plt.errorbar( 

282 channels, 

283 slopes, 

284 yerr=slope_errs, 

285 fmt="o", 

286 color=colors[gain], 

287 label=f"{gain.capitalize()} Gain", 

288 markersize=6, 

289 capsize=3, 

290 ) 

291 

292 plt.title( 

293 helper.plot_summary_string( 

294 board_id=run_metadata.board_id, 

295 run_numbers=run_metadata.run_numbers, 

296 name="Temperature Coefficient vs Channel", 

297 attenuation=run_metadata.att_val, 

298 pas_mode=run_metadata.pas_mode, 

299 ) 

300 ) 

301 plt.xlabel("Channel Number") 

302 plt.ylabel("Temperature Coefficient [ADC Counts/°C]") 

303 plt.legend() 

304 plt.grid(True, alpha=0.3) 

305 plt.tight_layout() 

306 

307 filename = f"temp{temp_str}_coefficient_vs_channel.png" 

308 plt.savefig(plot_dir / filename) 

309 logger.debug(f"Saved coefficient plot to {plot_dir / filename}") 

310 plt.close() 

311 

312 # Y-intercept vs channel plot 

313 plt.figure(figsize=(12, 6)) 

314 for gain in ["lo", "hi"]: 

315 if intercepts_data[gain]: 

316 channels = sorted(intercepts_data[gain].keys()) 

317 intercepts = [intercepts_data[gain][ch] for ch in channels] 

318 intercept_errs = [intercept_errors[gain][ch] for ch in channels] 

319 

320 plt.errorbar( 

321 channels, 

322 intercepts, 

323 yerr=intercept_errs, 

324 fmt="o", 

325 color=colors[gain], 

326 label=f"{gain.capitalize()} Gain", 

327 markersize=6, 

328 capsize=3, 

329 ) 

330 

331 plt.title( 

332 helper.plot_summary_string( 

333 board_id=run_metadata.board_id, 

334 run_numbers=run_metadata.run_numbers, 

335 name="Y-Intercept vs Channel", 

336 attenuation=run_metadata.att_val, 

337 pas_mode=run_metadata.pas_mode, 

338 ) 

339 ) 

340 plt.xlabel("Channel Number") 

341 plt.ylabel("Y-Intercept [ADC Counts]") 

342 plt.legend() 

343 plt.grid(True, alpha=0.3) 

344 plt.tight_layout() 

345 

346 filename = f"temp{temp_str}_intercept_vs_channel.png" 

347 plt.savefig(plot_dir / filename) 

348 logger.debug(f"Saved intercept plot to {plot_dir / filename}") 

349 plt.close() 

350 

351 

352def _create_corrected_time_plots( 

353 derived_data: pl.DataFrame, 

354 temp_df: pl.DataFrame, 

355 fit_results: Dict[str, Any], 

356 settling_time: Optional[int], 

357 plot_dir: Path, 

358 temp_str: str, 

359 run_metadata: Metadata, 

360) -> None: 

361 """Create temperature-corrected mean vs time plots.""" 

362 if "fit_dict" not in fit_results: 

363 return 

364 

365 fit_dict = fit_results["fit_dict"] 

366 colors = fit_results["colors"] 

367 settling_time_plt = fit_results["settling_time_plt"] 

368 

369 for gain in ["lo", "hi"]: 

370 if gain not in fit_dict: 

371 continue 

372 

373 gain_filtered_df = derived_data.filter(pl.col("gain") == gain) 

374 slope = fit_dict[gain]["slope"] 

375 intercept = fit_dict[gain]["intercept"] 

376 color = colors[gain] 

377 

378 for channel in gain_filtered_df["channel"].unique().to_list(): 

379 channel_df = gain_filtered_df.filter(pl.col("channel") == channel) 

380 if len(channel_df) == 0: 

381 continue 

382 

383 # Apply settling time filter if specified 

384 if settling_time is not None: 

385 temp_df_tmp, _ = _filter_by_settling_time(temp_df, settling_time) 

386 channel_df_tmp, _ = _filter_by_settling_time(channel_df, settling_time) 

387 else: 

388 temp_df_tmp = temp_df 

389 channel_df_tmp = channel_df 

390 

391 # Join data 

392 combined_df = _join_temp_channel_data(channel_df_tmp, temp_df_tmp) 

393 

394 # Extract values and apply temperature correction 

395 temp_values = combined_df.select("temp_interp").to_numpy().flatten() 

396 mean_values = combined_df.select("mean").to_numpy().flatten() 

397 t = helper.datetime_to_hr(combined_df["timestamp"]) 

398 

399 # Temperature correction 

400 temp_contribution = slope * temp_values + intercept 

401 reference_temp = np.mean(temp_values) 

402 reference_contribution = slope * reference_temp + intercept 

403 corrected_means = mean_values - temp_contribution + reference_contribution 

404 

405 # Create plot 

406 plt.figure(figsize=(10, 5)) 

407 plt.scatter( 

408 t, 

409 corrected_means, 

410 alpha=0.7, 

411 color=color, 

412 s=30, 

413 label=f"Ch {str(channel).zfill(3)} {gain.capitalize()} Gain", 

414 ) 

415 

416 settling_time_str = f", {settling_time_plt} min settling time" if settling_time_plt is not None else "" 

417 plt.title( 

418 helper.plot_summary_string( 

419 board_id=run_metadata.board_id, 

420 run_numbers=run_metadata.run_numbers, 

421 name=f"Temperature-Corrected Pedestal Mean vs Time \ 

422 Ch.{str(channel).zfill(3)} {gain} Gain{settling_time_str}", 

423 attenuation=run_metadata.att_val, 

424 pas_mode=run_metadata.pas_mode, 

425 ) 

426 ) 

427 plt.xlabel("Time") 

428 plt.ylabel("Temperature-Corrected Pedestal Mean [ADC Counts]") 

429 plt.legend() 

430 plt.grid(True, alpha=0.3) 

431 plt.xticks(rotation=45) 

432 plt.tight_layout() 

433 

434 channel_str = str(channel).zfill(3) 

435 filename = f"temp{temp_str}_corrected_mean_vs_time_channel{channel_str}_{gain}.png" 

436 plt.savefig(plot_dir / filename) 

437 logger.debug(f"Saved corrected time plot to {plot_dir / filename}") 

438 plt.close() 

439 

440 

441def _baseline_correction(df: pl.DataFrame) -> pl.DataFrame: 

442 # Correct the baseline of the samples by subtracting the mean for each gain and channel 

443 df = df.with_columns(pl.col("mean").mean().over(["gain", "channel"]).alias("overall_mean")) 

444 df = df.with_columns(pl.col("samples") - pl.col("mean") + pl.col("overall_mean").alias("samples")) 

445 return df 

446 

447 

448def _get_temp_df(temp_source: str, monitoring_df: pl.DataFrame, lab_env_data: pl.DataFrame) -> pl.DataFrame: 

449 if temp_source == "thermistors": 

450 return ( 

451 monitoring_df.filter(pl.col("group_name") == "thermistor") 

452 .group_by("measurement") 

453 .agg([pl.col("value").mean().alias("value"), pl.col("timestamp").first().alias("timestamp")]) 

454 ) 

455 elif temp_source == "bpol48": 

456 return ( 

457 monitoring_df.filter(pl.col("monitor").is_in(["PAT1", "PAT2"])) 

458 .group_by("measurement") 

459 .agg([pl.col("value").mean().alias("value"), pl.col("timestamp").first().alias("timestamp")]) 

460 ) 

461 elif "TEMP" in temp_source or temp_source.startswith("PAT"): 

462 return monitoring_df.filter(pl.col("monitor") == temp_source) 

463 elif temp_source == "lab_environment": 

464 return lab_env_data.with_columns(pl.mean_horizontal(["lab_temp", "crate_temp"]).alias("value")) 

465 elif temp_source == "lab_temp": 

466 return lab_env_data.with_columns(pl.col("lab_temp").alias("value")) 

467 elif temp_source == "crate_temp": 

468 return lab_env_data.with_columns(pl.col("crate_temp").alias("value")) 

469 else: 

470 print(f"Unknown temperature source: {temp_source}. Returning empty DataFrame.") 

471 return pl.DataFrame() 

472 

473 

474def _filter_by_settling_time( 

475 df: pl.DataFrame, 

476 settling_time_minutes: int, 

477) -> Tuple[pl.DataFrame, Optional[int]]: 

478 """ 

479 Filter dataframe to only include entries after settling time has passed. 

480 

481 Args: 

482 df: DataFrame with a 'timestamp' column 

483 settling_time_minutes: Time in minutes to wait after first timestamp 

484 

485 Returns: 

486 Filtered DataFrame 

487 """ 

488 first_timestamp = df["timestamp"].min() 

489 settling_threshold = first_timestamp + pl.duration(minutes=settling_time_minutes) 

490 filtered_df = df.filter(pl.col("timestamp") >= settling_threshold) 

491 if len(filtered_df) == 0: 

492 logger.warning( 

493 f"No data points found after {settling_time_minutes} minutes settling time. Returning original DataFrame." 

494 ) 

495 return df, None 

496 

497 return filtered_df, settling_time_minutes 

498 

499 

500def moving_average(a: np.ndarray, n: int = 4) -> np.ndarray: 

501 ret = np.cumsum(a, dtype=float) 

502 ret[n:] = ret[n:] - ret[:-n] 

503 return ret[n - 1 :] / n 

504 

505 

506def plot_monitoring( 

507 monitoring_df: pl.DataFrame, lab_env_data: pl.DataFrame, plot_dir: Path, tzero_correction: bool = True 

508) -> None: 

509 # Plots monitoring data from the monitoring DataFrame and lab environment data. 

510 run_number = monitoring_df["run_number"][0] 

511 board_id = monitoring_df["board_id"][0] if "board_id" in monitoring_df.columns else None 

512 pas_mode = monitoring_df["pas_mode"][0] if "pas_mode" in monitoring_df.columns else None 

513 attenuation = monitoring_df["att_val"][0] if "att_val" in monitoring_df.columns else None 

514 

515 corrected_temperature_col_name = "value_tzero_corr" 

516 

517 df_gb = ( 

518 monitoring_df.sort(["timestamp"]) 

519 .with_columns(pl.col("timestamp").dt.epoch(time_unit="s").alias("seconds")) 

520 .group_by(pl.col("monitor")) 

521 .agg(["value", "seconds", "group_name", "unit", "monitor_type"]) 

522 .with_columns( 

523 pl.col("group_name").list.first().alias("group_name"), 

524 pl.col("unit").list.first().alias("unit"), 

525 pl.col("monitor_type").list.first().alias("monitor_type"), 

526 pl.col("value").list.first().alias("first"), 

527 ((pl.col("seconds") - pl.col("seconds").list.first()) / 3600).alias("hours"), 

528 ) 

529 ) 

530 

531 df_avg = df_gb.group_by("group_name").agg(pl.col("first").mean().alias("tzero_avg")) 

532 df_gb = df_gb.join(df_avg, on="group_name").with_columns( 

533 (pl.col("value") - pl.col("value").list.first() + pl.col("tzero_avg")).alias(corrected_temperature_col_name) 

534 ) 

535 

536 for monitor_group in df_gb["group_name"].unique(): 

537 for monitor_unit in df_gb.filter(pl.col("group_name") == monitor_group)["monitor_type"].unique(): 

538 monitor_group_df = df_gb.filter( 

539 (pl.col("group_name") == monitor_group) & (pl.col("monitor_type") == monitor_unit) 

540 ) 

541 for monitor in sorted(monitor_group_df["monitor"].unique().to_list(), key=lambda x: (len(x), x)): 

542 value_col = ( 

543 corrected_temperature_col_name if (tzero_correction and monitor_unit.lower() == "c") else "value" 

544 ) 

545 

546 monitor_df = monitor_group_df.filter(pl.col("monitor") == monitor) 

547 plt.plot(monitor_df["hours"].item(), monitor_df[value_col].item(), label=monitor) 

548 if monitor_group == "": 

549 monitor_group = "other" 

550 plt.title( 

551 helper.plot_summary_string( 

552 board_id=board_id, 

553 run_numbers=run_number, 

554 name=f"{monitor_group} {monitor_unit}", 

555 attenuation=attenuation, 

556 pas_mode=pas_mode, 

557 ) 

558 ) 

559 plt.legend(loc="center left", bbox_to_anchor=(1, 0.47), fontsize=6) 

560 plt.tight_layout(rect=(0.03, 0.03, 1, 0.95)) 

561 plt.xlabel("Time since configuration [Hours]") 

562 plt.ylabel(f"{monitor_group} {monitor_unit} [{monitor_group_df['unit'][0]}]") 

563 plt.savefig(f"{plot_dir}/{monitor_group}_{monitor_unit}.png") 

564 plt.savefig(f"{plot_dir}/{monitor_group}_{monitor_unit}.pdf") 

565 plt.close() 

566 logger.info(f"Saved run{run_number} {monitor_group} to {plot_dir}/{monitor_group}_{monitor_unit}.png") 

567 

568 # Plot lab environment temp 

569 for monitor in ["lab_temp", "crate_temp"]: 

570 t = helper.datetime_to_hr(lab_env_data["timestamp"]) 

571 plt.plot(t, lab_env_data[monitor], label=monitor) 

572 plt.title( 

573 helper.plot_summary_string( 

574 board_id=board_id, 

575 run_numbers=run_number, 

576 name="Lab Temperature", 

577 attenuation=attenuation, 

578 pas_mode=pas_mode, 

579 ) 

580 ) 

581 plt.legend(loc="center left", bbox_to_anchor=(1, 0.47), fontsize=6) 

582 plt.tight_layout(rect=(0.03, 0.03, 1, 0.95)) 

583 plt.xlabel("Time since configuration [Hours]") 

584 plt.ylabel("Lab Temperature [C]") 

585 plt.savefig(f"{plot_dir}/lab_temperature.png") 

586 plt.close() 

587 

588 

589def plot_monitor_channel_correlation( 

590 derived_data: pl.DataFrame, monitoring_df: pl.DataFrame, lab_env_data: pl.DataFrame, plot_dir: Path 

591) -> None: 

592 temp_df = ( 

593 monitoring_df.filter(pl.col("monitor_type") == "temperature") 

594 .group_by("measurement", "monitor") 

595 .agg([pl.col("timestamp").first().alias("timestamp"), pl.col("value").first()]) 

596 ) 

597 ### Remove later 

598 ### temp_df = temp_df.with_columns(pl.col("timestamp").dt.replace_time_zone("UTC", non_existent="null")) 

599 ### 

600 temp_pivoted = temp_df.pivot(index=["measurement", "timestamp"], on="monitor", values="value").sort("timestamp") 

601 combined_temp_df = ( 

602 temp_pivoted.sort("timestamp") 

603 .join_asof( 

604 lab_env_data.select(["timestamp", "crate_temp", "lab_temp"]).sort("timestamp"), 

605 on="timestamp", 

606 strategy="nearest", 

607 ) 

608 .drop("measurement", "timestamp") 

609 ) 

610 sorted_cols = sorted(combined_temp_df.columns) 

611 combined_temp_df = combined_temp_df.select(sorted_cols) 

612 

613 for gain in derived_data["gain"].unique().to_list(): 

614 gain_filtered_df = derived_data.filter(pl.col("gain") == gain).sort("timestamp").select("mean", "channel") 

615 channels = gain_filtered_df["channel"].unique().to_list() 

616 monitors = combined_temp_df.columns 

617 corr_matrix = np.full((len(monitors), len(channels)), np.nan) 

618 

619 if len(monitors) == 0: 

620 continue 

621 null_mask = np.isnan(combined_temp_df[monitors[0]].to_numpy().flatten()) 

622 for i, monitor in enumerate(monitors): 

623 monitor_data_tmp = combined_temp_df[monitor].to_numpy().flatten() 

624 monitor_data = monitor_data_tmp[~null_mask] 

625 

626 for j, channel in enumerate(channels): 

627 channel_data = gain_filtered_df.filter(pl.col("channel") == channel).select("mean").to_numpy().flatten() 

628 

629 if len(monitor_data) > 1 and len(channel_data) > 1: 

630 if len(monitor_data) != len(channel_data): 

631 min_len = min(len(monitor_data), len(channel_data)) 

632 monitor_data = monitor_data[:min_len] 

633 channel_data = channel_data[:min_len] 

634 corr_matrix[i, j] = np.corrcoef(channel_data, monitor_data)[0, 1] 

635 

636 for gain in derived_data["gain"].unique().to_list(): 

637 gain_filtered_df = derived_data.filter(pl.col("gain") == gain).sort("timestamp").select("mean", "channel") 

638 channels = gain_filtered_df["channel"].unique().to_list() 

639 monitors = combined_temp_df.columns 

640 corr_matrix = np.full((len(monitors), len(channels)), np.nan) 

641 

642 if len(monitors) == 0: 

643 continue 

644 temp_null_mask = np.isnan(combined_temp_df[monitors[0]].to_numpy().flatten()) 

645 for i, monitor in enumerate(monitors): 

646 monitor_data_tmp = combined_temp_df[monitor].to_numpy().flatten() 

647 monitor_data = monitor_data_tmp[~temp_null_mask] 

648 

649 for j, channel in enumerate(channels): 

650 channel_data = gain_filtered_df.filter(pl.col("channel") == channel).select("mean").to_numpy().flatten() 

651 

652 if abs(len(monitor_data) - len(channel_data)) > 1: 

653 logger.warning( 

654 f"Length mismatch for monitor {monitor} and channel {channel}: " 

655 f"{len(monitor_data)} vs {len(channel_data)}" 

656 ) 

657 if len(monitor_data) != len(channel_data): 

658 min_len = min(len(monitor_data), len(channel_data)) 

659 monitor_data = monitor_data[:min_len] 

660 channel_data = channel_data[:min_len] 

661 mask = ~(np.isnan(monitor_data) | np.isnan(channel_data)) 

662 monitor_clean = monitor_data[mask] 

663 channel_clean = channel_data[mask] 

664 

665 if len(monitor_clean) >= 10: # Need sufficient data points 

666 if np.std(monitor_clean) > 1e-10 and np.std(channel_clean) > 1e-10: 

667 corr_val = np.corrcoef(monitor_clean, channel_clean)[0, 1] 

668 corr_matrix[i, j] = corr_val 

669 

670 plt.figure(figsize=(30, 10)) 

671 display_matrix = corr_matrix.copy() 

672 nan_mask = np.isnan(corr_matrix) 

673 display_matrix[nan_mask] = 0 

674 

675 plt.imshow(display_matrix, cmap="RdBu", vmin=-1, vmax=1) 

676 

677 # Add text annotations 

678 for i, monitor in enumerate(monitors): 

679 for j, channel in enumerate(channels): 

680 if nan_mask[i, j]: 

681 plt.text(j, i, "N/A", ha="center", va="center", color="gray", fontsize=6) 

682 else: 

683 corr_val = corr_matrix[i, j] 

684 text_color = "white" if abs(corr_val) > 0.5 else "black" 

685 plt.text(j, i, f"{corr_val:.2f}", ha="center", va="center", color=text_color, fontsize=4) 

686 

687 plt.colorbar(label="Correlation Coefficient") 

688 plt.xticks(ticks=np.arange(len(channels)), labels=channels, rotation=90, ha="right") 

689 plt.yticks(ticks=np.arange(len(monitors)), labels=monitors) 

690 plt.title("Correlation Matrix: Monitor Temp vs Channel ADC Counts") 

691 plt.tight_layout() 

692 plt.savefig(plot_dir / f"monitor_channel_corr_{gain}.png") 

693 plt.close() 

694 

695 

696def plot_monitor_monitor_correlation( 

697 monitoring_df: pl.DataFrame, 

698 lab_env_data: pl.DataFrame, 

699 plot_dir: Path, 

700) -> None: 

701 temp_df = ( 

702 monitoring_df.filter(pl.col("monitor_type") == "temperature") 

703 .group_by("measurement", "monitor") 

704 .agg([pl.col("timestamp").first().alias("timestamp"), pl.col("value").first()]) 

705 ) 

706 ### Remove later 

707 # temp_df = temp_df.with_columns(pl.col("timestamp").dt.replace_time_zone("UTC", non_existent="null")) 

708 ### 

709 temp_pivoted = temp_df.pivot(index=["measurement", "timestamp"], on="monitor", values="value").sort("measurement") 

710 combined_df = ( 

711 temp_pivoted.sort("timestamp") 

712 .join_asof( 

713 lab_env_data.select(["timestamp", "crate_temp", "lab_temp"]).sort("timestamp"), 

714 on="timestamp", 

715 strategy="nearest", 

716 ) 

717 .drop("measurement", "timestamp") 

718 ) 

719 

720 sorted_cols = sorted(combined_df.columns) 

721 combined_df = combined_df.select(sorted_cols) 

722 

723 data_matrix = combined_df.to_numpy().astype(float) 

724 n_cols = len(combined_df.columns) 

725 corr_matrix = np.full((n_cols, n_cols), np.nan) 

726 

727 # Calculate correlations pairwise 

728 for i in range(n_cols): 

729 for j in range(n_cols): 

730 if i == j: 

731 corr_matrix[i, j] = 1.0 

732 else: 

733 # Get data for both columns 

734 x = data_matrix[:, i] 

735 y = data_matrix[:, j] 

736 

737 # Remove rows where either is NaN 

738 mask = ~(np.isnan(x) | np.isnan(y)) 

739 x_clean = x[mask] 

740 y_clean = y[mask] 

741 

742 if len(x_clean) >= 10: # Need sufficient data points 

743 # Check for constant values 

744 if np.std(x_clean) > 1e-10 and np.std(y_clean) > 1e-10: 

745 corr_val = np.corrcoef(x_clean, y_clean)[0, 1] 

746 corr_matrix[i, j] = corr_val 

747 

748 plt.figure(figsize=(15, 12)) 

749 display_matrix = corr_matrix.copy() 

750 nan_mask = np.isnan(corr_matrix) 

751 display_matrix[nan_mask] = 0 

752 

753 plt.imshow(display_matrix, cmap="Blues", vmin=0, vmax=1) 

754 

755 # Add text annotations 

756 for i in range(len(combined_df.columns)): 

757 for j in range(len(combined_df.columns)): 

758 if nan_mask[i, j]: 

759 plt.text(j, i, "N/A", ha="center", va="center", color="gray", fontsize=6) 

760 else: 

761 corr_val = corr_matrix[i, j] 

762 text_color = "white" if abs(corr_val) > 0.5 else "black" 

763 plt.text(j, i, f"{corr_val:.2f}", ha="center", va="center", color=text_color, fontsize=4) 

764 

765 plt.colorbar(label="Correlation Coefficient") 

766 plt.xticks(ticks=np.arange(len(combined_df.columns)), labels=combined_df.columns, rotation=90, ha="right") 

767 plt.yticks(ticks=np.arange(len(combined_df.columns)), labels=combined_df.columns) 

768 plt.title("Correlation Matrix of Monitoring Channels") 

769 plt.tight_layout() 

770 plt.savefig(plot_dir / "monitor_monitor_corr.png") 

771 plt.close() 

772 

773 

774def plot_temp_correlation( 

775 derived_data: pl.DataFrame, 

776 monitoring_df: pl.DataFrame, 

777 lab_env_data: pl.DataFrame, 

778 plot_dir: Path, 

779 temp_source: Optional[str] = None, 

780 settling_time: Optional[int] = None, 

781) -> None: 

782 temp_str = f"_{temp_source}" if temp_source else "" 

783 temp_source = temp_source or "lab_environment" 

784 

785 run_metadata = Metadata.fill_from_dataframe(derived_data) 

786 

787 temp_df = _get_temp_df(temp_source, monitoring_df, lab_env_data) 

788 if len(temp_df) == 0: 

789 logger.warning("No temperature data found for correlation plot.") 

790 return 

791 

792 # Process data and create fits 

793 fit_results = _process_temp_correlation_data(derived_data, temp_df, settling_time, plot_dir, temp_str, run_metadata) 

794 

795 # Generate all plots 

796 fit_dict = _create_combined_plots(fit_results, plot_dir, temp_str, run_metadata) 

797 fit_results["fit_dict"] = fit_dict 

798 _create_summary_plots(fit_results, plot_dir, temp_str, run_metadata) 

799 _create_corrected_time_plots(derived_data, temp_df, fit_results, settling_time, plot_dir, temp_str, run_metadata) 

800 

801 

802def plot_mean_rms_vs_time(gain_filtered_df: pl.DataFrame, plot_dir: Path) -> None: 

803 color_dict = {"lo": "b", "hi": "r"} 

804 gain = gain_filtered_df["gain"][0] 

805 color = color_dict[gain] 

806 board_id = gain_filtered_df["board_id"][0] if "board_id" in gain_filtered_df.columns else None 

807 run_number = gain_filtered_df["run_number"][0] 

808 pas_mode = gain_filtered_df["pas_mode"][0] if "pas_mode" in gain_filtered_df.columns else None 

809 attenuation = gain_filtered_df["att_val"][0] if "att_val" in gain_filtered_df.columns else None 

810 

811 for channel in gain_filtered_df["channel"].unique().to_list(): 

812 channel_df = gain_filtered_df.select("channel", "gain", "samples", "timestamp", "mean", "std").filter( 

813 pl.col("channel") == channel 

814 ) 

815 

816 if len(channel_df) == 0: 

817 continue 

818 

819 t = helper.datetime_to_hr(channel_df["timestamp"]) 

820 channel = str(channel).zfill(3) 

821 

822 # Mean vs Time 

823 fig1, ax1 = plt.subplots(figsize=(10, 5)) 

824 ax1.grid(visible=True, zorder=0) 

825 ax1.scatter(t, channel_df["mean"], c=color, marker="o") 

826 ax1.set_title( 

827 helper.plot_summary_string( 

828 board_id=board_id, 

829 run_numbers=run_number, 

830 name=f"{gain} Gain Mean vs. Time Ch.{channel}", 

831 attenuation=attenuation, 

832 pas_mode=pas_mode, 

833 ) 

834 ) 

835 ax1.set_ylabel("Pedestal Mean [ADC Counts]") 

836 ax1.set_xlabel("Time [Hours]") 

837 fig1.savefig(plot_dir / f"mean_vs_time_channel{channel}_{gain}.png") 

838 plt.close(fig1) 

839 

840 # RMS vs Time 

841 fig2, ax2 = plt.subplots(figsize=(10, 5)) 

842 ax2.scatter(t, channel_df["std"], c=color, marker="o") 

843 ax2.set_title( 

844 helper.plot_summary_string( 

845 board_id=board_id, 

846 run_numbers=run_number, 

847 name=f"{gain} Gain RMS vs. Time Ch.{channel}", 

848 attenuation=attenuation, 

849 pas_mode=pas_mode, 

850 ) 

851 ) 

852 ax2.set_ylabel("Pedestal RMS [ADC Counts]") 

853 ax2.set_xlabel("Time [Hours]") 

854 fig2.savefig(plot_dir / f"rms_vs_time_channel{channel}_{gain}.png") 

855 plt.close(fig2) 

856 

857 

858def plot_sample_range_vs_time(gain_filtered_df: pl.DataFrame, plot_dir: Path) -> None: 

859 board_id = gain_filtered_df["board_id"][0] if "board_id" in gain_filtered_df.columns else None 

860 run_number = gain_filtered_df["run_number"][0] 

861 pas_mode = gain_filtered_df["pas_mode"][0] if "pas_mode" in gain_filtered_df.columns else None 

862 attenuation = gain_filtered_df["att_val"][0] if "att_val" in gain_filtered_df.columns else None 

863 plt.figure(figsize=(10, 5)) 

864 color_dict = {"lo": "b", "hi": "r"} 

865 title_dict = {"lo": "LG", "hi": "HG"} 

866 gain = gain_filtered_df["gain"][0] 

867 color = color_dict[gain] 

868 title = title_dict[gain] 

869 

870 first_channel = True 

871 for channel in gain_filtered_df["channel"].unique().to_list(): 

872 channel_df = gain_filtered_df.select("channel", "gain", "samples", "timestamp", "mean", "std").filter( 

873 pl.col("channel") == channel 

874 ) 

875 fig, ax = plt.subplots(figsize=(8, 6)) 

876 ax.grid(visible=True, zorder=0) 

877 avg_filter_df = channel_df.select( 

878 [ 

879 pl.col("samples").list.max().alias("max_per_list"), 

880 pl.col("samples").list.min().alias("min_per_list"), 

881 pl.col("samples").list.len().alias("list_lens"), 

882 (pl.col("samples").list.max() - pl.col("samples").list.min()).alias("range_per_list"), 

883 ] 

884 ) 

885 

886 t = helper.datetime_to_hr(channel_df["timestamp"]) 

887 range_per_list = avg_filter_df["range_per_list"] 

888 error = channel_df["std"] / np.sqrt(avg_filter_df["list_lens"]) 

889 label = title if first_channel else None # Only label first channel per gain 

890 first_channel = False 

891 

892 ax.errorbar(t, range_per_list, error, fmt=f"{color}o", label=label) 

893 

894 ax.set_title( 

895 helper.plot_summary_string( 

896 board_id=board_id, 

897 run_numbers=run_number, 

898 channels=helper.list_to_text_ranges(channel), 

899 name="Sample Range vs. Time", 

900 attenuation=attenuation, 

901 pas_mode=pas_mode, 

902 gain=gain, 

903 ) 

904 ) 

905 channel = str(channel).zfill(3) 

906 ax.set_ylabel("Avg. Sample Range [ADC Counts]") 

907 ax.set_xlabel("Time[hours]") 

908 fig.tight_layout() 

909 fig.savefig(plot_dir / f"sample_range_vs_time_channel{channel}_{gain}.png") 

910 plt.close(fig) 

911 fig.clf() 

912 

913 

914def avg_rms_mean_vs_channel( 

915 df: pl.DataFrame, 

916 plot_dir: Path, 

917 info: Optional[Metadata] = None, 

918 settling_time: int = 4, # Settling time in minutes for max rms deviation calculation 

919): 

920 board_id = df["board_id"][0] if "board_id" in df.columns else None 

921 run_number = df["run_number"][0] 

922 pas_mode = df["pas_mode"][0] if "pas_mode" in df.columns else None 

923 attenuation = df["att_val"][0] if "att_val" in df.columns else None 

924 

925 plt.figure(figsize=(10, 5)) 

926 n_channels = df["channel"].n_unique() 

927 channels = df["channel"].unique().to_list() 

928 color_dict = {"lo": "b", "hi": "r"} 

929 title_dict = {"lo": "LG", "hi": "HG"} 

930 

931 fig, ax = plt.subplots(figsize=(8, 6)) 

932 plt.xticks(np.arange(0, n_channels, 4), rotation=70) 

933 fig2, ax2 = plt.subplots(1, figsize=(8, 6)) 

934 fig3, ax3 = plt.subplots(1, figsize=(8, 6)) 

935 ax.grid(visible=True, zorder=0) 

936 ax.xaxis.set_tick_params(pad=0.1) 

937 ax2.grid(visible=True) 

938 ax3.grid(visible=True) 

939 plt.xticks(np.arange(0, n_channels, 4), rotation=70) 

940 

941 # Plot avg mean, avg rms, and max rms deviation per channel 

942 for gain in ["hi", "lo"]: 

943 gain_filtered_df = df.filter(pl.col("gain") == gain) 

944 first_channel = True 

945 for channel in gain_filtered_df["channel"].unique().to_list(): 

946 channel_df = gain_filtered_df.select("channel", "gain", "samples", "timestamp", "mean", "std").filter( 

947 pl.col("channel") == channel 

948 ) 

949 color = color_dict[gain] 

950 title = title_dict[gain] 

951 

952 means = channel_df["mean"].to_numpy() 

953 stds = channel_df["std"].to_numpy() 

954 

955 means_mean = np.mean(means) 

956 means_min = np.min(means) 

957 means_max = np.max(means) 

958 stds_mean = np.mean(stds) 

959 

960 n_measurements = len(means) 

961 stds_err = np.std(stds) / np.sqrt(n_measurements) 

962 means_err = np.array([[means_mean - means_min], [means_max - means_mean]]) 

963 

964 label = title if first_channel else None 

965 first_channel = False 

966 

967 ax.errorbar(channel, means_mean, yerr=means_err, fmt=f"{color}o", label=label) 

968 ax2.errorbar(channel, stds_mean, stds_err, fmt=f"{color}o", label=label) 

969 

970 filtered_df, _ = _filter_by_settling_time(channel_df, settling_time) 

971 stds_settled = filtered_df["std"].to_numpy() 

972 max_rms_deviation = np.max(np.abs(stds_settled - stds_mean)) 

973 ax3.plot(channel, max_rms_deviation, f"{color}o", label=label) 

974 

975 ax.set_title( 

976 helper.plot_summary_string( 

977 board_id=board_id, 

978 run_numbers=run_number, 

979 channels=helper.list_to_text_ranges(channels), 

980 name="Overall Mean vs. Channel #\nError bars show min and max", 

981 attenuation=attenuation, 

982 pas_mode=pas_mode, 

983 ) 

984 ) 

985 ax.set_ylabel("Avg. Pedestal Mean [ADC Counts]") 

986 ax.set_xlabel("Channel") 

987 fig.legend() 

988 fig.tight_layout() 

989 fig.savefig(plot_dir / "avg_mean_vs_channel.png") 

990 fig.clf() 

991 

992 ax2.set_title( 

993 helper.plot_summary_string( 

994 board_id=board_id, 

995 run_numbers=run_number, 

996 channels=helper.list_to_text_ranges(channels), 

997 name="Overall RMS vs. Channel #", 

998 attenuation=attenuation, 

999 pas_mode=pas_mode, 

1000 info=info, 

1001 ) 

1002 ) 

1003 ax2.set_ylabel("Avg. Pedestal RMS [ADC Counts]") 

1004 ax2.set_xlabel("Channel") 

1005 fig2.legend() 

1006 fig2.tight_layout() 

1007 fig2.savefig(plot_dir / "avg_rms_vs_time.png") 

1008 fig2.clf() 

1009 

1010 ax3.set_title( 

1011 helper.plot_summary_string( 

1012 board_id=board_id, 

1013 run_numbers=run_number, 

1014 channels=helper.list_to_text_ranges(channels), 

1015 name=f"Max RMS Deviation vs. Channel # After {settling_time} min Settling Time)", 

1016 attenuation=attenuation, 

1017 pas_mode=pas_mode, 

1018 info=info, 

1019 ) 

1020 ) 

1021 ax3.set_ylabel("Max RMS Deviation [ADC Counts]") 

1022 ax3.set_xlabel("Channel") 

1023 fig3.legend() 

1024 fig3.tight_layout() 

1025 fig3.savefig(plot_dir / "max_rms_deviation_vs_channel.png") 

1026 fig3.clf() 

1027 

1028 

1029def plot_outliers( 

1030 df: pl.DataFrame, 

1031 plot_dir: Path, 

1032 info: Optional[Metadata] = None, 

1033): 

1034 df = _baseline_correction(df) 

1035 board_id = df["board_id"][0] if "board_id" in df.columns else None 

1036 run_number = df["run_number"][0] 

1037 pas_mode = df["pas_mode"][0] if "pas_mode" in df.columns else None 

1038 attenuation = df["att_val"][0] if "att_val" in df.columns else None 

1039 

1040 # Summary plot setup 

1041 fig, ax = plt.subplots(figsize=(10, 5)) 

1042 channels = df["channel"].unique().to_list() 

1043 color_dict = {"lo": "b", "hi": "r"} 

1044 title_dict = {"lo": "LG", "hi": "HG"} 

1045 

1046 # For time-series deviation summary plot 

1047 max_outlier_rate_deviations: Dict[str, Dict[int, float]] = {"hi": {}, "lo": {}} 

1048 # For average outlier rate summary plot 

1049 avg_outlier_rates: Dict[str, Dict[int, float]] = {"hi": {}, "lo": {}} 

1050 # For max outlier rate summary plot 

1051 max_outlier_rates: Dict[str, Dict[int, float]] = {"hi": {}, "lo": {}} 

1052 

1053 gains: List[Literal["hi", "lo"]] = ["hi", "lo"] 

1054 for gain in gains: 

1055 gain_filtered_df = df.filter(pl.col("gain") == gain) 

1056 first_channel = True 

1057 

1058 for channel in gain_filtered_df["channel"].unique().to_list(): 

1059 channel_df = gain_filtered_df.select("channel", "gain", "samples", "timestamp", "mean", "std").filter( 

1060 pl.col("channel") == channel 

1061 ) 

1062 channel_str = str(channel).zfill(3) 

1063 color = color_dict[gain] 

1064 title = title_dict[gain] 

1065 label = title if first_channel else None 

1066 first_channel = False 

1067 

1068 # Original outlier analysis 

1069 samples = np.array(channel_df["samples"].explode().to_list()) 

1070 

1071 # gaussian fit statistics 

1072 hist_bins: np.ndarray = np.linspace(min(samples), max(samples), 100) 

1073 fig2, ax2 = plt.subplots(1) 

1074 

1075 _ = ax2.hist(samples, bins=hist_bins.tolist()) 

1076 xaxis: np.ndarray = np.linspace(np.min(samples), np.max(samples), 50) 

1077 fit_pars = helper.calc_gaussian(samples, hist_bins) 

1078 fit_mu, fit_sigma, fit_N = fit_pars[0], fit_pars[2], fit_pars[4] 

1079 d_mu, d_sigma = fit_pars[1], fit_pars[3] 

1080 gauss_fit = helper.gauss(xaxis, fit_mu, fit_sigma, fit_N) 

1081 upper_bound = fit_mu + 5 * fit_sigma 

1082 lower_bound = fit_mu - 5 * fit_sigma 

1083 ax2.axvline(upper_bound, color="red", linestyle="--", label="5σ") 

1084 ax2.axvline(lower_bound, color="red", linestyle="--") 

1085 ax2.axvline(fit_mu + 3 * fit_sigma, color="green", linestyle="--", label="3σ") 

1086 ax2.axvline(fit_mu - 3 * fit_sigma, color="green", linestyle="--") 

1087 ax2.plot( 

1088 xaxis, 

1089 gauss_fit, 

1090 label=rf"""Gaussian Fit 

1091 $\mu$ = {fit_mu:.03f} $\pm$ {d_mu:.03g} 

1092 $\sigma$ = {fit_sigma:.03f} $\pm$ {d_sigma:.03g}""", 

1093 ) 

1094 

1095 ax2.set_title( 

1096 helper.plot_summary_string( 

1097 board_id=board_id, 

1098 run_numbers=run_number, 

1099 channels=channel, 

1100 name="Pedestal Sample Outliers [ADC counts]", 

1101 attenuation=attenuation, 

1102 pas_mode=pas_mode, 

1103 gain=gain, 

1104 ) 

1105 ) 

1106 ax2.set_ylabel("Entries") 

1107 ax2.set_xlabel("Sample [ADC counts]") 

1108 ax2.legend() 

1109 fig2.savefig(f"{plot_dir}/channel{channel_str}_{gain}_sample_hist.png") 

1110 fig2.clf() 

1111 plt.close(fig2) 

1112 

1113 # Calculate total outliers for summary bar plot 

1114 five_sigma = 5 * fit_sigma 

1115 outliers = samples[np.abs(samples - fit_mu) > five_sigma] 

1116 num_outliers = len(outliers) 

1117 

1118 ax.bar(int(channel), num_outliers, fill=False, ec=color, label=label, zorder=3) 

1119 

1120 # Time-series analysis - outlier rate vs time 

1121 fig3, ax3 = plt.subplots(figsize=(12, 6)) 

1122 ax3.grid(visible=True, zorder=0) 

1123 

1124 t = helper.datetime_to_hr(channel_df["timestamp"]) 

1125 outlier_rates = [] 

1126 

1127 for sample in channel_df["samples"]: 

1128 sample = np.array(sample) 

1129 hist_bins = np.linspace(min(sample), max(sample), 100) 

1130 

1131 fit_pars = helper.calc_gaussian(sample, hist_bins) 

1132 fit_mu, fit_sigma = fit_pars[0], fit_pars[2] 

1133 five_sigma = 5 * fit_sigma 

1134 outliers = sample[np.abs(sample - fit_mu) > five_sigma] 

1135 

1136 # Calculate outlier rate as percentage 

1137 outlier_rate = (len(outliers) / len(sample)) * 100 

1138 outlier_rates.append(outlier_rate) 

1139 

1140 max_outlier_rates[gain][channel] = max(outlier_rates) if outlier_rates else 0 

1141 ax3.plot(t, outlier_rates, "o", color=color, alpha=0.7) 

1142 

1143 max_rate = max(outlier_rates) if outlier_rates else 0 

1144 y_max = max(0.05, max_rate * 1.2) 

1145 ax3.set_ylim(0, y_max) 

1146 

1147 # Calculate max deviation and average rate for summary plots 

1148 mean_rate = np.mean(outlier_rates) 

1149 max_deviation = np.max(np.abs(np.array(outlier_rates) - mean_rate)) 

1150 max_outlier_rate_deviations[gain][channel] = max_deviation 

1151 avg_outlier_rates[gain][channel] = mean_rate.astype(float) 

1152 

1153 ax3.set_title( 

1154 helper.plot_summary_string( 

1155 board_id=board_id, 

1156 run_numbers=run_number, 

1157 channels=channel, 

1158 name=f"Channel {channel} Outlier Rate vs Time, 5σ Gaussian Fit Cut", 

1159 attenuation=attenuation, 

1160 pas_mode=pas_mode, 

1161 gain=gain, 

1162 ) 

1163 ) 

1164 

1165 ax3.set_ylabel("Outlier Rate (%)") 

1166 ax3.set_xlabel("Time [hours]") 

1167 fig3.tight_layout() 

1168 fig3.savefig(plot_dir / f"outliers_vs_time_channel{channel_str}_{gain}.png", dpi=300) 

1169 plt.close(fig3) 

1170 

1171 # Create summary plot: Number of outliers per channel 

1172 ax.set_title( 

1173 helper.plot_summary_string( 

1174 board_id=board_id, 

1175 run_numbers=run_number, 

1176 channels=helper.list_to_text_ranges(channels), 

1177 name="Number of Baseline Corrected Sample Outliers Per Channel, 5σ Gaussian Fit Cut", 

1178 attenuation=attenuation, 

1179 pas_mode=pas_mode, 

1180 info=info, 

1181 ) 

1182 ) 

1183 ax.set_xlabel("Channel", loc="right") 

1184 ax.set_ylabel("Number of Outliers") 

1185 ax.legend(loc="upper right") 

1186 fig.tight_layout() 

1187 fig.savefig(f"{plot_dir}/outliers_summary.png") 

1188 plt.close(fig) 

1189 

1190 # Create summary plot: Maximum outlier rate deviation vs channel # 

1191 fig4, ax4 = plt.subplots(figsize=(12, 8)) 

1192 ax4.grid(visible=True, zorder=0) 

1193 

1194 for gain in gains: 

1195 if max_outlier_rate_deviations[gain]: 

1196 channels_sorted = sorted(max_outlier_rate_deviations[gain].keys()) 

1197 deviations = [max_outlier_rate_deviations[gain][ch] for ch in channels_sorted] 

1198 

1199 ax4.plot( 

1200 channels_sorted, 

1201 deviations, 

1202 "o", 

1203 color=color_dict[gain], 

1204 label=f"{gain} Gain", 

1205 alpha=0.7, 

1206 markersize=6, 

1207 ) 

1208 

1209 ax4.set_title( 

1210 helper.plot_summary_string( 

1211 board_id=board_id, 

1212 run_numbers=run_number, 

1213 channels=helper.list_to_text_ranges(list(range(min(channels_sorted), max(channels_sorted) + 1))), 

1214 name="Maximum Outlier Rate Deviation vs Channel Number, 5σ Gaussian Fit Cut", 

1215 attenuation=attenuation, 

1216 pas_mode=pas_mode, 

1217 info=info, 

1218 ) 

1219 ) 

1220 

1221 ax4.set_ylabel("Maximum Outlier Rate Deviation (%)") 

1222 ax4.set_xlabel("Channel Number") 

1223 ax4.legend() 

1224 fig4.tight_layout() 

1225 fig4.savefig(plot_dir / "max_outlier_rate_deviation_vs_channel.png", dpi=300) 

1226 plt.close(fig4) 

1227 

1228 # Create summary plots: Average outlier rate vs channel 

1229 for gain in gains: 

1230 if avg_outlier_rates[gain]: 

1231 fig5, ax5 = plt.subplots(figsize=(12, 8)) 

1232 ax5.grid(visible=True, zorder=0) 

1233 

1234 channels_sorted = sorted(avg_outlier_rates[gain].keys()) 

1235 avg_rates = [avg_outlier_rates[gain][ch] for ch in channels_sorted] 

1236 max_rates = [max_outlier_rates[gain][ch] for ch in channels_sorted] 

1237 

1238 # Calculate error bars: upper = max - avg, lower = 0 

1239 upper_errors = [max_rate - avg_rate for max_rate, avg_rate in zip(max_rates, avg_rates)] 

1240 lower_errors = [0] * len(avg_rates) # No lower error bars 

1241 

1242 ax5.errorbar( 

1243 channels_sorted, 

1244 avg_rates, 

1245 yerr=np.array([lower_errors, upper_errors]), 

1246 fmt="o", 

1247 color=color_dict[gain], 

1248 label=f"{gain} Gain", 

1249 alpha=0.7, 

1250 capsize=3, 

1251 ) 

1252 

1253 ax5.set_title( 

1254 helper.plot_summary_string( 

1255 board_id=board_id, 

1256 run_numbers=run_number, 

1257 channels=helper.list_to_text_ranges(list(range(min(channels_sorted), max(channels_sorted) + 1))), 

1258 name=f"Average Outlier Rate vs Channel Number - {gain.upper()} Gain, 5σ Gaussian Fit Cut", 

1259 attenuation=attenuation, 

1260 pas_mode=pas_mode, 

1261 info=info, 

1262 ) 

1263 ) 

1264 

1265 ax5.set_ylabel("Average Outlier Rate (%)") 

1266 ax5.set_xlabel("Channel Number") 

1267 ax5.legend() 

1268 fig5.tight_layout() 

1269 fig5.savefig(plot_dir / f"avg_outlier_rate_vs_channel_{gain}.png", dpi=300) 

1270 ax5.set_yscale("log") 

1271 fig5.savefig(plot_dir / f"avg_outlier_rate_vs_channel_{gain}_log.png", dpi=300) 

1272 plt.close(fig5) 

1273 

1274 

1275def plot_avg_sample_range( 

1276 df: pl.DataFrame, 

1277 plot_dir: Path, 

1278 info: Optional[Metadata] = None, 

1279) -> None: 

1280 board_id = df["board_id"][0] if "board_id" in df.columns else None 

1281 run_number = df["run_number"][0] 

1282 pas_mode = df["pas_mode"][0] if "pas_mode" in df.columns else None 

1283 attenuation = df["att_val"][0] if "att_val" in df.columns else None 

1284 

1285 plt.figure(figsize=(10, 5)) 

1286 n_channels = df["channel"].n_unique() 

1287 channels = df["channel"].unique().to_list() 

1288 color_dict = {"lo": "b", "hi": "r"} 

1289 title_dict = {"lo": "LG", "hi": "HG"} 

1290 

1291 fig, ax = plt.subplots(figsize=(8, 6)) 

1292 plt.xticks(np.arange(0, n_channels, 4), rotation=70) 

1293 ax.xaxis.set_tick_params(pad=0.1) 

1294 _, ax2 = plt.subplots(1, figsize=(8, 6)) 

1295 ax.grid(visible=True) 

1296 ax2.grid(visible=True) 

1297 plt.xticks(np.arange(0, n_channels, 4), rotation=70) 

1298 

1299 for gain in ["hi", "lo"]: 

1300 gain_filtered_df = df.filter(pl.col("gain") == gain) 

1301 first_channel = True 

1302 for channel in gain_filtered_df["channel"].unique().to_list(): 

1303 channel_df = gain_filtered_df.select("channel", "gain", "samples", "timestamp", "mean", "std").filter( 

1304 pl.col("channel") == channel 

1305 ) 

1306 color = color_dict[gain] 

1307 title = title_dict[gain] 

1308 ax.grid(visible=True, zorder=0) 

1309 

1310 avg_filter_df = channel_df.select( 

1311 [ 

1312 pl.col("samples").list.max().alias("max_per_list"), 

1313 pl.col("samples").list.min().alias("min_per_list"), 

1314 (pl.col("samples").list.max() - pl.col("samples").list.min()).alias("range_per_list"), 

1315 (pl.col("samples").list.max() - pl.col("samples").list.min()).mean().alias("avg_range"), 

1316 ] 

1317 ) 

1318 error = avg_filter_df["range_per_list"].std() / np.sqrt(len(avg_filter_df["range_per_list"])) 

1319 avg_range = avg_filter_df["avg_range"][0] 

1320 label = title if first_channel else None # Only label first channel per gain 

1321 first_channel = False 

1322 

1323 ax.errorbar(channel, avg_range, error, fmt=f"{color}o", label=label) 

1324 

1325 ax.set_title( 

1326 helper.plot_summary_string( 

1327 board_id=board_id, 

1328 run_numbers=run_number, 

1329 channels=helper.list_to_text_ranges(channels), 

1330 name="Avg. Sample Range vs. Channel #", 

1331 attenuation=attenuation, 

1332 pas_mode=pas_mode, 

1333 info=info, 

1334 ) 

1335 ) 

1336 ax.set_ylabel("Avg. Sample Range [ADC Counts]") 

1337 ax.set_xlabel("Channel") 

1338 fig.legend() 

1339 fig.tight_layout() 

1340 fig.savefig(plot_dir / "avg_sample_range.png") 

1341 fig.clf()