Coverage for polars_analysis / plotting / noise_stability_plotting.py: 74%
665 statements
« prev ^ index » next coverage.py v7.13.4, created at 2026-03-16 15:00 -0400
« prev ^ index » next coverage.py v7.13.4, created at 2026-03-16 15:00 -0400
1import logging
2from pathlib import Path
3from typing import Any, Dict, List, Literal, Optional, Tuple
5import matplotlib.pyplot as plt
6import numpy as np
7import polars as pl
8from scipy import stats
10from polars_analysis import utils
11from polars_analysis.plotting import helper
12from polars_analysis.plotting.helper import Metadata
14logger = logging.getLogger(__name__)
17def _process_temp_correlation_data(
18 derived_data: pl.DataFrame,
19 temp_df: pl.DataFrame,
20 settling_time: Optional[int],
21 plot_dir: Path,
22 temp_str: str,
23 run_metadata: Metadata,
24) -> dict:
25 """Process temperature correlation data and create individual channel plots."""
26 colors = {"lo": "blue", "hi": "red"}
28 # Initialize data structures
29 slopes_data: Dict[str, Dict[int, float]] = {"lo": {}, "hi": {}}
30 intercepts_data: Dict[str, Dict[int, float]] = {"lo": {}, "hi": {}}
31 slope_errors: Dict[str, Dict[int, float]] = {"lo": {}, "hi": {}}
32 intercept_errors: Dict[str, Dict[int, float]] = {"lo": {}, "hi": {}}
33 combined_data: Dict[str, Dict[str, List[np.ndarray]]] = {
34 "lo": {"temp": [], "mean": [], "std": []},
35 "hi": {"temp": [], "mean": [], "std": []},
36 }
38 settling_time_plt = None
40 for gain in ["lo", "hi"]:
41 gain_filtered_df = derived_data.filter(pl.col("gain") == gain)
43 for channel in gain_filtered_df["channel"].unique().to_list():
44 channel_df = gain_filtered_df.filter(pl.col("channel") == channel)
45 if len(channel_df) == 0:
46 continue
48 # Apply settling time filter if specified
49 if settling_time is not None:
50 temp_df_tmp, settling_time_plt = _filter_by_settling_time(temp_df, settling_time)
51 channel_df_tmp, settling_time_plt = _filter_by_settling_time(channel_df, settling_time)
52 else:
53 temp_df_tmp = temp_df
54 channel_df_tmp = channel_df
56 # Join temperature and channel data
57 combined_df = _join_temp_channel_data(channel_df_tmp, temp_df_tmp)
59 # Extract arrays
60 mean_values = combined_df.select("mean").to_numpy().flatten()
61 std_values = combined_df.select("std").to_numpy().flatten()
62 temp_values = combined_df.select("temp_interp").to_numpy().flatten()
64 # Store combined data
65 combined_data[gain]["temp"].extend(temp_values)
66 combined_data[gain]["mean"].extend(mean_values)
67 combined_data[gain]["std"].extend(std_values)
69 # Perform linear regression
70 result = stats.linregress(temp_values, mean_values)
72 # Store fit results
73 slopes_data[gain][channel] = result.slope.astype(float)
74 intercepts_data[gain][channel] = result.intercept.astype(float)
75 slope_errors[gain][channel] = result.stderr.astype(float)
76 intercept_errors[gain][channel] = result.intercept_stderr.astype(float)
78 # Create individual channel plot
79 _plot_individual_channel(
80 temp_values,
81 mean_values,
82 std_values,
83 result.slope.astype(float),
84 result.intercept.astype(float),
85 channel,
86 gain,
87 colors[gain],
88 plot_dir,
89 temp_str,
90 settling_time_plt,
91 run_metadata,
92 )
94 return {
95 "slopes_data": slopes_data,
96 "intercepts_data": intercepts_data,
97 "slope_errors": slope_errors,
98 "intercept_errors": intercept_errors,
99 "combined_data": combined_data,
100 "colors": colors,
101 "settling_time_plt": settling_time_plt,
102 }
105def _join_temp_channel_data(
106 channel_df: pl.DataFrame,
107 temp_df: pl.DataFrame,
108) -> pl.DataFrame:
109 """Join channel and temperature data by timestamp."""
110 return channel_df.sort("timestamp").join_asof(
111 temp_df.select(["timestamp", "value"]).rename({"value": "temp_interp"}).sort("timestamp"),
112 on="timestamp",
113 strategy="nearest",
114 )
117def _plot_individual_channel(
118 temp_values: np.ndarray,
119 mean_values: np.ndarray,
120 std_values: np.ndarray,
121 slope: float,
122 intercept: float,
123 channel: int,
124 gain: str,
125 color: str,
126 plot_dir: Path,
127 temp_str: str,
128 settling_time_plt: Optional[int],
129 run_metadata: Metadata,
130) -> None:
131 """Create individual channel temperature correlation plot."""
132 channel_str = str(channel).zfill(3)
134 # Create fit line
135 temp_fit = np.linspace(temp_values.min(), temp_values.max(), 100)
136 mean_fit = slope * temp_fit + intercept
138 plt.figure(figsize=(10, 5))
139 plt.errorbar(temp_values, mean_values, yerr=std_values, fmt="o", color=color, label=f"Ch {channel_str} {gain} Gain")
140 plt.plot(temp_fit, mean_fit, "-", color=color, label=f"Fit: y = {slope:.3f}x + {intercept:.2f}")
142 settling_time_str = f", {settling_time_plt} min settling time" if settling_time_plt is not None else ""
143 plt.title(
144 helper.plot_summary_string(
145 board_id=run_metadata.board_id,
146 run_numbers=run_metadata.run_numbers,
147 name=f"Temperature vs. Pedestal Mean Ch.{channel_str} {gain} Gain{settling_time_str}",
148 attenuation=run_metadata.att_val,
149 pas_mode=run_metadata.pas_mode,
150 )
151 )
152 plt.xlabel("Temperature [°C]")
153 plt.ylabel("Pedestal Mean [ADC Counts]")
154 plt.legend()
155 plt.grid(True, alpha=0.3)
156 plt.tight_layout()
158 filename = f"mean_vs_temp{temp_str}_channel{channel_str}_{gain}.png"
159 plt.savefig(plot_dir / filename)
160 logger.debug(f"Saved to {plot_dir / filename}")
161 plt.close()
164def _create_combined_plots(
165 fit_results: Dict[str, Any],
166 plot_dir: Path,
167 temp_str: str,
168 run_metadata: Metadata,
169) -> dict:
170 """Create combined temperature correlation plots for all channels."""
171 combined_data = fit_results["combined_data"]
172 colors = fit_results["colors"]
174 plt.figure(figsize=(12, 8))
176 # Plot data points
177 for gain in ["lo", "hi"]:
178 if combined_data[gain]["temp"]:
179 temp_array = np.array(combined_data[gain]["temp"])
180 mean_array = np.array(combined_data[gain]["mean"])
181 std_array = np.array(combined_data[gain]["std"])
183 plt.errorbar(
184 temp_array,
185 mean_array,
186 yerr=std_array,
187 fmt="o",
188 color=colors[gain],
189 alpha=0.6,
190 label=f"{gain.capitalize()} Gain Data",
191 markersize=4,
192 )
194 # Create fit lines and combined fit
195 fit_dict = {}
196 all_temp = np.array(combined_data["lo"]["temp"] + combined_data["hi"]["temp"])
197 all_mean = np.array(combined_data["lo"]["mean"] + combined_data["hi"]["mean"])
199 for gain in ["lo", "hi"]:
200 if combined_data[gain]["temp"]:
201 temp_array = np.array(combined_data[gain]["temp"])
202 mean_array = np.array(combined_data[gain]["mean"])
204 result = stats.linregress(temp_array, mean_array)
205 temp_range = np.linspace(temp_array.min(), temp_array.max(), 100)
206 fit_line = result.slope * temp_range + result.intercept
208 plt.plot(
209 temp_range,
210 fit_line,
211 color=colors[gain],
212 linewidth=2,
213 label=f"{gain.capitalize()}: y = {result.slope:.3f}x + {result.intercept:.2f}",
214 )
216 fit_dict[gain] = {"slope": result.slope, "intercept": result.intercept}
217 board_id: str = run_metadata.board_id if run_metadata.board_id is not None else ""
218 utils.add_run_info(f"{gain}_slope", result.slope, board_id, plot_dir, False)
219 utils.add_run_info(f"{gain}_intercept", result.intercept, board_id, plot_dir, False)
221 # Combined fit
222 if len(all_temp) > 0:
223 result = stats.linregress(all_temp, all_mean)
224 slope_combined = result.slope
225 intercept_combined = result.intercept
226 temp_range_combined = np.linspace(all_temp.min(), all_temp.max(), 100)
227 fit_combined = slope_combined * temp_range_combined + intercept_combined
228 plt.plot(
229 temp_range_combined,
230 fit_combined,
231 color="black",
232 linewidth=2,
233 linestyle="--",
234 label=f"Combined: y = {slope_combined:.3f}x + {intercept_combined:.2f}",
235 )
237 plt.title(
238 helper.plot_summary_string(
239 board_id=run_metadata.board_id,
240 run_numbers=run_metadata.run_numbers,
241 name="Pedestal Mean vs. Temperature - All Channels Combined",
242 attenuation=run_metadata.att_val,
243 pas_mode=run_metadata.pas_mode,
244 )
245 )
246 plt.xlabel("Temperature [°C]")
247 plt.ylabel("Pedestal Mean [ADC Counts]")
248 plt.legend()
249 plt.grid(True, alpha=0.3)
250 plt.tight_layout()
252 filename = f"mean_vs_temp{temp_str}_combined.png"
253 plt.savefig(plot_dir / filename)
254 logger.debug(f"Saved combined plot to {plot_dir / filename}")
255 plt.close()
257 return fit_dict
260def _create_summary_plots(
261 fit_results: Dict[str, Any],
262 plot_dir: Path,
263 temp_str: str,
264 run_metadata: Metadata,
265) -> None:
266 """Create summary plots for temperature coefficients and intercepts."""
267 slopes_data = fit_results["slopes_data"]
268 intercepts_data = fit_results["intercepts_data"]
269 slope_errors = fit_results["slope_errors"]
270 intercept_errors = fit_results["intercept_errors"]
271 colors = fit_results["colors"]
273 # Temperature coefficient vs channel plot
274 plt.figure(figsize=(12, 6))
275 for gain in ["lo", "hi"]:
276 if slopes_data[gain]:
277 channels = sorted(slopes_data[gain].keys())
278 slopes = [slopes_data[gain][ch] for ch in channels]
279 slope_errs = [slope_errors[gain][ch] for ch in channels]
281 plt.errorbar(
282 channels,
283 slopes,
284 yerr=slope_errs,
285 fmt="o",
286 color=colors[gain],
287 label=f"{gain.capitalize()} Gain",
288 markersize=6,
289 capsize=3,
290 )
292 plt.title(
293 helper.plot_summary_string(
294 board_id=run_metadata.board_id,
295 run_numbers=run_metadata.run_numbers,
296 name="Temperature Coefficient vs Channel",
297 attenuation=run_metadata.att_val,
298 pas_mode=run_metadata.pas_mode,
299 )
300 )
301 plt.xlabel("Channel Number")
302 plt.ylabel("Temperature Coefficient [ADC Counts/°C]")
303 plt.legend()
304 plt.grid(True, alpha=0.3)
305 plt.tight_layout()
307 filename = f"temp{temp_str}_coefficient_vs_channel.png"
308 plt.savefig(plot_dir / filename)
309 logger.debug(f"Saved coefficient plot to {plot_dir / filename}")
310 plt.close()
312 # Y-intercept vs channel plot
313 plt.figure(figsize=(12, 6))
314 for gain in ["lo", "hi"]:
315 if intercepts_data[gain]:
316 channels = sorted(intercepts_data[gain].keys())
317 intercepts = [intercepts_data[gain][ch] for ch in channels]
318 intercept_errs = [intercept_errors[gain][ch] for ch in channels]
320 plt.errorbar(
321 channels,
322 intercepts,
323 yerr=intercept_errs,
324 fmt="o",
325 color=colors[gain],
326 label=f"{gain.capitalize()} Gain",
327 markersize=6,
328 capsize=3,
329 )
331 plt.title(
332 helper.plot_summary_string(
333 board_id=run_metadata.board_id,
334 run_numbers=run_metadata.run_numbers,
335 name="Y-Intercept vs Channel",
336 attenuation=run_metadata.att_val,
337 pas_mode=run_metadata.pas_mode,
338 )
339 )
340 plt.xlabel("Channel Number")
341 plt.ylabel("Y-Intercept [ADC Counts]")
342 plt.legend()
343 plt.grid(True, alpha=0.3)
344 plt.tight_layout()
346 filename = f"temp{temp_str}_intercept_vs_channel.png"
347 plt.savefig(plot_dir / filename)
348 logger.debug(f"Saved intercept plot to {plot_dir / filename}")
349 plt.close()
352def _create_corrected_time_plots(
353 derived_data: pl.DataFrame,
354 temp_df: pl.DataFrame,
355 fit_results: Dict[str, Any],
356 settling_time: Optional[int],
357 plot_dir: Path,
358 temp_str: str,
359 run_metadata: Metadata,
360) -> None:
361 """Create temperature-corrected mean vs time plots."""
362 if "fit_dict" not in fit_results:
363 return
365 fit_dict = fit_results["fit_dict"]
366 colors = fit_results["colors"]
367 settling_time_plt = fit_results["settling_time_plt"]
369 for gain in ["lo", "hi"]:
370 if gain not in fit_dict:
371 continue
373 gain_filtered_df = derived_data.filter(pl.col("gain") == gain)
374 slope = fit_dict[gain]["slope"]
375 intercept = fit_dict[gain]["intercept"]
376 color = colors[gain]
378 for channel in gain_filtered_df["channel"].unique().to_list():
379 channel_df = gain_filtered_df.filter(pl.col("channel") == channel)
380 if len(channel_df) == 0:
381 continue
383 # Apply settling time filter if specified
384 if settling_time is not None:
385 temp_df_tmp, _ = _filter_by_settling_time(temp_df, settling_time)
386 channel_df_tmp, _ = _filter_by_settling_time(channel_df, settling_time)
387 else:
388 temp_df_tmp = temp_df
389 channel_df_tmp = channel_df
391 # Join data
392 combined_df = _join_temp_channel_data(channel_df_tmp, temp_df_tmp)
394 # Extract values and apply temperature correction
395 temp_values = combined_df.select("temp_interp").to_numpy().flatten()
396 mean_values = combined_df.select("mean").to_numpy().flatten()
397 t = helper.datetime_to_hr(combined_df["timestamp"])
399 # Temperature correction
400 temp_contribution = slope * temp_values + intercept
401 reference_temp = np.mean(temp_values)
402 reference_contribution = slope * reference_temp + intercept
403 corrected_means = mean_values - temp_contribution + reference_contribution
405 # Create plot
406 plt.figure(figsize=(10, 5))
407 plt.scatter(
408 t,
409 corrected_means,
410 alpha=0.7,
411 color=color,
412 s=30,
413 label=f"Ch {str(channel).zfill(3)} {gain.capitalize()} Gain",
414 )
416 settling_time_str = f", {settling_time_plt} min settling time" if settling_time_plt is not None else ""
417 plt.title(
418 helper.plot_summary_string(
419 board_id=run_metadata.board_id,
420 run_numbers=run_metadata.run_numbers,
421 name=f"Temperature-Corrected Pedestal Mean vs Time \
422 Ch.{str(channel).zfill(3)} {gain} Gain{settling_time_str}",
423 attenuation=run_metadata.att_val,
424 pas_mode=run_metadata.pas_mode,
425 )
426 )
427 plt.xlabel("Time")
428 plt.ylabel("Temperature-Corrected Pedestal Mean [ADC Counts]")
429 plt.legend()
430 plt.grid(True, alpha=0.3)
431 plt.xticks(rotation=45)
432 plt.tight_layout()
434 channel_str = str(channel).zfill(3)
435 filename = f"temp{temp_str}_corrected_mean_vs_time_channel{channel_str}_{gain}.png"
436 plt.savefig(plot_dir / filename)
437 logger.debug(f"Saved corrected time plot to {plot_dir / filename}")
438 plt.close()
441def _baseline_correction(df: pl.DataFrame) -> pl.DataFrame:
442 # Correct the baseline of the samples by subtracting the mean for each gain and channel
443 df = df.with_columns(pl.col("mean").mean().over(["gain", "channel"]).alias("overall_mean"))
444 df = df.with_columns(pl.col("samples") - pl.col("mean") + pl.col("overall_mean").alias("samples"))
445 return df
448def _get_temp_df(temp_source: str, monitoring_df: pl.DataFrame, lab_env_data: pl.DataFrame) -> pl.DataFrame:
449 if temp_source == "thermistors":
450 return (
451 monitoring_df.filter(pl.col("group_name") == "thermistor")
452 .group_by("measurement")
453 .agg([pl.col("value").mean().alias("value"), pl.col("timestamp").first().alias("timestamp")])
454 )
455 elif temp_source == "bpol48":
456 return (
457 monitoring_df.filter(pl.col("monitor").is_in(["PAT1", "PAT2"]))
458 .group_by("measurement")
459 .agg([pl.col("value").mean().alias("value"), pl.col("timestamp").first().alias("timestamp")])
460 )
461 elif "TEMP" in temp_source or temp_source.startswith("PAT"):
462 return monitoring_df.filter(pl.col("monitor") == temp_source)
463 elif temp_source == "lab_environment":
464 return lab_env_data.with_columns(pl.mean_horizontal(["lab_temp", "crate_temp"]).alias("value"))
465 elif temp_source == "lab_temp":
466 return lab_env_data.with_columns(pl.col("lab_temp").alias("value"))
467 elif temp_source == "crate_temp":
468 return lab_env_data.with_columns(pl.col("crate_temp").alias("value"))
469 else:
470 print(f"Unknown temperature source: {temp_source}. Returning empty DataFrame.")
471 return pl.DataFrame()
474def _filter_by_settling_time(
475 df: pl.DataFrame,
476 settling_time_minutes: int,
477) -> Tuple[pl.DataFrame, Optional[int]]:
478 """
479 Filter dataframe to only include entries after settling time has passed.
481 Args:
482 df: DataFrame with a 'timestamp' column
483 settling_time_minutes: Time in minutes to wait after first timestamp
485 Returns:
486 Filtered DataFrame
487 """
488 first_timestamp = df["timestamp"].min()
489 settling_threshold = first_timestamp + pl.duration(minutes=settling_time_minutes)
490 filtered_df = df.filter(pl.col("timestamp") >= settling_threshold)
491 if len(filtered_df) == 0:
492 logger.warning(
493 f"No data points found after {settling_time_minutes} minutes settling time. Returning original DataFrame."
494 )
495 return df, None
497 return filtered_df, settling_time_minutes
500def moving_average(a: np.ndarray, n: int = 4) -> np.ndarray:
501 ret = np.cumsum(a, dtype=float)
502 ret[n:] = ret[n:] - ret[:-n]
503 return ret[n - 1 :] / n
506def plot_monitoring(
507 monitoring_df: pl.DataFrame, lab_env_data: pl.DataFrame, plot_dir: Path, tzero_correction: bool = True
508) -> None:
509 # Plots monitoring data from the monitoring DataFrame and lab environment data.
510 run_number = monitoring_df["run_number"][0]
511 board_id = monitoring_df["board_id"][0] if "board_id" in monitoring_df.columns else None
512 pas_mode = monitoring_df["pas_mode"][0] if "pas_mode" in monitoring_df.columns else None
513 attenuation = monitoring_df["att_val"][0] if "att_val" in monitoring_df.columns else None
515 corrected_temperature_col_name = "value_tzero_corr"
517 df_gb = (
518 monitoring_df.sort(["timestamp"])
519 .with_columns(pl.col("timestamp").dt.epoch(time_unit="s").alias("seconds"))
520 .group_by(pl.col("monitor"))
521 .agg(["value", "seconds", "group_name", "unit", "monitor_type"])
522 .with_columns(
523 pl.col("group_name").list.first().alias("group_name"),
524 pl.col("unit").list.first().alias("unit"),
525 pl.col("monitor_type").list.first().alias("monitor_type"),
526 pl.col("value").list.first().alias("first"),
527 ((pl.col("seconds") - pl.col("seconds").list.first()) / 3600).alias("hours"),
528 )
529 )
531 df_avg = df_gb.group_by("group_name").agg(pl.col("first").mean().alias("tzero_avg"))
532 df_gb = df_gb.join(df_avg, on="group_name").with_columns(
533 (pl.col("value") - pl.col("value").list.first() + pl.col("tzero_avg")).alias(corrected_temperature_col_name)
534 )
536 for monitor_group in df_gb["group_name"].unique():
537 for monitor_unit in df_gb.filter(pl.col("group_name") == monitor_group)["monitor_type"].unique():
538 monitor_group_df = df_gb.filter(
539 (pl.col("group_name") == monitor_group) & (pl.col("monitor_type") == monitor_unit)
540 )
541 for monitor in sorted(monitor_group_df["monitor"].unique().to_list(), key=lambda x: (len(x), x)):
542 value_col = (
543 corrected_temperature_col_name if (tzero_correction and monitor_unit.lower() == "c") else "value"
544 )
546 monitor_df = monitor_group_df.filter(pl.col("monitor") == monitor)
547 plt.plot(monitor_df["hours"].item(), monitor_df[value_col].item(), label=monitor)
548 if monitor_group == "":
549 monitor_group = "other"
550 plt.title(
551 helper.plot_summary_string(
552 board_id=board_id,
553 run_numbers=run_number,
554 name=f"{monitor_group} {monitor_unit}",
555 attenuation=attenuation,
556 pas_mode=pas_mode,
557 )
558 )
559 plt.legend(loc="center left", bbox_to_anchor=(1, 0.47), fontsize=6)
560 plt.tight_layout(rect=(0.03, 0.03, 1, 0.95))
561 plt.xlabel("Time since configuration [Hours]")
562 plt.ylabel(f"{monitor_group} {monitor_unit} [{monitor_group_df['unit'][0]}]")
563 plt.savefig(f"{plot_dir}/{monitor_group}_{monitor_unit}.png")
564 plt.savefig(f"{plot_dir}/{monitor_group}_{monitor_unit}.pdf")
565 plt.close()
566 logger.info(f"Saved run{run_number} {monitor_group} to {plot_dir}/{monitor_group}_{monitor_unit}.png")
568 # Plot lab environment temp
569 for monitor in ["lab_temp", "crate_temp"]:
570 t = helper.datetime_to_hr(lab_env_data["timestamp"])
571 plt.plot(t, lab_env_data[monitor], label=monitor)
572 plt.title(
573 helper.plot_summary_string(
574 board_id=board_id,
575 run_numbers=run_number,
576 name="Lab Temperature",
577 attenuation=attenuation,
578 pas_mode=pas_mode,
579 )
580 )
581 plt.legend(loc="center left", bbox_to_anchor=(1, 0.47), fontsize=6)
582 plt.tight_layout(rect=(0.03, 0.03, 1, 0.95))
583 plt.xlabel("Time since configuration [Hours]")
584 plt.ylabel("Lab Temperature [C]")
585 plt.savefig(f"{plot_dir}/lab_temperature.png")
586 plt.close()
589def plot_monitor_channel_correlation(
590 derived_data: pl.DataFrame, monitoring_df: pl.DataFrame, lab_env_data: pl.DataFrame, plot_dir: Path
591) -> None:
592 temp_df = (
593 monitoring_df.filter(pl.col("monitor_type") == "temperature")
594 .group_by("measurement", "monitor")
595 .agg([pl.col("timestamp").first().alias("timestamp"), pl.col("value").first()])
596 )
597 ### Remove later
598 ### temp_df = temp_df.with_columns(pl.col("timestamp").dt.replace_time_zone("UTC", non_existent="null"))
599 ###
600 temp_pivoted = temp_df.pivot(index=["measurement", "timestamp"], on="monitor", values="value").sort("timestamp")
601 combined_temp_df = (
602 temp_pivoted.sort("timestamp")
603 .join_asof(
604 lab_env_data.select(["timestamp", "crate_temp", "lab_temp"]).sort("timestamp"),
605 on="timestamp",
606 strategy="nearest",
607 )
608 .drop("measurement", "timestamp")
609 )
610 sorted_cols = sorted(combined_temp_df.columns)
611 combined_temp_df = combined_temp_df.select(sorted_cols)
613 for gain in derived_data["gain"].unique().to_list():
614 gain_filtered_df = derived_data.filter(pl.col("gain") == gain).sort("timestamp").select("mean", "channel")
615 channels = gain_filtered_df["channel"].unique().to_list()
616 monitors = combined_temp_df.columns
617 corr_matrix = np.full((len(monitors), len(channels)), np.nan)
619 if len(monitors) == 0:
620 continue
621 null_mask = np.isnan(combined_temp_df[monitors[0]].to_numpy().flatten())
622 for i, monitor in enumerate(monitors):
623 monitor_data_tmp = combined_temp_df[monitor].to_numpy().flatten()
624 monitor_data = monitor_data_tmp[~null_mask]
626 for j, channel in enumerate(channels):
627 channel_data = gain_filtered_df.filter(pl.col("channel") == channel).select("mean").to_numpy().flatten()
629 if len(monitor_data) > 1 and len(channel_data) > 1:
630 if len(monitor_data) != len(channel_data):
631 min_len = min(len(monitor_data), len(channel_data))
632 monitor_data = monitor_data[:min_len]
633 channel_data = channel_data[:min_len]
634 corr_matrix[i, j] = np.corrcoef(channel_data, monitor_data)[0, 1]
636 for gain in derived_data["gain"].unique().to_list():
637 gain_filtered_df = derived_data.filter(pl.col("gain") == gain).sort("timestamp").select("mean", "channel")
638 channels = gain_filtered_df["channel"].unique().to_list()
639 monitors = combined_temp_df.columns
640 corr_matrix = np.full((len(monitors), len(channels)), np.nan)
642 if len(monitors) == 0:
643 continue
644 temp_null_mask = np.isnan(combined_temp_df[monitors[0]].to_numpy().flatten())
645 for i, monitor in enumerate(monitors):
646 monitor_data_tmp = combined_temp_df[monitor].to_numpy().flatten()
647 monitor_data = monitor_data_tmp[~temp_null_mask]
649 for j, channel in enumerate(channels):
650 channel_data = gain_filtered_df.filter(pl.col("channel") == channel).select("mean").to_numpy().flatten()
652 if abs(len(monitor_data) - len(channel_data)) > 1:
653 logger.warning(
654 f"Length mismatch for monitor {monitor} and channel {channel}: "
655 f"{len(monitor_data)} vs {len(channel_data)}"
656 )
657 if len(monitor_data) != len(channel_data):
658 min_len = min(len(monitor_data), len(channel_data))
659 monitor_data = monitor_data[:min_len]
660 channel_data = channel_data[:min_len]
661 mask = ~(np.isnan(monitor_data) | np.isnan(channel_data))
662 monitor_clean = monitor_data[mask]
663 channel_clean = channel_data[mask]
665 if len(monitor_clean) >= 10: # Need sufficient data points
666 if np.std(monitor_clean) > 1e-10 and np.std(channel_clean) > 1e-10:
667 corr_val = np.corrcoef(monitor_clean, channel_clean)[0, 1]
668 corr_matrix[i, j] = corr_val
670 plt.figure(figsize=(30, 10))
671 display_matrix = corr_matrix.copy()
672 nan_mask = np.isnan(corr_matrix)
673 display_matrix[nan_mask] = 0
675 plt.imshow(display_matrix, cmap="RdBu", vmin=-1, vmax=1)
677 # Add text annotations
678 for i, monitor in enumerate(monitors):
679 for j, channel in enumerate(channels):
680 if nan_mask[i, j]:
681 plt.text(j, i, "N/A", ha="center", va="center", color="gray", fontsize=6)
682 else:
683 corr_val = corr_matrix[i, j]
684 text_color = "white" if abs(corr_val) > 0.5 else "black"
685 plt.text(j, i, f"{corr_val:.2f}", ha="center", va="center", color=text_color, fontsize=4)
687 plt.colorbar(label="Correlation Coefficient")
688 plt.xticks(ticks=np.arange(len(channels)), labels=channels, rotation=90, ha="right")
689 plt.yticks(ticks=np.arange(len(monitors)), labels=monitors)
690 plt.title("Correlation Matrix: Monitor Temp vs Channel ADC Counts")
691 plt.tight_layout()
692 plt.savefig(plot_dir / f"monitor_channel_corr_{gain}.png")
693 plt.close()
696def plot_monitor_monitor_correlation(
697 monitoring_df: pl.DataFrame,
698 lab_env_data: pl.DataFrame,
699 plot_dir: Path,
700) -> None:
701 temp_df = (
702 monitoring_df.filter(pl.col("monitor_type") == "temperature")
703 .group_by("measurement", "monitor")
704 .agg([pl.col("timestamp").first().alias("timestamp"), pl.col("value").first()])
705 )
706 ### Remove later
707 # temp_df = temp_df.with_columns(pl.col("timestamp").dt.replace_time_zone("UTC", non_existent="null"))
708 ###
709 temp_pivoted = temp_df.pivot(index=["measurement", "timestamp"], on="monitor", values="value").sort("measurement")
710 combined_df = (
711 temp_pivoted.sort("timestamp")
712 .join_asof(
713 lab_env_data.select(["timestamp", "crate_temp", "lab_temp"]).sort("timestamp"),
714 on="timestamp",
715 strategy="nearest",
716 )
717 .drop("measurement", "timestamp")
718 )
720 sorted_cols = sorted(combined_df.columns)
721 combined_df = combined_df.select(sorted_cols)
723 data_matrix = combined_df.to_numpy().astype(float)
724 n_cols = len(combined_df.columns)
725 corr_matrix = np.full((n_cols, n_cols), np.nan)
727 # Calculate correlations pairwise
728 for i in range(n_cols):
729 for j in range(n_cols):
730 if i == j:
731 corr_matrix[i, j] = 1.0
732 else:
733 # Get data for both columns
734 x = data_matrix[:, i]
735 y = data_matrix[:, j]
737 # Remove rows where either is NaN
738 mask = ~(np.isnan(x) | np.isnan(y))
739 x_clean = x[mask]
740 y_clean = y[mask]
742 if len(x_clean) >= 10: # Need sufficient data points
743 # Check for constant values
744 if np.std(x_clean) > 1e-10 and np.std(y_clean) > 1e-10:
745 corr_val = np.corrcoef(x_clean, y_clean)[0, 1]
746 corr_matrix[i, j] = corr_val
748 plt.figure(figsize=(15, 12))
749 display_matrix = corr_matrix.copy()
750 nan_mask = np.isnan(corr_matrix)
751 display_matrix[nan_mask] = 0
753 plt.imshow(display_matrix, cmap="Blues", vmin=0, vmax=1)
755 # Add text annotations
756 for i in range(len(combined_df.columns)):
757 for j in range(len(combined_df.columns)):
758 if nan_mask[i, j]:
759 plt.text(j, i, "N/A", ha="center", va="center", color="gray", fontsize=6)
760 else:
761 corr_val = corr_matrix[i, j]
762 text_color = "white" if abs(corr_val) > 0.5 else "black"
763 plt.text(j, i, f"{corr_val:.2f}", ha="center", va="center", color=text_color, fontsize=4)
765 plt.colorbar(label="Correlation Coefficient")
766 plt.xticks(ticks=np.arange(len(combined_df.columns)), labels=combined_df.columns, rotation=90, ha="right")
767 plt.yticks(ticks=np.arange(len(combined_df.columns)), labels=combined_df.columns)
768 plt.title("Correlation Matrix of Monitoring Channels")
769 plt.tight_layout()
770 plt.savefig(plot_dir / "monitor_monitor_corr.png")
771 plt.close()
774def plot_temp_correlation(
775 derived_data: pl.DataFrame,
776 monitoring_df: pl.DataFrame,
777 lab_env_data: pl.DataFrame,
778 plot_dir: Path,
779 temp_source: Optional[str] = None,
780 settling_time: Optional[int] = None,
781) -> None:
782 temp_str = f"_{temp_source}" if temp_source else ""
783 temp_source = temp_source or "lab_environment"
785 run_metadata = Metadata.fill_from_dataframe(derived_data)
787 temp_df = _get_temp_df(temp_source, monitoring_df, lab_env_data)
788 if len(temp_df) == 0:
789 logger.warning("No temperature data found for correlation plot.")
790 return
792 # Process data and create fits
793 fit_results = _process_temp_correlation_data(derived_data, temp_df, settling_time, plot_dir, temp_str, run_metadata)
795 # Generate all plots
796 fit_dict = _create_combined_plots(fit_results, plot_dir, temp_str, run_metadata)
797 fit_results["fit_dict"] = fit_dict
798 _create_summary_plots(fit_results, plot_dir, temp_str, run_metadata)
799 _create_corrected_time_plots(derived_data, temp_df, fit_results, settling_time, plot_dir, temp_str, run_metadata)
802def plot_mean_rms_vs_time(gain_filtered_df: pl.DataFrame, plot_dir: Path) -> None:
803 color_dict = {"lo": "b", "hi": "r"}
804 gain = gain_filtered_df["gain"][0]
805 color = color_dict[gain]
806 board_id = gain_filtered_df["board_id"][0] if "board_id" in gain_filtered_df.columns else None
807 run_number = gain_filtered_df["run_number"][0]
808 pas_mode = gain_filtered_df["pas_mode"][0] if "pas_mode" in gain_filtered_df.columns else None
809 attenuation = gain_filtered_df["att_val"][0] if "att_val" in gain_filtered_df.columns else None
811 for channel in gain_filtered_df["channel"].unique().to_list():
812 channel_df = gain_filtered_df.select("channel", "gain", "samples", "timestamp", "mean", "std").filter(
813 pl.col("channel") == channel
814 )
816 if len(channel_df) == 0:
817 continue
819 t = helper.datetime_to_hr(channel_df["timestamp"])
820 channel = str(channel).zfill(3)
822 # Mean vs Time
823 fig1, ax1 = plt.subplots(figsize=(10, 5))
824 ax1.grid(visible=True, zorder=0)
825 ax1.scatter(t, channel_df["mean"], c=color, marker="o")
826 ax1.set_title(
827 helper.plot_summary_string(
828 board_id=board_id,
829 run_numbers=run_number,
830 name=f"{gain} Gain Mean vs. Time Ch.{channel}",
831 attenuation=attenuation,
832 pas_mode=pas_mode,
833 )
834 )
835 ax1.set_ylabel("Pedestal Mean [ADC Counts]")
836 ax1.set_xlabel("Time [Hours]")
837 fig1.savefig(plot_dir / f"mean_vs_time_channel{channel}_{gain}.png")
838 plt.close(fig1)
840 # RMS vs Time
841 fig2, ax2 = plt.subplots(figsize=(10, 5))
842 ax2.scatter(t, channel_df["std"], c=color, marker="o")
843 ax2.set_title(
844 helper.plot_summary_string(
845 board_id=board_id,
846 run_numbers=run_number,
847 name=f"{gain} Gain RMS vs. Time Ch.{channel}",
848 attenuation=attenuation,
849 pas_mode=pas_mode,
850 )
851 )
852 ax2.set_ylabel("Pedestal RMS [ADC Counts]")
853 ax2.set_xlabel("Time [Hours]")
854 fig2.savefig(plot_dir / f"rms_vs_time_channel{channel}_{gain}.png")
855 plt.close(fig2)
858def plot_sample_range_vs_time(gain_filtered_df: pl.DataFrame, plot_dir: Path) -> None:
859 board_id = gain_filtered_df["board_id"][0] if "board_id" in gain_filtered_df.columns else None
860 run_number = gain_filtered_df["run_number"][0]
861 pas_mode = gain_filtered_df["pas_mode"][0] if "pas_mode" in gain_filtered_df.columns else None
862 attenuation = gain_filtered_df["att_val"][0] if "att_val" in gain_filtered_df.columns else None
863 plt.figure(figsize=(10, 5))
864 color_dict = {"lo": "b", "hi": "r"}
865 title_dict = {"lo": "LG", "hi": "HG"}
866 gain = gain_filtered_df["gain"][0]
867 color = color_dict[gain]
868 title = title_dict[gain]
870 first_channel = True
871 for channel in gain_filtered_df["channel"].unique().to_list():
872 channel_df = gain_filtered_df.select("channel", "gain", "samples", "timestamp", "mean", "std").filter(
873 pl.col("channel") == channel
874 )
875 fig, ax = plt.subplots(figsize=(8, 6))
876 ax.grid(visible=True, zorder=0)
877 avg_filter_df = channel_df.select(
878 [
879 pl.col("samples").list.max().alias("max_per_list"),
880 pl.col("samples").list.min().alias("min_per_list"),
881 pl.col("samples").list.len().alias("list_lens"),
882 (pl.col("samples").list.max() - pl.col("samples").list.min()).alias("range_per_list"),
883 ]
884 )
886 t = helper.datetime_to_hr(channel_df["timestamp"])
887 range_per_list = avg_filter_df["range_per_list"]
888 error = channel_df["std"] / np.sqrt(avg_filter_df["list_lens"])
889 label = title if first_channel else None # Only label first channel per gain
890 first_channel = False
892 ax.errorbar(t, range_per_list, error, fmt=f"{color}o", label=label)
894 ax.set_title(
895 helper.plot_summary_string(
896 board_id=board_id,
897 run_numbers=run_number,
898 channels=helper.list_to_text_ranges(channel),
899 name="Sample Range vs. Time",
900 attenuation=attenuation,
901 pas_mode=pas_mode,
902 gain=gain,
903 )
904 )
905 channel = str(channel).zfill(3)
906 ax.set_ylabel("Avg. Sample Range [ADC Counts]")
907 ax.set_xlabel("Time[hours]")
908 fig.tight_layout()
909 fig.savefig(plot_dir / f"sample_range_vs_time_channel{channel}_{gain}.png")
910 plt.close(fig)
911 fig.clf()
914def avg_rms_mean_vs_channel(
915 df: pl.DataFrame,
916 plot_dir: Path,
917 info: Optional[Metadata] = None,
918 settling_time: int = 4, # Settling time in minutes for max rms deviation calculation
919):
920 board_id = df["board_id"][0] if "board_id" in df.columns else None
921 run_number = df["run_number"][0]
922 pas_mode = df["pas_mode"][0] if "pas_mode" in df.columns else None
923 attenuation = df["att_val"][0] if "att_val" in df.columns else None
925 plt.figure(figsize=(10, 5))
926 n_channels = df["channel"].n_unique()
927 channels = df["channel"].unique().to_list()
928 color_dict = {"lo": "b", "hi": "r"}
929 title_dict = {"lo": "LG", "hi": "HG"}
931 fig, ax = plt.subplots(figsize=(8, 6))
932 plt.xticks(np.arange(0, n_channels, 4), rotation=70)
933 fig2, ax2 = plt.subplots(1, figsize=(8, 6))
934 fig3, ax3 = plt.subplots(1, figsize=(8, 6))
935 ax.grid(visible=True, zorder=0)
936 ax.xaxis.set_tick_params(pad=0.1)
937 ax2.grid(visible=True)
938 ax3.grid(visible=True)
939 plt.xticks(np.arange(0, n_channels, 4), rotation=70)
941 # Plot avg mean, avg rms, and max rms deviation per channel
942 for gain in ["hi", "lo"]:
943 gain_filtered_df = df.filter(pl.col("gain") == gain)
944 first_channel = True
945 for channel in gain_filtered_df["channel"].unique().to_list():
946 channel_df = gain_filtered_df.select("channel", "gain", "samples", "timestamp", "mean", "std").filter(
947 pl.col("channel") == channel
948 )
949 color = color_dict[gain]
950 title = title_dict[gain]
952 means = channel_df["mean"].to_numpy()
953 stds = channel_df["std"].to_numpy()
955 means_mean = np.mean(means)
956 means_min = np.min(means)
957 means_max = np.max(means)
958 stds_mean = np.mean(stds)
960 n_measurements = len(means)
961 stds_err = np.std(stds) / np.sqrt(n_measurements)
962 means_err = np.array([[means_mean - means_min], [means_max - means_mean]])
964 label = title if first_channel else None
965 first_channel = False
967 ax.errorbar(channel, means_mean, yerr=means_err, fmt=f"{color}o", label=label)
968 ax2.errorbar(channel, stds_mean, stds_err, fmt=f"{color}o", label=label)
970 filtered_df, _ = _filter_by_settling_time(channel_df, settling_time)
971 stds_settled = filtered_df["std"].to_numpy()
972 max_rms_deviation = np.max(np.abs(stds_settled - stds_mean))
973 ax3.plot(channel, max_rms_deviation, f"{color}o", label=label)
975 ax.set_title(
976 helper.plot_summary_string(
977 board_id=board_id,
978 run_numbers=run_number,
979 channels=helper.list_to_text_ranges(channels),
980 name="Overall Mean vs. Channel #\nError bars show min and max",
981 attenuation=attenuation,
982 pas_mode=pas_mode,
983 )
984 )
985 ax.set_ylabel("Avg. Pedestal Mean [ADC Counts]")
986 ax.set_xlabel("Channel")
987 fig.legend()
988 fig.tight_layout()
989 fig.savefig(plot_dir / "avg_mean_vs_channel.png")
990 fig.clf()
992 ax2.set_title(
993 helper.plot_summary_string(
994 board_id=board_id,
995 run_numbers=run_number,
996 channels=helper.list_to_text_ranges(channels),
997 name="Overall RMS vs. Channel #",
998 attenuation=attenuation,
999 pas_mode=pas_mode,
1000 info=info,
1001 )
1002 )
1003 ax2.set_ylabel("Avg. Pedestal RMS [ADC Counts]")
1004 ax2.set_xlabel("Channel")
1005 fig2.legend()
1006 fig2.tight_layout()
1007 fig2.savefig(plot_dir / "avg_rms_vs_time.png")
1008 fig2.clf()
1010 ax3.set_title(
1011 helper.plot_summary_string(
1012 board_id=board_id,
1013 run_numbers=run_number,
1014 channels=helper.list_to_text_ranges(channels),
1015 name=f"Max RMS Deviation vs. Channel # After {settling_time} min Settling Time)",
1016 attenuation=attenuation,
1017 pas_mode=pas_mode,
1018 info=info,
1019 )
1020 )
1021 ax3.set_ylabel("Max RMS Deviation [ADC Counts]")
1022 ax3.set_xlabel("Channel")
1023 fig3.legend()
1024 fig3.tight_layout()
1025 fig3.savefig(plot_dir / "max_rms_deviation_vs_channel.png")
1026 fig3.clf()
1029def plot_outliers(
1030 df: pl.DataFrame,
1031 plot_dir: Path,
1032 info: Optional[Metadata] = None,
1033):
1034 df = _baseline_correction(df)
1035 board_id = df["board_id"][0] if "board_id" in df.columns else None
1036 run_number = df["run_number"][0]
1037 pas_mode = df["pas_mode"][0] if "pas_mode" in df.columns else None
1038 attenuation = df["att_val"][0] if "att_val" in df.columns else None
1040 # Summary plot setup
1041 fig, ax = plt.subplots(figsize=(10, 5))
1042 channels = df["channel"].unique().to_list()
1043 color_dict = {"lo": "b", "hi": "r"}
1044 title_dict = {"lo": "LG", "hi": "HG"}
1046 # For time-series deviation summary plot
1047 max_outlier_rate_deviations: Dict[str, Dict[int, float]] = {"hi": {}, "lo": {}}
1048 # For average outlier rate summary plot
1049 avg_outlier_rates: Dict[str, Dict[int, float]] = {"hi": {}, "lo": {}}
1050 # For max outlier rate summary plot
1051 max_outlier_rates: Dict[str, Dict[int, float]] = {"hi": {}, "lo": {}}
1053 gains: List[Literal["hi", "lo"]] = ["hi", "lo"]
1054 for gain in gains:
1055 gain_filtered_df = df.filter(pl.col("gain") == gain)
1056 first_channel = True
1058 for channel in gain_filtered_df["channel"].unique().to_list():
1059 channel_df = gain_filtered_df.select("channel", "gain", "samples", "timestamp", "mean", "std").filter(
1060 pl.col("channel") == channel
1061 )
1062 channel_str = str(channel).zfill(3)
1063 color = color_dict[gain]
1064 title = title_dict[gain]
1065 label = title if first_channel else None
1066 first_channel = False
1068 # Original outlier analysis
1069 samples = np.array(channel_df["samples"].explode().to_list())
1071 # gaussian fit statistics
1072 hist_bins: np.ndarray = np.linspace(min(samples), max(samples), 100)
1073 fig2, ax2 = plt.subplots(1)
1075 _ = ax2.hist(samples, bins=hist_bins.tolist())
1076 xaxis: np.ndarray = np.linspace(np.min(samples), np.max(samples), 50)
1077 fit_pars = helper.calc_gaussian(samples, hist_bins)
1078 fit_mu, fit_sigma, fit_N = fit_pars[0], fit_pars[2], fit_pars[4]
1079 d_mu, d_sigma = fit_pars[1], fit_pars[3]
1080 gauss_fit = helper.gauss(xaxis, fit_mu, fit_sigma, fit_N)
1081 upper_bound = fit_mu + 5 * fit_sigma
1082 lower_bound = fit_mu - 5 * fit_sigma
1083 ax2.axvline(upper_bound, color="red", linestyle="--", label="5σ")
1084 ax2.axvline(lower_bound, color="red", linestyle="--")
1085 ax2.axvline(fit_mu + 3 * fit_sigma, color="green", linestyle="--", label="3σ")
1086 ax2.axvline(fit_mu - 3 * fit_sigma, color="green", linestyle="--")
1087 ax2.plot(
1088 xaxis,
1089 gauss_fit,
1090 label=rf"""Gaussian Fit
1091 $\mu$ = {fit_mu:.03f} $\pm$ {d_mu:.03g}
1092 $\sigma$ = {fit_sigma:.03f} $\pm$ {d_sigma:.03g}""",
1093 )
1095 ax2.set_title(
1096 helper.plot_summary_string(
1097 board_id=board_id,
1098 run_numbers=run_number,
1099 channels=channel,
1100 name="Pedestal Sample Outliers [ADC counts]",
1101 attenuation=attenuation,
1102 pas_mode=pas_mode,
1103 gain=gain,
1104 )
1105 )
1106 ax2.set_ylabel("Entries")
1107 ax2.set_xlabel("Sample [ADC counts]")
1108 ax2.legend()
1109 fig2.savefig(f"{plot_dir}/channel{channel_str}_{gain}_sample_hist.png")
1110 fig2.clf()
1111 plt.close(fig2)
1113 # Calculate total outliers for summary bar plot
1114 five_sigma = 5 * fit_sigma
1115 outliers = samples[np.abs(samples - fit_mu) > five_sigma]
1116 num_outliers = len(outliers)
1118 ax.bar(int(channel), num_outliers, fill=False, ec=color, label=label, zorder=3)
1120 # Time-series analysis - outlier rate vs time
1121 fig3, ax3 = plt.subplots(figsize=(12, 6))
1122 ax3.grid(visible=True, zorder=0)
1124 t = helper.datetime_to_hr(channel_df["timestamp"])
1125 outlier_rates = []
1127 for sample in channel_df["samples"]:
1128 sample = np.array(sample)
1129 hist_bins = np.linspace(min(sample), max(sample), 100)
1131 fit_pars = helper.calc_gaussian(sample, hist_bins)
1132 fit_mu, fit_sigma = fit_pars[0], fit_pars[2]
1133 five_sigma = 5 * fit_sigma
1134 outliers = sample[np.abs(sample - fit_mu) > five_sigma]
1136 # Calculate outlier rate as percentage
1137 outlier_rate = (len(outliers) / len(sample)) * 100
1138 outlier_rates.append(outlier_rate)
1140 max_outlier_rates[gain][channel] = max(outlier_rates) if outlier_rates else 0
1141 ax3.plot(t, outlier_rates, "o", color=color, alpha=0.7)
1143 max_rate = max(outlier_rates) if outlier_rates else 0
1144 y_max = max(0.05, max_rate * 1.2)
1145 ax3.set_ylim(0, y_max)
1147 # Calculate max deviation and average rate for summary plots
1148 mean_rate = np.mean(outlier_rates)
1149 max_deviation = np.max(np.abs(np.array(outlier_rates) - mean_rate))
1150 max_outlier_rate_deviations[gain][channel] = max_deviation
1151 avg_outlier_rates[gain][channel] = mean_rate.astype(float)
1153 ax3.set_title(
1154 helper.plot_summary_string(
1155 board_id=board_id,
1156 run_numbers=run_number,
1157 channels=channel,
1158 name=f"Channel {channel} Outlier Rate vs Time, 5σ Gaussian Fit Cut",
1159 attenuation=attenuation,
1160 pas_mode=pas_mode,
1161 gain=gain,
1162 )
1163 )
1165 ax3.set_ylabel("Outlier Rate (%)")
1166 ax3.set_xlabel("Time [hours]")
1167 fig3.tight_layout()
1168 fig3.savefig(plot_dir / f"outliers_vs_time_channel{channel_str}_{gain}.png", dpi=300)
1169 plt.close(fig3)
1171 # Create summary plot: Number of outliers per channel
1172 ax.set_title(
1173 helper.plot_summary_string(
1174 board_id=board_id,
1175 run_numbers=run_number,
1176 channels=helper.list_to_text_ranges(channels),
1177 name="Number of Baseline Corrected Sample Outliers Per Channel, 5σ Gaussian Fit Cut",
1178 attenuation=attenuation,
1179 pas_mode=pas_mode,
1180 info=info,
1181 )
1182 )
1183 ax.set_xlabel("Channel", loc="right")
1184 ax.set_ylabel("Number of Outliers")
1185 ax.legend(loc="upper right")
1186 fig.tight_layout()
1187 fig.savefig(f"{plot_dir}/outliers_summary.png")
1188 plt.close(fig)
1190 # Create summary plot: Maximum outlier rate deviation vs channel #
1191 fig4, ax4 = plt.subplots(figsize=(12, 8))
1192 ax4.grid(visible=True, zorder=0)
1194 for gain in gains:
1195 if max_outlier_rate_deviations[gain]:
1196 channels_sorted = sorted(max_outlier_rate_deviations[gain].keys())
1197 deviations = [max_outlier_rate_deviations[gain][ch] for ch in channels_sorted]
1199 ax4.plot(
1200 channels_sorted,
1201 deviations,
1202 "o",
1203 color=color_dict[gain],
1204 label=f"{gain} Gain",
1205 alpha=0.7,
1206 markersize=6,
1207 )
1209 ax4.set_title(
1210 helper.plot_summary_string(
1211 board_id=board_id,
1212 run_numbers=run_number,
1213 channels=helper.list_to_text_ranges(list(range(min(channels_sorted), max(channels_sorted) + 1))),
1214 name="Maximum Outlier Rate Deviation vs Channel Number, 5σ Gaussian Fit Cut",
1215 attenuation=attenuation,
1216 pas_mode=pas_mode,
1217 info=info,
1218 )
1219 )
1221 ax4.set_ylabel("Maximum Outlier Rate Deviation (%)")
1222 ax4.set_xlabel("Channel Number")
1223 ax4.legend()
1224 fig4.tight_layout()
1225 fig4.savefig(plot_dir / "max_outlier_rate_deviation_vs_channel.png", dpi=300)
1226 plt.close(fig4)
1228 # Create summary plots: Average outlier rate vs channel
1229 for gain in gains:
1230 if avg_outlier_rates[gain]:
1231 fig5, ax5 = plt.subplots(figsize=(12, 8))
1232 ax5.grid(visible=True, zorder=0)
1234 channels_sorted = sorted(avg_outlier_rates[gain].keys())
1235 avg_rates = [avg_outlier_rates[gain][ch] for ch in channels_sorted]
1236 max_rates = [max_outlier_rates[gain][ch] for ch in channels_sorted]
1238 # Calculate error bars: upper = max - avg, lower = 0
1239 upper_errors = [max_rate - avg_rate for max_rate, avg_rate in zip(max_rates, avg_rates)]
1240 lower_errors = [0] * len(avg_rates) # No lower error bars
1242 ax5.errorbar(
1243 channels_sorted,
1244 avg_rates,
1245 yerr=np.array([lower_errors, upper_errors]),
1246 fmt="o",
1247 color=color_dict[gain],
1248 label=f"{gain} Gain",
1249 alpha=0.7,
1250 capsize=3,
1251 )
1253 ax5.set_title(
1254 helper.plot_summary_string(
1255 board_id=board_id,
1256 run_numbers=run_number,
1257 channels=helper.list_to_text_ranges(list(range(min(channels_sorted), max(channels_sorted) + 1))),
1258 name=f"Average Outlier Rate vs Channel Number - {gain.upper()} Gain, 5σ Gaussian Fit Cut",
1259 attenuation=attenuation,
1260 pas_mode=pas_mode,
1261 info=info,
1262 )
1263 )
1265 ax5.set_ylabel("Average Outlier Rate (%)")
1266 ax5.set_xlabel("Channel Number")
1267 ax5.legend()
1268 fig5.tight_layout()
1269 fig5.savefig(plot_dir / f"avg_outlier_rate_vs_channel_{gain}.png", dpi=300)
1270 ax5.set_yscale("log")
1271 fig5.savefig(plot_dir / f"avg_outlier_rate_vs_channel_{gain}_log.png", dpi=300)
1272 plt.close(fig5)
1275def plot_avg_sample_range(
1276 df: pl.DataFrame,
1277 plot_dir: Path,
1278 info: Optional[Metadata] = None,
1279) -> None:
1280 board_id = df["board_id"][0] if "board_id" in df.columns else None
1281 run_number = df["run_number"][0]
1282 pas_mode = df["pas_mode"][0] if "pas_mode" in df.columns else None
1283 attenuation = df["att_val"][0] if "att_val" in df.columns else None
1285 plt.figure(figsize=(10, 5))
1286 n_channels = df["channel"].n_unique()
1287 channels = df["channel"].unique().to_list()
1288 color_dict = {"lo": "b", "hi": "r"}
1289 title_dict = {"lo": "LG", "hi": "HG"}
1291 fig, ax = plt.subplots(figsize=(8, 6))
1292 plt.xticks(np.arange(0, n_channels, 4), rotation=70)
1293 ax.xaxis.set_tick_params(pad=0.1)
1294 _, ax2 = plt.subplots(1, figsize=(8, 6))
1295 ax.grid(visible=True)
1296 ax2.grid(visible=True)
1297 plt.xticks(np.arange(0, n_channels, 4), rotation=70)
1299 for gain in ["hi", "lo"]:
1300 gain_filtered_df = df.filter(pl.col("gain") == gain)
1301 first_channel = True
1302 for channel in gain_filtered_df["channel"].unique().to_list():
1303 channel_df = gain_filtered_df.select("channel", "gain", "samples", "timestamp", "mean", "std").filter(
1304 pl.col("channel") == channel
1305 )
1306 color = color_dict[gain]
1307 title = title_dict[gain]
1308 ax.grid(visible=True, zorder=0)
1310 avg_filter_df = channel_df.select(
1311 [
1312 pl.col("samples").list.max().alias("max_per_list"),
1313 pl.col("samples").list.min().alias("min_per_list"),
1314 (pl.col("samples").list.max() - pl.col("samples").list.min()).alias("range_per_list"),
1315 (pl.col("samples").list.max() - pl.col("samples").list.min()).mean().alias("avg_range"),
1316 ]
1317 )
1318 error = avg_filter_df["range_per_list"].std() / np.sqrt(len(avg_filter_df["range_per_list"]))
1319 avg_range = avg_filter_df["avg_range"][0]
1320 label = title if first_channel else None # Only label first channel per gain
1321 first_channel = False
1323 ax.errorbar(channel, avg_range, error, fmt=f"{color}o", label=label)
1325 ax.set_title(
1326 helper.plot_summary_string(
1327 board_id=board_id,
1328 run_numbers=run_number,
1329 channels=helper.list_to_text_ranges(channels),
1330 name="Avg. Sample Range vs. Channel #",
1331 attenuation=attenuation,
1332 pas_mode=pas_mode,
1333 info=info,
1334 )
1335 )
1336 ax.set_ylabel("Avg. Sample Range [ADC Counts]")
1337 ax.set_xlabel("Channel")
1338 fig.legend()
1339 fig.tight_layout()
1340 fig.savefig(plot_dir / "avg_sample_range.png")
1341 fig.clf()