Coverage for polars_analysis/frame.py: 57%

1import concurrent.futures

2import logging

3import multiprocessing as mp

4import os

5import sys

6import traceback

7from concurrent.futures import ProcessPoolExecutor

8from copy import deepcopy

9from pathlib import Path

10from typing import List, Literal, Optional

12import matplotlib

13import numpy as np

14import polars as pl

16matplotlib.use("agg")

17import scipy.signal as sps

18from matplotlib import pyplot as plt

20import polars_analysis.plotting.frame_plotting as frame_plotting

21import polars_analysis.plotting.pedestal_plotting as pedestal_plotting

22from polars_analysis import frame_utils

23from polars_analysis.analysis import constants, pedestal_analysis

24from polars_analysis.data_sources import DataSource

25from polars_analysis.plotting.helper import Metadata

27"""

28High level functions for frame analysis

29"""

31# Instantiate logger

32log = logging.getLogger(__name__)

35def scan_for_misalignment(

36 loader: DataSource,

37 min_run_number: int,

38 max_run_number: int,

39 plot_dir: Path,

40 reject_single_adc: bool = True,

41 test_channel: int = 7,

42) -> None:

43 """

44 Function to scan over all runs in the range of min to max run number for

45 misaligned frames. Produces plots of misaligned channels per board.

46 Previously scan_misalignment.py

47 """

48 old_run_cutoff = 2046

50 if min_run_number > max_run_number:

51 log.error("Min run number is larger than max run number")

52 sys.exit(1)

54 if min_run_number < old_run_cutoff and max_run_number > old_run_cutoff:

55 log.error(

56 "Run number range stradles 2046 cutoff between bugfix of frame data. These need to be run over and handled separately." # noqa: E501

57 )

58 sys.exit(1)

60 # old runs have swapped frame data

61 old_runs = max_run_number < old_run_cutoff

63 df_run_numbers = loader.get_runs_list()

65 run_numbers = (

66 df_run_numbers.filter(

67 pl.col("meas_type") == "pedestal",

68 pl.col("run_number") <= max_run_number,

69 pl.col("run_number") >= min_run_number,

70 )

71 .select(pl.col("run_number"))

72 .to_series()

73 .to_list()

74 )

76 df = loader.load_frame_data(*run_numbers, reject_single_adc=reject_single_adc, non_empty=True)

77 # Filter away trigger runs if needed

78 if not reject_single_adc:

79 run_numbers = (

80 df.group_by(pl.col("run_number"))

81 .agg(pl.col("measurement").unique().len() > 1)

82 .filter(pl.col("measurement"))["run_number"]

83 .to_list()

84 )

85 df = df.filter(pl.col("run_number").is_in(run_numbers))

87 runs_to_test = np.unique(df.select(pl.col("run_number")).to_series().to_list())

89 data = []

91 for rn in runs_to_test:

92 print(f"Processing run: {rn}")

93 df_temp = df.filter(pl.col("run_number") == rn)

94 if len(df_temp) != 32:

95 print(f"Skipping run {rn} as it has {len(df_temp)} != 32 rows of ADC data")

96 continue

98 # Need to loop over measurements to handle single ADC runs

99 a = np.array([[], [], []], dtype=np.int32)

100 for meas in df_temp["measurement"].unique().to_list():

101 df_temp_meas = df_temp.filter(pl.col("measurement") == meas)

102 df_temp_meas = frame_utils.trim_df_to_shortest_array(df_temp_meas)

103 df_temp_meas = frame_utils.unpack_frame_data(df_temp_meas, old_runs)

104 a = np.concatenate([a, frame_utils.check_bcid_alignment(df_temp_meas)], axis=1)

105

106 data.append((int(rn), int(df_temp["board_id"][0]), a[0].astype(np.int64)))

107

108 data_dict = dict()

109 data_dict["run_number"], data_dict["board_id"], data_dict["misalign_channels"] = list(zip(*data))

110

111 df_alignment_check = pl.from_dict(

112 data_dict,

113 schema={"run_number": pl.Int64, "board_id": pl.Int64, "misalign_channels": pl.List(pl.Int64)},

114 strict=False,

115 )

116

117 df_gb = (

118 df_alignment_check.filter(pl.col("misalign_channels").list.len() > 0)

119 .group_by(pl.col("board_id"))

120 .agg(pl.col("misalign_channels").explode())

121 )

122

123 # print out some info for runs that have misaligned channel X

124 print(f"Runs with ch {test_channel} misaligned")

125 for r in (

126 df_alignment_check.filter(pl.col("misalign_channels").list.contains(test_channel))

127 .select(pl.col("board_id"), pl.col("run_number").cast(pl.Int32))

128 .join(df, on="run_number", how="left")

129 .select(pl.col(["run_number", "board_id", "timestamp"]))

130 .group_by(pl.col("run_number"))

131 .first()

132 .sort(pl.col("run_number"))

133 .iter_rows()

134 ):

135 print(r[0], r[1], r[2].strftime("%Y-%m-%d %H:%M:%S"))

136

137 for board in df_gb["board_id"].to_list():

138 print(f"plotting {board}")

139 n, _ = np.histogram(df_gb.filter(pl.col("board_id") == board)["misalign_channels"].to_list(), bins=range(129))

140

141 n_runs = len(df_alignment_check.filter(pl.col("board_id") == board))

142 # denom = np.ones(len(n)) * n_runs # noqa: F841

143

144 plt.figure(figsize=(10, 8))

145 plt.bar(range(128), n, fill=False) # /denom

146 plt.title(

147 f"Channels with misalignment\nBoard ID = {board}, Runs in {min_run_number}--{max_run_number}, N Runs Total = {n_runs}" # noqa: E501

148 )

149 plt.ylabel("N Misaligned Runs")

150 plt.xlabel("Channel")

151 plt.xticks(np.arange(0, 128, 4), rotation=70)

152 plt.tight_layout()

153 plt.grid(True)

154 if old_runs:

155 plt.savefig(plot_dir / f"ch_misalign_freq_{board}_oldruns.png")

156 else:

157 plt.savefig(plot_dir / f"ch_misalign_freq_{board}.png")

158 plt.cla()

159 plt.clf()

160

161 # todo make plots that are just empty instead

162 print(f"Aligned boards: {np.setdiff1d(np.unique(data_dict['board_id']), df_gb['board_id'].to_list())}")

163

164

165def plot_extended_readout(

166 raw_data: pl.DataFrame,

167 run_number: int,

168 plot_dir_base: Path,

169 trigger_window: int = 128,

170 trigger_rate_hz: float = constants.FELIX_TRIGGER_RATE[1], # Hz

171 skip_channels_lo: Optional[List[int]] = None,

172 skip_channels_hi: Optional[List[int]] = None,

173 bnl_data: bool = False,

174):

175 plot_adc_bcid = False # check columns_to_drop (need to keep adc_bcid) in check_and_align_frames_wrapper

176 # when setting this to True

177

178 board_id = raw_data.select(pl.col("board_id").first()).item()

179 pas_mode = raw_data.select(pl.col("pas_mode").first()).item()

180

181 # Overwrite input arguments with values from metadata if it exists

182 if "trigger_window" in raw_data.columns:

183 trigger_window_meta = raw_data.select(pl.col("trigger_window").first()).item()

184 if trigger_window_meta is not None and trigger_window_meta > 0 and trigger_window_meta != trigger_window:

185 log.warning(f"Trigger window {trigger_window} overridden by window in metadata {trigger_window_meta}")

186 trigger_window = trigger_window

187 trigger_rate_hz = frame_utils.trigger_rate_meta_data_override(raw_data, trigger_rate_hz, bnl_data)

188

189 plot_dir = Path(plot_dir_base / f"run{run_number}/extended_readout/")

190 if not plot_dir.exists():

191 plot_dir.mkdir(parents=True, exist_ok=True)

192 os.chmod(plot_dir, 0o775)

193 plot_dir_filled = len([p for p in plot_dir.glob("*png")]) > 0

194

195 # Pair down a few columns

196 raw_data = raw_data.select(

197 pl.col(

198 [

199 "board_id",

200 "gain",

201 "channel",

202 "samples",

203 "measurement",

204 "felix_event_count",

205 "awg_amp",

206 "att_val",

207 "pas_mode",

208 "run_number",

209 "adc_full_bcid",

210 ]

211 + (["adc_bcid"] if plot_adc_bcid else []) # needed to plot vs adc_bcid below

212 )

213 )

214

215 # Hacking in ADC_BCID when BCR is held high

216 # if plot_adc_bcid:

217 # raw_data = raw_data.with_columns( pl.lit([i%32 for i in range(1957246)]).alias("adc_bcid"))

218

219 if skip_channels_lo:

220 raw_data = raw_data.filter(~((pl.col("gain") == "lo") & (pl.col("channel").is_in(skip_channels_lo))))

221 if skip_channels_hi:

222 raw_data = raw_data.filter(~((pl.col("gain") == "hi") & (pl.col("channel").is_in(skip_channels_hi))))

223

224 # Sort by channels to preserve order

225 raw_data.sort(pl.col("channel"))

226

227 gains: List[Literal["lo", "hi"]] = ["lo", "hi"]

228 for gain in gains:

229 raw_data_g = raw_data.filter(pl.col("gain") == gain)

230 info_g = Metadata.fill_from_dataframe(raw_data_g)

231 channels = raw_data_g["channel"].unique().sort().to_list()

232

233 cutoff = np.floor(len(raw_data_g["felix_event_count"][0]) / trigger_window).astype(np.int32) * trigger_window

234

235 raw_data_g = raw_data_g.with_columns(

236 pl.col("samples")

237 .map_elements(

238 lambda x: x[:cutoff].to_numpy().reshape(-1, trigger_window).mean(axis=1),

239 return_dtype=pl.List(pl.Float64),

240 )

241 .alias("trig_window_mean"),

242 pl.col("samples")

243 .map_elements(

244 lambda x: x[:cutoff].to_numpy().reshape(-1, trigger_window).std(axis=1),

245 return_dtype=pl.List(pl.Float64),

246 )

247 .alias("trig_window_std"),

248 )

249

250 # Mean and std vs time

251 mean_y = np.array([i for i in raw_data_g["trig_window_mean"]])

252 std_y = np.array([i for i in raw_data_g["trig_window_std"]])

253 time_x = np.arange(len(mean_y[0])) / trigger_rate_hz

254 std_samples = raw_data_g["samples"].list.std().to_numpy()

255

256 # FFT of mean vs time

257 freq, psd = sps.welch(mean_y, fs=trigger_rate_hz, axis=1, nperseg=len(mean_y[0]) // 4)

258

259 adc_full_bcid = raw_data_g["adc_full_bcid"]

260 x_bcid = np.arange(constants.N_BCID)

261

262 # Correlation Matrix

263 measurements = raw_data_g["measurement"].unique().to_list()

264 matrix = pedestal_analysis.calc_correlation_matrix(raw_data_g, measurements, gain, "trig_window_mean")

265 min_channel: int = raw_data_g.select(pl.col("channel").min()).item()

266 n_channels: int = raw_data_g.select(pl.col("channel").unique().count()).item()

267 pedestal_plotting.plot_correlation_matrix(

268 matrix, gain, min_channel, n_channels, plot_dir, board_id=board_id, pas_mode=pas_mode

269 )

270

271 log.info(f"Plotting {gain} gain extended readout figures")

272 frame_plotting.plot_mean_v_time(time_x, mean_y, plot_dir, info=info_g)

273

274 frame_plotting.plot_60hz_power(freq, psd, plot_dir, info=info_g, channels=channels)

275

276 pedestal_plotting.plot_fft2d(

277 freq.tolist(),

278 psd,

279 plot_dir,

280 channels=channels,

281 gain=gain,

282 run_number=run_number,

283 board_id=str(board_id),

284 pas_mode=pas_mode,

285 unit="Hz",

286 )

287

288 # only used if plot_adc_bcid == True

289 x_adc_bcid = np.arange(32)

290 freq_adc_all = np.array([])

291 psd_adc_all = []

292 h_adc_bcid_sum = np.zeros(32)

293 h_adc_bcid_weighted_sum = np.zeros(32)

294

295 with ProcessPoolExecutor(mp_context=mp.get_context("spawn")) as executor:

296 job_handles = dict()

297 for index, channel in enumerate(channels):

298 # Need to copy info for each channel or else parallel plotting might have wrong labels

299 info_ch = deepcopy(info_g)

300 info_ch.channels = channel

301

302 h_bcid = np.histogram(adc_full_bcid[index], bins=np.arange(constants.N_BCID + 1))

303

304 h_weighted = np.histogram(

305 adc_full_bcid[index],

306 bins=np.arange(constants.N_BCID + 1),

307 weights=raw_data_g["samples"][index],

308 )

309

310 freq_bcid, psd_bcid = sps.welch(h_weighted[0] / h_bcid[0], fs=1, nperseg=constants.N_BCID)

311

312 # Internal ADC BCID

313 if plot_adc_bcid:

314 adc_internal_bcid = raw_data_g["adc_bcid"]

315 h_adc_bcid = np.histogram(adc_internal_bcid[index], bins=np.arange(33))

316 h_adc_weighted = np.histogram(

317 adc_internal_bcid[index],

318 bins=np.arange(33),

319 weights=raw_data_g["samples"][index],

320 )

321 h_adc_bcid_sum += h_adc_bcid[0]

322 h_adc_bcid_weighted_sum += h_adc_weighted[0]

323

324 freq_adc_bcid, psd_adc_bcid = sps.welch(h_adc_weighted[0] / h_adc_bcid[0], fs=1, nperseg=33)

325 freq_adc_all = freq_adc_bcid.astype(np.ndarray)

326 psd_adc_all.append(psd_adc_bcid)

327

328 job_handles[

329 executor.submit(

330 frame_plotting.plot_mean_v_bcid,

331 channel,

332 x_adc_bcid,

333 h_adc_bcid[0],

334 h_adc_weighted[0],

335 plot_dir,

336 info_ch,

337 )

338 ] = "plot_mean_v_bcid"

339

340 job_handles[

341 executor.submit(

342 frame_plotting.plot_fft_mean_v_bcid,

343 channel,

344 freq_adc_bcid,

345 psd_adc_bcid,

346 plot_dir,

347 info_ch,

348 )

349 ] = "plot_fft_mean_v_bcid"

350 else:

351 job_handles[

352 executor.submit(

353 frame_plotting.plot_mean_v_bcid,

354 channel,

355 x_bcid,

356 h_bcid[0],

357 h_weighted[0],

358 plot_dir,

359 info_ch,

360 )

361 ] = "plot_mean_v_bcid"

362

363 job_handles[

364 executor.submit(

365 frame_plotting.plot_fft_mean_v_bcid,

366 channel,

367 freq_bcid,

368 psd_bcid,

369 plot_dir,

370 info_ch,

371 )

372 ] = "plot_fft_mean_v_bcid"

373

374 # job_handles[

375 # executor.submit(

376 # frame_plotting.plot_adc_full_bcid,

377 # channel,

378 # h_bcid,

379 # plot_dir,

380 # info_ch,

381 # )

382 # ] = "plot_adc_full_bcid"

383

384 job_handles[

385 executor.submit(

386 frame_plotting.plot_mean_v_time_ch,

387 channel,

388 time_x,

389 mean_y[index],

390 plot_dir,

391 info_ch,

392 )

393 ] = "plot_mean_v_time_ch"

394

395 # job_handles[

396 # executor.submit(

397 # frame_plotting.plot_mean_v_time_ch_zoom,

398 # channel,

399 # time_x,

400 # mean_y[index],

401 # plot_dir,

402 # info_ch,

403 # )

404 # ] = "plot_mean_v_time_ch_zoom"

405

406 # job_handles[

407 # executor.submit(

408 # frame_plotting.plot_std_v_time_ch,

409 # channel,

410 # time_x,

411 # std_y[index],

412 # plot_dir,

413 # info_ch,

414 # )

415 # ] = "plot_std_v_time_ch"

416

417 # job_handles[

418 # executor.submit(

419 # frame_plotting.plot_std_v_time_ch_zoom,

420 # channel,

421 # time_x,

422 # std_y[index],

423 # plot_dir,

424 # info_ch,

425 # )

426 # ] = "plot_std_v_time_ch_zoom"

427

428 job_handles[

429 executor.submit(

430 frame_plotting.plot_fft_mean_v_time,

431 channel,

432 freq,

433 psd[index],

434 plot_dir,

435 info_ch,

436 )

437 ] = "plot_fft_mean_v_time"

438

439 # job_handles[

440 # executor.submit(

441 # frame_plotting.plot_window_stds,

442 # channel,

443 # plot_dir,

444 # std_y[index],

445 # std_samples[index],

446 # info_ch

447 # )

448 # ] = "plot_window_stds"

449

450 job_handles[

451 executor.submit(

452 frame_plotting.plot_window_means,

453 channel,

454 plot_dir,

455 mean_y[index],

456 std_y[index],

457 std_samples[index],

458 trigger_window,

459 info_ch,

460 )

461 ] = "plot_window_means_sample_mean"

462

463 if plot_adc_bcid:

464 job_handles[

465 executor.submit(

466 frame_plotting.plot_mean_v_bcid,

467 200,

468 x_adc_bcid,

469 h_adc_bcid_sum,

470 h_adc_bcid_weighted_sum,

471 plot_dir,

472 info_g,

473 )

474 ] = "plot_mean_v_bcid_sum"

475

476 freq_adc_bcid, psd_adc_bcid = sps.welch(h_adc_bcid_weighted_sum / h_adc_bcid_sum, fs=1, nperseg=33)

477 job_handles[

478 executor.submit(

479 frame_plotting.plot_fft_mean_v_bcid,

480 200,

481 freq_adc_bcid,

482 psd_adc_bcid,

483 plot_dir,

484 info_g,

485 )

486 ] = "plot_fft_mean_v_bcid"

487

488 # Check for exceptions

489 for future in concurrent.futures.as_completed(job_handles):

490 job = job_handles[future]

491 try:

492 future.result()

493 except Exception as exc:

494 log.error(f"{job} generated an exception: {exc}")

495 print(traceback.format_exc())

496

497 if plot_adc_bcid:

498 pedestal_plotting.plot_fft2d(

499 freq_adc_all.tolist(),

500 np.concatenate([[np.array(i)] for i in psd_adc_all]),

501 plot_dir,

502 channels=channels,

503 gain=gain,

504 run_number=run_number,

505 board_id=str(board_id),

506 pas_mode=pas_mode,

507 unit="1/(ADC BCID)",

508 extra_filename="adc_bcid",

509 )

510

511 if not plot_dir_filled:

512 for f in plot_dir.glob("*png"):

513 os.chmod(f, 0o664)

514 for f in plot_dir.glob("*json"):

515 os.chmod(f, 0o664)

516

517

518def plot_extended_readout_from_loader(

519 loader: DataSource,

520 run_number: int,

521 plot_dir_base: Path,

522 trigger_window: int = 128,

523 trigger_rate_hz: float = constants.FELIX_TRIGGER_RATE[1], # Hz

524 skip_channels_lo: Optional[List[int]] = None,

525 skip_channels_hi: Optional[List[int]] = None,

526 swap_frame18: bool = False,

527 baseline_corr_integration_period: Optional[float] = None,

528):

529 """

530 Load a dataframe for standalone running

531 """

532 raw_data, _ = frame_utils.check_and_align_frames_wrapper(

533 loader,

534 run_number,

535 swap_frame18,

536 baseline_corr_integration_period=baseline_corr_integration_period,

537 plot_dir=Path(plot_dir_base / f"run{run_number}"),

538 )

539

540 plot_extended_readout(

541 raw_data,

542 run_number,

543 plot_dir_base,

544 trigger_window,

545 trigger_rate_hz,

546 skip_channels_lo,

547 skip_channels_hi,

548 )

Coverage for polars_analysis / frame.py: 57%

173 statements