Coverage for polars_analysis/noise

1import concurrent.futures

2import datetime

3import logging

4import multiprocessing as mp

5import os

6import subprocess as sp

7import traceback

8from concurrent.futures import ProcessPoolExecutor

9from pathlib import Path

10from typing import Optional, cast

11from zoneinfo import ZoneInfo

13import polars as pl

15import polars_analysis.plotting.noise_stability_plotting as plotting

16import polars_analysis.plotting.pedestal_plotting as pedestal_plotting

17from polars_analysis import utils

18from polars_analysis.analysis import constants

19from polars_analysis.data_sources import DataSource

20from polars_analysis.plotting.helper import Metadata

21from polars_analysis.utils import get_columns_or_exit

23# Instantiate logger

24log = logging.getLogger(__name__)

27def calc_all(raw_data: pl.DataFrame) -> pl.DataFrame:

28 if len(raw_data.filter(pl.col("meas_type") == "noise_stability")) == 0:

29 log.critical("No rows in the dataframe correspond to a noise_stability run. Aborting.")

30 raise Exception("Empty dataframe")

32 raw_data = calc_derived(raw_data)

33 return raw_data

36def calc_derived(df: pl.DataFrame) -> pl.DataFrame:

37 df = (

38 df.filter(pl.col("meas_type") == "noise_stability")

39 .select(

40 "run_number", "samples", "measurement", "board_id", "timestamp", "gain", "channel", "pas_mode", "att_val"

41 )

42 .with_columns(

43 pl.col("samples").list.mean().alias("mean"),

44 pl.col("samples").list.std().alias("std"),

45 )

46 )

47 return df

50def plot_all(

51 raw_data: pl.DataFrame,

52 monitoring_df: pl.DataFrame,

53 derived_data: pl.DataFrame,

54 lab_env_data: pl.DataFrame,

55 plot_dir: Path,

56 # uri: Optional[str],

57 plot_all_temp_sources: Optional[bool] = False,

58):

59 ### Raw Samples Plots ###

60 columns_to_get = [

61 "run_number",

62 "measurement",

63 "channel",

64 "gain",

65 "samples",

66 "board_id",

67 "pas_mode",

68 ]

69 raw_data = get_columns_or_exit(raw_data, columns_to_get)

71 pas_mode = raw_data["pas_mode"].unique().to_list()[0]

73 if pas_mode != pas_mode:

74 pas_mode = -1

75 raw_data.drop_in_place("pas_mode")

77 aggregated_df = (

78 raw_data.sort(["channel", "gain", "measurement"]) # Sort by all relevant columns

79 .group_by("channel", "gain", maintain_order=True)

80 .agg(

81 pl.col("run_number").first(),

82 pl.col("samples").explode(),

83 pl.col("board_id").first(),

84 )

85 )

87 temp_sources = constants.ALL_TEMPERATURE_SOURCES if plot_all_temp_sources else constants.TEMPERATURE_SOURCES

88 for temp_source in temp_sources:

89 log.debug(f"Processing temperature source: {temp_source}")

90 try:

91 plotting.plot_temp_correlation(

92 derived_data, monitoring_df, lab_env_data, plot_dir, temp_source=temp_source, settling_time=None

93 )

94 except ValueError:

95 log.warning(f"Not enough data to calculate correlation for {temp_source}")

96 for channel_df in aggregated_df.iter_rows(named=True):

97 all_samples = channel_df["samples"]

98 channel_info = Metadata.fill_from_dataframe(pl.DataFrame(channel_df))

99 pedestal_plotting.plot_raw(channel_info, all_samples, plot_dir)

100

101 info = Metadata.fill_from_dataframe(pl.DataFrame(aggregated_df))

102 plotting.plot_monitoring(monitoring_df, lab_env_data, plot_dir)

103 plotting.avg_rms_mean_vs_channel(derived_data, plot_dir)

104 plotting.plot_outliers(derived_data, plot_dir, info)

105 plotting.plot_avg_sample_range(derived_data, plot_dir)

106 plotting.plot_monitor_channel_correlation(derived_data, monitoring_df, lab_env_data, plot_dir)

107 plotting.plot_monitor_monitor_correlation(monitoring_df, lab_env_data, plot_dir)

108 for gain in ["lo", "hi"]:

109 gain_filtered_df: pl.DataFrame = derived_data.filter(pl.col("gain") == gain)

110 plotting.plot_mean_rms_vs_time(gain_filtered_df, plot_dir)

111 plotting.plot_sample_range_vs_time(gain_filtered_df, plot_dir)

112

113

114def parallel_plot_all(

115 raw_data: pl.DataFrame,

116 monitoring_df: pl.DataFrame,

117 derived_data: pl.DataFrame,

118 lab_env_data: pl.DataFrame,

119 plot_dir: Path,

120 plot_all_temp_sources: bool = False,

121):

122 plot_dir_filled = len([p for p in plot_dir.glob("*png")]) > 0

123 githash = sp.check_output(["git", "rev-parse", "HEAD"]).decode("ascii").strip()

124 columns_to_get = [

125 "run_number",

126 "measurement",

127 "channel",

128 "gain",

129 "samples",

130 "board_id",

131 "pas_mode",

132 ]

133 raw_data = get_columns_or_exit(raw_data, columns_to_get)

134 derived_data = derived_data.join(

135 raw_data.select("run_number", "measurement", "channel", "gain", "board_id"),

136 on=["run_number", "measurement", "channel", "gain"],

137 )

138

139 info_derived = Metadata.fill_from_dataframe(derived_data)

140

141 with ProcessPoolExecutor(mp_context=mp.get_context("spawn")) as executor:

142 job_handles = dict()

143

144 aggregated_df = (

145 raw_data.sort(["channel", "gain", "measurement"]) # Sort by all relevant columns

146 .group_by("channel", "gain", maintain_order=True)

147 .agg(

148 pl.col("run_number").first(),

149 pl.col("samples").explode(),

150 pl.col("board_id").first(),

151 pl.col("pas_mode").first(),

152 )

153 )

154 for channel_df in aggregated_df.iter_rows(named=True):

155 all_samples = channel_df["samples"]

156 info = Metadata.fill_from_dataframe(pl.DataFrame(channel_df))

157 job_handles[

158 executor.submit(

159 pedestal_plotting.plot_raw,

160 info,

161 all_samples,

162 plot_dir,

163 )

164 ] = "plot_raw"

165

166 temp_sources = constants.ALL_TEMPERATURE_SOURCES if plot_all_temp_sources else constants.TEMPERATURE_SOURCES

167 for temp_source in temp_sources:

168 job_handles[

169 executor.submit(

170 plotting.plot_temp_correlation,

171 derived_data.clone(),

172 monitoring_df,

173 lab_env_data,

174 plot_dir,

175 temp_source=temp_source,

176 # settling_time=None, # settling time in minutes

177 )

178 ] = "plot_temp_correlation"

179 job_handles[

180 executor.submit(

181 plotting.plot_monitor_channel_correlation,

182 derived_data.clone(),

183 monitoring_df,

184 lab_env_data,

185 plot_dir,

186 )

187 ] = "plot_monitor_channel_correlation"

188

189 job_handles[

190 executor.submit(

191 plotting.plot_monitor_monitor_correlation,

192 monitoring_df,

193 lab_env_data,

194 plot_dir,

195 )

196 ] = "plot_monitor_monitor_correlation"

197 job_handles[

198 executor.submit(

199 plotting.plot_outliers,

200 derived_data.clone(),

201 plot_dir,

202 info_derived,

203 )

204 ] = "plot_outliers"

205 job_handles[

206 executor.submit(

207 plotting.plot_avg_sample_range,

208 derived_data.clone(),

209 plot_dir,

210 )

211 ] = "plot_avg_sample_range"

212

213 job_handles[

214 executor.submit(

215 plotting.plot_monitoring,

216 monitoring_df,

217 lab_env_data,

218 plot_dir,

219 )

220 ] = "plot_monitoring"

221 job_handles[

222 executor.submit(

223 plotting.avg_rms_mean_vs_channel,

224 derived_data.clone(),

225 plot_dir,

226 )

227 ] = "avg_rms_mean_vs_channel"

228 for gain in ["lo", "hi"]:

229 gain_df = derived_data.clone().filter(pl.col("gain") == gain).clone()

230 info_g = Metadata.fill_from_dataframe(gain_df)

231 info_g.githash = githash

232

233 job_handles[

234 executor.submit(

235 plotting.plot_mean_rms_vs_time,

236 gain_df,

237 plot_dir,

238 )

239 ] = "plot_mean_rms_vs_time"

240

241 job_handles[

242 executor.submit(

243 plotting.plot_sample_range_vs_time,

244 gain_df,

245 plot_dir,

246 )

247 ] = "plot_sample_range_vs_time"

248 # Check for exceptions

249 for future in concurrent.futures.as_completed(job_handles):

250 job = job_handles[future]

251 try:

252 future.result()

253 except Exception as exc:

254 log.error(f"{job} generated an exception: {exc}")

255 print(traceback.format_exc())

256

257 if not plot_dir_filled:

258 for f in plot_dir.glob("*png"):

259 os.chmod(f, 0o664)

260 for f in plot_dir.glob("*json"):

261 os.chmod(f, 0o664)

262

263

264def calc_plot_all(

265 loader: DataSource,

266 run_number: int,

267 plot_dir: Path,

268 plot_all_temp_sources: bool = False,

269):

270 if not plot_dir.exists():

271 plot_dir.mkdir(parents=True, exist_ok=True)

272 os.chmod(plot_dir, 0o775)

273

274 raw_data = loader.load_raw_data(run_number)

275 monitoring_df = loader.load_monitoring_data(run_number)

276

277 derived_data = calc_all(raw_data)

278 loader.save_derived_data(derived_data, run_number=run_number, meas_type="noise_stability")

279

280 n_readouts = cast(int, raw_data["measurement"].max())

281 start_time = cast(datetime.datetime, raw_data["timestamp"].min())

282 end_time = cast(datetime.datetime, raw_data["timestamp"].max())

283 start_time_NY = start_time.astimezone(ZoneInfo("America/New_York"))

284 end_time_NY = end_time.astimezone(ZoneInfo("America/New_York"))

285 duration_per_readout = (end_time - start_time) / n_readouts

286

287 board_ids = loader.get_boards_list(run_number)["board_id"].to_list()

288 for board_id in board_ids:

289 utils.add_run_info("n_readouts", n_readouts + 1, board_id, plot_dir)

290 utils.add_run_info(

291 "duration_between_readouts", f"{round(duration_per_readout.total_seconds())} s", board_id, plot_dir

292 )

293 utils.add_run_info("start_time", start_time_NY.strftime("%Y-%m-%d %H:%M:%S %Z"), board_id, plot_dir)

294 utils.add_run_info("end_time", end_time_NY.strftime("%Y-%m-%d %H:%M:%S %Z"), board_id, plot_dir)

295

296 lab_env_data_all = loader.load_lab_environment_data()

297 lab_env_data = (

298 lab_env_data_all.with_columns(pl.col("timestamp"))

299 .filter(pl.col("timestamp") >= start_time)

300 .filter(pl.col("timestamp") <= end_time)

301 .filter(pl.col("lab_name") == "crate_lab")

302 )

303

304 log.info("Making noise stability plots")

305 if log.getEffectiveLevel() == 10: # debug

306 plot_all(

307 raw_data, monitoring_df, derived_data, lab_env_data, plot_dir, plot_all_temp_sources=plot_all_temp_sources

308 )

309 else:

310 parallel_plot_all(

311 raw_data, monitoring_df, derived_data, lab_env_data, plot_dir, plot_all_temp_sources=plot_all_temp_sources

312 )

Coverage for polars_analysis / noise_stability.py: 94%

126 statements