Skip to content

Commit edf743b

Browse files
committed
Removed the unnecessary outlier_board_evaluation script, and transposed it into the outlier_scan. Also made a few outlier_scan fixes!
1 parent ac90e43 commit edf743b

2 files changed

Lines changed: 143 additions & 38 deletions

File tree

ana/charge/outlier_board_evaluation.py

Lines changed: 0 additions & 28 deletions
This file was deleted.

ana/charge/outlier_scan.py

Lines changed: 143 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -12,15 +12,15 @@
1212

1313
HR_thresholds = {"CALIB_0":10, "CALIB_32":24, "CALIB_64":42, "CALIB_256":100,
1414
"CALIB_512":200, "CALIB_1024":200, "CALIB_2048":200, "CALIB_4096":200}
15-
plot_types = ['CLUSTER', 'SINGULAR']
15+
plot_types = ['CLUSTER', 'SINGULAR', 'EVALUATION']
1616

1717
parser = argparse.ArgumentParser()
1818
parser.add_argument('dataset', type=Path, nargs='+', help='Parameter timescan of one channel, one CALIB, n samples per time-point')
1919
parser.add_argument('-s', '--samples', type=int, help='Number of samples per phase')
2020
parser.add_argument('-cs', '--cluster_scan', action='store_true', help='Perform an outliers cluster-scan (default)')
21-
parser.add_argument('-wh', '--wrong_header', action='store_true', help='Lund board-testing specific - some HR data have the wrong header')
2221
parser.add_argument('-ts', '--threshold_scan', action='store_true', help='Perform a threshold outliers-scan with default thresholds (only HR works)')
2322
parser.add_argument('-ph', '--phase_analysis', action='store_true', help='Perform and plot phase analysis of the outliers (include -pd for plot directory path)')
23+
parser.add_argument('-be', '--bulk_evaluation', action='store_true', help='Perform analysis to evaluate multiple channels and CALIBs in terms of outliers found')
2424
parser.add_argument('-p', '--plot', choices=plot_types, type=str, help=f'Plot results. Available types: {", ".join(plot_types)}')
2525
parser.add_argument('-pd', '--plot_directory', type=Path, help='Figures directory path. If not provided, figures are not saved automatically')
2626
parser.add_argument('-csv', '--csv', type=Path, help='Save the scan results to a csv with the given directory path')
@@ -50,6 +50,9 @@ def __init__(self, dataset, sample, parameters):
5050
self.sample = sample
5151
self.parameters = parameters
5252

53+
if parameters["preCC"]: self.scan_type = "preCC"
54+
else: self.scan_type = "highrange"
55+
5356
self.outliers_time = []
5457
self.outliers_adc = []
5558
self.outliers_number = 0
@@ -82,7 +85,11 @@ def read_data(path: Path):
8285
if has_pflib_header:
8386
data = pd.read_csv(path, skiprows=1, dtype=np.float64) # will probably add **kwargs later
8487

85-
run_params["samples"] = args.samples
88+
if args.samples: run_params["samples"] = args.samples
89+
else:
90+
time = data["time"].to_list()
91+
time_unique = list(np.unique(data["time"].to_numpy()))
92+
run_params["samples"] = time.count(time_unique[0])
8693

8794
return data, run_params
8895

@@ -92,12 +99,11 @@ def sort_data(raw_data, raw_data_parameters):
9299
raw_adc = raw_data["adc"].to_numpy()
93100
raw_time = raw_data["time"].to_numpy()
94101
raw_channels = raw_data["channel"].to_numpy()
95-
link = round(raw_channels[0]/72)
102+
link = round(raw_channels[0]/71)
96103

97-
if raw_data_parameters["preCC"]: raw_calibs = raw_data[f"REFERENCEVOLTAGE_0.CALIB_2V5"] # temporary change from {link} to 0, I think I accidentally made all preCC have link 0
104+
if raw_data_parameters["preCC"]: raw_calibs = raw_data[f"REFERENCEVOLTAGE_{link}.CALIB_2V5"]
98105
else:
99-
if args.wrong_header: raw_calibs = raw_data[f"REFERENCEVOLTAGE_0.CALIB_2V5"]
100-
else: raw_calibs = raw_data[f"REFERENCEVOLTAGE_{link}.CALIB"]
106+
raw_calibs = raw_data[f"REFERENCEVOLTAGE_{link}.CALIB"]
101107

102108

103109
scan_length = len(np.unique(raw_time))
@@ -145,7 +151,12 @@ def threshold_outlier_search(dataset : list, threshold : int):
145151

146152
def cluster_outlier_search(dataset : list):
147153

148-
if args.samples < 10:
154+
if not args.samples :
155+
if dataset[0].parameters['samples'] < 10:
156+
print("Please ensure at least 10 samples of the pulse are available. test")
157+
return
158+
159+
elif args.samples < 10:
149160
print("Please ensure at least 10 samples of the pulse are available.")
150161
return
151162

@@ -333,6 +344,31 @@ def outlier_phase_analysis(dataset : list):
333344
else:
334345
plt.show()
335346

347+
def outlier_bulk_evaluation(full_dataset):
348+
349+
outlier_data = []
350+
351+
for dataset in full_dataset:
352+
samples = len(dataset)
353+
calib = dataset[0].calib
354+
channel = dataset[0].channel
355+
scan_type = dataset[0].scan_type
356+
outlier_count = 0
357+
outlier_sum = 0
358+
potential_outlier_sum = 0
359+
for sample in dataset:
360+
if sample.outliers_number != 0: outlier_count += 1
361+
outlier_sum += sample.outliers_number
362+
potential_outlier_sum += sample.potential_outliers_number
363+
outlier_fraction = outlier_count/samples
364+
outlier_data.append(np.array((outlier_fraction, outlier_sum, potential_outlier_sum, calib, channel, samples, scan_type)))
365+
outlier_data = np.array(outlier_data)
366+
outlier_dict = {"outlier_fraction" : outlier_data[:,0], "outlier_sum" : outlier_data[:,1], "pot_outlier_sum" : outlier_data[:,2], "calib" : outlier_data[:,3],
367+
"channel" : outlier_data[:,4], "samples" : outlier_data[:,5], "scan_type" : outlier_data[:,6]}
368+
outlier_df = pd.DataFrame(outlier_dict)
369+
370+
return outlier_df
371+
336372
# ------- PLOTTING DATA -------
337373

338374
def plot_outliers(dataset : list, plot_type : str):
@@ -358,6 +394,7 @@ def plot_outliers(dataset : list, plot_type : str):
358394
plt.xlabel('time [ns]')
359395
plt.title(f"highrange = {dataset[i].parameters['highrange']}, preCC = {dataset[i].parameters['preCC']}, channel {dataset[i].channel}, CALIB {dataset[i].calib}; {dataset[i].parameters['samples']} samples")
360396
plt.legend()
397+
plt.grid()
361398
if args.plot_directory:
362399
plt.savefig(os.path.join(args.plot_directory,f'clustered_outliers.png'), dpi=400)
363400
plt.close()
@@ -377,13 +414,102 @@ def plot_outliers(dataset : list, plot_type : str):
377414
plt.xlabel('time [ns]')
378415
plt.title(f"highrange = {dataset[i].parameters['highrange']}, preCC = {dataset[i].parameters['preCC']}, channel {dataset[i].channel}, CALIB {dataset[i].calib}")
379416
plt.legend()
417+
plt.grid()
380418
if args.plot_directory:
381419
plt.savefig(os.path.join(args.plot_directory,f'sample_{i}_outliers.png'), dpi=300)
382420
plt.close()
383421
else:
384422
plt.show()
385423
plt.close()
386424

425+
def plot_evaluation(dataframe):
426+
427+
multi_type = False
428+
types = np.unique(dataframe.scan_type.to_numpy())
429+
if len(types) > 1: multi_type = True
430+
431+
calibs = np.unique(dataframe.calib.to_numpy(dtype=np.int64))
432+
433+
if not multi_type:
434+
for calib in calibs:
435+
plt.scatter(dataframe[dataframe.calib==str(calib)].channel.to_numpy(dtype=np.int64), dataframe[dataframe.calib==str(calib)].outlier_sum.to_numpy(dtype=np.int64), alpha=0.7, label=f"CALIB{calib}")
436+
plt.xticks(np.arange(72))
437+
plt.xlabel("Channels")
438+
plt.ylabel("Outlier sum")
439+
plt.grid()
440+
plt.legend()
441+
plt.title(f"Total number of outliers per channel, {dataframe.scan_type.to_numpy()[0]}")
442+
if args.plot_directory:
443+
plt.savefig(os.path.join(args.plot_directory,f'outlier_evaluation_sum.png'), dpi=400)
444+
plt.close()
445+
else: plt.show()
446+
447+
for calib in calibs:
448+
plt.bar(dataframe[dataframe.calib==str(calib)].channel.to_numpy(dtype=np.int64), dataframe[dataframe.calib==str(calib)].outlier_fraction.to_numpy(dtype=np.float64), alpha=0.7, label=f"CALIB{calib}")
449+
plt.xticks(np.arange(72))
450+
plt.xlabel("Channels")
451+
plt.ylabel("Outlier-sample frequency")
452+
plt.grid()
453+
plt.legend()
454+
plt.title(f"Fraction of samples with outliers per channel, {dataframe.scan_type.to_numpy()[0]}")
455+
if args.plot_directory:
456+
plt.savefig(os.path.join(args.plot_directory,f'outlier_evaluation_freq.png'), dpi=400)
457+
plt.close()
458+
else: plt.show()
459+
else:
460+
fig, (ax1,ax2) = plt.subplots(2,1,figsize=(12,8))
461+
for calib in calibs:
462+
ax1.scatter(dataframe[(dataframe.calib==str(calib)) & (dataframe.scan_type==types[0])].channel.to_numpy(dtype=np.int64),
463+
dataframe[(dataframe.calib==str(calib)) & (dataframe.scan_type==types[0])].outlier_sum.to_numpy(dtype=np.int64), alpha=0.7, label=f"CALIB{calib}, {types[0]}")
464+
ax2.scatter(dataframe[(dataframe.calib==str(calib)) & (dataframe.scan_type==types[1])].channel.to_numpy(dtype=np.int64),
465+
dataframe[(dataframe.calib==str(calib)) & (dataframe.scan_type==types[1])].outlier_sum.to_numpy(dtype=np.int64), alpha=0.7, label=f"CALIB{calib}, {types[1]}")
466+
ax1.set_xticks(np.arange(72))
467+
ax2.set_xticks(np.arange(72))
468+
ax1.set_xlabel("Channels")
469+
ax2.set_xlabel("Channels")
470+
ax1.set_ylabel("Outlier sum")
471+
ax2.set_ylabel("Outlier sum")
472+
ax1.grid()
473+
ax2.grid()
474+
ax1.legend()
475+
ax2.legend()
476+
477+
ax1.set_title(f"{types[0]}")
478+
ax2.set_title(f"{types[1]}")
479+
fig.suptitle(f"Total number of outliers per channel, {types[0]} and {types[1]}")
480+
481+
if args.plot_directory:
482+
plt.savefig(os.path.join(args.plot_directory,f'outlier_evaluation_sum.png'), dpi=400)
483+
plt.close()
484+
else: plt.show()
485+
486+
fig2, (ax3,ax4) = plt.subplots(2,1,figsize=(12,8))
487+
for calib in calibs:
488+
ax3.bar(dataframe[(dataframe.calib==str(calib)) & (dataframe.scan_type==types[0])].channel.to_numpy(dtype=np.int64),
489+
dataframe[(dataframe.calib==str(calib)) & (dataframe.scan_type==types[0])].outlier_fraction.to_numpy(dtype=np.int64), alpha=0.7, label=f"CALIB{calib}, {types[0]}")
490+
ax4.bar(dataframe[(dataframe.calib==str(calib)) & (dataframe.scan_type==types[1])].channel.to_numpy(dtype=np.int64),
491+
dataframe[(dataframe.calib==str(calib)) & (dataframe.scan_type==types[1])].outlier_fraction.to_numpy(dtype=np.int64), alpha=0.7, label=f"CALIB{calib}, {types[1]}")
492+
ax3.set_xticks(np.arange(72))
493+
ax4.set_xticks(np.arange(72))
494+
ax3.set_xlabel("Channels")
495+
ax4.set_xlabel("Channels")
496+
ax3.set_ylabel("Outlier sum")
497+
ax4.set_ylabel("Outlier sum")
498+
ax3.grid()
499+
ax4.grid()
500+
ax3.legend()
501+
ax4.legend()
502+
503+
ax3.set_title(f"{types[0]}")
504+
ax4.set_title(f"{types[1]}")
505+
fig.suptitle(f"Fraction of samples with outliers per channel, {types[0]} and {types[1]}")
506+
507+
if args.plot_directory:
508+
plt.savefig(os.path.join(args.plot_directory,f'outlier_evaluation_sum.png'), dpi=400)
509+
plt.close()
510+
else: plt.show()
511+
512+
387513
# ------- SAVING DATA -------
388514

389515
def write_to_csv(dataset : list, save_path : Path):
@@ -431,10 +557,17 @@ def write_to_csv(dataset : list, save_path : Path):
431557
for data in working_data:
432558
outlier_phase_analysis(data)
433559

560+
if args.bulk_evaluation:
561+
outlier_dataframe = outlier_bulk_evaluation(working_data)
562+
434563
if args.csv:
435564
for data in working_data:
436565
write_to_csv(data, args.csv)
437566

438567
if args.plot:
439-
for data in working_data:
440-
plot_outliers(data, args.plot)
568+
if args.plot == "EVALUATION":
569+
plot_evaluation(outlier_dataframe)
570+
else:
571+
for data in working_data:
572+
plot_outliers(data, args.plot)
573+

0 commit comments

Comments
 (0)