EDCR_PyReason_AirSim/utils.py at main · lab-v2/EDCR_PyReason_AirSim · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
import ast
import json
import typing
import datetime
import math
import pandas as pd
from openpyxl import Workbook
from collections import defaultdict


def format_output(dict_output):
    filtered_dict = {}
    pfca_dict = {}
    for k, v in dict_output[0].items():
        if v and not (len(v) == 1 and 'ic' in v and v['ic'] in [(1.0, 1.0), (0.0, 0.0)]):
            filtered_dict[k] = v
    for k, v in filtered_dict.items():
        if 'pfca' in v:
            pfca_dict[k] = v

    return [filtered_dict, pfca_dict]


def colored_text(color: str):
    index = {'red': 1,
             'green': 2,
             'blue': 4}[color]

    return lambda s: f"\033[9{index}m{s}\033[0m"


def green_text(s: typing.Union[str, float]) -> str:
    return colored_text('green')(s)


def red_text(s: typing.Union[str, float]) -> str:
    return colored_text('red')(s)


def blue_text(s: typing.Union[str, float]) -> str:
    return colored_text('blue')(s)


def read_file_as_dict(path):
    try:
        with open(path, 'r') as file:
            print(blue_text(f"\nReading file from {path}..."))
            content = file.read()
            # Convert the text into a Python dictionary
            data = ast.literal_eval(content)
            print(blue_text(f"File read successfully"))
            return data
    except (FileNotFoundError, ValueError) as e:
        print(f"Error reading the file: {e}")
        return None


def format_seconds(seconds: float):
    """
    Formats a given number of seconds into a human-readable string indicating the largest
    unit of time (hours, minutes, seconds, milliseconds, microseconds, or nanoseconds) and
    the corresponding value. This function utilizes a `datetime.timedelta` object for precise
    time arithmetic and makes use of Python's built-in methods for conversion to readable format.

    This function helps in converting raw time durations into user-friendly textual
    representations, such as "2 hours", "45 minutes", "30 seconds", "500 milliseconds",
    "300 microseconds", or "100 nanoseconds", depending on the input duration.

    :param seconds: The time duration in seconds to format
    :return: A string representation of the time duration in a human-readable form, such as
             "x hours", "x minutes", "x seconds", "x milliseconds", "x microseconds", or "x nanoseconds"
    """
    # Create a timedelta object with the given seconds
    time_delta = datetime.timedelta(seconds=seconds)

    # Use the total_seconds() method to get the total number of seconds
    total_seconds = time_delta.total_seconds()

    # Use divmod to get the hours and minutes
    hours, remainder = divmod(total_seconds, 3600)
    minutes, seconds = divmod(remainder, 60)

    # Create the formatted string
    if hours > 0:
        return f"{math.floor(hours)} hour{'s' if hours > 1 else ''}"
    elif minutes > 0:
        return f"{math.floor(minutes)} minute{'s' if minutes > 1 else ''}"
    elif seconds >= 1:
        return f"{math.floor(seconds)} second{'s' if seconds > 1 else ''}"
    else:
        milliseconds = total_seconds * 1000
        if milliseconds >= 1:
            return f"{math.floor(milliseconds)} millisecond{'s' if milliseconds > 1 else ''}"
        else:
            microseconds = milliseconds * 1000
            if microseconds >= 1:
                return f"{math.floor(microseconds)} microsecond{'s' if microseconds > 1 else ''}"
            else:
                nanoseconds = microseconds * 1000
                return f"{math.floor(nanoseconds)} nanosecond{'s' if nanoseconds > 1 else ''}"


def compute_incon(assignments: typing.List[typing.Tuple[str, int]]):
    assignments = set(assignments)
    processed_objects = {}
    inconsistent_objects = set()
    inconsistencies = 0

    for object_and_prediction in assignments:
        current_object, current_prediction = object_and_prediction

        if current_object in processed_objects:
            # If the object already has a prediction and it's different from the current one,
            # count as inconsistent, but only once

            if processed_objects[current_object] != current_prediction and current_object not in inconsistent_objects:
                inconsistent_objects.add(current_object)
                inconsistencies += 1
        else:
            processed_objects[current_object] = current_prediction

    inconsistencies_percentage = inconsistencies / len(processed_objects) if len(processed_objects) > 0 else 0

    assert 1 >= inconsistencies_percentage >= 0

    return inconsistencies_percentage


def resolve_inconsistencies_TB_with_avg_diff(assignments):
    best_class_for_obj = defaultdict(lambda: (None, -1))
    obj_all_confs = defaultdict(list)

    # Recopilar todas las confianzas por objeto
    for obj, cls, conf in assignments:
        obj_all_confs[obj].append(conf)

    # Resolver inconsistencias
    for obj, cls, conf in assignments:
        if conf > best_class_for_obj[obj][1]:
            best_class_for_obj[obj] = (cls, conf)

    resolved_tuples = [(obj, cls) for obj, (cls, conf) in best_class_for_obj.items()]

    # Calcular diferencias promedio para objetos con múltiples predicciones
    diffs_sum = 0
    diffs_count = 0

    for obj, confs in obj_all_confs.items():
        if len(confs) > 1:
            # Ordenar confianzas de mayor a menor
            confs.sort(reverse=True)
            # Calcular diferencias entre confianzas consecutivas
            for i in range(len(confs) - 1):
                diffs_sum += confs[i] - confs[i + 1]
                diffs_count += 1

    avg_diff = diffs_sum / diffs_count if diffs_count > 0 else 0

    return resolved_tuples, avg_diff


def compute_performance_metrics(gt, preds):
    """
        Calculate performance metrics (precision, recall, F1 score, and accuracy)
        based on the provided predictions and ground truth.

        Args:
            gt (set): Set of tuples (obj, class) representing the ground truth.
            preds (list): List of tuples (obj, class) representing the predictions.

        Returns:
            dict: A dictionary with the calculated performance metrics:
                - "precision" (float): The precision of the predictions.
                - "recall" (float): The recall of the predictions.
                - "f1" (float): The F1 score of the predictions.
                - "accuracy" (float): The accuracy of the predictions.
        """
    # Convert predictions to a set
    predictions = set(preds)

    # True positives: intersection between ground truth and predictions
    tp = gt & predictions

    # False negatives: ground truth minus predictions
    fn = gt - predictions

    # False positives: predictions minus ground truth
    fp = predictions - gt

    # Calculate precision
    precision = len(tp) / (len(tp) + len(fp)) if (len(tp) + len(fp)) > 0 else 0

    # Calculate recall
    recall = len(tp) / (len(tp) + len(fn)) if (len(tp) + len(fn)) > 0 else 0

    # Calculate accuracy
    # Total cases: TP + FP + FN
    total = len(tp) + len(fp) + len(fn)
    accuracy = len(tp) / total if total > 0 else 0

    # Calculate F1 score
    f1 = (2 * precision * recall) / (precision + recall) if (precision + recall) > 0 else 0

    # Return the metrics rounded to two decimal places
    return {
        "precision": round(precision, 2),
        "recall": round(recall, 2),
        "f1": round(f1, 2),
        "accuracy": round(accuracy, 2)
    }


def get_ground_truth(df):
    filtered_df = df[df['ground_truth_category_id'].notnull()]
    tuples_list = list(zip(filtered_df['object'], filtered_df['ground_truth_category_id']))
    ground_truth = set(tuples_list)
    return ground_truth


def compute_before_and_after_EDCR(predictions):
    models = predictions['model'].unique()
    output = []
    global_metrics = []

    for model in models:
        data_model = predictions[predictions['model'] == model]
        gt = get_ground_truth(data_model)
        before_preds = [(obj, pred) for obj, pred in zip(data_model['object'], data_model['prediction_category_id']) if
                        pd.notnull(pred)]
        after_preds = [(obj, pred) for obj, pred in zip(data_model['object'], data_model['after_edcr']) if
                       pd.notnull(pred)]

        before = compute_performance_metrics(gt, before_preds)
        after = compute_performance_metrics(gt, after_preds)

        output.append([model, "Before", before['precision'], before['recall'], before['f1'], before['accuracy']])
        output.append([model, "After", after['precision'], after['recall'], after['f1'], after['accuracy']])

    # Global metrics
    gt = get_ground_truth(predictions)
    before_preds = [(obj, pred) for obj, pred in zip(predictions['object'], predictions['prediction_category_id']) if
                    pd.notnull(pred)]
    after_preds = [(obj, pred) for obj, pred in zip(predictions['object'], predictions['after_edcr']) if
                   pd.notnull(pred)]
    before = compute_performance_metrics(gt, before_preds)
    after = compute_performance_metrics(gt, after_preds)
    inc_before = compute_incon(before_preds)
    inc_after = compute_incon(after_preds)

    global_metrics.append(
        ["Global", "Before", before['precision'], before['recall'], before['f1'], before['accuracy'], inc_before])
    global_metrics.append(
        ["Global", "After", after['precision'], after['recall'], after['f1'], after['accuracy'], inc_after])

    return output, global_metrics


def show_metrics(test_set_path):
    epsilon_values = [0.01, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1]

    # Initialize dictionaries to store results
    metrics_summary_global = {
        "%_Incon": {"Before": [], "After": []},
        "Precision": {"Before": [], "After": []},
        "Recall": {"Before": [], "After": []},
        "F1": {"Before": [], "After": []},
        "Accuracy": {"Before": [], "After": []}
    }

    metrics_summary_local = {}

    for epsilon in epsilon_values:
        print(f"Calculating Epsilon metrics = {epsilon}")
        predictions = pd.read_excel(f'{test_set_path}/epsilon-{epsilon}.xlsx')
        model_results, global_metrics = compute_before_and_after_EDCR(predictions)

        # Save global metrics
        for metric in global_metrics:
            phase = metric[1]  # Before o After
            metrics_summary_global["Precision"][phase].append(metric[2])
            metrics_summary_global["Recall"][phase].append(metric[3])
            metrics_summary_global["F1"][phase].append(metric[4])
            metrics_summary_global["Accuracy"][phase].append(metric[5])
            metrics_summary_global["%_Incon"][phase].append(metric[6])

        # Save local metrics by model
        for result in model_results:
            model = result[0]
            phase = result[1]

            if model not in metrics_summary_local:
                metrics_summary_local[model] = {
                    "Precision": {"Before": [], "After": []},
                    "Recall": {"Before": [], "After": []},
                    "F1": {"Before": [], "After": []},
                    "Accuracy": {"Before": [], "After": []}
                }

            metrics_summary_local[model]["Precision"][phase].append(result[2])
            metrics_summary_local[model]["Recall"][phase].append(result[3])
            metrics_summary_local[model]["F1"][phase].append(result[4])
            metrics_summary_local[model]["Accuracy"][phase].append(result[5])

    # To save in Excel
    wb = Workbook()

    # Global metric sheet
    ws_global = wb.active
    ws_global.title = "Global Metrics"

    header = [f"ε={e}" for e in epsilon_values]
    ws_global.append(["Metric"] + header)

    for metric_name in metrics_summary_global:
        ws_global.append([f"{metric_name}_base"] + metrics_summary_global[metric_name]["Before"])
        ws_global.append([f"{metric_name}"] + metrics_summary_global[metric_name]["After"])

    # Local metric sheet per model
    ws_local = wb.create_sheet(title="Local Metrics by Model")
    ws_local.append(["Model", "Metric"] + header)

    for model in metrics_summary_local:
        for metric_name in metrics_summary_local[model]:
            ws_local.append([model, f"{metric_name}_base"] + metrics_summary_local[model][metric_name]["Before"])
            ws_local.append([model, f"{metric_name}"] + metrics_summary_local[model][metric_name]["After"])

    # Save file
    wb.save(f"{test_set_path}/before_after_edr_testset_uni1.xlsx")


def format_int_prog_results(results_path):
    epsilon_list = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1]
    data = []

    for epsilon in epsilon_list:
        file_name = f'{results_path}/epsilon-{epsilon}-metrics.jsonl'

        try:
            with open(file_name, 'r') as file:
                for line in file:
                    line = line.strip()
                    if not line:
                        continue  # Saltar líneas vacías

                    try:
                        record = json.loads(line)
                        entry = {
                            'epsilon': record.get('epsilon', epsilon),
                            'delta': record.get('delta'),
                            'precision': record['metrics'].get('precision'),
                            'recall': record['metrics'].get('recall'),
                            'f1': record['metrics'].get('f1'),
                            'accuracy': record['metrics'].get('accuracy'),
                            'inconsistencies': record['metrics'].get('inconsistencies'),
                            'tb_precision': record['tb_metrics'].get('precision'),
                            'tb_recall': record['tb_metrics'].get('recall'),
                            'tb_f1': record['tb_metrics'].get('f1'),
                            'tb_accuracy': record['tb_metrics'].get('accuracy'),
                            'tb_inconsistencies': record['tb_metrics'].get('inconsistencies')
                        }
                        data.append(entry)
                    except json.JSONDecodeError as e:
                        print(f"JSON decode error in file {file_name}, line: {line[:50]}... Error: {e}")

        except FileNotFoundError:
            print(f"File Not Found: {file_name}")

    df = pd.DataFrame(data)
    df.to_excel(f'{results_path}/metrics_summary_IP.xlsx', index=False)