-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathacroeval.py
145 lines (115 loc) · 5.23 KB
/
acroeval.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
import seaborn as sns
import pandas as pd
import matplotlib.pyplot as plt
import argparse
from typing import List
import glob
# import matplotlib.pyplot as plt
# Get a list of all CSV files
# i want to use and combine test-param/test-values
# files = glob.glob('*replay_id*.csv')
# files = glob.glob('individuals_p8_gen_cols64_gen_rows1_gen_lbacksNone_gen_pmtvs5_ea_offspgs1_ea_trnmtsz1_ea_mutrt0.08_ea_proc_cnt8_tfit0.95_exp_id*_replay_id*')
props_human = {
'_p': 'Parents',
'_gen_cols': 'CGP columns',
'_gen_rows': 'CGP rows',
'_gen_lbacks': 'CGP L-Backs',
'_gen_pmtvs': 'CGP Primitives (operations)',
'_ea_offspgs': 'Offsprings',
'_ea_trnmtsz': 'Tournament size',
'_ea_mutrt': 'Mutation rate',
'_ea_proc_cnt': 'Parallel processes',
'_tfit': 'Termination fitness',
}
# PUT - property under test
def get_PUT_name(filenames: List[str]) -> str:
if len(filenames) < 1:
raise Exception("Filename list is empty")
# Store previous property values
prev_values = {}
# The property that changed
changed_prop = None
# Iterate over file names
for file_name in filenames:
# Iterate over properties
for prop, _ in props_human.items():
# Find the property in the file name
start_index = file_name.find(prop)
if start_index != -1:
end_index = file_name.find("_", start_index + len(prop) + 1)
end_index = end_index if end_index != -1 else len(file_name)
# Extract and parse the property value
prop_value = file_name[start_index + len(prop):end_index]
print(prop_value)
# Compare with the previous value
if prop in prev_values and prev_values[prop] != prop_value:
changed_prop = prop
break
# Store the current value
prev_values[prop] = prop_value
# Stop if a change was detected
if changed_prop:
break
if changed_prop:
return changed_prop
else:
raise ValueError('No property under test detected from file names')
# PUT - property under values
def get_PUT_values(filenames: List[str], queried_property: str) -> List[str]:
if len(filenames) < 1:
raise Exception("Filename list is empty")
unique_values = set()
# Iterate over file names
for file_name in filenames:
# Find the property in the file name
start_index = file_name.find(queried_property)
if start_index != -1:
end_index = file_name.find("_", start_index + len(queried_property) + 1)
end_index = end_index if end_index != -1 else len(file_name)
# Extract and parse the property value
prop_value = file_name[start_index + len(queried_property):end_index]
# Add the current value to the set
unique_values.add(float(prop_value))
if not unique_values:
raise ValueError(f"No values found for the property '{queried_property}' in the provided filenames.")
# Convert set to list and return
return sorted(list(unique_values))
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='Benchmark an external program.')
parser.add_argument('--result-path', type=str, required=True, help='Path to the results for evaluation e.g. "results/gen_cols/"')
args = parser.parse_args()
files = glob.glob(f'{args.result_path}/individuals_*')
test_prop_name = get_PUT_name(files)
test_prop_vals = get_PUT_values(files, test_prop_name)
frames = []
for file in files:
print(file)
# Read each CSV file
df = pd.read_csv(file)
# Calculate total time and max fitness for each experiment
total_time = df['Time To Completion [seconds]'].sum()
max_fitness = df['Fitness'].max()
num_generations = df['Individual ID'].nunique()
# Get the experiment ID from the data (assuming all rows have the same experiment ID)
experiment_id = df['Experiment ID'].iloc[0]
# Get the replay ID, same assumption
replay_id = df['Replay ID'].iloc[0]
# Create a new DataFrame with the summary information
summary_df = pd.DataFrame({
'Experiment ID': [experiment_id],
'Replay ID': [replay_id],
'Test Property Value': [test_prop_vals[experiment_id]],
'Experiment\'s Time To Completion [seconds]': [total_time],
'Experiment Max Fitness': [max_fitness],
'Number of Generations': [num_generations]
})
frames.append(summary_df)
result = pd.concat(frames, ignore_index=True)
result.to_csv(f'{args.result_path}/merged{test_prop_name}.csv', index=False)
# Create boxplot
plt.figure(figsize=(10, 6))
# sns.boxplot(x='Experiment ID', y='Experiment\'s Time To Completion [seconds]', data=result)
sns.boxplot(x='Test Property Value', y='Experiment\'s Time To Completion [seconds]', data=result).set(xlabel=f'{props_human[test_prop_name]}')
# sns.boxplot(x='Experiment ID', y='Experiment\'s Time To Completion [seconds]', data=result, showfliers=False)
plt.title('Boxplot of Experiment\'s Time To Completion [seconds] for Each Experiment ID')
plt.show()