Skip to content

Commit

Permalink
progress bar shows pairs
Browse files Browse the repository at this point in the history
  • Loading branch information
incaseoftrouble committed Apr 6, 2024
1 parent 7522fde commit 86bdfce
Showing 1 changed file with 20 additions and 24 deletions.
44 changes: 20 additions & 24 deletions copydetect/detector.py
Original file line number Diff line number Diff line change
Expand Up @@ -420,32 +420,28 @@ def _comparison_loop(self):

test_indices = {f: i for i, f in enumerate(self.test_files)}
ref_indices = {f: i for i, f in enumerate(self.ref_files)}
comparison_by_file = defaultdict(list)
for test, ref in self.get_comparison_pairs():
comparison_by_file[test].append(ref)

for test_f, references in tqdm(comparison_by_file.items(),
bar_format=' {l_bar}{bar}{r_bar}',
disable=self.conf.silent
):

for test_f, ref_f in tqdm(self.get_comparison_pairs(),
bar_format=' {l_bar}{bar}{r_bar}',
disable=self.conf.silent
):
i = test_indices[test_f]
for ref_f in references:
j = ref_indices[ref_f]
j = ref_indices[ref_f]

if (ref_f, test_f) in comparisons:
ref_idx, test_idx = comparisons[(ref_f, test_f)]
overlap = self.token_overlap_matrix[ref_idx, test_idx]
sim2, sim1 = self.similarity_matrix[ref_idx, test_idx]
else:
overlap, (sim1, sim2), (slices1, slices2) = compare_files(
self.file_data[test_f], self.file_data[ref_f]
)
comparisons[(test_f, ref_f)] = (i, j)
if slices1.shape[0] != 0:
self.slice_matrix[(test_f, ref_f)] = [slices1, slices2]

self.similarity_matrix[i, j] = np.array([sim1, sim2])
self.token_overlap_matrix[i, j] = overlap
if (ref_f, test_f) in comparisons:
ref_idx, test_idx = comparisons[(ref_f, test_f)]
overlap = self.token_overlap_matrix[ref_idx, test_idx]
sim2, sim1 = self.similarity_matrix[ref_idx, test_idx]
else:
overlap, (sim1, sim2), (slices1, slices2) = compare_files(
self.file_data[test_f], self.file_data[ref_f]
)
comparisons[(test_f, ref_f)] = (i, j)
if slices1.shape[0] != 0:
self.slice_matrix[(test_f, ref_f)] = [slices1, slices2]

self.similarity_matrix[i, j] = np.array([sim1, sim2])
self.token_overlap_matrix[i, j] = overlap

if not self.conf.silent:
print(f"{time.time()-start_time:6.2f}: Code comparison completed")
Expand Down

0 comments on commit 86bdfce

Please sign in to comment.