Skip to content

Commit

Permalink
Ignore n levels instead of just leaves
Browse files Browse the repository at this point in the history
  • Loading branch information
incaseoftrouble committed Mar 31, 2024
1 parent 12a80cf commit c294390
Show file tree
Hide file tree
Showing 3 changed files with 35 additions and 21 deletions.
14 changes: 10 additions & 4 deletions copydetect/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,10 +59,16 @@ def main():
parser.add_argument("-s", '--same-name', dest='same_name',
action='store_true', default=False,
help="only compare files which have the same name")
parser.add_argument("-l", '--ignore-leaf', dest='ignore_leaf',
action='store_true', default=False,
help="don't compare files located in the same "
"leaf directory")

grouping = parser.add_mutually_exclusive_group()
grouping.add_argument("-l", '--ignore-leaf', dest='ignore_depth',
action='store_const', const=1,
help="don't compare files located in the same "
"leaf directory")
grouping.add_argument('--ignore-depth', dest='ignore_depth',
type=int, help="don't compare files whose n'th parent "
"is the same folder", default=0)

parser.add_argument("-f", '--disable-filter', dest='filter',
action='store_true', default=False,
help="disable code tokenization and filtering")
Expand Down
6 changes: 3 additions & 3 deletions copydetect/_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ class CopydetectConfig:
guarantee_t: int = defaults.GUARANTEE_THRESHOLD
display_t: float = defaults.DISPLAY_THRESHOLD
same_name_only: bool = False
ignore_leaf: bool = False
ignore_depth: int = 0
autoopen: bool = True
disable_filtering: bool = False
force_language: Optional[str] = None
Expand Down Expand Up @@ -52,8 +52,8 @@ def _check_arguments(self):
raise TypeError("Boilerplate directories must be a list")
if not isinstance(self.same_name_only, bool):
raise TypeError("same_name_only must be true or false")
if not isinstance(self.ignore_leaf, bool):
raise TypeError("ignore_leaf must be true or false")
if not isinstance(self.ignore_depth, int):
raise TypeError("ignore_depth must be an integer")
if not isinstance(self.disable_filtering, bool):
raise TypeError("disable_filtering must be true or false")
if not isinstance(self.autoopen, bool):
Expand Down
36 changes: 22 additions & 14 deletions copydetect/detector.py
Original file line number Diff line number Diff line change
Expand Up @@ -221,9 +221,9 @@ class CopyDetector:
same_name_only : bool
If true, the detector will only compare files that have the
same name
ignore_leaf : bool
If true, the detector will not compare files located in the
same leaf directory.
ignore_depth : int
The detector will not compare files whose n'th parent folders
are equal.
autoopen : bool
If true, the detector will automatically open a webbrowser to
display the results of generate_html_report
Expand Down Expand Up @@ -252,7 +252,7 @@ def __init__(self, test_dirs=None, ref_dirs=None,
noise_t=defaults.NOISE_THRESHOLD,
guarantee_t=defaults.GUARANTEE_THRESHOLD,
display_t=defaults.DISPLAY_THRESHOLD,
same_name_only=False, ignore_leaf=False, autoopen=True,
same_name_only=False, ignore_depth=0, autoopen=True,
disable_filtering=False, force_language=None,
truncate=False, out_file="./report.html", css_files=None,
silent=False, encoding: str = "utf-8"):
Expand Down Expand Up @@ -322,27 +322,29 @@ def _get_file_list(self, dirs, exts):
# convert to a set to remove duplicates, then back to a list
return list(set(file_list))

def add_file(self, filename, type="testref"):
def add_file(self, filename, file_type="testref"):
"""Adds a file to the list of test files, reference files, or
boilerplate files.
Parameters
----------
filename : str
Name of file to add.
type : {"testref", "test", "ref", "boilerplate"}
file_type : {"testref", "test", "ref", "boilerplate"}
Type of file to add. "testref" will add the file as both a
test and reference file.
"""
if type == "testref":
if file_type == "testref":
self.test_files.append(filename)
self.ref_files.append(filename)
elif type == "test":
elif file_type == "test":
self.test_files.append(filename)
elif type == "ref":
elif file_type == "ref":
self.ref_files.append(filename)
elif type == "boilerplate":
elif file_type == "boilerplate":
self.boilerplate_files.append(filename)
else:
raise ValueError(file_type)

def _get_boilerplate_hashes(self):
"""Generates a list of hashes of the boilerplate text. Returns
Expand Down Expand Up @@ -418,18 +420,24 @@ def _comparison_loop(self):

for i, test_f in enumerate(
tqdm(self.test_files,
bar_format= ' {l_bar}{bar}{r_bar}',
bar_format=' {l_bar}{bar}{r_bar}',
disable=self.conf.silent)
):
test_path = Path(test_f).resolve()
for j, ref_f in enumerate(self.ref_files):
ref_path = Path(ref_f).resolve()
if (test_f not in self.file_data
or ref_f not in self.file_data
or test_f == ref_f
or (self.conf.same_name_only
and (Path(test_f).name != Path(ref_f).name))
or (self.conf.ignore_leaf
and (Path(test_f).parent == Path(ref_f).parent))):
and (test_path.name != ref_path.name))):
continue
if self.conf.ignore_depth:
depth = self.conf.ignore_depth - 1
ref_parents, test_parents = ref_path.parents, test_path.parents
if (len(test_parents) >= depth and len(ref_parents) >= depth
and test_parents[depth] == ref_parents[depth]):
continue

if (ref_f, test_f) in comparisons:
ref_idx, test_idx = comparisons[(ref_f, test_f)]
Expand Down

0 comments on commit c294390

Please sign in to comment.