From 4add25ae3364e4c8fd3b611566aaa898b8af2815 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Fri, 13 Jan 2023 12:07:13 +0100 Subject: [PATCH 1/8] New version --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 015d52ba..d4642b64 100644 --- a/setup.py +++ b/setup.py @@ -20,7 +20,7 @@ setup( name="pytest-workflow", - version="2.0.1", + version="2.1.0-dev", description="A pytest plugin for configuring workflow/pipeline tests " "using YAML files", author="Leiden University Medical Center", From f81a588f634cd9fe048ea2e79c7934e1cde210d2 Mon Sep 17 00:00:00 2001 From: Will Holtz Date: Fri, 20 Jan 2023 00:49:35 -0800 Subject: [PATCH 2/8] Document test discovery naming conventions (#172) * docs: conventions for test discovery * docs: update HISTORY.rst * add back end of file newline * use version indepdent URL --- HISTORY.rst | 4 ++++ docs/writing_tests.rst | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/HISTORY.rst b/HISTORY.rst index d05d7d92..3a71c2b1 100644 --- a/HISTORY.rst +++ b/HISTORY.rst @@ -7,6 +7,10 @@ Changelog .. This document is user facing. Please word the changes in such a way .. that users understand how the changes affect the new version. +version 2.1.0-dev +--------------------------- +* Document naming conventions for Python test discovery + version 2.0.1 --------------------------- + Fixed a bug where pytest-workflow would crash on logs that used non-ASCII diff --git a/docs/writing_tests.rst b/docs/writing_tests.rst index 9d75d435..db09e66f 100644 --- a/docs/writing_tests.rst +++ b/docs/writing_tests.rst @@ -160,6 +160,10 @@ Multiple workflows can use the same custom test like this: points to the folder where the named workflow was executed. This allows writing of advanced python tests for each file produced by the workflow. +Custom tests must follow the `conventions for Python test discovery +`_, +which constrains the names of files and functions containing custom tests. + .. note:: stdout and stderr are available as files in the root of the From ec8be3c6f83720047fafd7691bad924e225d9aac Mon Sep 17 00:00:00 2001 From: Will Holtz Date: Wed, 21 Jun 2023 17:07:12 -0700 Subject: [PATCH 3/8] Add ungzip_md5sum check --- HISTORY.rst | 5 ++++- docs/writing_tests.rst | 1 + src/pytest_workflow/file_tests.py | 30 ++++++++++++++++++++++-------- src/pytest_workflow/schema.py | 3 +++ src/pytest_workflow/util.py | 31 +++++++++++++++++++++++++++---- tests/test_schema.py | 1 + tests/test_utils.py | 11 ++++++++++- 7 files changed, 68 insertions(+), 14 deletions(-) diff --git a/HISTORY.rst b/HISTORY.rst index 3a71c2b1..8b2b8ef0 100644 --- a/HISTORY.rst +++ b/HISTORY.rst @@ -9,7 +9,10 @@ Changelog version 2.1.0-dev --------------------------- -* Document naming conventions for Python test discovery ++ Add md5sum checking on unzipped contents of gzipped output files. Gzipped + files contain a timestamp which makes it hard to directly compare the md5sums + of gzipped files. ++ Document naming conventions for Python test discovery version 2.0.1 --------------------------- diff --git a/docs/writing_tests.rst b/docs/writing_tests.rst index db09e66f..0e11a7d7 100644 --- a/docs/writing_tests.rst +++ b/docs/writing_tests.rst @@ -64,6 +64,7 @@ Test options - path: "TomCruise.txt.gz" # Gzipped files can also be searched, provided their extension is '.gz' contains: - "starring" + ungzip_md5sum: e27c52f6b5f8152aa3ef58be7bdacc4d # Md5sum of the ungzipped file (optional) stderr: # Options for testing stderr (optional) contains: # A list of strings which should be in stderr (optional) - "BSOD error, please contact the IT crowd" diff --git a/src/pytest_workflow/file_tests.py b/src/pytest_workflow/file_tests.py index 17642fc8..5503154b 100644 --- a/src/pytest_workflow/file_tests.py +++ b/src/pytest_workflow/file_tests.py @@ -22,7 +22,7 @@ from .content_tests import ContentTestCollector from .schema import FileTest -from .util import file_md5sum +from .util import file_md5sum, gzip_md5sum from .workflow import Workflow @@ -76,7 +76,16 @@ def collect(self): parent=self, filepath=filepath, md5sum=self.filetest.md5sum, - workflow=self.workflow)] + workflow=self.workflow, + ungzip=False)] + + if self.filetest.ungzip_md5sum: + tests += [FileMd5.from_parent( + parent=self, + filepath=filepath, + md5sum=self.filetest.ungzip_md5sum, + workflow=self.workflow, + ungzip=True)] return tests @@ -119,20 +128,22 @@ def repr_failure(self, excinfo, style=None): class FileMd5(pytest.Item): def __init__(self, parent: pytest.Collector, filepath: Path, - md5sum: str, workflow: Workflow): + md5sum: str, workflow: Workflow, ungzip: bool): """ Create a tests for the file md5sum. :param parent: The collector that started this item :param filepath: The path to the file :param md5sum: The expected md5sum :param workflow: The workflow running to generate the file + :param ungzip: Whether the file should be ungzipped before calculating """ - name = "md5sum" + name = "unzip_md5sum" if ungzip else "md5sum" super().__init__(name, parent) self.filepath = filepath self.expected_md5sum = md5sum self.observed_md5sum = None self.workflow = workflow + self.ungzip = ungzip def runtest(self): # Wait for the workflow to finish before we check the md5sum of a file. @@ -140,11 +151,14 @@ def runtest(self): if not self.workflow.matching_exitcode(): pytest.skip(f"'{self.parent.workflow.name}' did not exit with" f"desired exit code.") - self.observed_md5sum = file_md5sum(self.filepath) + sum_func = gzip_md5sum if self.ungzip else file_md5sum + self.observed_md5sum = sum_func(self.filepath) assert self.observed_md5sum == self.expected_md5sum def repr_failure(self, excinfo, style=None): + metric = "ungzip_md5sum" if self.ungzip else "md5sum" return ( - f"Observed md5sum '{self.observed_md5sum}' not equal to expected " - f"md5sum '{self.expected_md5sum}' for file '{self.filepath}'" - ) + f"Observed {metric} '{self.observed_md5sum}' not equal to " + f"expected {metric} '{self.expected_md5sum}' for file " + f"'{self.filepath}'" + ) diff --git a/src/pytest_workflow/schema.py b/src/pytest_workflow/schema.py index c8c3e40f..ed74b6c3 100644 --- a/src/pytest_workflow/schema.py +++ b/src/pytest_workflow/schema.py @@ -125,6 +125,7 @@ def __init__(self, contains: Optional[List[str]] = None, class FileTest(ContentTest): """A class that contains all the properties of a to be tested file.""" def __init__(self, path: str, md5sum: Optional[str] = None, + ungzip_md5sum: Optional[str] = None, should_exist: bool = DEFAULT_FILE_SHOULD_EXIST, contains: Optional[List[str]] = None, must_not_contain: Optional[List[str]] = None, @@ -135,6 +136,7 @@ def __init__(self, path: str, md5sum: Optional[str] = None, A container object :param path: the path to the file :param md5sum: md5sum of the file contents + :param unzip_md5sum: md5sum of the unzipped file contents :param should_exist: whether the file should exist or not :param contains: a list of strings that should be present in the file :param must_not_contain: a list of strings that should not be present @@ -150,6 +152,7 @@ def __init__(self, path: str, md5sum: Optional[str] = None, encoding=encoding) self.path = Path(path) self.md5sum = md5sum + self.ungzip_md5sum = ungzip_md5sum self.should_exist = should_exist diff --git a/src/pytest_workflow/util.py b/src/pytest_workflow/util.py index a7c91bc0..a91c9b5c 100644 --- a/src/pytest_workflow/util.py +++ b/src/pytest_workflow/util.py @@ -1,4 +1,5 @@ import functools +import gzip import hashlib import os import re @@ -7,7 +8,7 @@ import sys import warnings from pathlib import Path -from typing import Callable, Iterator, List, Optional, Set, Tuple, Union +from typing import BinaryIO, Callable, Iterator, List, Optional, Set, Tuple, Union Filepath = Union[str, os.PathLike] @@ -204,10 +205,32 @@ def file_md5sum(filepath: Path, block_size=64 * 1024) -> str: :param block_size: Block size in bytes :return: a md5sum as hexadecimal string. """ - hasher = hashlib.md5() with filepath.open('rb') as file_handler: # Read the file in bytes - for block in iter(lambda: file_handler.read(block_size), b''): - hasher.update(block) + return file_handle_md5sum(file_handler, block_size) + + +def gzip_md5sum(filepath: Path, block_size=64 * 1024) -> str: + """ + Generates a md5sum for the uncompressed contents of gzipped file. + Reads file in blocks to save memory. + :param filepath: a pathlib. Path to the gzipped file + :param block_size: Block size in bytes + :return: a md5sum as hexadecimal string. + """ + with gzip.open(filepath) as file_handler: # Read the file in bytes + return file_handle_md5sum(file_handler, block_size) + + +def file_handle_md5sum(file_handler: BinaryIO, block_size) -> str: + """ + Generates a md5sum for a file handle. Reads file in blocks to save memory. + :param file_handler: a readable binary file handler + :param block_size: Block size in bytes + :return: a md5sum as hexadecimal string. + """ + hasher = hashlib.md5() + for block in iter(lambda: file_handler.read(block_size), b''): + hasher.update(block) return hasher.hexdigest() diff --git a/tests/test_schema.py b/tests/test_schema.py index 378be288..98edfc95 100644 --- a/tests/test_schema.py +++ b/tests/test_schema.py @@ -171,6 +171,7 @@ def test_filetest_defaults(): assert file_test.contains_regex == [] assert file_test.must_not_contain_regex == [] assert file_test.md5sum is None + assert file_test.ungzip_md5sum is None assert file_test.should_exist diff --git a/tests/test_utils.py b/tests/test_utils.py index 45f789c3..b2f9f376 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -13,6 +13,7 @@ # # You should have received a copy of the GNU Affero General Public License # along with pytest-workflow. If not, see Date: Wed, 21 Jun 2023 17:36:15 -0700 Subject: [PATCH 4/8] add ungzip_md5sum to schema.json --- src/pytest_workflow/schema/schema.json | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/pytest_workflow/schema/schema.json b/src/pytest_workflow/schema/schema.json index 9ead66bb..82ec80c1 100644 --- a/src/pytest_workflow/schema/schema.json +++ b/src/pytest_workflow/schema/schema.json @@ -123,6 +123,10 @@ "should_exist": { "type": "boolean" }, + "ungzip_md5sum": { + "type": "string", + "pattern": "^[a-f0-9]{32}$" + }, "contains": { "type": "array", "items": { From 684f314de1b3da2234e53c9de923eb037652169d Mon Sep 17 00:00:00 2001 From: Will Holtz Date: Wed, 21 Jun 2023 17:54:59 -0700 Subject: [PATCH 5/8] fix unzip_md5sum to ungzip_md5sum --- src/pytest_workflow/file_tests.py | 2 +- src/pytest_workflow/schema.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/pytest_workflow/file_tests.py b/src/pytest_workflow/file_tests.py index 5503154b..99030c79 100644 --- a/src/pytest_workflow/file_tests.py +++ b/src/pytest_workflow/file_tests.py @@ -137,7 +137,7 @@ def __init__(self, parent: pytest.Collector, filepath: Path, :param workflow: The workflow running to generate the file :param ungzip: Whether the file should be ungzipped before calculating """ - name = "unzip_md5sum" if ungzip else "md5sum" + name = "ungzip_md5sum" if ungzip else "md5sum" super().__init__(name, parent) self.filepath = filepath self.expected_md5sum = md5sum diff --git a/src/pytest_workflow/schema.py b/src/pytest_workflow/schema.py index ed74b6c3..d4eada2f 100644 --- a/src/pytest_workflow/schema.py +++ b/src/pytest_workflow/schema.py @@ -136,7 +136,7 @@ def __init__(self, path: str, md5sum: Optional[str] = None, A container object :param path: the path to the file :param md5sum: md5sum of the file contents - :param unzip_md5sum: md5sum of the unzipped file contents + :param ungzip_md5sum: md5sum of the unzipped file contents :param should_exist: whether the file should exist or not :param contains: a list of strings that should be present in the file :param must_not_contain: a list of strings that should not be present From a22950cf58a531d7db6a5cc29d8d032f0634f98f Mon Sep 17 00:00:00 2001 From: Will Holtz Date: Wed, 21 Jun 2023 18:16:28 -0700 Subject: [PATCH 6/8] fix typing for gzip file handles --- src/pytest_workflow/util.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/pytest_workflow/util.py b/src/pytest_workflow/util.py index a91c9b5c..491033d0 100644 --- a/src/pytest_workflow/util.py +++ b/src/pytest_workflow/util.py @@ -8,7 +8,8 @@ import sys import warnings from pathlib import Path -from typing import BinaryIO, Callable, Iterator, List, Optional, Set, Tuple, Union +from typing import Callable, IO, Iterator, List, Optional, Set, Tuple, Union, \ + cast Filepath = Union[str, os.PathLike] @@ -218,10 +219,10 @@ def gzip_md5sum(filepath: Path, block_size=64 * 1024) -> str: :return: a md5sum as hexadecimal string. """ with gzip.open(filepath) as file_handler: # Read the file in bytes - return file_handle_md5sum(file_handler, block_size) + return file_handle_md5sum(cast(IO[bytes], file_handler), block_size) -def file_handle_md5sum(file_handler: BinaryIO, block_size) -> str: +def file_handle_md5sum(file_handler: IO[bytes], block_size) -> str: """ Generates a md5sum for a file handle. Reads file in blocks to save memory. :param file_handler: a readable binary file handler From 2f583bf633750556f65fd89242f1e077382d4632 Mon Sep 17 00:00:00 2001 From: Will Holtz Date: Fri, 23 Jun 2023 12:52:41 -0700 Subject: [PATCH 7/8] move to extract_md5sum using xopen --- HISTORY.rst | 6 +++--- docs/writing_tests.rst | 8 +++++++- requirements.txt | 4 +++- setup.py | 4 +++- src/pytest_workflow/file_tests.py | 22 +++++++++++----------- src/pytest_workflow/schema.py | 6 +++--- src/pytest_workflow/schema/schema.json | 2 +- src/pytest_workflow/util.py | 11 ++++++----- tests/test_schema.py | 2 +- tests/test_utils.py | 8 ++++---- 10 files changed, 42 insertions(+), 31 deletions(-) diff --git a/HISTORY.rst b/HISTORY.rst index 8b2b8ef0..455177f1 100644 --- a/HISTORY.rst +++ b/HISTORY.rst @@ -9,9 +9,9 @@ Changelog version 2.1.0-dev --------------------------- -+ Add md5sum checking on unzipped contents of gzipped output files. Gzipped - files contain a timestamp which makes it hard to directly compare the md5sums - of gzipped files. ++ Add extract_md5sum check on uncompressed contents of compressed output files. + Gzipped files contain a timestamp which makes it hard to directly compare the + md5sums of gzipped files. + Document naming conventions for Python test discovery version 2.0.1 diff --git a/docs/writing_tests.rst b/docs/writing_tests.rst index 0e11a7d7..0a3896d1 100644 --- a/docs/writing_tests.rst +++ b/docs/writing_tests.rst @@ -64,7 +64,7 @@ Test options - path: "TomCruise.txt.gz" # Gzipped files can also be searched, provided their extension is '.gz' contains: - "starring" - ungzip_md5sum: e27c52f6b5f8152aa3ef58be7bdacc4d # Md5sum of the ungzipped file (optional) + extract_md5sum: e27c52f6b5f8152aa3ef58be7bdacc4d # Md5sum of the uncompressed file (optional) stderr: # Options for testing stderr (optional) contains: # A list of strings which should be in stderr (optional) - "BSOD error, please contact the IT crowd" @@ -90,6 +90,12 @@ Please see the `Python documentation on regular expressions `_ to see how Python handles escape sequences. +The ``extract_md5sum`` option is used to uncompress a file and then compare +the md5sum of the uncompressed file with the supplied md5sum. This option is +particularly useful when testing gzipped files, which may contain a file +creation timestamp in the gzip header. The supported compressed file +formats for this option are gzip, bzip2, xz and Zstandard. + .. note:: Workflow names must be unique. Pytest workflow will crash when multiple workflows have the same name, even if they are in different files. diff --git a/requirements.txt b/requirements.txt index 2c5d3bff..884f5ec3 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,5 @@ pyyaml pytest>=7.0.0 -jsonschema \ No newline at end of file +jsonschema +xopen>=1.7.0 +zstandard diff --git a/setup.py b/setup.py index d4642b64..e4de914f 100644 --- a/setup.py +++ b/setup.py @@ -54,7 +54,9 @@ install_requires=[ "pytest>=7.0.0", # To use pathlib Path's in pytest "pyyaml", - "jsonschema" + "jsonschema", + "xopen>=1.4.0", + "zstandard", ], # This line makes sure the plugin is automatically loaded when it is # installed in the same environment as pytest. No need to configure diff --git a/src/pytest_workflow/file_tests.py b/src/pytest_workflow/file_tests.py index 99030c79..1f98b1c2 100644 --- a/src/pytest_workflow/file_tests.py +++ b/src/pytest_workflow/file_tests.py @@ -22,7 +22,7 @@ from .content_tests import ContentTestCollector from .schema import FileTest -from .util import file_md5sum, gzip_md5sum +from .util import extract_md5sum, file_md5sum from .workflow import Workflow @@ -77,15 +77,15 @@ def collect(self): filepath=filepath, md5sum=self.filetest.md5sum, workflow=self.workflow, - ungzip=False)] + extract=False)] - if self.filetest.ungzip_md5sum: + if self.filetest.extract_md5sum: tests += [FileMd5.from_parent( parent=self, filepath=filepath, - md5sum=self.filetest.ungzip_md5sum, + md5sum=self.filetest.extract_md5sum, workflow=self.workflow, - ungzip=True)] + extract=True)] return tests @@ -128,22 +128,22 @@ def repr_failure(self, excinfo, style=None): class FileMd5(pytest.Item): def __init__(self, parent: pytest.Collector, filepath: Path, - md5sum: str, workflow: Workflow, ungzip: bool): + md5sum: str, workflow: Workflow, extract: bool): """ Create a tests for the file md5sum. :param parent: The collector that started this item :param filepath: The path to the file :param md5sum: The expected md5sum :param workflow: The workflow running to generate the file - :param ungzip: Whether the file should be ungzipped before calculating + :param extract: Whether the file should be extracted before calculating """ - name = "ungzip_md5sum" if ungzip else "md5sum" + name = "extract_md5sum" if extract else "md5sum" super().__init__(name, parent) self.filepath = filepath self.expected_md5sum = md5sum self.observed_md5sum = None self.workflow = workflow - self.ungzip = ungzip + self.extract = extract def runtest(self): # Wait for the workflow to finish before we check the md5sum of a file. @@ -151,12 +151,12 @@ def runtest(self): if not self.workflow.matching_exitcode(): pytest.skip(f"'{self.parent.workflow.name}' did not exit with" f"desired exit code.") - sum_func = gzip_md5sum if self.ungzip else file_md5sum + sum_func = extract_md5sum if self.extract else file_md5sum self.observed_md5sum = sum_func(self.filepath) assert self.observed_md5sum == self.expected_md5sum def repr_failure(self, excinfo, style=None): - metric = "ungzip_md5sum" if self.ungzip else "md5sum" + metric = "extract_md5sum" if self.extract else "md5sum" return ( f"Observed {metric} '{self.observed_md5sum}' not equal to " f"expected {metric} '{self.expected_md5sum}' for file " diff --git a/src/pytest_workflow/schema.py b/src/pytest_workflow/schema.py index d4eada2f..499dd416 100644 --- a/src/pytest_workflow/schema.py +++ b/src/pytest_workflow/schema.py @@ -125,7 +125,7 @@ def __init__(self, contains: Optional[List[str]] = None, class FileTest(ContentTest): """A class that contains all the properties of a to be tested file.""" def __init__(self, path: str, md5sum: Optional[str] = None, - ungzip_md5sum: Optional[str] = None, + extract_md5sum: Optional[str] = None, should_exist: bool = DEFAULT_FILE_SHOULD_EXIST, contains: Optional[List[str]] = None, must_not_contain: Optional[List[str]] = None, @@ -136,7 +136,7 @@ def __init__(self, path: str, md5sum: Optional[str] = None, A container object :param path: the path to the file :param md5sum: md5sum of the file contents - :param ungzip_md5sum: md5sum of the unzipped file contents + :param extract_md5sum: md5sum of the extracted file contents :param should_exist: whether the file should exist or not :param contains: a list of strings that should be present in the file :param must_not_contain: a list of strings that should not be present @@ -152,7 +152,7 @@ def __init__(self, path: str, md5sum: Optional[str] = None, encoding=encoding) self.path = Path(path) self.md5sum = md5sum - self.ungzip_md5sum = ungzip_md5sum + self.extract_md5sum = extract_md5sum self.should_exist = should_exist diff --git a/src/pytest_workflow/schema/schema.json b/src/pytest_workflow/schema/schema.json index 82ec80c1..718b6c25 100644 --- a/src/pytest_workflow/schema/schema.json +++ b/src/pytest_workflow/schema/schema.json @@ -123,7 +123,7 @@ "should_exist": { "type": "boolean" }, - "ungzip_md5sum": { + "extract_md5sum": { "type": "string", "pattern": "^[a-f0-9]{32}$" }, diff --git a/src/pytest_workflow/util.py b/src/pytest_workflow/util.py index 491033d0..beed52af 100644 --- a/src/pytest_workflow/util.py +++ b/src/pytest_workflow/util.py @@ -1,5 +1,4 @@ import functools -import gzip import hashlib import os import re @@ -11,6 +10,8 @@ from typing import Callable, IO, Iterator, List, Optional, Set, Tuple, Union, \ cast +from xopen import xopen + Filepath = Union[str, os.PathLike] @@ -210,15 +211,15 @@ def file_md5sum(filepath: Path, block_size=64 * 1024) -> str: return file_handle_md5sum(file_handler, block_size) -def gzip_md5sum(filepath: Path, block_size=64 * 1024) -> str: +def extract_md5sum(filepath: Path, block_size=64 * 1024) -> str: """ - Generates a md5sum for the uncompressed contents of gzipped file. + Generates a md5sum for the uncompressed contents of compressed file. Reads file in blocks to save memory. - :param filepath: a pathlib. Path to the gzipped file + :param filepath: a pathlib. Path to the compressed file :param block_size: Block size in bytes :return: a md5sum as hexadecimal string. """ - with gzip.open(filepath) as file_handler: # Read the file in bytes + with xopen(filepath, 'rb') as file_handler: # Read the file in bytes return file_handle_md5sum(cast(IO[bytes], file_handler), block_size) diff --git a/tests/test_schema.py b/tests/test_schema.py index 98edfc95..8defda21 100644 --- a/tests/test_schema.py +++ b/tests/test_schema.py @@ -171,7 +171,7 @@ def test_filetest_defaults(): assert file_test.contains_regex == [] assert file_test.must_not_contain_regex == [] assert file_test.md5sum is None - assert file_test.ungzip_md5sum is None + assert file_test.extract_md5sum is None assert file_test.should_exist diff --git a/tests/test_utils.py b/tests/test_utils.py index b2f9f376..574225bd 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -26,8 +26,8 @@ import pytest from pytest_workflow.util import decode_unaligned, duplicate_tree, \ - file_md5sum, git_check_submodules_cloned, git_root, \ - gzip_md5sum, is_in_dir, link_tree, replace_whitespace + extract_md5sum, file_md5sum, git_check_submodules_cloned, git_root, \ + is_in_dir, link_tree, replace_whitespace WHITESPACE_TESTS = [ ("bla\nbla", "bla_bla"), @@ -164,11 +164,11 @@ def test_file_md5sum(hash_file: Path): assert whole_file_md5 == per_line_md5 -def test_gzip_md5sum(): +def test_extract_md5sum(): hash_file = HASH_FILE_DIR / "LICENSE.gz" with gzip.open(hash_file, "rb") as contents_fh: whole_file_md5 = hashlib.md5(contents_fh.read()).hexdigest() - per_line_md5 = gzip_md5sum(hash_file) + per_line_md5 = extract_md5sum(hash_file) assert whole_file_md5 == per_line_md5 From dd69ee7153313631b09ebf178eedddf537820cfe Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 26 Jun 2023 08:51:27 +0200 Subject: [PATCH 8/8] Also include extract_md5sum keyword on the readme --- README.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/README.rst b/README.rst index c53eb866..3e19015c 100644 --- a/README.rst +++ b/README.rst @@ -127,6 +127,7 @@ predefined tests as well as custom tests are possible. - path: "TomCruise.txt.gz" # Gzipped files can also be searched, provided their extension is '.gz' contains: - "starring" + extract_md5sum: e27c52f6b5f8152aa3ef58be7bdacc4d # Md5sum of the uncompressed file (optional) stderr: # Options for testing stderr (optional) contains: # A list of strings which should be in stderr (optional) - "BSOD error, please contact the IT crowd"