From cb7ba18cb542eea6effaedeef9a6f1a55dd52e59 Mon Sep 17 00:00:00 2001 From: Panos Mavrogiorgos Date: Tue, 23 Apr 2024 11:59:28 +0300 Subject: [PATCH 1/2] ci: Replace `black --check` with `black --diff` Makes it easier to debug failures. --- .github/workflows/conda_pip.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/conda_pip.yml b/.github/workflows/conda_pip.yml index ed67d642..0fdeb9e1 100644 --- a/.github/workflows/conda_pip.yml +++ b/.github/workflows/conda_pip.yml @@ -44,7 +44,7 @@ jobs: run: "pip install 'black'" - name: "Run linters" - run: "black --check ./" + run: "black --diff ./" run-tests: needs: "run-linters" From f74b63d1c3acaa59b44b4d1101b43bc595e114a6 Mon Sep 17 00:00:00 2001 From: Panos Mavrogiorgos Date: Tue, 23 Apr 2024 11:50:52 +0300 Subject: [PATCH 2/2] tools: Add `get_zarr_encoding()` and `get_netcdf_encoding()` --- dependencies/main.yml | 2 + environments/base-p3.10.yml | 2 + environments/base-p3.11.yml | 2 + environments/base-p3.12.yml | 2 + environments/base-p3.9.yml | 2 + environments/full-p3.10.yml | 2 + environments/full-p3.11.yml | 2 + environments/full-p3.12.yml | 2 + environments/full-p3.9.yml | 2 + environments/viz-p3.10.yml | 2 + environments/viz-p3.11.yml | 2 + environments/viz-p3.12.yml | 2 + environments/viz-p3.9.yml | 2 + locks/requirements-ci.txt | 1 + locks/requirements-full.txt | 1 + locks/requirements-viz.txt | 1 + locks/requirements.txt | 1 + poetry.lock | 60 +++++++++++++++- pyposeidon/tools.py | 132 ++++++++++++++++++++++++++++++++---- pyproject.toml | 6 +- tests/test_tools.py | 94 +++++++++++++++++++++++++ 21 files changed, 305 insertions(+), 17 deletions(-) diff --git a/dependencies/main.yml b/dependencies/main.yml index 184fa9b8..a284afc6 100644 --- a/dependencies/main.yml +++ b/dependencies/main.yml @@ -5,6 +5,7 @@ channels: - "gbrey" dependencies: + - "bottleneck" - "cartopy" - "cfgrib" - "colorlog" @@ -13,6 +14,7 @@ dependencies: - "geopandas" - "jinja2" - "netCDF4" + - "numcodecs" - "numpy" - "pandas<2" - "psutil" diff --git a/environments/base-p3.10.yml b/environments/base-p3.10.yml index ec264a47..63cd0d1e 100644 --- a/environments/base-p3.10.yml +++ b/environments/base-p3.10.yml @@ -2,6 +2,7 @@ channels: - conda-forge - gbrey dependencies: + - bottleneck - cartopy - cfgrib - colorlog @@ -17,6 +18,7 @@ dependencies: - jigsaw>=0.9.12 - jinja2 - netCDF4 + - numcodecs - numpy - pandas<2 - pip diff --git a/environments/base-p3.11.yml b/environments/base-p3.11.yml index 0413cbac..5841b918 100644 --- a/environments/base-p3.11.yml +++ b/environments/base-p3.11.yml @@ -2,6 +2,7 @@ channels: - conda-forge - gbrey dependencies: + - bottleneck - cartopy - cfgrib - colorlog @@ -17,6 +18,7 @@ dependencies: - jigsaw>=0.9.12 - jinja2 - netCDF4 + - numcodecs - numpy - pandas<2 - pip diff --git a/environments/base-p3.12.yml b/environments/base-p3.12.yml index 2b201c38..45b18ff3 100644 --- a/environments/base-p3.12.yml +++ b/environments/base-p3.12.yml @@ -2,6 +2,7 @@ channels: - conda-forge - gbrey dependencies: + - bottleneck - cartopy - cfgrib - colorlog @@ -17,6 +18,7 @@ dependencies: - jigsaw>=0.9.12 - jinja2 - netCDF4 + - numcodecs - numpy - pandas<2 - pip diff --git a/environments/base-p3.9.yml b/environments/base-p3.9.yml index e2339bc1..0ab462e3 100644 --- a/environments/base-p3.9.yml +++ b/environments/base-p3.9.yml @@ -2,6 +2,7 @@ channels: - conda-forge - gbrey dependencies: + - bottleneck - cartopy - cfgrib - colorlog @@ -17,6 +18,7 @@ dependencies: - jigsaw>=0.9.12 - jinja2 - netCDF4 + - numcodecs - numpy - pandas<2 - pip diff --git a/environments/full-p3.10.yml b/environments/full-p3.10.yml index 22beb23a..c759e7e0 100644 --- a/environments/full-p3.10.yml +++ b/environments/full-p3.10.yml @@ -3,6 +3,7 @@ channels: - gbrey dependencies: - black<24.0 + - bottleneck - bump2version - cartopy - cfgrib @@ -29,6 +30,7 @@ dependencies: - mkdocs-material - mkdocstrings - netCDF4 + - numcodecs - numpy - pandas<2 - pip diff --git a/environments/full-p3.11.yml b/environments/full-p3.11.yml index e93f1521..6599d426 100644 --- a/environments/full-p3.11.yml +++ b/environments/full-p3.11.yml @@ -3,6 +3,7 @@ channels: - gbrey dependencies: - black<24.0 + - bottleneck - bump2version - cartopy - cfgrib @@ -29,6 +30,7 @@ dependencies: - mkdocs-material - mkdocstrings - netCDF4 + - numcodecs - numpy - pandas<2 - pip diff --git a/environments/full-p3.12.yml b/environments/full-p3.12.yml index d9c38fce..d68f838b 100644 --- a/environments/full-p3.12.yml +++ b/environments/full-p3.12.yml @@ -3,6 +3,7 @@ channels: - gbrey dependencies: - black<24.0 + - bottleneck - bump2version - cartopy - cfgrib @@ -29,6 +30,7 @@ dependencies: - mkdocs-material - mkdocstrings - netCDF4 + - numcodecs - numpy - pandas<2 - pip diff --git a/environments/full-p3.9.yml b/environments/full-p3.9.yml index d8fd6f77..37a19e90 100644 --- a/environments/full-p3.9.yml +++ b/environments/full-p3.9.yml @@ -3,6 +3,7 @@ channels: - gbrey dependencies: - black<24.0 + - bottleneck - bump2version - cartopy - cfgrib @@ -29,6 +30,7 @@ dependencies: - mkdocs-material - mkdocstrings - netCDF4 + - numcodecs - numpy - pandas<2 - pip diff --git a/environments/viz-p3.10.yml b/environments/viz-p3.10.yml index 7cfa8ac6..2d43b84b 100644 --- a/environments/viz-p3.10.yml +++ b/environments/viz-p3.10.yml @@ -2,6 +2,7 @@ channels: - conda-forge - gbrey dependencies: + - bottleneck - cartopy - cfgrib - colorlog @@ -23,6 +24,7 @@ dependencies: - matplotlib>=3.3 - mayavi - netCDF4 + - numcodecs - numpy - pandas<2 - pip diff --git a/environments/viz-p3.11.yml b/environments/viz-p3.11.yml index 9993c5b4..cdb6f2b1 100644 --- a/environments/viz-p3.11.yml +++ b/environments/viz-p3.11.yml @@ -2,6 +2,7 @@ channels: - conda-forge - gbrey dependencies: + - bottleneck - cartopy - cfgrib - colorlog @@ -23,6 +24,7 @@ dependencies: - matplotlib>=3.3 - mayavi - netCDF4 + - numcodecs - numpy - pandas<2 - pip diff --git a/environments/viz-p3.12.yml b/environments/viz-p3.12.yml index b4a02551..efb6ea3d 100644 --- a/environments/viz-p3.12.yml +++ b/environments/viz-p3.12.yml @@ -2,6 +2,7 @@ channels: - conda-forge - gbrey dependencies: + - bottleneck - cartopy - cfgrib - colorlog @@ -23,6 +24,7 @@ dependencies: - matplotlib>=3.3 - mayavi - netCDF4 + - numcodecs - numpy - pandas<2 - pip diff --git a/environments/viz-p3.9.yml b/environments/viz-p3.9.yml index 85cc33e8..2211fb26 100644 --- a/environments/viz-p3.9.yml +++ b/environments/viz-p3.9.yml @@ -2,6 +2,7 @@ channels: - conda-forge - gbrey dependencies: + - bottleneck - cartopy - cfgrib - colorlog @@ -23,6 +24,7 @@ dependencies: - matplotlib>=3.3 - mayavi - netCDF4 + - numcodecs - numpy - pandas<2 - pip diff --git a/locks/requirements-ci.txt b/locks/requirements-ci.txt index 22cec9b1..46c154f4 100644 --- a/locks/requirements-ci.txt +++ b/locks/requirements-ci.txt @@ -5,6 +5,7 @@ asciitree==0.3.3 ; python_version >= "3.9" and python_version < "4" attrs==23.2.0 ; python_version >= "3.9" and python_version < "4" beautifulsoup4==4.12.3 ; python_version >= "3.9" and python_version < "4" black==24.3.0 ; python_version >= "3.9" and python_version < "4" +bottleneck==1.3.8 ; python_version >= "3.9" and python_version < "4" bump2version==1.0.1 ; python_version >= "3.9" and python_version < "4" cartopy==0.22.0 ; python_version >= "3.9" and python_version < "4" catalogue==2.0.10 ; python_version >= "3.9" and python_version < "4" diff --git a/locks/requirements-full.txt b/locks/requirements-full.txt index 2bcb9285..97bc820c 100644 --- a/locks/requirements-full.txt +++ b/locks/requirements-full.txt @@ -13,6 +13,7 @@ beautifulsoup4==4.12.3 ; python_version >= "3.9" and python_version < "4" black==24.3.0 ; python_version >= "3.9" and python_version < "4" bleach==6.1.0 ; python_version >= "3.9" and python_version < "4" bokeh==3.4.0 ; python_version >= "3.9" and python_version < "4" +bottleneck==1.3.8 ; python_version >= "3.9" and python_version < "4" bump2version==1.0.1 ; python_version >= "3.9" and python_version < "4" cartopy==0.22.0 ; python_version >= "3.9" and python_version < "4" catalogue==2.0.10 ; python_version >= "3.9" and python_version < "4" diff --git a/locks/requirements-viz.txt b/locks/requirements-viz.txt index 1f3af139..734ab644 100644 --- a/locks/requirements-viz.txt +++ b/locks/requirements-viz.txt @@ -11,6 +11,7 @@ attrs==23.2.0 ; python_version >= "3.9" and python_version < "4" beautifulsoup4==4.12.3 ; python_version >= "3.9" and python_version < "4" bleach==6.1.0 ; python_version >= "3.9" and python_version < "4" bokeh==3.4.0 ; python_version >= "3.9" and python_version < "4" +bottleneck==1.3.8 ; python_version >= "3.9" and python_version < "4" cartopy==0.22.0 ; python_version >= "3.9" and python_version < "4" certifi==2024.2.2 ; python_version >= "3.9" and python_version < "4" cffi==1.16.0 ; python_version >= "3.9" and python_version < "4" diff --git a/locks/requirements.txt b/locks/requirements.txt index 0640aaf6..fcdfc618 100644 --- a/locks/requirements.txt +++ b/locks/requirements.txt @@ -4,6 +4,7 @@ anyio==4.3.0 ; python_version >= "3.9" and python_version < "4.0" asciitree==0.3.3 ; python_version >= "3.9" and python_version < "4" attrs==23.2.0 ; python_version >= "3.9" and python_version < "4" beautifulsoup4==4.12.3 ; python_version >= "3.9" and python_version < "4" +bottleneck==1.3.8 ; python_version >= "3.9" and python_version < "4" cartopy==0.22.0 ; python_version >= "3.9" and python_version < "4" certifi==2024.2.2 ; python_version >= "3.9" and python_version < "4" cffi==1.16.0 ; python_version >= "3.9" and python_version < "4" diff --git a/poetry.lock b/poetry.lock index 0d50f936..46f6037f 100644 --- a/poetry.lock +++ b/poetry.lock @@ -307,6 +307,64 @@ PyYAML = ">=3.10" tornado = ">=6.2" xyzservices = ">=2021.09.1" +[[package]] +name = "bottleneck" +version = "1.3.8" +description = "Fast NumPy array functions written in C" +optional = false +python-versions = "*" +files = [ + {file = "Bottleneck-1.3.8-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:865c8ed5b798c0198b0b80553e09cc0d890c4f5feb3d81d31661517ca7819fa3"}, + {file = "Bottleneck-1.3.8-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d073a31e259d40b25e29dbba80f73abf38afe98fd730c79dad7edd9a0ad6cff5"}, + {file = "Bottleneck-1.3.8-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b806b277ab47495032822f55f43b8d336e4b7e73f8506ed34d3ea3da6d644abc"}, + {file = "Bottleneck-1.3.8-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:770b517609916adeb39d3b1a386a29bc316da03dd61e7ee6e8a38325b80cc327"}, + {file = "Bottleneck-1.3.8-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:2948502b0394ee419945b55b092585222a505c61d41a874c741be49f2cac056f"}, + {file = "Bottleneck-1.3.8-cp310-cp310-win32.whl", hash = "sha256:271b6333522beb8aee32e640ba49a2064491d2c10317baa58a5996be3dd443e4"}, + {file = "Bottleneck-1.3.8-cp310-cp310-win_amd64.whl", hash = "sha256:d41000ea7ca196b5fd39d6fccd34bf0704c8831731cedd2da2dcae3c6ac49c42"}, + {file = "Bottleneck-1.3.8-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:d0a7f454394cd3642498b6e077e70f4a6b9fd46a8eb908c83ac737fdc9f9a98c"}, + {file = "Bottleneck-1.3.8-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2c4ea8b9024dcb4e83b5c118a3c8faa863ace2ad572849da548a74a8ee4e8f2a"}, + {file = "Bottleneck-1.3.8-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f40724b6e965ff5b88b333d4a10097b1629e60c0db21bb3d08c24d7b1a904a16"}, + {file = "Bottleneck-1.3.8-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:4bd7183b8dcca89d0e65abe4507c19667dd31dacfbcc8ed705bad642f26a46e1"}, + {file = "Bottleneck-1.3.8-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:20aa31a7d9d747c499ace1610a6e1f7aba6e3d4a9923e0312f6b4b6d68a59af3"}, + {file = "Bottleneck-1.3.8-cp311-cp311-win32.whl", hash = "sha256:350520105d9449e6565b3f0c4ce1f80a0b3e4d63695ebbf29db41f62e13f6461"}, + {file = "Bottleneck-1.3.8-cp311-cp311-win_amd64.whl", hash = "sha256:167a278902775defde7dfded6e98e3707dfe54971ffd9aec25c43bc74e4e381a"}, + {file = "Bottleneck-1.3.8-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:c6e93ed45c6c83392f73d0333b310b38772df7eb78c120c1447245691bdedaf4"}, + {file = "Bottleneck-1.3.8-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d3400f47dda0196b5af50b0b0678e33cc8c42e52e55ae0a63cdfed60725659bc"}, + {file = "Bottleneck-1.3.8-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fba5fd1805c71b2eeea50bea93d59be449c4af23ebd8da5f75fd74fd0331e314"}, + {file = "Bottleneck-1.3.8-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:60139c5c3d2a9c1454a04af5ee981a9f56548d27fa36f264069b149a6e9b01ed"}, + {file = "Bottleneck-1.3.8-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:99fab17fa26c811ccad63e208314726e718ae6605314329eca09641954550523"}, + {file = "Bottleneck-1.3.8-cp312-cp312-win32.whl", hash = "sha256:d3ae2bb5d4168912e438e377cc1301fa01df949ba59cd86317b3e00404fd4a97"}, + {file = "Bottleneck-1.3.8-cp312-cp312-win_amd64.whl", hash = "sha256:bcba1d5d5328c50f94852ab521fcb26f35d9e0ccd928d120d56455d1a5bb743f"}, + {file = "Bottleneck-1.3.8-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:8d01fd5389d3160d54619119987ac24b020fa6810b7b398fff4945892237b3da"}, + {file = "Bottleneck-1.3.8-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7ca25f0003ef65264942f6306d793e0f270ece8b406c5a293dfc7d878146e9f8"}, + {file = "Bottleneck-1.3.8-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bf7763cf1516fa388c3587d12182fc1bc1c8089eab1a0a1bf09761f4c41af73c"}, + {file = "Bottleneck-1.3.8-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:38837c022350e2a656453f0e448416b7108cf67baccf11d04a0b3b70a48074dd"}, + {file = "Bottleneck-1.3.8-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:84ca5e741fae1c1796744dbdd0d2c1789cb74dd79c12ea8ec5834f83430f8520"}, + {file = "Bottleneck-1.3.8-cp37-cp37m-win32.whl", hash = "sha256:f4dfc22a3450227e692ef2ff4657639c33eec88ad04ee3ce29d1a23a4942da24"}, + {file = "Bottleneck-1.3.8-cp37-cp37m-win_amd64.whl", hash = "sha256:90b87eed152bbd760c4eb11473c2cf036abdb26e2f84caeb00787da74fb08c40"}, + {file = "Bottleneck-1.3.8-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:54a1b5d9d63b2d9f2955f8542eea26c418f97873e0abf86ca52beea0208c9306"}, + {file = "Bottleneck-1.3.8-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:019dd142d1e870388fb0b649213a0d8e569cce784326e183deba8f17826edd9f"}, + {file = "Bottleneck-1.3.8-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8b5ed34a540eb7df59f45da659af9f792306637de1c69c95f020294f3b9fc4a8"}, + {file = "Bottleneck-1.3.8-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:b69fcd4d818bcf9d53497d8accd0d5f852a447728baaa33b9b7168f8c4221d06"}, + {file = "Bottleneck-1.3.8-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:02616a830bd477f5ba51103396092da4b9d83cea2e88f5b8069e3f4f7b796704"}, + {file = "Bottleneck-1.3.8-cp38-cp38-win32.whl", hash = "sha256:93d359fb83eb3bdd6635ef6e64835c38ffdc211441fc190549f286e6af98b5f6"}, + {file = "Bottleneck-1.3.8-cp38-cp38-win_amd64.whl", hash = "sha256:51c8bb3dffeb72c14f0382b80de76eabac6726d316babbd48f7e4056267d7910"}, + {file = "Bottleneck-1.3.8-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:84453548b0f722c3be912ce3c6b685917fea842bf1252eeb63714a2c1fd1ffc9"}, + {file = "Bottleneck-1.3.8-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:92700867504a213cafa9b8d9be529bd6e18dc83366b2ba00e86e80769b93f678"}, + {file = "Bottleneck-1.3.8-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fadfd2f3931fdff42f4b9867eb02ed7c662d01e6099ff6b347b6ced791450651"}, + {file = "Bottleneck-1.3.8-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:cfbc4a3a934b677bfbc37ac8757c4e1264a76262b774259bd3fa8a265dbd668b"}, + {file = "Bottleneck-1.3.8-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:3c74c18f86a1ffac22280b005df8bb8a58505ac6663c4d6807f39873c17dc347"}, + {file = "Bottleneck-1.3.8-cp39-cp39-win32.whl", hash = "sha256:211f881159e8adb3a57df2263028ae6dc89ec4328bfd43f3421e507406c28654"}, + {file = "Bottleneck-1.3.8-cp39-cp39-win_amd64.whl", hash = "sha256:8615eeb75009ba7c0a112a5a6a5154ed3d61fd6b0879631778b3e42e2d9a6d65"}, + {file = "Bottleneck-1.3.8.tar.gz", hash = "sha256:6780d896969ba7f53c8995ba90c87c548beb3db435dc90c60b9a10ed1ab4d868"}, +] + +[package.dependencies] +numpy = "*" + +[package.extras] +doc = ["gitpython", "numpydoc", "sphinx"] + [[package]] name = "bump2version" version = "1.0.1" @@ -5679,4 +5737,4 @@ viz = ["geoviews", "holoviews", "hvplot", "ipykernel", "itkwidgets", "matplotlib [metadata] lock-version = "2.0" python-versions = ">=3.9, <4" -content-hash = "3b82313d68e68c02a7b42b03ee5678adaec056d90fb8bfc773ecc1858c069f9e" +content-hash = "ccefdc029726419ea610067524982095bb9082c3e4b47e08e3015930cce28787" diff --git a/pyposeidon/tools.py b/pyposeidon/tools.py index db02354f..3a4918cc 100644 --- a/pyposeidon/tools.py +++ b/pyposeidon/tools.py @@ -4,32 +4,34 @@ # Unless required by applicable law or agreed to in writing, software distributed under the Licence is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the Licence for the specific language governing permissions and limitations under the Licence. +from __future__ import annotations + import itertools import logging import os -import shlex import pathlib import re -import subprocess -from collections.abc import Iterable -import time +import shlex import shutil +import subprocess import sys +import time +import typing as T +from collections import abc +from typing import TypeVar +import bottleneck as bn import cartopy.feature import colorlog import geopandas as gpd import jinja2 +import numcodecs import numpy as np +import numpy.typing as npt import psutil import rioxarray import xarray as xr -from typing import Iterator -from typing import Optional -from typing import Tuple -from typing import TypeVar -from typing import Union from pyposeidon.utils.get_value import get_value @@ -121,7 +123,7 @@ def setup_logging( - min_level: int = logging.DEBUG, color: bool = True, log_file: Optional[os.PathLike[str]] = "pyposeidon.log" + min_level: int = logging.DEBUG, color: bool = True, log_file: os.PathLike[str] | str | None = "pyposeidon.log" ) -> None: # The purpose is to have a function that will allow us to easily setup some pyposeidon logging # and that will allow us to also dynamically change the log levels @@ -360,7 +362,7 @@ def open_dataset(source: os.PathLike, **kwargs) -> xr.Dataset: def is_iterable(obj): - return isinstance(obj, Iterable) + return isinstance(obj, abc.Iterable) def cast_path_to_str(path: os.PathLike) -> str: @@ -436,12 +438,12 @@ def get_coastlines(resolution: str, category="physical", name="land") -> gpd.Geo def grouper( - iterable: Iterable[_T], + iterable: abc.Iterable[_T], n: int, *, incomplete: str = "fill", - fillvalue: Union[_U, None] = None, -) -> Iterator[tuple[Union[_T, _U], ...]]: + fillvalue: _U | None = None, +) -> abc.Iterator[tuple[_T | _U, ...]]: """Collect data into non-overlapping fixed-length chunks or blocks""" # grouper('ABCDEFG', 3, fillvalue='x') --> ABC DEF Gxx # grouper('ABCDEFG', 3, incomplete='strict') --> ABC DEF ValueError @@ -476,3 +478,105 @@ def resolve_schism_path(instance, kwargs) -> str: # ------------------------------------------------------------------------------ bin_path = "schism" return bin_path + + +class QuantizationParams(T.TypedDict): + scale_factor: float + add_offset: float + missing_value: int + dtype: npt.DTypeLike + + +def calc_quantization_params( + data_min: float, + data_max: float, + dtype: npt.DTypeLike, +) -> QuantizationParams: + bits = np.iinfo(dtype).bits + missing_value = (2**bits - 2) // 2 + scale_factor = (data_max - data_min) / (2**bits - 2) + add_offset = data_min + 2 ** (bits - 1) * scale_factor + return { + "scale_factor": scale_factor, + "add_offset": add_offset, + "dtype": dtype, + "missing_value": missing_value, + } + + +def quantize( + array: npt.NDArray[np.float_], + *, + add_offset: float, + scale_factor: float, + dtype: npt.DTypeLike, + missing_value: int, +) -> npt.NDArray[np.int_]: + nans = np.isnan(array) + quantized: npt.NDArray[np.int_] = np.round((array - add_offset) / scale_factor, 0) + quantized[nans] = missing_value + return quantized.astype(dtype) + + +def dequantize( + array: npt.NDArray[np.int_], + *, + add_offset: float, + scale_factor: float, + missing_value: int, + dtype: npt.DTypeLike, +) -> npt.NDArray[np.float_]: + array = bn.replace(array.astype(np.float_), missing_value, np.nan) + dequantized = (array * scale_factor) + add_offset + return dequantized + + +def update_or_add(outer: dict[str, T.Any], key: str, inner: abc.Mapping[str, T.Any]) -> None: + if key in outer: + outer[key].update(inner) + else: + outer[key] = inner + + +def get_zarr_encoding( + ds: xr.Dataset, + *, + compressor: numcodecs.Blosc | None = numcodecs.Zstd(level=3), + quantized_vars: abc.Mapping[str, npt.DTypeLike] | None = None, +) -> dict[str, dict[str, T.Any]]: + encoding = ds.encoding.copy() + for var in [str(var) for var in ds.variables]: + update_or_add(encoding, var, {"compressor": compressor}) + if quantized_vars: + for var, dtype in quantized_vars.items(): + params = calc_quantization_params( + data_min=float(ds[var].min()), + data_max=float(ds[var].max()), + dtype=dtype, + ) + update_or_add(encoding, var, params) + return encoding + + +def get_netcdf_encoding( + ds: xr.Dataset, + *, + quantized_vars: abc.Mapping[str, npt.DTypeLike] | None = None, + compression_level: int = 1, +) -> dict[str, dict[str, T.Any]]: + encoding = ds.encoding.copy() + for var in [str(var) for var in ds.variables]: + update_or_add(encoding, var, {"zlib": compression_level > 0, "complevel": compression_level}) + # Use chunks if they are defined + if ds[var].chunks: + chunksizes = [values[0] for values in ds[var].chunksizes.values()] + update_or_add(encoding, var, {"chunksizes": chunksizes}) + if quantized_vars: + for var, dtype in quantized_vars.items(): + params = calc_quantization_params( + data_min=float(ds[var].min()), + data_max=float(ds[var].max()), + dtype=dtype, + ) + update_or_add(encoding, var, params) + return encoding diff --git a/pyproject.toml b/pyproject.toml index adfd79ca..341b4339 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -34,7 +34,7 @@ classifiers = [ [tool.poetry.dependencies] # python = ">=3.10, <3.11" python = ">=3.9, <4" -setuptools = "*" +bottleneck = "*" Cartopy = "*" cfgrib = "*" colorlog = "*" @@ -46,16 +46,19 @@ jinja2 = "*" joblib = "*" llvmlite = "*" netCDF4 = "*" +numcodecs = "*" numpy = ">=1.20, <2.0" pandas = "*" psutil = "*" pydap = ">=3.4.0" +pyogrio = "*" pyresample = "*" rasterio = "*" requests = "*" rioxarray = "*" scipy = "*" searvey = "*" +setuptools = "*" Shapely = ">=2" #Shapely = { url = "https://github.com/shapely/shapely/archive/refs/tags/1.7.1.zip" } tqdm = "*" @@ -72,7 +75,6 @@ mayavi = { version = "*", optional = true } numba = { version = ">=0.51", optional = true } spatialpandas = { version = "*", optional = true } vtk = {version = "*", optional = true} -pyogrio = "*" [tool.poetry.extras] viz = [ diff --git a/tests/test_tools.py b/tests/test_tools.py index bfb7a694..1812ba9c 100644 --- a/tests/test_tools.py +++ b/tests/test_tools.py @@ -1,8 +1,14 @@ +from __future__ import annotations + import logging import os import pathlib import shutil +import bottleneck as bn +import numcodecs.abc +import numpy as np +import pandas as pd import psutil import pytest import xarray as xr @@ -112,3 +118,91 @@ def test_setup_logging(): # we should be able to change the min_level tools.setup_logging(min_level=logging.INFO) assert logger.level == logging.INFO + + +def test_calc_quantization_params() -> None: + params = tools.calc_quantization_params(0, 10, dtype=np.int8) + assert pytest.approx(params["scale_factor"], abs=1e-3) == 0.039 + assert pytest.approx(params["add_offset"], abs=1e-3) == 5.039 + assert params["missing_value"] == 127 + assert params["dtype"] == np.int8 + # + params = tools.calc_quantization_params(0, 10, dtype=np.int16) + assert pytest.approx(params["scale_factor"], abs=1e-4) == 0.0001 + assert pytest.approx(params["add_offset"], abs=1e-4) == 5.0001 + assert params["missing_value"] == 32767 + assert params["dtype"] == np.int16 + # + params = tools.calc_quantization_params(0, 10, dtype=np.int32) + assert pytest.approx(params["scale_factor"], abs=1e-8) == 0 + assert pytest.approx(params["add_offset"], abs=1e-8) == 5 + assert params["missing_value"] == 2147483647 + assert params["dtype"] == np.int32 + + +def test_quantization_roundtrip_with_nans() -> None: + original = np.array([0, 4.9, np.nan, 10, np.nan]) + params = tools.calc_quantization_params(bn.nanmin(original), bn.nanmax(original), dtype=np.int8) + expected = np.array([-128, -4, 127, 126, 127], dtype=np.int8) + quantized = tools.quantize(original, **params) + assert np.allclose(expected, quantized) + dequantized = tools.dequantize(quantized, **params) + assert np.allclose(original, dequantized, atol=1e-1, equal_nan=True) + + +def test_zarr_encoding_compressor_default(): + ds = pd.DataFrame({"a": [1, 5, 10, np.nan]}).to_xarray() + default_compressor = numcodecs.Zstd(level=3) + encoding = tools.get_zarr_encoding(ds) + expected = { + "a": {"compressor": default_compressor}, + "index": {"compressor": default_compressor}, + } + assert encoding == expected + + +@pytest.mark.parametrize("compressor", [None, pytest.param(numcodecs.Blosc(cname="zstd", clevel=1), id="blosc")]) +def test_zarr_encoding_compressor_custom(compressor): + ds = pd.DataFrame({"a": [1, 5, 10, np.nan]}).to_xarray() + encoding = tools.get_zarr_encoding(ds, compressor=compressor) + expected = { + "a": {"compressor": compressor}, + "index": {"compressor": compressor}, + } + assert encoding == expected + + +def test_zarr_encoding_quantized_vars(): + ds = pd.DataFrame({"a": [1, 5, 10, np.nan]}).to_xarray() + default_compressor = numcodecs.Zstd(level=3) + encoding = tools.get_zarr_encoding(ds, quantized_vars={"a": np.int8}) + expected = { + "a": { + "compressor": default_compressor, + "scale_factor": 0.03543307086614173, + "add_offset": 5.535433070866142, + "dtype": np.int8, + "missing_value": 127, + }, + "index": {"compressor": default_compressor}, + } + assert encoding == expected + + +def test_netcdfr_encoding_quantized_vars(): + ds = pd.DataFrame({"a": [1, 5, 10, np.nan]}).to_xarray() + ds = ds.chunk(index=1) + encoding = tools.get_netcdf_encoding(ds, quantized_vars={"a": np.int8}) + expected = { + "a": { + "zlib": True, + "complevel": 1, + "chunksizes": [1], + "scale_factor": 0.03543307086614173, + "add_offset": 5.535433070866142, + "dtype": np.int8, + "missing_value": 127, + }, + "index": {"zlib": True, "complevel": 1}, + } + assert encoding == expected