pax_global_header00006660000000000000000000000064147755262570014536gustar00rootroot0000000000000052 comment=ce7b9b73bb1c54ef555f6794ccb67b107f948fe5 flox-0.10.3/000077500000000000000000000000001477552625700125675ustar00rootroot00000000000000flox-0.10.3/.git_archival.txt000066400000000000000000000001571477552625700160450ustar00rootroot00000000000000node: $Format:%H$ node-date: $Format:%cI$ describe-name: $Format:%(describe:tags=true)$ ref-names: $Format:%D$ flox-0.10.3/.github/000077500000000000000000000000001477552625700141275ustar00rootroot00000000000000flox-0.10.3/.github/dependabot.yml000066400000000000000000000001671477552625700167630ustar00rootroot00000000000000version: 2 updates: - package-ecosystem: "github-actions" directory: "/" schedule: interval: "monthly" flox-0.10.3/.github/release.yml000066400000000000000000000001141477552625700162660ustar00rootroot00000000000000changelog: exclude: authors: - dependabot - pre-commit-ci flox-0.10.3/.github/workflows/000077500000000000000000000000001477552625700161645ustar00rootroot00000000000000flox-0.10.3/.github/workflows/benchmarks.yml000066400000000000000000000051571477552625700210340ustar00rootroot00000000000000name: Benchmark on: pull_request: types: [opened, reopened, synchronize, labeled] workflow_dispatch: jobs: benchmark: # if: ${{ contains( github.event.pull_request.labels.*.name, 'run-benchmark') && github.event_name == 'pull_request' || github.event_name == 'workflow_dispatch' }} # Run if the PR has been labelled correctly. if: ${{ github.event_name == 'pull_request' || github.event_name == 'workflow_dispatch' }} # Always run. name: Linux runs-on: ubuntu-20.04 env: ASV_DIR: "./asv_bench" steps: # We need the full repo to avoid this issue # https://github.com/actions/checkout/issues/23 - uses: actions/checkout@v4 with: fetch-depth: 0 - name: Set up conda environment uses: mamba-org/setup-micromamba@v2 with: environment-name: flox-bench create-args: >- python=3.12 asv mamba libmambapy<2.0 conda-build init-shell: bash cache-environment: true - name: Run benchmarks shell: bash -l {0} id: benchmark env: OPENBLAS_NUM_THREADS: 1 MKL_NUM_THREADS: 1 OMP_NUM_THREADS: 1 ASV_FACTOR: 1.5 ASV_SKIP_SLOW: 1 BASE_SHA: ${{ github.event.pull_request.base.sha }} LAST_HEAD_SHA: ${{ github.event.pull_request.head.sha }} HEAD_LABEL: ${{ github.event.pull_request.head.label }} BASE_LABEL: ${{ github.event.pull_request.base.label }} run: | # set -x # ID this runner asv machine --yes echo "Baseline: $LAST_HEAD_SHA ($BASE_LABEL)" echo "Contender: ${GITHUB_SHA} ($HEAD_LABEL)" # Run benchmarks for current commit against base ASV_OPTIONS="--split --show-stderr --factor $ASV_FACTOR" asv continuous $ASV_OPTIONS $BASE_SHA ${GITHUB_SHA} \ | sed "/Traceback \|failed$\|PERFORMANCE DECREASED/ s/^/::error::/" \ | tee benchmarks.log # Report and export results for subsequent steps if grep "Traceback \|failed\|PERFORMANCE DECREASED" benchmarks.log > /dev/null ; then exit 1 fi working-directory: ${{ env.ASV_DIR }} - name: Add instructions to artifact if: always() run: | cp benchmarks/README_CI.md benchmarks.log .asv/results/ working-directory: ${{ env.ASV_DIR }} - uses: actions/upload-artifact@v4 if: always() with: name: asv-benchmark-results-${{ runner.os }} path: ${{ env.ASV_DIR }}/.asv/results flox-0.10.3/.github/workflows/ci-additional.yaml000066400000000000000000000077511477552625700215630ustar00rootroot00000000000000name: CI Additional on: push: branches: - "main" pull_request: branches: - "*" workflow_dispatch: # allows you to trigger manually concurrency: group: ${{ github.workflow }}-${{ github.ref }} cancel-in-progress: true jobs: detect-ci-trigger: name: detect ci trigger runs-on: ubuntu-latest if: | github.repository == 'xarray-contrib/flox' && (github.event_name == 'push' || github.event_name == 'pull_request') outputs: triggered: ${{ steps.detect-trigger.outputs.trigger-found }} steps: - uses: actions/checkout@v4 with: fetch-depth: 2 - uses: xarray-contrib/ci-trigger@v1.2 id: detect-trigger with: keyword: "[skip-ci]" doctest: name: Doctests runs-on: "ubuntu-latest" needs: detect-ci-trigger if: needs.detect-ci-trigger.outputs.triggered == 'false' defaults: run: shell: bash -l {0} env: CONDA_ENV_FILE: ci/environment.yml PYTHON_VERSION: "3.13" steps: - uses: actions/checkout@v4 with: fetch-depth: 0 # Fetch all history for all branches and tags. - name: set environment variables run: | echo "TODAY=$(date +'%Y-%m-%d')" >> $GITHUB_ENV - name: Setup micromamba uses: mamba-org/setup-micromamba@v2 with: environment-file: ${{env.CONDA_ENV_FILE}} environment-name: flox-tests init-shell: bash cache-environment: true cache-environment-key: "${{runner.os}}-${{runner.arch}}-py${{env.PYTHON_VERSION}}-${{env.TODAY}}-${{hashFiles(env.CONDA_ENV_FILE)}}" create-args: | python=${{ env.PYTHON_VERSION }} - name: Install flox run: | python -m pip install --no-deps -e . - name: Version info run: | conda info -a conda list - name: Run doctests run: | python -m pytest --doctest-modules \ flox/aggregations.py flox/core.py flox/xarray.py \ --ignore flox/tests \ --cov=./ --cov-report=xml - name: Upload code coverage to Codecov uses: codecov/codecov-action@v5.4.0 with: file: ./coverage.xml flags: unittests env_vars: RUNNER_OS name: codecov-umbrella fail_ci_if_error: false mypy: name: Mypy runs-on: "ubuntu-latest" needs: detect-ci-trigger if: needs.detect-ci-trigger.outputs.triggered == 'false' defaults: run: shell: bash -l {0} env: CONDA_ENV_FILE: ci/environment.yml PYTHON_VERSION: "3.13" steps: - uses: actions/checkout@v4 with: fetch-depth: 0 # Fetch all history for all branches and tags. - name: set environment variables run: | echo "TODAY=$(date +'%Y-%m-%d')" >> $GITHUB_ENV - name: Setup micromamba uses: mamba-org/setup-micromamba@v2 with: environment-file: ${{env.CONDA_ENV_FILE}} environment-name: flox-tests init-shell: bash cache-environment: true cache-environment-key: "${{runner.os}}-${{runner.arch}}-py${{env.PYTHON_VERSION}}-${{env.TODAY}}-${{hashFiles(env.CONDA_ENV_FILE)}}" create-args: | python=${{ env.PYTHON_VERSION }} - name: Install flox run: | python -m pip install --no-deps -e . - name: Version info run: | conda info -a conda list - name: Install mypy run: | python -m pip install mypy - name: Run mypy run: | mkdir .mypy_cache python -m mypy --install-types --non-interactive --cache-dir=.mypy_cache/ --cobertura-xml-report mypy_report - name: Upload mypy coverage to Codecov uses: codecov/codecov-action@v5.4.0 with: file: mypy_report/cobertura.xml flags: mypy env_vars: PYTHON_VERSION name: codecov-umbrella fail_ci_if_error: false flox-0.10.3/.github/workflows/ci.yaml000066400000000000000000000114601477552625700174450ustar00rootroot00000000000000name: CI on: push: branches: - "main" pull_request: branches: - "*" schedule: - cron: "0 0 * * *" # Daily “At 00:00” workflow_dispatch: # allows you to trigger manually concurrency: group: ${{ github.ref }} cancel-in-progress: true jobs: test: name: Test (${{matrix.env}}, ${{ matrix.python-version }}, ${{ matrix.os }}) runs-on: ${{ matrix.os }} defaults: run: shell: bash -l {0} strategy: fail-fast: false matrix: os: ["ubuntu-latest"] env: ["environment"] python-version: ["3.10", "3.13"] include: - os: "windows-latest" env: "environment" python-version: "3.13" - os: "ubuntu-latest" env: "no-dask" # "no-xarray", "no-numba" python-version: "3.13" - os: "ubuntu-latest" env: "minimal-requirements" python-version: "3.10" - os: "windows-latest" env: "env-numpy1" python-version: "3.10" steps: - uses: actions/checkout@v4 with: fetch-depth: 0 # Fetch all history for all branches and tags. - name: Set environment variables run: | echo "PYTHON_VERSION=${{ matrix.python-version }}" >> $GITHUB_ENV - name: Set up conda environment uses: mamba-org/setup-micromamba@v2 with: environment-file: ci/${{ matrix.env }}.yml environment-name: flox-tests init-shell: bash cache-environment: true create-args: | python=${{ matrix.python-version }} - name: Install flox run: | python -m pip install --no-deps -e . # https://github.com/actions/cache/blob/main/tips-and-workarounds.md#update-a-cache - name: Restore cached hypothesis directory id: restore-hypothesis-cache uses: actions/cache/restore@v4 with: path: .hypothesis/ key: cache-hypothesis-${{ runner.os }}-${{ matrix.python-version }}-${{ github.run_id }} restore-keys: | cache-hypothesis-${{ runner.os }}-${{ matrix.python-version }}- - name: Run Tests id: status run: | python -c "import xarray; xarray.show_versions()" pytest --durations=20 --durations-min=0.5 -n auto --cov=./ --cov-report=xml --hypothesis-profile ci - name: Upload code coverage to Codecov uses: codecov/codecov-action@v5.4.0 with: file: ./coverage.xml flags: unittests env_vars: RUNNER_OS,PYTHON_VERSION name: codecov-umbrella fail_ci_if_error: false # explicitly save the cache so it gets updated, also do this even if it fails. - name: Save cached hypothesis directory id: save-hypothesis-cache if: always() && steps.status.outcome != 'skipped' uses: actions/cache/save@v4 with: path: .hypothesis/ key: cache-hypothesis-${{ runner.os }}-${{ matrix.python-version }}-${{ github.run_id }} xarray-groupby: name: xarray-groupby runs-on: ubuntu-latest defaults: run: shell: bash -l {0} steps: - uses: actions/checkout@v4 with: repository: "pydata/xarray" fetch-depth: 0 # Fetch all history for all branches and tags. - name: Set up conda environment uses: mamba-org/setup-micromamba@v2 with: environment-file: ci/requirements/environment.yml environment-name: xarray-tests init-shell: bash cache-environment: true create-args: >- python=3.11 pint>=0.22 - name: Install xarray run: | python -m pip install --no-deps . - name: Install upstream flox run: | python -m pip install --no-deps \ git+https://github.com/dcherian/flox.git@${{ github.ref }} - name: Version info run: | conda info -a conda list python xarray/util/print_versions.py - name: import xarray run: | python -c 'import xarray' - name: import flox run: | python -c 'import flox' - name: Run Tests if: success() id: status run: | set -euo pipefail python -m pytest -n auto \ xarray/tests/test_groupby.py \ xarray/tests/test_units.py::TestDataArray::test_computation_objects \ xarray/tests/test_units.py::TestDataArray::test_grouped_operations \ xarray/tests/test_units.py::TestDataArray::test_resample \ xarray/tests/test_units.py::TestDataset::test_computation_objects \ xarray/tests/test_units.py::TestDataset::test_grouped_operations \ xarray/tests/test_units.py::TestDataset::test_resample flox-0.10.3/.github/workflows/pypi.yaml000066400000000000000000000012171477552625700200320ustar00rootroot00000000000000name: Upload Package to PyPI on: release: types: [created] jobs: deploy: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - name: Set up Python uses: actions/setup-python@v5 with: python-version: "3.x" - name: Install dependencies run: | python -m pip install --upgrade pip pip install build setuptools setuptools-scm wheel twine - name: Build and publish env: TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }} TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }} run: | python -m build twine upload dist/* flox-0.10.3/.github/workflows/testpypi-release.yaml000066400000000000000000000044121477552625700223500ustar00rootroot00000000000000name: Build and Upload to TestPyPI on: push: branches: - "main" pull_request: types: [opened, reopened, synchronize, labeled] branches: - "*" workflow_dispatch: # no need for concurrency limits jobs: build-artifacts: if: ${{ contains( github.event.pull_request.labels.*.name, 'test-build') && github.event_name == 'pull_request' || github.event_name == 'workflow_dispatch' }} runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 with: fetch-depth: 0 - uses: actions/setup-python@v5 name: Install Python - name: Install dependencies run: | python -m pip install --upgrade pip python -m pip install build twine python -m pip install tomli tomli_w # - name: Disable local versions # run: | # python .github/workflows/configure-testpypi-version.py pyproject.toml # git update-index --assume-unchanged pyproject.toml # cat pyproject.toml - name: Build tarball and wheels run: | git clean -xdf python -m build - name: Check built artifacts run: | python -m twine check --strict dist/* if [ -f dist/flox-999.tar.gz ]; then echo "❌ INVALID VERSION NUMBER" exit 1 else echo "✅ Looks good" fi - uses: actions/upload-artifact@v4 with: name: releases path: dist test-built-dist: needs: build-artifacts runs-on: ubuntu-latest steps: - uses: actions/setup-python@v5 name: Install Python - uses: actions/download-artifact@v4 with: name: releases path: dist - name: List contents of built dist run: | ls -ltrh ls -ltrh dist - name: Verify the built dist/wheel is valid run: | python -m pip install --upgrade pip python -m pip install dist/flox*.whl # python -m cf_xarray.scripts.print_versions # - name: Publish package to TestPyPI # uses: pypa/gh-action-pypi-publish@v1.6.4 # with: # password: ${{ secrets.TESTPYPI_TOKEN }} # repository_url: https://test.pypi.org/legacy/ # verbose: true flox-0.10.3/.github/workflows/upstream-dev-ci.yaml000066400000000000000000000064721477552625700220660ustar00rootroot00000000000000name: CI Upstream on: push: branches: - main pull_request: types: [opened, reopened, synchronize, labeled] branches: - main paths: - ".github/workflows/upstream-dev-ci.yaml" - "ci/upstream-dev-env.yml" - "flox/*" schedule: - cron: "0 0 * * *" # Daily “At 00:00” UTC workflow_dispatch: # allows you to trigger the workflow run manually concurrency: group: ${{ github.workflow }}-${{ github.ref }} cancel-in-progress: true jobs: upstream-dev: name: upstream-dev runs-on: ubuntu-latest if: ${{ (contains(github.event.pull_request.labels.*.name, 'test-upstream') && github.event_name == 'pull_request') || github.event_name == 'workflow_dispatch' || github.event_name == 'schedule' }} defaults: run: shell: bash -l {0} strategy: fail-fast: false matrix: python-version: ["3.13"] steps: - uses: actions/checkout@v4 with: fetch-depth: 0 # Fetch all history for all branches and tags. - name: Set environment variables run: | echo "PYTHON_VERSION=${{ matrix.python-version }}" >> $GITHUB_ENV - name: Set up conda environment uses: mamba-org/setup-micromamba@v2 with: environment-name: flox-tests init-shell: bash # cache-environment: true # micromamba list does not list pip dependencies, so install mamba create-args: >- mamba pip python=${{ matrix.python-version }} pytest-reportlog - name: Install upstream dev dependencies run: | # install cython for building cftime without build isolation micromamba install -f ci/upstream-dev-env.yml micromamba remove --force numpy scipy pandas cftime python -m pip install \ -i https://pypi.anaconda.org/scientific-python-nightly-wheels/simple \ --no-deps \ --pre \ --upgrade \ numpy \ scipy \ pandas \ xarray # without build isolation for packages compiling against numpy # TODO: remove once there are `numpy>=2.0` builds for cftime python -m pip install \ --no-deps \ --upgrade \ --no-build-isolation \ git+https://github.com/Unidata/cftime python -m pip install \ git+https://github.com/dask/dask \ git+https://github.com/ml31415/numpy-groupies - name: Install flox run: | python -m pip install --no-deps -e . - name: List deps run: | # micromamba list does not list pip dependencies mamba list - name: Run Tests if: success() id: status run: | pytest -rf -n auto --cov=./ --cov-report=xml \ --report-log output-${{ matrix.python-version }}-log.jsonl \ --hypothesis-profile ci - name: Generate and publish the report if: | failure() && steps.status.outcome == 'failure' && github.event_name == 'schedule' && github.repository_owner == 'xarray-contrib' uses: xarray-contrib/issue-from-pytest-log@v1 with: log-path: output-${{ matrix.python-version }}-log.jsonl flox-0.10.3/.gitignore000066400000000000000000000024021477552625700145550ustar00rootroot00000000000000asv_bench/pkgs/ docs/source/generated/ html/ .asv/ asv_bench/pkgs/ # Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] *$py.class # C extensions *.so # Distribution / packaging .Python build/ develop-eggs/ dist/ downloads/ eggs/ .eggs/ lib/ lib64/ parts/ sdist/ var/ wheels/ *.egg-info/ .installed.cfg *.egg MANIFEST # PyInstaller # Usually these files are written by a python script from a template # before PyInstaller builds the exe, so as to inject date/other infos into it. *.manifest *.spec # Installer logs pip-log.txt pip-delete-this-directory.txt # Unit test / coverage reports htmlcov/ .tox/ .coverage .coverage.* .cache nosetests.xml coverage.xml *.cover .hypothesis/ .pytest_cache/ # Translations *.mo *.pot # Django stuff: *.log local_settings.py db.sqlite3 # Flask stuff: instance/ .webassets-cache # Scrapy stuff: .scrapy # Sphinx documentation docs/_build/ # PyBuilder target/ # Jupyter Notebook .ipynb_checkpoints # pyenv .python-version # celery beat schedule file celerybeat-schedule # SageMath parsed files *.sage.py # Environments .env .venv env/ venv/ ENV/ env.bak/ venv.bak/ # Spyder project settings .spyderproject .spyproject # Rope project settings .ropeproject # mkdocs documentation /site # mypy .mypy_cache/ .DS_Store flox-0.10.3/.pre-commit-config.yaml000066400000000000000000000030661477552625700170550ustar00rootroot00000000000000ci: autoupdate_schedule: quarterly repos: - repo: https://github.com/astral-sh/ruff-pre-commit # Ruff version. rev: "v0.11.4" hooks: - id: ruff args: ["--fix", "--show-fixes"] - id: ruff-format - repo: https://github.com/pre-commit/mirrors-prettier rev: "v4.0.0-alpha.8" hooks: - id: prettier - repo: https://github.com/pre-commit/pre-commit-hooks rev: v5.0.0 hooks: - id: check-yaml - id: trailing-whitespace - id: end-of-file-fixer - id: check-docstring-first - repo: https://github.com/executablebooks/mdformat rev: 0.7.22 hooks: - id: mdformat additional_dependencies: - mdformat-black - mdformat-myst - repo: https://github.com/kynan/nbstripout rev: 0.8.1 hooks: - id: nbstripout args: [--extra-keys=metadata.kernelspec metadata.language_info.version] - repo: https://github.com/codespell-project/codespell rev: v2.4.1 hooks: - id: codespell additional_dependencies: - tomli - repo: https://github.com/abravalheri/validate-pyproject rev: v0.24.1 hooks: - id: validate-pyproject - repo: https://github.com/rhysd/actionlint rev: v1.7.7 hooks: - id: actionlint files: ".github/workflows/" args: [ "-ignore", "SC1090", "-ignore", "SC2046", "-ignore", "SC2086", "-ignore", "SC2129", "-ignore", "SC2155", ] flox-0.10.3/.readthedocs.yml000066400000000000000000000003401477552625700156520ustar00rootroot00000000000000version: 2 sphinx: # Path to your Sphinx configuration file. configuration: docs/source/conf.py build: os: "ubuntu-lts-latest" tools: python: "mambaforge-latest" conda: environment: ci/docs.yml formats: [] flox-0.10.3/LICENSE000066400000000000000000000260741477552625700136050ustar00rootroot00000000000000Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS APPENDIX: How to apply the Apache License to your work. To apply the Apache License to your work, attach the following boilerplate notice, with the fields enclosed by brackets "[]" replaced with your own identifying information. (Don't include the brackets!) The text should be enclosed in the appropriate comment syntax for the file format. We also recommend that a file or class name and description of purpose be included on the same "printed page" as the copyright notice for easier identification within third-party archives. Copyright (c) 2021 onwards, Deepak Cherian Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. flox-0.10.3/README.md000066400000000000000000000072531477552625700140550ustar00rootroot00000000000000[![GitHub Workflow CI Status](https://img.shields.io/github/actions/workflow/status/xarray-contrib/flox/ci.yaml?branch=main&logo=github&style=flat)](https://github.com/xarray-contrib/flox/actions) [![pre-commit.ci status](https://results.pre-commit.ci/badge/github/xarray-contrib/flox/main.svg)](https://results.pre-commit.ci/latest/github/xarray-contrib/flox/main) [![image](https://img.shields.io/codecov/c/github/xarray-contrib/flox.svg?style=flat)](https://codecov.io/gh/xarray-contrib/flox) [![Documentation Status](https://readthedocs.org/projects/flox/badge/?version=latest)](https://flox.readthedocs.io/en/latest/?badge=latest) [![PyPI](https://img.shields.io/pypi/v/flox.svg?style=flat)](https://pypi.org/project/flox/) [![Conda-forge](https://img.shields.io/conda/vn/conda-forge/flox.svg?style=flat)](https://anaconda.org/conda-forge/flox) [![NASA-80NSSC18M0156](https://img.shields.io/badge/NASA-80NSSC18M0156-blue)](https://earthdata.nasa.gov/esds/competitive-programs/access/pangeo-ml) [![NASA-80NSSC22K0345](https://img.shields.io/badge/NASA-80NSSC22K0345-blue)](https://science.nasa.gov/open-science-overview) # flox This project explores strategies for fast GroupBy reductions with dask.array. It used to be called `dask_groupby` It was motivated by 1. Dask Dataframe GroupBy [blogpost](https://blog.dask.org/2019/10/08/df-groupby) 1. [numpy_groupies](https://github.com/ml31415/numpy-groupies) in Xarray [issue](https://github.com/pydata/xarray/issues/4473) (See a [presentation](https://docs.google.com/presentation/d/1YubKrwu9zPHC_CzVBhvORuQBW-z148BvX3Ne8XcvWsQ/edit?usp=sharing) about this package, from the Pangeo Showcase). ## Acknowledgements This work was funded in part by 1. NASA-ACCESS 80NSSC18M0156 "Community tools for analysis of NASA Earth Observing System Data in the Cloud" (PI J. Hamman, NCAR), 1. NASA-OSTFL 80NSSC22K0345 "Enhancing analysis of NASA data with the open-source Python Xarray Library" (PIs Scott Henderson, University of Washington; Deepak Cherian, NCAR; Jessica Scheick, University of New Hampshire), and 1. [NCAR's Earth System Data Science Initiative](https://ncar.github.io/esds/). It was motivated by [very](https://github.com/pangeo-data/pangeo/issues/266) [very](https://github.com/pangeo-data/pangeo/issues/271) [many](https://github.com/dask/distributed/issues/2602) [discussions](https://github.com/pydata/xarray/issues/2237) in the [Pangeo](https://pangeo.io) community. ## API There are two main functions 1. `flox.groupby_reduce(dask_array, by_dask_array, "mean")` "pure" dask array interface 1. `flox.xarray.xarray_reduce(xarray_object, by_dataarray, "mean")` "pure" xarray interface; though [work is ongoing](https://github.com/pydata/xarray/pull/5734) to integrate this package in xarray. ## Implementation See [the documentation](https://flox.readthedocs.io/en/latest/implementation.html) for details on the implementation. ## Custom reductions `flox` implements all common reductions provided by `numpy_groupies` in `aggregations.py`. It also allows you to specify a custom Aggregation (again inspired by dask.dataframe), though this might not be fully functional at the moment. See `aggregations.py` for examples. ```python mean = Aggregation( # name used for dask tasks name="mean", # operation to use for pure-numpy inputs numpy="mean", # blockwise reduction chunk=("sum", "count"), # combine intermediate results: sum the sums, sum the counts combine=("sum", "sum"), # generate final result as sum / count finalize=lambda sum_, count: sum_ / count, # Used when "reindexing" at combine-time fill_value=0, # Used when any member of `expected_groups` is not found final_fill_value=np.nan, ) ``` flox-0.10.3/asv_bench/000077500000000000000000000000001477552625700145175ustar00rootroot00000000000000flox-0.10.3/asv_bench/__init__.py000066400000000000000000000000001477552625700166160ustar00rootroot00000000000000flox-0.10.3/asv_bench/asv.conf.json000066400000000000000000000076421477552625700171400ustar00rootroot00000000000000{ // The version of the config file format. Do not change, unless // you know what you are doing. "version": 1, // The name of the project being benchmarked "project": "flox", // The project's homepage "project_url": "http://flox.readthedocs.io/", // The URL or local path of the source code repository for the // project being benchmarked "repo": "..", // The Python project's subdirectory in your repo. If missing or // the empty string, the project is assumed to be located at the root // of the repository. // "repo_subdir": "", // Customizable commands for building, installing, and // uninstalling the project. See asv.conf.json documentation. // // "uninstall_command": ["return-code=any python -mpip uninstall -y {project}"], // "build_command": [ // "python setup.py build", // "PIP_NO_BUILD_ISOLATION=false python -mpip wheel --no-deps --no-index -w {build_cache_dir} {build_dir}" // ], // "build_command": [ "python setup.py build", "python -mpip wheel --no-deps --no-build-isolation --no-index -w {build_cache_dir} {build_dir}" ], "install_command": [ "in-dir={env_dir} python -mpip install {wheel_file} --no-deps" ], // List of branches to benchmark. If not provided, defaults to "master" // (for git) or "default" (for mercurial). "branches": ["main"], // for git "dvcs": "git", // timeout in seconds for installing any dependencies in environment // defaults to 10 min "install_timeout": 600, // the base URL to show a commit for the project. "show_commit_url": "http://github.com/xarray-contrib/flox/commit/", // The Pythons you'd like to test against. If not provided, defaults // to the current version of Python used to run `asv`. // "pythons": ["3.9"], "environment_type": "mamba", "conda_channels": ["conda-forge"], "conda_environment_file": "../ci/benchmark.yml", // The directory (relative to the current directory) that benchmarks are // stored in. If not provided, defaults to "benchmarks" "benchmark_dir": "benchmarks", // The directory (relative to the current directory) to cache the Python // environments in. If not provided, defaults to "env" "env_dir": ".asv/env", // The directory (relative to the current directory) that raw benchmark // results are stored in. If not provided, defaults to "results". "results_dir": ".asv/results", // The directory (relative to the current directory) that the html tree // should be written to. If not provided, defaults to "html". "html_dir": ".asv/html" // The number of characters to retain in the commit hashes. // "hash_length": 8, // `asv` will cache results of the recent builds in each // environment, making them faster to install next time. This is // the number of builds to keep, per environment. // "build_cache_size": 2, // The commits after which the regression search in `asv publish` // should start looking for regressions. Dictionary whose keys are // regexps matching to benchmark names, and values corresponding to // the commit (exclusive) after which to start looking for // regressions. The default is to start from the first commit // with results. If the commit is `null`, regression detection is // skipped for the matching benchmark. // // "regressions_first_commits": { // "some_benchmark": "352cdf", // Consider regressions only after this commit // "another_benchmark": null, // Skip regression detection altogether // }, // The thresholds for relative change in results, after which `asv // publish` starts reporting regressions. Dictionary of the same // form as in ``regressions_first_commits``, with values // indicating the thresholds. If multiple entries match, the // maximum is taken. If no entry matches, the default is 5%. // // "regressions_thresholds": { // "some_benchmark": 0.01, // Threshold of 1% // "another_benchmark": 0.5, // Threshold of 50% // }, } flox-0.10.3/asv_bench/benchmarks/000077500000000000000000000000001477552625700166345ustar00rootroot00000000000000flox-0.10.3/asv_bench/benchmarks/README_CI.md000066400000000000000000000174601477552625700204760ustar00rootroot00000000000000# Benchmark CI ## How it works The `asv` suite can be run for any PR on GitHub Actions (check workflow `.github/workflows/benchmarks.yml`) by adding a `run-benchmark` label to said PR. This will trigger a job that will run the benchmarking suite for the current PR head (merged commit) against the PR base (usually `main`). We use `asv continuous` to run the job, which runs a relative performance measurement. This means that there's no state to be saved and that regressions are only caught in terms of performance ratio (absolute numbers are available but they are not useful since we do not use stable hardware over time). `asv continuous` will: - Compile `scikit-image` for _both_ commits. We use `ccache` to speed up the process, and `mamba` is used to create the build environments. - Run the benchmark suite for both commits, _twice_ (since `processes=2` by default). - Generate a report table with performance ratios: - `ratio=1.0` -> performance didn't change. - `ratio<1.0` -> PR made it slower. - `ratio>1.0` -> PR made it faster. Due to the sensitivity of the test, we cannot guarantee that false positives are not produced. In practice, values between `(0.7, 1.5)` are to be considered part of the measurement noise. When in doubt, running the benchmark suite one more time will provide more information about the test being a false positive or not. ## Running the benchmarks on GitHub Actions 1. On a PR, add the label `run-benchmark`. 1. The CI job will be started. Checks will appear in the usual dashboard panel above the comment box. 1. If more commits are added, the label checks will be grouped with the last commit checks _before_ you added the label. 1. Alternatively, you can always go to the `Actions` tab in the repo and [filter for `workflow:Benchmark`](https://github.com/scikit-image/scikit-image/actions?query=workflow%3ABenchmark). Your username will be assigned to the `actor` field, so you can also filter the results with that if you need it. ## The artifacts The CI job will also generate an artifact. This is the `.asv/results` directory compressed in a zip file. Its contents include: - `fv-xxxxx-xx/`. A directory for the machine that ran the suite. It contains three files: - `.json`, `.json`: the benchmark results for each commit, with stats. - `machine.json`: details about the hardware. - `benchmarks.json`: metadata about the current benchmark suite. - `benchmarks.log`: the CI logs for this run. - This README. ## Re-running the analysis Although the CI logs should be enough to get an idea of what happened (check the table at the end), one can use `asv` to run the analysis routines again. 1. Uncompress the artifact contents in the repo, under `.asv/results`. This is, you should see `.asv/results/benchmarks.log`, not `.asv/results/something_else/benchmarks.log`. Write down the machine directory name for later. 1. Run `asv show` to see your available results. You will see something like this: ``` $> asv show Commits with results: Machine : Jaimes-MBP Environment: conda-py3.9-cython-numpy1.20-scipy 00875e67 Machine : fv-az95-499 Environment: conda-py3.7-cython-numpy1.17-pooch-scipy 8db28f02 3a305096 ``` 3. We are interested in the commits for `fv-az95-499` (the CI machine for this run). We can compare them with `asv compare` and some extra options. `--sort ratio` will show largest ratios first, instead of alphabetical order. `--split` will produce three tables: improved, worsened, no changes. `--factor 1.5` tells `asv` to only complain if deviations are above a 1.5 ratio. `-m` is used to indicate the machine ID (use the one you wrote down in step 1). Finally, specify your commit hashes: baseline first, then contender! ``` $> asv compare --sort ratio --split --factor 1.5 -m fv-az95-499 8db28f02 3a305096 Benchmarks that have stayed the same: before after ratio [8db28f02] [3a305096] n/a n/a n/a benchmark_restoration.RollingBall.time_rollingball_ndim 1.23±0.04ms 1.37±0.1ms 1.12 benchmark_transform_warp.WarpSuite.time_to_float64(, 128, 3) 5.07±0.1μs 5.59±0.4μs 1.10 benchmark_transform_warp.ResizeLocalMeanSuite.time_resize_local_mean(, (192, 192, 192), (192, 192, 192)) 1.23±0.02ms 1.33±0.1ms 1.08 benchmark_transform_warp.WarpSuite.time_same_type(, 128, 3) 9.45±0.2ms 10.1±0.5ms 1.07 benchmark_rank.Rank3DSuite.time_3d_filters('majority', (32, 32, 32)) 23.0±0.9ms 24.6±1ms 1.07 benchmark_interpolation.InterpolationResize.time_resize((80, 80, 80), 0, 'symmetric', , True) 38.7±1ms 41.1±1ms 1.06 benchmark_transform_warp.ResizeLocalMeanSuite.time_resize_local_mean(, (2048, 2048), (192, 192, 192)) 4.97±0.2μs 5.24±0.2μs 1.05 benchmark_transform_warp.ResizeLocalMeanSuite.time_resize_local_mean(, (2048, 2048), (2048, 2048)) 4.21±0.2ms 4.42±0.3ms 1.05 benchmark_rank.Rank3DSuite.time_3d_filters('gradient', (32, 32, 32)) ... ``` If you want more details on a specific test, you can use `asv show`. Use `-b pattern` to filter which tests to show, and then specify a commit hash to inspect: ``` $> asv show -b time_to_float64 8db28f02 Commit: 8db28f02 benchmark_transform_warp.WarpSuite.time_to_float64 [fv-az95-499/conda-py3.7-cython-numpy1.17-pooch-scipy] ok =============== ============= ========== ============= ========== ============ ========== ============ ========== ============ -- N / order --------------- -------------------------------------------------------------------------------------------------------------- dtype_in 128 / 0 128 / 1 128 / 3 1024 / 0 1024 / 1 1024 / 3 4096 / 0 4096 / 1 4096 / 3 =============== ============= ========== ============= ========== ============ ========== ============ ========== ============ numpy.uint8 2.56±0.09ms 523±30μs 1.28±0.05ms 130±3ms 28.7±2ms 81.9±3ms 2.42±0.01s 659±5ms 1.48±0.01s numpy.uint16 2.48±0.03ms 530±10μs 1.28±0.02ms 130±1ms 30.4±0.7ms 81.1±2ms 2.44±0s 653±3ms 1.47±0.02s numpy.float32 2.59±0.1ms 518±20μs 1.27±0.01ms 127±3ms 26.6±1ms 74.8±2ms 2.50±0.01s 546±10ms 1.33±0.02s numpy.float64 2.48±0.04ms 513±50μs 1.23±0.04ms 134±3ms 30.7±2ms 85.4±2ms 2.55±0.01s 632±4ms 1.45±0.01s =============== ============= ========== ============= ========== ============ ========== ============ ========== ============ started: 2021-07-06 06:14:36, duration: 1.99m ``` ## Other details ### Skipping slow or demanding tests To minimize the time required to run the full suite, we trimmed the parameter matrix in some cases and, in others, directly skipped tests that ran for too long or require too much memory. Unlike `pytest`, `asv` does not have a notion of marks. However, you can `raise NotImplementedError` in the setup step to skip a test. In that vein, a new private function is defined at `benchmarks.__init__`: `_skip_slow`. This will check if the `ASV_SKIP_SLOW` environment variable has been defined. If set to `1`, it will raise `NotImplementedError` and skip the test. To implement this behavior in other tests, you can add the following attribute: ```python from . import _skip_slow # this function is defined in benchmarks.__init__ def time_something_slow(): pass time_something.setup = _skip_slow ``` flox-0.10.3/asv_bench/benchmarks/__init__.py000066400000000000000000000011411477552625700207420ustar00rootroot00000000000000import os def parameterized(names, params): def decorator(func): func.param_names = names func.params = params return func return decorator def _skip_slow(): """ Use this function to skip slow or highly demanding tests. Use it as a `Class.setup` method or a `function.setup` attribute. Examples -------- >>> from . import _skip_slow >>> def time_something_slow(): ... pass >>> time_something.setup = _skip_slow """ if os.environ.get("ASV_SKIP_SLOW", "0") == "1": raise NotImplementedError("Skipping this test...") flox-0.10.3/asv_bench/benchmarks/cohorts.py000066400000000000000000000200231477552625700206640ustar00rootroot00000000000000from functools import cached_property import dask import numpy as np import pandas as pd import flox from .helpers import codes_for_resampling class Cohorts: """Time the core reduction function.""" def setup(self, *args, **kwargs): raise NotImplementedError @cached_property def result(self): return flox.groupby_reduce(self.array, self.by, func="sum", axis=self.axis)[0] def containment(self): asfloat = self.bitmask().astype(float) chunks_per_label = asfloat.sum(axis=0) containment = (asfloat.T @ asfloat) / chunks_per_label return containment.todense() def chunks_cohorts(self): return flox.core.find_group_cohorts( self.by, [self.array.chunks[ax] for ax in self.axis], expected_groups=self.expected, )[1] def bitmask(self): chunks = [self.array.chunks[ax] for ax in self.axis] return flox.core._compute_label_chunk_bitmask(self.by, chunks, self.expected[-1] + 1) def time_find_group_cohorts(self): flox.core.find_group_cohorts( self.by, [self.array.chunks[ax] for ax in self.axis], expected_groups=self.expected, ) # The cache clear fails dependably in CI # Not sure why try: flox.cache.cache.clear() except AttributeError: pass def track_num_cohorts(self): return len(self.chunks_cohorts()) def time_graph_construct(self): flox.groupby_reduce(self.array, self.by, func="sum", axis=self.axis) def track_num_tasks(self): return len(self.result.dask.to_dict()) def track_num_tasks_optimized(self): (opt,) = dask.optimize(self.result) return len(opt.dask.to_dict()) def track_num_layers(self): return len(self.result.dask.layers) track_num_cohorts.unit = "cohorts" # type: ignore[attr-defined] # Lazy track_num_tasks.unit = "tasks" # type: ignore[attr-defined] # Lazy track_num_tasks_optimized.unit = "tasks" # type: ignore[attr-defined] # Lazy track_num_layers.unit = "layers" # type: ignore[attr-defined] # Lazy for f in [ track_num_tasks, track_num_tasks_optimized, track_num_layers, track_num_cohorts, ]: f.repeat = 1 # type: ignore[attr-defined] # Lazy f.rounds = 1 # type: ignore[attr-defined] # Lazy f.number = 1 # type: ignore[attr-defined] # Lazy class NWMMidwest(Cohorts): """2D labels, ireregular w.r.t chunk size. Mimics National Weather Model, Midwest county groupby.""" def setup(self, *args, **kwargs): x = np.repeat(np.arange(30), 150) y = np.repeat(np.arange(30), 60) by = x[np.newaxis, :] * y[:, np.newaxis] self.by = flox.core._factorize_multiple((by,), expected_groups=(None,), any_by_dask=False)[0][0] self.array = dask.array.ones(self.by.shape, chunks=(350, 350)) self.axis = (-2, -1) self.expected = pd.RangeIndex(self.by.max() + 1) class ERA5Dataset: """ERA5""" def __init__(self, *args, **kwargs): self.time = pd.Series(pd.date_range("2016-01-01", "2018-12-31 23:59", freq="h")) self.axis = (-1,) self.array = dask.array.random.random((721, 1440, len(self.time)), chunks=(-1, -1, 48)) def rechunk(self): self.array = flox.core.rechunk_for_cohorts( self.array, -1, self.by, force_new_chunk_at=[1], chunksize=48, ignore_old_chunks=True, ) class ERA5Resampling(Cohorts): def setup(self, *args, **kwargs): super().__init__() # nyears is number of years, adjust to make bigger, # full dataset is 60-ish years. nyears = 5 shape = (37, 721, 1440, nyears * 365 * 24) chunks = (-1, -1, -1, 1) time = pd.date_range("2001-01-01", periods=shape[-1], freq="h") self.array = dask.array.random.random(shape, chunks=chunks) self.by = codes_for_resampling(time, "D") self.axis = (-1,) self.expected = np.unique(self.by) class ERA5DayOfYear(ERA5Dataset, Cohorts): def setup(self, *args, **kwargs): super().__init__() self.by = self.time.dt.dayofyear.values - 1 self.expected = pd.RangeIndex(self.by.max() + 1) # class ERA5DayOfYearRechunked(ERA5DayOfYear, Cohorts): # def setup(self, *args, **kwargs): # super().setup() # self.array = dask.array.random.random((721, 1440, len(self.time)), chunks=(-1, -1, 24)) # self.expected = pd.RangeIndex(self.by.max() + 1) class ERA5MonthHour(ERA5Dataset, Cohorts): def setup(self, *args, **kwargs): super().__init__() by = (self.time.dt.month.values, self.time.dt.hour.values) ret = flox.core._factorize_multiple( by, (pd.Index(np.arange(1, 13)), pd.Index(np.arange(1, 25))), any_by_dask=False, ) # Add one so the rechunk code is simpler and makes sense self.by = ret[0][0] self.expected = pd.RangeIndex(self.by.max() + 1) class ERA5MonthHourRechunked(ERA5MonthHour, Cohorts): def setup(self, *args, **kwargs): super().setup() super().rechunk() class PerfectMonthly(Cohorts): """Perfectly chunked for a "cohorts" monthly mean climatology""" def setup(self, *args, **kwargs): self.time = pd.Series(pd.date_range("1961-01-01", "2018-12-31 23:59", freq="ME")) self.axis = (-1,) self.array = dask.array.random.random((721, 1440, len(self.time)), chunks=(-1, -1, 4)) self.by = self.time.dt.month.values - 1 self.expected = pd.RangeIndex(self.by.max() + 1) def rechunk(self): self.array = flox.core.rechunk_for_cohorts( self.array, -1, self.by, force_new_chunk_at=[1], chunksize=4, ignore_old_chunks=True, ) # class PerfectMonthlyRechunked(PerfectMonthly): # def setup(self, *args, **kwargs): # super().setup() # super().rechunk() class ERA5Google(Cohorts): def setup(self, *args, **kwargs): TIME = 900 # 92044 in Google ARCO ERA5 self.time = pd.Series(pd.date_range("1959-01-01", freq="6h", periods=TIME)) self.axis = (2,) self.array = dask.array.ones((721, 1440, TIME), chunks=(-1, -1, 1)) self.by = self.time.dt.day.values - 1 self.expected = pd.RangeIndex(self.by.max() + 1) class PerfectBlockwiseResampling(Cohorts): """Perfectly chunked for blockwise resampling.""" def setup(self, *args, **kwargs): index = pd.date_range("1959-01-01", freq="D", end="1962-12-31") self.time = pd.Series(index) TIME = len(self.time) self.axis = (2,) self.array = dask.array.ones((721, 1440, TIME), chunks=(-1, -1, 10)) self.by = codes_for_resampling(index, freq="5D") self.expected = pd.RangeIndex(self.by.max() + 1) class SingleChunk(Cohorts): """Single chunk along reduction axis: always blockwise.""" def setup(self, *args, **kwargs): index = pd.date_range("1959-01-01", freq="D", end="1962-12-31") self.time = pd.Series(index) TIME = len(self.time) self.axis = (2,) self.array = dask.array.ones((721, 1440, TIME), chunks=(-1, -1, -1)) self.by = codes_for_resampling(index, freq="5D") self.expected = pd.RangeIndex(self.by.max() + 1) class OISST(Cohorts): def setup(self, *args, **kwargs): self.array = dask.array.ones((1, 14532), chunks=(1, 10)) self.axis = (1,) index = pd.date_range("1981-09-01 12:00", "2021-06-14 12:00", freq="D") self.time = pd.Series(index) self.by = self.time.dt.dayofyear.values - 1 self.expected = pd.RangeIndex(self.by.max() + 1) class RandomBigArray(Cohorts): def setup(self, *args, **kwargs): M, N = 100_000, 20_000 self.array = dask.array.random.normal(size=(M, N), chunks=(10_000, N // 5)).T self.by = np.random.choice(5_000, size=M) self.expected = pd.RangeIndex(5000) self.axis = (1,) flox-0.10.3/asv_bench/benchmarks/combine.py000066400000000000000000000041571477552625700206310ustar00rootroot00000000000000from functools import partial from typing import Any import numpy as np import flox from . import parameterized N = 1000 def _get_combine(combine): if combine == "grouped": return partial(flox.core._grouped_combine, engine="numpy") else: try: reindex = flox.ReindexStrategy(blockwise=False) except AttributeError: reindex = False return partial(flox.core._simple_combine, reindex=reindex) class Combine: def setup(self, *args, **kwargs): raise NotImplementedError @parameterized(("kind", "combine"), (("reindexed", "not_reindexed"), ("grouped", "simple"))) def time_combine(self, kind, combine): _get_combine(combine)( getattr(self, f"x_chunk_{kind}"), **self.kwargs, keepdims=True, ) @parameterized(("kind", "combine"), (("reindexed", "not_reindexed"), ("grouped", "simple"))) def peakmem_combine(self, kind, combine): _get_combine(combine)( getattr(self, f"x_chunk_{kind}"), **self.kwargs, keepdims=True, ) class Combine1d(Combine): """ Time the combine step for dask reductions, this is for reducing along a single dimension """ def setup(self, *args, **kwargs) -> None: def construct_member(groups) -> dict[str, Any]: return { "groups": groups, "intermediates": [ np.ones((40, 120, 120, 4), dtype=float), np.ones((40, 120, 120, 4), dtype=int), ], } # motivated by self.x_chunk_not_reindexed = [ construct_member(groups) for groups in [ np.array((1, 2, 3, 4)), np.array((5, 6, 7, 8)), np.array((9, 10, 11, 12)), ] * 2 ] self.x_chunk_reindexed = [construct_member(groups) for groups in [np.array((1, 2, 3, 4))] * 4] self.kwargs = { "agg": flox.aggregations._initialize_aggregation("sum", "float64", np.float64, 0, 0, {}), "axis": (3,), } flox-0.10.3/asv_bench/benchmarks/helpers.py000066400000000000000000000005621477552625700206530ustar00rootroot00000000000000import numpy as np import pandas as pd def codes_for_resampling(group_as_index: pd.Index, freq: str) -> np.ndarray: s = pd.Series(np.arange(group_as_index.size), group_as_index) grouped = s.groupby(pd.Grouper(freq=freq)) first_items = grouped.first() counts = grouped.count() codes = np.repeat(np.arange(len(first_items)), counts) return codes flox-0.10.3/asv_bench/benchmarks/reduce.py000066400000000000000000000115001477552625700204520ustar00rootroot00000000000000import numpy as np import pandas as pd import xarray as xr from asv_runner.benchmarks.mark import parameterize, skip_for_params import flox import flox.aggregations import flox.xarray from .helpers import codes_for_resampling N = 3000 funcs = ["sum", "nansum", "mean", "nanmean", "max", "nanmax", "count"] engines = [ None, "flox", "numpy", ] # numbagg is disabled for now since it takes ages in CI expected_groups = { "None": None, "bins": pd.IntervalIndex.from_breaks([1, 2, 4]), } expected_names = tuple(expected_groups) NUMBAGG_FUNCS = ["nansum", "nanmean", "nanmax", "count", "all"] numbagg_skip = [] for name in expected_names: numbagg_skip.extend(list((func, name, "numbagg") for func in funcs if func not in NUMBAGG_FUNCS)) def setup_jit(): # pre-compile jitted funcs labels = np.ones((N), dtype=int) array1 = np.ones((N), dtype=float) array2 = np.ones((N, N), dtype=float) if "numba" in engines: for func in funcs: method = getattr(flox.aggregate_npg, func) method(labels, array1, engine="numba") if "numbagg" in engines: for func in set(NUMBAGG_FUNCS) & set(funcs): flox.groupby_reduce(array1, labels, func=func, engine="numbagg") flox.groupby_reduce(array2, labels, func=func, engine="numbagg") class ChunkReduce: """Time the core reduction function.""" min_run_count = 5 warmup_time = 0.5 def setup(self, *args, **kwargs): raise NotImplementedError @skip_for_params(numbagg_skip) @parameterize({"func": funcs, "expected_name": expected_names, "engine": engines}) def time_reduce(self, func, expected_name, engine): flox.groupby_reduce( self.array, self.labels, func=func, engine=engine, axis=self.axis, expected_groups=expected_groups[expected_name], ) # @skip_for_params(numbagg_skip) # @parameterize({"func": funcs, "expected_name": expected_names, "engine": engines}) # def peakmem_reduce(self, func, expected_name, engine): # flox.groupby_reduce( # self.array, # self.labels, # func=func, # engine=engine, # axis=self.axis, # expected_groups=expected_groups[expected_name], # ) class ChunkReduce1D(ChunkReduce): def setup(self, *args, **kwargs): self.array = np.ones((N,)) self.labels = np.repeat(np.arange(5), repeats=N // 5) self.axis = -1 if "numbagg" in args: setup_jit() @parameterize( { "func": ["nansum", "nanmean", "nanmax", "count"], "engine": [e for e in engines if e is not None], } ) def time_reduce_bare(self, func, engine): # TODO: migrate to the other test cases, but we'll have to setup labels # appropriately ;( flox.aggregations.generic_aggregate( self.labels, self.array, axis=self.axis, func=func, engine=engine, fill_value=0, ) class ChunkReduce2D(ChunkReduce): def setup(self, *args, **kwargs): self.array = np.ones((N, N)) self.labels = np.repeat(np.arange(N // 5), repeats=5) self.axis = -1 setup_jit() class ChunkReduce2DAllAxes(ChunkReduce): def setup(self, *args, **kwargs): self.array = np.ones((N, N)) self.labels = np.repeat(np.arange(N // 5), repeats=5)[np.newaxis, :] self.axis = None setup_jit() # class ChunkReduce2DUnsorted(ChunkReduce): # def setup(self, *args, **kwargs): # self.array = np.ones((N, N)) # self.labels = np.random.permutation(np.repeat(np.arange(N // 5), repeats=5)) # self.axis = -1 # setup_jit() # class ChunkReduce1DUnsorted(ChunkReduce): # def setup(self, *args, **kwargs): # self.array = np.ones((N,)) # self.labels = np.random.permutation(np.repeat(np.arange(5), repeats=N // 5)) # self.axis = -1 # setup_jit() # class ChunkReduce2DAllAxesUnsorted(ChunkReduce): # def setup(self, *args, **kwargs): # self.array = np.ones((N, N)) # self.labels = np.random.permutation(np.repeat(np.arange(N // 5), repeats=5)) # self.axis = None # setup_jit() class Quantile: def setup(self, *args, **kwargs): shape = (31411, 25, 25, 1) time = pd.date_range("2014-01-01", "2099-12-31", freq="D") self.da = xr.DataArray( np.random.randn(*shape), name="pr", dims=("time", "lat", "lon", "lab"), coords={"time": time}, ) self.codes = xr.DataArray(dims="time", data=codes_for_resampling(time, "YE"), name="time") def time_quantile(self): flox.xarray.xarray_reduce(self.da, self.codes, engine="flox", func="quantile", q=0.9) flox-0.10.3/ci/000077500000000000000000000000001477552625700131625ustar00rootroot00000000000000flox-0.10.3/ci/benchmark.yml000066400000000000000000000003171477552625700156400ustar00rootroot00000000000000name: flox-bench channels: - conda-forge dependencies: - asv - build - cachey - dask-core - numpy<2.1 - mamba - pip - xarray - numpy_groupies>=0.9.19 - numbagg>=0.3 - wheel - scipy flox-0.10.3/ci/docs.yml000066400000000000000000000006031477552625700146340ustar00rootroot00000000000000name: flox-doc channels: - conda-forge dependencies: - cubed>=0.20.0 - cubed-xarray - dask-core - pip - xarray - numpy>=1.22 - scipy - numpydoc - numpy_groupies>=0.9.19 - toolz - matplotlib-base - myst-parser - myst-nb - sparse - sphinx - sphinx-remove-toctrees - furo>=2024.08 - ipykernel - jupyter - sphinx-codeautolink - pip: - -e .. flox-0.10.3/ci/env-numpy1.yml000066400000000000000000000006321477552625700157250ustar00rootroot00000000000000name: flox-tests channels: - conda-forge dependencies: - asv - cachey - cftime - codecov - cubed>=0.20.0 - dask-core - pandas - numpy<2 - scipy - lxml # for mypy coverage report - matplotlib - pip - pytest - pytest-cov - pytest-pretty - pytest-xdist - syrupy - pre-commit - numpy_groupies>=0.9.19 - pooch - toolz - numba - numbagg>=0.3 - hypothesis - xarray flox-0.10.3/ci/environment.yml000066400000000000000000000006471477552625700162600ustar00rootroot00000000000000name: flox-tests channels: - conda-forge dependencies: - asv - cachey - cftime - codecov - cubed>=0.20.0 - dask-core - pandas - numpy>=1.22 - scipy - lxml # for mypy coverage report - matplotlib - pip - pytest - pytest-cov - pytest-pretty - pytest-xdist - syrupy - pre-commit - numpy_groupies>=0.9.19 - pooch - toolz - numba - numbagg>=0.3 - hypothesis - xarray - zarr flox-0.10.3/ci/minimal-requirements.yml000066400000000000000000000004041477552625700200520ustar00rootroot00000000000000name: flox-tests channels: - conda-forge dependencies: - codecov - hypothesis - pip - pytest - pytest-cov - pytest-pretty - pytest-xdist - syrupy - numpy==1.22 - scipy==1.9.0 - numpy_groupies==0.9.19 - pandas==1.5 - pooch - toolz flox-0.10.3/ci/no-dask.yml000066400000000000000000000005051477552625700152410ustar00rootroot00000000000000name: flox-tests channels: - conda-forge dependencies: - codecov - pandas - hypothesis - cftime - numpy>=1.22 - scipy - pip - pytest - pytest-cov - pytest-pretty - pytest-xdist - syrupy - numpydoc - pre-commit - numpy_groupies>=0.9.19 - pooch - toolz - numba - numbagg>=0.3 - xarray flox-0.10.3/ci/no-numba.yml000066400000000000000000000005611477552625700154230ustar00rootroot00000000000000name: flox-tests channels: - conda-forge dependencies: - asv - cachey - cftime - codecov - dask-core - hypothesis - pandas - numpy>=1.22 - scipy - lxml # for mypy coverage report - matplotlib - pip - pytest - pytest-cov - pytest-pretty - pytest-xdist - syrupy - pre-commit - numpy_groupies>=0.9.19 - pooch - toolz - xarray flox-0.10.3/ci/no-xarray.yml000066400000000000000000000004501477552625700156240ustar00rootroot00000000000000name: flox-tests channels: - conda-forge dependencies: - codecov - syrupy - pandas - numpy>=1.22 - scipy - pip - pytest - pytest-cov - pytest-pretty - pytest-xdist - syrupy - dask-core - numpydoc - pre-commit - numpy_groupies>=0.9.19 - pooch - toolz - numba flox-0.10.3/ci/upstream-dev-env.yml000066400000000000000000000011111477552625700171010ustar00rootroot00000000000000name: flox-tests channels: - conda-forge dependencies: - asv_runner # for test_asv - cachey - codecov - pooch - hypothesis - toolz # - numpy # - pandas # - scipy - pytest-pretty - pytest-xdist - syrupy - pip # for cftime - cython>=0.29.20 - py-cpuinfo # - numba - pytest - pytest-cov # for upstream pandas - python-dateutil - pytz # - pip: # - git+https://github.com/pydata/xarray # - git+https://github.com/dask/dask # - git+https://github.com/ml31415/numpy-groupies # # - git+https://github.com/numbagg/numbagg flox-0.10.3/codecov.yml000066400000000000000000000004371477552625700147400ustar00rootroot00000000000000codecov: require_ci_to_pass: no max_report_age: off comment: false ignore: - "asv_bench/benchmarks/*.py" - "tests/*.py" coverage: precision: 2 round: down status: project: default: target: 95 informational: true patch: off changes: off flox-0.10.3/docs/000077500000000000000000000000001477552625700135175ustar00rootroot00000000000000flox-0.10.3/docs/Makefile000066400000000000000000000152041477552625700151610ustar00rootroot00000000000000# Makefile for Sphinx documentation # # You can set these variables from the command line. SPHINXOPTS = SPHINXBUILD = sphinx-build PAPER = BUILDDIR = _build # User-friendly check for sphinx-build ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1) $(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/) endif # Internal variables. PAPEROPT_a4 = -D latex_paper_size=a4 PAPEROPT_letter = -D latex_paper_size=letter ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source # the i18n builder cannot share the environment and doctrees with the others I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source .PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext help: @echo "Please use \`make ' where is one of" @echo " html to make standalone HTML files" @echo " dirhtml to make HTML files named index.html in directories" @echo " singlehtml to make a single large HTML file" @echo " pickle to make pickle files" @echo " json to make JSON files" @echo " htmlhelp to make HTML files and a HTML help project" @echo " qthelp to make HTML files and a qthelp project" @echo " devhelp to make HTML files and a Devhelp project" @echo " epub to make an epub" @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" @echo " latexpdf to make LaTeX files and run them through pdflatex" @echo " latexpdfja to make LaTeX files and run them through platex/dvipdfmx" @echo " text to make text files" @echo " man to make manual pages" @echo " texinfo to make Texinfo files" @echo " info to make Texinfo files and run them through makeinfo" @echo " gettext to make PO message catalogs" @echo " changes to make an overview of all changed/added/deprecated items" @echo " xml to make Docutils-native XML files" @echo " pseudoxml to make pseudoxml-XML files for display purposes" @echo " linkcheck to check all external links for integrity" @echo " doctest to run all doctests embedded in the documentation (if enabled)" clean: rm -rf $(BUILDDIR)/* html: $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html @echo @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." dirhtml: $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml @echo @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." singlehtml: $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml @echo @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." pickle: $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle @echo @echo "Build finished; now you can process the pickle files." json: $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json @echo @echo "Build finished; now you can process the JSON files." htmlhelp: $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp @echo @echo "Build finished; now you can run HTML Help Workshop with the" \ ".hhp project file in $(BUILDDIR)/htmlhelp." qthelp: $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp @echo @echo "Build finished; now you can run "qcollectiongenerator" with the" \ ".qhcp project file in $(BUILDDIR)/qthelp, like this:" @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/complexity.qhcp" @echo "To view the help file:" @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/complexity.qhc" devhelp: $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp @echo @echo "Build finished." @echo "To view the help file:" @echo "# mkdir -p $$HOME/.local/share/devhelp/complexity" @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/complexity" @echo "# devhelp" epub: $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub @echo @echo "Build finished. The epub file is in $(BUILDDIR)/epub." latex: $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex @echo @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." @echo "Run \`make' in that directory to run these through (pdf)latex" \ "(use \`make latexpdf' here to do that automatically)." latexpdf: $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex @echo "Running LaTeX files through pdflatex..." $(MAKE) -C $(BUILDDIR)/latex all-pdf @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." latexpdfja: $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex @echo "Running LaTeX files through platex and dvipdfmx..." $(MAKE) -C $(BUILDDIR)/latex all-pdf-ja @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." text: $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text @echo @echo "Build finished. The text files are in $(BUILDDIR)/text." man: $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man @echo @echo "Build finished. The manual pages are in $(BUILDDIR)/man." texinfo: $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo @echo @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo." @echo "Run \`make' in that directory to run these through makeinfo" \ "(use \`make info' here to do that automatically)." info: $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo @echo "Running Texinfo files through makeinfo..." make -C $(BUILDDIR)/texinfo info @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo." gettext: $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale @echo @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale." changes: $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes @echo @echo "The overview file is in $(BUILDDIR)/changes." linkcheck: $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck @echo @echo "Link check complete; look for any errors in the above output " \ "or in $(BUILDDIR)/linkcheck/output.txt." doctest: $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest @echo "Testing of doctests in the sources finished, look at the " \ "results in $(BUILDDIR)/doctest/output.txt." xml: $(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml @echo @echo "Build finished. The XML files are in $(BUILDDIR)/xml." pseudoxml: $(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml @echo @echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml." flox-0.10.3/docs/diagrams/000077500000000000000000000000001477552625700153065ustar00rootroot00000000000000flox-0.10.3/docs/diagrams/bitmask-patterns-perfect.png000066400000000000000000000370221477552625700227360ustar00rootroot00000000000000PNG  IHDR09tEXtSoftwareMatplotlib version3.8.0, https://matplotlib.org/5 pHYsgR=IDATx}\{^/&M4B]yVap|AXF5p}>,\YQ{Eݠ®",O>( 2 8Й C  d&ӗ>QI&UU]=~^OW}~S_oU*Mjuj"XTM i`Pӫ\2Mr./[I6M+YpQVRϜ9s+^ν$üL^^x^ I9ۗ͜={f}0qpiZWƔ3-gLsSos4i.rc΢UX7/]ښZI}nc\Ɩ3ȩ7CN9cr49f1gqs`P5 ,@4UjXTM i`P5 ,@4UjXTM i`P4ܜ鱋/}t3Clnn&Iө76ܹw%`c?l~ c8W<۷4\nѺug0Ɣ3-gLsSos4i.rc΢ӫ^i)%ɥlE7a9vwegg&j;Nz /$xw}u8cak+륔Ne jn88ϯJJ_ۘr4i.r͐SwΘ21e9]uױY_͢ jXTM i`P5 ,@4UjXTM i`P5 ,@4UjiK9gc[[[g_Qݽ$ɹsew^ I$Af66 &u<Nr+Oo5M2z]vSΘ21ENr\Ɩ35cھ:6Ytqz+m&n>V# <8g2\$?yj {cоu8$-l`{erWfT޾$?\dr)O\0H~xN 60j,ZWNʤu)gLs[Θ" 9ui.c\svvu1>rg8UjXTM i`P5 ,@4UjXTM i`P5 ,@4U+MX9x٧~M7:fd:(cLΝy^ 60ʓ}[M<+[[[g_v mL9cr49fȩ;gLs[Θ沟{cоu8>G΢ӫ^i)%ɥlE7a9vwegg&j`qj '= b從7|>X1ּWK)7O!nYtXTM i`P5 ,@4UjXTM i`P5 ,@4UjXT4M3c)7lzŋg~5݌It*39wnݻ{I2_Nz+Oo5M2[lmm~5nn1i.c\ԛ!w{ゾ?:c7gqUFNl3L5yDj1^7H.CcX>]u8Xdak+륔Ne ݌7$2k;ٙէOkI2Nz~rK SΘ21ENcyLkL&Fuߌ%gLsSosΟ/jXTM i`P5 ,@4UjXTM i`P5 ,@4UjiK9gc[[[g_Qݽ$ɹs_ew^ I$?2No5M2z]31el9cz3ƚ>ll}31ENc̹rܾ;MfvweggPc?{4\$?yP@I?{ ~}=yLk~I5RS2ynFuI.ggW$?I%`c-ɥI?~W^^WƔ3-gLsSo~Nz~rːSwΘg΢ jXTM i`P5 ,@4UjXTM i`P5 ,@4UjiK9gc[[[g_Qݽ$ɹsew^ I$Af66 &u<Nr+Oo5M2z]vSΘ21EN9}ͨ}-I"R|a$ܝ$l ͯa '=^yY:IvSΘ21e9;;N~c\Ɩ3ȩ7c93 i`P5 ,@4UjXTM i`P5 ,@4UjXTM io,M]xOQbss3I2Nq& {w/IakIϿrܾi_r֕ׯ?61el9cXsvwu|}=>uΘ21ENcYtqz+-d2th?l>' DM\$?yP@I?{ c=kkm~뱰5RS2ynFuI.ggW$?I%`c-ɥI?~W^^WƔ3-5el@r\Ɩ3ȩ7c93 i`P5 ,@4UjXTM i`P5 ,@4UjXTM i`P4ܜ鱭ׯ?ͨq^ܹao{/t$L 3g:'9ʕ's󷚦y| u)gLs[N_q_kN_k3ԝ3-gLsSosg^Jɭ[7?yp76d24H~3 :_CXZZs-" X^Y/ܼէ!%I?\/ $7$A5?+/֕S2z>}nc\svv}}cƒ3ȩ7CNr\YtXTM i`P5 ,@4UjXTM i`P5 ,@4UjXT4M3c)7lzŋg~5݌It*39wnݻ{I2_Nz+Oo5M2[lmm~5nn1i.9:_CXZǸ%gLsSoz3ԝ38WZJdrx#[ '|N{<88j '= 0xO7}^CXI5RS2ynFuI?5ALsS'$l '=?ҥ^Wz){_L&Fst!1ENr͐SwΘgXTM i`P5 ,@4UjXTM i`P5 ,@4UjXT4M3c)7lzlkkv3Cܽ$9wnakt:$?I677d1gy]魦i_rC+}=vƔ㽯}zss3gFst!1ENr͐SwΘgΕ+O<8i2{/;;7/driAj '=3侘Q'}=ڧsxqYz)SjA_>i.r͐SwΘ21ENcYtXTM i`P5 ,@4UjXTM i`P5 ,@4UjXT4M3c)7lzlkkv3Cܽ$9wnt:$?I677d1ƙIοrܾi_rC+}n}>jA_>i.r͐SwΘ21ENcYtqz+m&n>V}ggI8 ɃPc7H.ǺWNQ <ּWK)7߿?znFukIO& 'ɍ$`c?l~ c8ϯJ_:~׾& 9ui.r͐Sos4>sg8UjXTM i`P5 ,@4UjXTM i`P5 ,@4U+MX9>ӧ^җv3C>}:Ir}RrjPc?l~ c8o}޽w\nuWc?k_Sgs49fȩ7CN9cK93Nx߿7Nc[c7?5A5o%k]vSΘ21ENr\Ɩ3ȩ7c9s3kWЛyxZ_Nz59ufȩ7CN9cz3ԛ!1ͥϜE i`P5 ,@4JB},@4UjXTM UJ(|Q)K)RR.fL))#uCJ)VJ{\JR;K)SJR{0?J)?]JyrK)wxާc}}=eh}ƱYJ>UJRJ)'v5vWJRʴr,|FC)mmR_*[cJ٥_JyG{|s)/lR>[Jk/qf5ubw]~Y몹}վ29SO(RʿkG/R\JۥZ\>i{m7R>y]ٜߛkky}:SJ9[Jӥ.Rʛʬs;RR~RK)rv]Y)byk)+K)/+e:JNY,SV+e:pYXVuڑY:d8FRu`m9+FL$ykߵ$zVJm_]f>I&ɷ&gߛ;ے_~!ohI)Fm_^ql ߜd's?dΜ2?|h~WksC?%Mߚ^;O2?MXmڲ}BFoDXQOu=TS-k*PWɊs$odV#^|mf_˿~}\}w#?~OY8u`Ō9Kׁu}uqv06{=?_w565I(oOK⺆N򷒼⑟,&8$׷g|0sH6}~E 4.oIrȶWfv0I.M?϶EIkZ$wpho"jfO~ofO'/V/kKkHDA5>t^WUW$y,_&^vbٹ|T=ف^qoG.~}m5mKJ]$˒t#$]?yh yA{JTzrKYtX6'+ԁtX%g:|^%9KׁnG+e:RXQn3yG}iEr׉u]{]uZuU?W) ?s.~w>ڬZ!],i/]k%lv:NNW2 3ɿoka9Ju`ٜ~u`Ռ2GaXK8BZ1#ׁ#e-u`QNu`Uu4<`׶?5F~?}ϵY㙽D(wLIyI~G߯0#Ov1nhǓʬ!:?P)e#w$jNz}\$-}^^5:W-枤4nf'w veK)9g%y]4o^@p]G 4͝8`94~/UJ$"g}x?z4w򾙽 |}4o8fIىr_K5Mp)$i`!lk7R^7'҅}Yϓoؾ_?!hubA5˺>t^gYsOL]-$N;$;/t?}MJ)ٻT~8ɷ4Mu\q)B_ٟV1I'?i:`OksR4MQ_}̌ejQFvQ:f[*g:lƪu`nΚaYWxcԁytQ( lT<>v~֦iJ)&8I>k(Tfo=}{9j)K3?gvW>/R`41d4ϕR^v?|sf*]?~4g_Q< c4,={R.y:IkK'D5˺>t^[YsOR]=3;ౕR>%̬AI>>m֑K2/wfR.?)z1{ٹo[$OW%-Mfv '|r)卙'|Z4{̌.jAkducesV 3X嬳vEXfXXw8(g#W=ٜ.qNJg8~[fl)_zK<7N٤s`}qf,|p?Iy$a#~i{v I>o3.,^xs~n>˷u=d&w,'̼ls~F^lms rr=Qׇ]kq5W\$ʃOcȃ+?9tI|'=.}OװjCں>r,?g{}6{ ;/15ru`QΜ],Ruǭ嬫!g-u}mmu`}`:pr,uKl?j`ejOɂdI|Y[#?yzl`-oj$/{|Iz'}KOy7Hkڅyd6|wgM9Is /o~A7dvp'rd {Y}1Նމ$yT2sS3[> $fi~BrHr9֏7]GșF,,JX6c:YgXy u`|YV 3XYKX(7_olv^>_̃'vmqc9$o}/O v,厮 &>"{ǚ~VYȬ|l3pcwN[x֐臨߸?{|Jq8X-}!CZjCu>kq5W\&}~k{oǩ[Y'7fN&:`Nzh`oJ1گϵI׼_ݯֹ>w~cdv@ILp\W8,geRV#?R8}:a>k+kKՁ#e-u(sYGX(BՋs_|F^$ד!4 JDߞYK)7v>h :9oG璉I>*?"OŶiw}DOt5iKf'$3ɗ'cda}l?}%I޼҅Ѯ-Ck\j;^wGuZ<`m]m-Ul{|J4!z/1S2;(kC[6y}W3ku׆dvnzY[}hO{?.?O]gmx{Oʬeeg^'[F/JX\G9vX|Gy+ՁUl8bƱQr:pG9G'~?NO)̞н>i~3^lEm;>Յn.*Q'3|}[Xglt9v+7?2{R.6I9'>`kKĺփd>TNZc+gfI=:'ޱ0$t Y64MROdV16^Gogu64[y)j]n5XuxŜ粰,rX|ցc>GKd,;UqXߙ䧓1'27>ۃ3QW4uA 5u}aw]sZWK)dv$MXky4 ;^돓5OoXU~c>>/߷}LUkl׆_>>cgyM䘙'lwdߎq yca/ݝlaIy[W]7"8V;A֖.֐։!׃u>z:g-UWG~Mч糮O%y~ck/;}=tXk92;I~<ᾙdij?ަoh/v|:w5I{;./JM|'xe^#W{hXq>KՁe3Vu`TVoǹr:mRX6[뫴a R6|l|{K";VTJ$C?fVD;i*q|E?˚y뼇r,f[Wfnίhv#?eXxyf`/5Mw֔t)i*ҾIfv3MM|QЗ֖>֐։!׃u>z:W-^gU?)|df+I+} iX1=6ߕ9pfp)H\kO>XJɬ~iW$ޭ٧w$ۙwevDIh9Ҫ[)峒|u4ͯyt(|Zfz:FwgWf_1'6MnBk<^dHf}e+M=lX%g:l*u`g:/1T8ƾT8v:iY,Y樝v0_evNu${ѯWw#7I^Q%y]fH5#mOFvyy{ևf7o{c}82{𿳽3)l|pzfo+}[{dfopf͎k?Y5To}w_=U=}}e|mY>]2G8:׃XTCN8k>d]]6'+er2գԆW#\fMnfJdviw`-9 I4۹3:W|@oJ2v3k^|a9n&>t}Iles2{׷/dBʷ%sy2Ff:JNVd:BJu82u`T3,Qs:jFd:# r`4UjXTM i`P5 ,@b`pDIENDB`flox-0.10.3/docs/diagrams/blockwise.png000066400000000000000000001240161477552625700200020ustar00rootroot00000000000000PNG  IHDRugAMA a cHRMz&u0`:pQ<bKGD pHYs %uIDATxy|ŝ>Ou>-o `csa$l6$f,6.lB6Y! H $Y11s·lɖeGsvwhtjfZyj陚Vj9'eJ @A$u dDBY@A$u dDBY@A$u dDBY@A$u dDBY@A$u dDBYGϜD1';U$r2%=#.AxO1]%ps\M\O\?+/^ |.SzLu3 NeV[eH:M^SNf$>g2b *T;N.|)dD aK|/$SzLIh|sNDrֺ:PXv^ 1š)Q{&4Fl Ub^gT7js&79r1"L&!)LadDD&qιG9s1 J#w.ovL+a60 ;j\WzLbrS#9'E#ep/'hbB9'Aܐ?LuM#Pg;z" T'n:r"F4£J]-jFtƊ=EĽS/sfsꉚas$Q3зs*ΐJ '@)qΙEu]x9m1Es|8-i2LCa&yF N9y<PLSv_C.ңdJ9'U#Q$BrsbNwp9W 48 s͡?1h?"29x5N&7i*( Sԥ6 ^^CSiHs:q 葈2EQ5G4ȩHaLs811MӛFzTL5qGaH'NeFÉ"IQF"d3ĘCsA-qT=T/DNs*.3CyުH"^lV/ S̙˜Q]?ȡn&l<%/(p=*b} 4r\ܔ5|/]NiPHE99]&xN+H]a3 #c꺞Px( \>xS3'N1ƢO n:1@\KqS+<sҍ^'F'Ɯ87#]!bZ~$ڛHu2j[-gLe<$˩iDFBGWstӚ4"a礪Ɣ+aOOFߙHɷ[?.8d>0VϹsarir[=x`P%5B_&ץݳPe04Lq1 Fs)]ghT0MVrc("و7Mu+eTl8TEdÈ?B|I)*uӅ_!6_H1^ V@bw0U%Gji3MO} Iը_rhtrH|hJQG8&V*Q^2+#BgPwERSXf_!xw[a>Ni8r>/oGD*i9_5o{G^8 /ݾ+1j%Rcɼ62'(nEa j20*I 3cW7=VTHgi"2 #k3XG9mp7>q&pN]F}tut&QT( ^L\c8ʕ >}\$(4f/+ vМr'mF1b\U$mfnOysRHaGn]{Z4MꤡGJ:~$RzK(篬NQ$w UC(4^tn{8K畔V/>M/rrdv!"0<N_WTSªZgM/s/X?眕+s|\ 9{|4y%ĔV٘1#,5WQԶK Sq:C?Rvܰ|2>r 64;]2-=2"ދ^war3:,f3kZLt|l:ɐkQPŕб.rI$ﵒE :z)!/eqI c *>Ύ9^S_.e( _N[T8۷orǬQ0?%K Cqgv _~ENnN``@Q嚔^Y^v;rdι3g8~"w$cHd=\LNir˓ѣOĘq'?pPUxg'* yM}P d.(<l ;4߱ar('Ct$d<ǭa~& tVWְ:u*\yt0;_omY5d BCQ:RKMĸl$ʥT\$䠮57AM)s&e =x5Ց8H#0QuSX6E j+r;u. :sۚ4j^ CcZKPӡ=-{rRnꅮB-׶/(L2yﬥ0JhSO*3ba3J-GCfH )}zLc~n|2$j%G[r\H(lm>U:RKj(9cӈ17ewgE1施HH[[V+)cFt"ґa$0`GH*uQPK ?N䔋#ä 'sL8'ֶu'$#!(F_{OsJU2FFL=j2ͼwkKCMSۯ+\ t;r'w}x#U6"^=lC~b |H8,k*]Ppp7/_&ѣpp;W*E"+*( \.H# rre"`|NKˉ |od֖ƣ=ESzNJY4:܊6=EB:[Pr$#!5TWrH jjj*dD 8Gg #u19sjż &ܵQz|F<u| 7u0||ijfnd$#=ɃMy],;n(MGr]]n(}}}=mR]5Th$0.).>|HQQaH};0***保s}~_2v G" >q㑏 󛛛#P(v4NEB>_o__7lhjմnL<|@_^UH0TViDև}`R$\E\BDw*V58犢8qbt$6:ѮM˕/((lii,qn|`QBH倿1R/t} *ry/_o b;J08 ݒ3WTIaܡ*RL$*̡)TTE"4w c0!̌0k^$qEa0&kИ$!7JH#u4>FQ(:E5҃dJ91F$ƒ['&a19*EMRR,ST# S!#JfTQ2 9Dd 2dm_6zS'3J$g2 ԃz0dt͐R%ƴM=bDFPS4tS R*sF4d(TuEn##s7.JuZ纵DŽ78S[frB<"Ӑb< 2Eo137 3Ee iPBicf0uGdJ]DL(z]Jô]J-C˝ju^W.btLLt ]ꘘD9zd&i0Fz4tM=jJFir=jQ됡FS\*L2LS5 494LgM>ȴU'έϕzvC ]!uCFhu=#]4 =1t]IuU!n#aաcL ]眛nd8~:HC7 1n:qn#'saFɣr!I&! 9dJB10B|Tst:8?8,ƹιyjG] M3Y9;sƔBc,To|ތs kɽ.YE~򆢆" 1^<*ǥd"-s MjEwvH7T(tRcq+TB$Ո78Xi>E(ǩ*PeŌ柫 *OcЬ̥gP^Ta]+)R}oADh9# ̡ٴlrZF=T]C)}JEBzJPi5ZBrrӤ٤4chFF*j( S^ V[Y)L:,H#1jnsWeF+A"c cR#E'*CĈEy2r6/L┭2`W)^Qs 7 Jd$9*>l CČŋ#.O؛xRg{YUKvʳ6T$ }+> Qx1͞rI7KDM!"LN1J @*OyyI1 i *))7W0Njl$ 3ySE$ȣ:\22Y\<Է39sYĴj5FBD֩$ rTU- Eu(V $׆'$_č$ Csjj,^gH 7Yf%sr*r \.d$jAAӥ,TT=ܜMs694͗ǁNw~px$#HlF0x WUcBj4SZz)SHQ1c%#S.CfėDQ=99!MˑE++1cƹL3cpxzEEQvwZL4#nwUE|*ws#7gV(dWF8`FgD%" =7KQK[(Y7i)/dȍU "(MC~#RN$p"QI"ݐK Lv'bDQ3A.#-!-cD"d6);Bˆ$C d)\d H'cg}sF>h&%pJ=[ć2.;X@cȎ,]($dGK]oJ>+9&`i8'<|B7rdAJ"_Ԋ&OsM94"K2LI +xBGݞ"!CoowimWqqqK6&̘180I, ;"Ǧ$1aHs/Bd40JƄQB`#qAIC$MhДQB [e:DB1[o1qlJ(!zv2H#1􁌆cSF  E:DB10J2&My2PO.hA @0h86a@=]CԆ {9"ZdmݖH0h86a@7vw,􁌆QBSF \.!uhd)4Y  k My2p- ^!R@&B2ȃ*r]Ч!R@&B256Ua RI¥2lh%4{2p- !R  Д 삺C$pL;0ql(!zvARI],DhFCF()+zvm~H",nd"4!!оƃ]0JRH(KRMhbTdZvARI],Dh@F*@e`ċ:DBY w  Ǧ<@e`ċ:DBY D8wAF3 PU5eqQB 2 EH",Kd"4} nm8tF \.A e)4Y MDB,0.0Jd38'$}:H",& d"YHhS$A8ADB" u@&0hh@$4U] ''" ueDiB2F My@'H$ĕ>PB (9MGX^zB$2PO. C$0q 2QlG-i8%4填2H",H2QlG8  Д>Ȉ=`>"H"셉czPs4mDBSF LaPB$w CA L18G\.F=VPB2]B؎#Qqn|8NwY ])B$0J2[b;~n;Ł"QCڈ?L!^ɂH2HC%$z0!`q! ].!^H !Ά %ƌhKwq`!`C"Bt)BRH({ad"q-,YcSF D`ѧdUH";D3M,bm ?@C P CD  Q x PB$w!CaP@ d]P E&@&w98}AA$%9 5ā`L!^h@&}z7Z?A0q,Kx^k'(x F A e/d . 4m %%ƃ]P E"<H20Jhj(ln `d"`s:6Qno].T j8h8hsnmcД@=.A* e5LL9qb*F My8G T j  G,$D%pƃ]P "!D g K]P "!D'QBSg !u RH(!LdϗHFcm1/"DB DBYM> @(!8':0G^^ߟ@fC$@$D4 Lrd/dH(23ח@fiH@:nA1lr?+Vg?LM;vX~9stȑ#_9t%=ؖ-[b{$w?tЃ>iڗ3gNdɿﵶd…wqDAR*Ls~kP6͵6 A'`qey%bQl'zH$e˖t`dN={voڴs\YfO'D4 -i"[{a„ ! F AD]"B8j*'l9?qwG\_TT{`>m۾UUU 0]ve]vy]w%Yvem]w ~˼Lm}Ѵ@C"I`&eqZGb~y1$-.A e5M4Mu&zt]<GΙ3GVu'p*hk׮;w555_EvԫV@LDzJlWw@l=G76.UTTDI e;Mޭ:묳X(//Or70̛7oט S|>DBA0q,ĎeG$#WVVD|>5S!QXK(ۉVKooo˒aHDԫ_H2z6-{q`)((l'\MH?<>ӟ/kСCѣG{{{].ײen{ioo7 ͞=>3UU/{zsmذ_ҥK x뭷ll= cE]4d-[444y^zF={l߾tVVV{w^78N"|חlٲ(**Ν;|!)qfԧV^ /y8vt~ߘ1cF\wyg׮]V q8\p+~$[n~/Zh_۷O!oz-˵튢uY_|@f%F e|e|VhkHDB8@ e; M»X.k 1w a/yW1iľ?|555k׮]nii7񍢢"gy][[kƢEnw`뺺:"šؗE"׿DTRR2mڴz뭷޺k!oz!+JpHȑ#GٲeW!cn{C\.WuuiO=Զmnv4p7 2H86 ZBY"77׊p8ĵl!.A egΜip߾}=M>}źc O?JyF3ǎ۵k˯ꂂ4n?a"eH;ϯX"7xMn$0γm/ݸqg=rWw_|WZ:}-]o֭[UW]u饗Z;v<&A] P@l 2PO.IC$c~C~xY&7߻wo0&( yɓ'sΉ}~Yaa_?"3VGGGS /}ߟb$ܜܛݻPUk_ڜ9sR|XP -[ѕW^9y͞={ѳRtH۷n:WU[RMӆL""0hͩv/!>!":pM "RUuij.L=20삺I(l'" ~$zI^ke[[[=}j޼ys]`e,^K@D{=vUU5~7IDbK$I;&&(;x'߿'Nhnn P(dLIV=4V7A3&A `m D`8!vGOOϐe3햹 [TT~7ܱcG]]݉'6o7o_t҉/eGd-El?=1ycwafMMM}}!KN$y6o JJJjjj-ZTTTϏ72zeG4av6MH$bƸ~@D(Yc#A Eb*@$b3ީ SUuʕ+WuoC:t^~.^oۿXb-KpnٲEU/}K}N;Nm}-\뮛>}Ν;e\:/^=-}woo> EH({`f$-vjooo7!{`-lx4$|؄4fڵwZ"zG|Mco͜(?|뒰gϞ8h4}ŌR(38?cf%n߾}eeev[ld;Y0Hs֚ͭzxȑp8la;2`(,$gpk"L1xغv$?(.E=Zv>_#31;<"ڴiSWWט/fXy#Tkj]׏?>=z)5\#C`K/4 >Ci&֯dFJNԗ"??_QιdV;::F{̙33\.ka&ZG6}Tl= RxDu H޿ٞUM{;΃>;Zbf͚pzzK/ۻ2-<̥K?tн;v➞'"I'oݻ7?nEw}q!ݮ#GoK=b>|СC⇍/_z%JND.zO?m%!N| ѓO>涵8l'z;{lUU;;;7ol$ yg>UU7ncֺ@ Ɖ?466wI `i%pdnt.m!EXK*--]dΝ;wܙSYYzCPSSS ˻G/~7Jqy?_wu}VVVQ[[[oouuuKx;hjj<Oqqq4mkkWVVUW][|p8|C}g}vѢE5o޼˗sOeee^^^GGGgg9s;vwww'k>|]wM6z{{{zz>/_|ȋ7m?ioo/**~ ^3w[n޲e֭[rss' Ø1cG>0'quCW@7Crx?-&7/!Kwq +t2?oE k A%ua"Id4<>V!^$ "\ 3nL~];F2 qk 8@=$ad& I4~kāv8Ep!&i S\%PB$2 ]Du !K Ct0J&HDzh_!8D=F.d0ċDB@D( CG@$Iwqa%7F Ȉ5֖@u4Mg0t:m\ I],JKK8:wtmBHNUYY|",'QWWh  5XΚt2F 2k7/!+LdH(9Ncmq`!إ(!! 7&fy믿/NW.\pB&)L1EEELJb-!-3 ]|U@F 1JO@1L%^AV;@ف"8D!Abۚ<`*i(!DBYK=8&x>LahA AP$(yPo'äcH(k!"Fx8"$ k i ?Ob?OSO.~?q8p8/.kW]uՈOu}֭oɓ'hQQive i[:3nV7O߱c{wIz̙f͚ٳgyw󝾾ywN;~CVرc믻.쭷z׎;9袋=ܤ |%%%?c_ӟ/2?% K;w4 cȿΘ19pt:ksv0n喙3g/ݻtΜ9sժUsΐO?+W馛۲eڵkjw튊͛7رtZ9s 2_z˗_veADaÆ{nĒX__c?O].W|jݭ.4;k e-=Ä #v~{{;NI"4q1!UTT>|ZZZsssHs֭˿TUU y}ss/~\Ç-X`̙ǎ۽{>|"br?JJJ|󄨪X\\|W.[|Ν/^;T3'>a"ߞ8qbwsysss?ϭXB4]_y啿[l9ӆ:֬Y__[n% /0:裏gTU%ho-kll|衇4M/_cǼ^xM]]?lW^vZ?C[~ޚJɉ駟;lq``뭷~ϟ?#ȳ>f͚K/477;vOzYnnU6o޼yC>q˖-DtEX򩭠@4} bF QGGg0R ,JJJ9ʅ,{V>1@c ΝbѼy ":p@+}َŋ _sEQV^}EYӎyeE7QV ۴ƋZDa~Aqf ,9sfmmmWW(;wneeeG<;HwVab?<=9 smmm iAD&L&cCX=jjeO=,qb=["Hml~t.]tK/ѩGd fIzjUU׷._<; @`׮]555K,Y`AyUWWO64 6 kaikiuǾqHUywvv曣牨f!W\q|6x8@Dkt&/\ [K0&DBbTB{{+/&{/9Ph4 kG_455[#o~@Biڵ?CFkxro裏W\1|Occ~'NX455|K/tu~饗JͱD VXO-񄪪GǴO}S-[lذA|_Wi.Y$6VOX//ºu몪FM6/kll/~D,Xdɒ᯿cպH$[la]}#>-[lݺ5''笳β(MU999"ļ $D@vq KOUa|@lz>fʕ۷o7lPPP::: %~_WW(KKKiMsΕW^9UUUMMM?Ngkk+|_իWo۶ɓwuWyyy 8W\qŶmf̘aˈS˵f͚~EEXGUUSQQqYg%A->?۸qcYYY `/~/.** _{^{m˖-yyy~4O|`/ryyy4TUU} _iӦ{tttX_oR1.{,\vez[0IQBxͦMf=8D$z)YLK ]e=&N<9~x3fؾ}{KKK___~~ N;4yC;y7vTWWxO~gZjWx<7_|]v555,\p555qF }]zƍ3ܹs/˗x4 oKoV{{{aaҥK׮]kMqqwSO577\rb޼yuuu6.r`M6yޮ!cv/tM+WLWZ5o޼7۷t.\pժU#nxa,Ydڵ555zL#Zd??o۳gO06mW^=Ci۶m[nmjjr\/^fٳ|s=O5(DB0وQBXH IDB`)B P"b]r%\rh/(,,=?O~OO^{^{퐟ϟ?7Ë-Zha{W6 i4ӦMW"@ x,N~W\qWf4H駟~w^}H;H#JKK?ȼRQ5k֬Yf͛TVVvM7تu\p?| .LD>0`9 dn|(H9" k PӦM6p` =ݖφ.˖7|E,//&M$0$駟&A -l(!DB@crD#&6^"!J0H(kK/mڴ)???bՉr\alݺu?:t+^:>==xbɑbKDB]]]ħz  ,;yMm!ϡOwq`R1JDBmmm,h'N .[n]uUwc=3r:;;].׍7޸lٲt 9~(Jkkeee]$k1M3/v4%ϵy%dr뱰%E.ԤD0y!ĘUɓ'j*'Olnnx<+V'>D-\zAUUsss.+Vd VXv1+bjooG$%r8^7Ł|n; "!z Fv@D$H|DԄV,+VXbJ|g\iII%'}sKKKMt׶ŋ8պ TVV644 !`HQee ]s >%#1x?t .b%E ˜cbQPO E.! D$tĉt@ [0yǍa QOt]g$ rB SHs!DG@#1())Ѵ=0Rx$!QB]]]H$nuttp]j,F QP80yHRQ\\xd .)ԔMcQ6F g`EHFt:E+s #ah@)d9Mӊm"! `2 ȰdAw H,PPPKwq` H삺2 18HO`p8@9d Hk @sa2֖!=b>,Fċ}}}P(ŁI L\@ooo06ONa4Zۘ"rB0:DB0iӦN<M|`%3ιê` ]PP`m#^h.LRJ+ :qĢElywڵk1_cmٲeHx~矿 >ώg6lWz}2UL0k Q\\iXh***OC˭`Ti_a|)gN8>l8>rݻ]LnckĠa~+Ȩ6ЍgX FQB0;wR"..;;;t 2k֬:}t TTT=z4|M|>_NYYլ„ Cޮ뺦I$D/0jks.|屳`@pgB,$$ZbWNwY`缥E%Rkk+:t0"LQ!*a2ҥK$-H/1Jp;GNNfǼ@*9NJ] ǏHCz!D 80y!=[\.ku Fc̙3?~|vuor̙vښ߼_޻woww9sU9IF7xw9~x z555]tђ%Kd>g?Yoo??7n_rg_^9oll$*1'ܴig>.n`08׭[ꈟ{M60 ϟrʹs_~]Q:/ ADBP!>1N<9o޼t&iӦ:tЍUUUS]]mEBMMM6mww~⎎{oN1xz7}Q]םNgEEE ؿo/}Iߴiw޽{+VtM4s=g?|"v1񚖖"nMLVH4k,?Ν .744ܷz䜗-\4֭[nݺuɒ%_׆_{}媮6Mڶm^TTd/ ***cs"jiiA$sam#iV\\ID---`4b&A*++ A \]^};c…D400㏿[ϟ_PP=æi^yk׮u:DT__C۷ozk'̝;h?;yPGG󞞞O`"1c(`ё#GZ缩IUժ*럾կƾsww>0sO}jG}.\8$jhhXnW]uե^j@ ?iǎ>wi/ c%%%V΋Lv+$LUTTXgڰ*0!>,>v…ַ_NNR!9[&A<*+hjj\q99{ CBVVV8l;v#wZXޒSQN;m fϞuV+okk[ti$kii)HHU/| _7xfΜ9ϟ;w𵢭iC=47LN"jmmMwY 6fA{եzrC!L6ʍQB"7-`\` Vbs%"]ד=25zQ~~~Bs=ϟ?СC>]w%fcY:hdӧGQ"jhhLq-\G?֭[~Çս cٲeGD[S|en= C[nD(!!fщ'fΜ$%"!UTTXO0\E4@$vLB &鶼5*V{{[oED"Y387UWWO64 6 kaikiKEE?4Msvuu ҥK:kxDgAkjjjll!"ٳ>Lu}IJifm{fΜYUUyD?7ͺu']Zh,'[b"4Ȉ]ĉ.d0۲eˆ "_Wi.Ydҥ4M۷o/~د_?u]t /p```CifϞMD{u݊ȚoRjO?yOO͛ gTUݸqc=>;ZT C;wLz'spg&s+**Jwq`R(!cIp8x6nXVVނ /yٳgzׯ?x|ol21w'?yk֬~h%>{!9sX?Z}~E{/ /P\\\RRr~c èG?sr-?-[nZUUN8aƌ3>(tp8N+Yxq;/ |'?Ɇ .\hA%%%~1&͞=[Q4^o*L,Y??>|رcHΛ7o{Oꫯֶ677.]tgy"r:{o0}{wߍa0~iot&nz??uY.LR7oHD?]`ׯ߱c\nJwq`R-ZB06бH$&^YYچab'cmia=)0!}ZnC*Zv-'0~c[H&@ A dz9GUTA$RĽ,DBrB`'3b@ d|k A*Ĉ3@FC0!=y2M|B$R0J2 F D!Q"u],D0"! #B$RD$]1vOooX#" ].>ā9`/"X@ʴiTU]1va? ,1s !q#B$Rc₄`s>'"*))IwY c b+s !✃QB  Y4L&4MjPU ""ċHPZZiц0!q"B$PGG<Ł -4M+,,/"!H;H&?DB0N0Jl!534.A@$ A 8t2XQQ>ā9`,a"!HF @`@ Fm20s ɅMMM. D$F]"b'5@r=](t $CDBpO.I.vRgnz^Ñ@fC$2bgTHK8N 1ks`s86"!]xX6d("N8XR/bbC$`FvF l(Xu)!$ '採vOG$|4Es .H$1pl'F qc"El{qv"RC$IF ':툄.8=80Jx, I"䜧8vOWWi.d<4 -0d0t2F $I%D(^sN.ӎHl; #rR!9`86 In;''Ɠ `+((t>IHQtDB x^k"98d3DBb"@r0q  AJE˳Lr ח@1?@Fl40f'DB*LfL!:a`+$MTqS.@J ]9i'[!Ta*d1IֆKwY`D$PUUjx1;!T8Nk$'`NLB CU\vB$vAІN9D$]#)CܔE RH(!T '_ ӐQsTUu.L5X#$ZAiPvB$B$D4$G,$'xPUU=8Zf E"!HU|DC`rB$) 838U\ ES AbobLr*"lHN$6RB$2 ] ~N UNrYۈ`АDB0=8vab$,dDB`,' E"r8. LM{ .d0QHlH2F A0JNS xRVB$6@$BhH"!o2 ]TUL@]B S )F&8(!vA C $' !)% kĀ$>'H!E =0J2 )6$H,$D%)vBNS6` ϡZ S\.Ƙ64$L  ]0q,k!{H`mP(@&A$C43K\pSR$h "!hDQ4 U!9i.Le"!Cq9i "! v Hr,0ʼnѬ8px1k!{ĎpF А1j`s"! "! %:cB$ 3!n@RsNvB$]K$D$ O)C$3!ZผApN6XK2Аػ`8D wd!E"! &@A$cH&g 3Klo4t&"! "! hCCO !"P(Hld (!`"y#@$v]){ ;)`I&9!Q`42PvB$v5 2ZHr80 3 "HH2А( N?8M D`, 6d Dؐ( aL 4@C F e'DB`'ܦL64$J,Ǎi d : NN}-HF i d] e'DB`'DB)ІD! o NN $JLC$*EQpNmhHF C dr6i' !;a*d !Qp@$@`#"!̩A'IDATI)p 'C d. lf ,HY)DaL0@B$v]w!Q`f}ݺuЈk u-X x"!HF C$Ѝu AJO~=?~ᣄ=CD.,D"!XK$vA AJVZ$R!'+jjjZjUK q>.mDB0aĜ DB`ԥ,HR2}`0hBȃhٲe.)~7M3ŁN#DB0,Wn<u) !T^T"Ƭ1lH$g }.8d!DBe˖y<" /眈z{{<0k &8X @}l8S\AFnćx1 !Ty^1/̺9]s`2xgbsH(lڴ)݅I -p۶mېg OJH,0\.WY3κ `#l߾3HTc8U!{\z#"$8۷s=@`H$$+Wb= '"!`DWaѡy<!Ǐ zF;;;wڕ¸@$XrbL*ָŦ{'vxH"@#}DBq `D]D*;ʃ30}uNBqP Xn݂ ]^^wx*Yc0'$655W9_~*X_~VxؐǍm۶}DB0"t+':qF dL>}Ic S3aA$ R8SD4AXlYl+yx$$ ,>v9xmb2ZB$\]8CH*..׻HHA5!;%$T\\(@rVZOƦBG Yto:kę Y?D"" )KLN8kV{~C#+DT]]6^˖-{GE*4di4p@BJ`_wuuYyNTHd=㙐%$&>"MMMoJnjj<bUG}tӦMu=D?XF| r өg)pc'FMmfVY2LZM],.vk@0}j"z'yH'Ģ0Ĉ5{cȺT?!-[k+d6|p0؊D Kx'۷;v޻w/.Y ؟ $a}zKSӐbpqoH$3$38w qx.WqNJ`t"***BZ qrp"!FMq9  8<=4N TH  @: -[`g0UVY)Ր!cX@$YjcRIF[F#Ѭ\w8 u'4"!^VB ,7o^qqqKS_1t!K/xH",HתUł1Cy] x"ƬH{$7a" YuCla]AgDBcxx ƈHSU491@sWP,lż+Nʥ( 8se_pn]QP(j 6"RH.λy>pЭzd>+ ,8myj$bf8Rpu7!&q$"RTySSOJ7?~UV1vi2?;J7 0uv'SU C9~v"[EBtGik *QNh`YNAM99R9i*UTQI2d$ƉT]t 8RM Ee]7.:yVOCtшQR;kiɎg=>'ipolV^͡I)*P$6iRVe3 r]Gv<挱r_Oۀiu0E'$n:1FѰQ1W&\cGos|bӽ8\2KQվm>WN^}ye5sÊ"F U}芫^|@o5A_S}GI4)3T&Ld'ГWy^u:e2~MD8;c;n$FX#2UMT3402R~f V<^fpc/W}; p څ%/{ b:UIl B2{1b(.%!u8'Q:Lfi~"SN#iN)Z8Pgդ, .=mi0<|-հ$bpx֬Yn{{&w:\?q!(=}}zC#'bj8v9v{U#9,wWޮg4-hYǛ3#Vd#Y)(߿0B2Y}EEwir#.ٮ(r]S;cD@$#n;QCS 1w׵% u# Kj45wCUam=sn{ =۩4<~2HR˝N60hJQ dTx{zɀۡ;~66u{\dż|wkF BGvQ_;U)4S̡CMfD|I-d#!|edRCTv$ot$C-tirWS( G5uS3R9)Q1ziRF]i~6[SKRȚkfM~)'R9cLU:R`s(&u4)L9\o1j2USTM6R5ƺ|X%Һd[X%8W H*'7bLu8TC2N(Y{1jETa: >禪iLQ*:aUTU*@aLQU0)j{>$o)U<7@1)i^re]bTFʩ]NĈVdʈ6%%惤Rة&K$4MrbEQTUB4MSU5LTUUO241q5MUEaLQ8+L:+(.Ë'+SXuVQ\D8Sŭ(.Tc% )dLK.R7c8N%).43&2KaL'{J'\U2bLS$Reȷ$ =T$W˒kar1wNS:jTí_&'UTEow_8> HrI)T+p.E+pm RNP'H0$Cݍе'(C2$Uq SY YT\\gwFDWpSsRrp$\7{/!*$VW@ת "K $kΚ-˚YB!DE9k!kVV53LRUZHw*j4gkkDV4gmj!!HUiJʼ^z*ѺFaAUBD3b0u("8 W@U4'er"Yr+9f*0VNYrꩨ SnSa*DAEUD0ݔ*BjiU= Ts g*9'%Z9mn`m8%MV Ջ]Yr&K'x9.0,EGpdyH")"jIE5utxMI,QD(*6(f(|J`j.D`P Bbu٣hԧ朳ab@D)%ffRY1=X$ƨ)%D5D+(lRrnznXUAQTzDu];+ @E*V33,X(5 7H%ҨU=iEj,R JUD6J$hV͖ʪj[u STU(ku`W)!U% 6ʷ/TXZD{]SB}y~wޔ;_<=~}1eGtX#%M71wmbX(糸Y+E4{h@]P ?_ҔtFݔ=ne6ߕV, "M>374MO!>^ M3dmm:&M5%ĩNkc{*bH l~Dڎa4 Mp$E[# Hx{:Sj p̖ϭ)! maX HF󇳗Kp hrq0|Jې5I-z=za46a\b0fsekj2w10yp3%4+?QM|kL[DB'НPUgU]h4约iƣ&c lʁi썁ޝ//.Ιe<G"ƣiSýr90ڔ>|RhL3?NEQozx8ofL&V;GP}U/ʺ^f׃rjI0דɧ;9%4obbL 5wGMa#C9!ꋢر6&LS>'2^2i77sښKNciL 8Ley*(\׍On>Cf u-aXU>9{ D+}=pu$"j+H- #Z1>_,( {o5uG+K++D*\5п6E 6F)!s9s9?9s9s#7=%s9s9wts9s9ܽ)!s9sι{SB9s9ss9s9O 9s9s;% @=#%tEXtdate:create2021-11-16T15:54:06-07:00īH%tEXtdate:modify2021-11-16T15:54:06-07:00ܙIENDB`flox-0.10.3/docs/diagrams/cohorts-month-chunk4.png000066400000000000000000000034251477552625700220160ustar00rootroot00000000000000PNG  IHDR s0k9tEXtSoftwareMatplotlib version3.4.3, https://matplotlib.org/̞D pHYs%%IR$IDATx!Vu@pPQZL6_߂$FNafGY|husaYl8 2*ACP@%(`e A !(0eT2 JPP 2*ACP@%(`e A !(x\\1 b(3Ng׫Z޾uUƗp_%:`>r[VuǍ/Yݟ/YוGOzƗg[7?| UW__UՋ/>u~OUr?%AvٗS >b|zǾ?vӲ?؎Ǿ81P@%(`e A !(0eT2 JPP 2*ACP@%(`e A !(0eT2 JPP 2*ACP@%(`e A !(0eT2 JPP 2*ACP@%(`e A !(ò,Ǿ=!:Up JPP 2*ACP@%(`e A !(0eT2 JPP 2*ACP@%(`e AU}!q`3fIENDB`flox-0.10.3/docs/diagrams/cohorts-month-chunk5.png000066400000000000000000000034761477552625700220250ustar00rootroot00000000000000PNG  IHDR sz9tEXtSoftwareMatplotlib version3.4.3, https://matplotlib.org/̞D pHYs%%IR$IDATx!a3 blkM?a+`nYb(w,n48w=$ۗ{ux$m9x^UUt󇯪'iwUUΓu#7[~TUճa(`skƮ 'aֶ;̉=gN2mw{2F,kƞ ,v9Y͞_@p (Q5kD %\ J@p (Q5kD %\ J@p (Q5kD %\ J@p (Q5kD %\ J@p (Q5kD %\ J@p (Q5kD %\ J@p (Q5kD 58,0呋kDvR)@p (Q5kD %\ J@p (Q5kD %\ J@p (Q5kD %\ J@p (Qq3IENDB`flox-0.10.3/docs/diagrams/containment.png000066400000000000000000001040451477552625700203370ustar00rootroot00000000000000PNG  IHDR+PX.9tEXtSoftwareMatplotlib version3.8.4, https://matplotlib.org/%#u pHYsnu>IDATx{g]>MOwOwQ0 &BT]/74e㎠+C ш2&B$&sLw<]Lͤ꺯=Qꚩ}ߟ*)@bLV0J&0Y &+d%`L` LV0J&0Y &+d%`L` LV0ᢚt=9WrΝqειrν9ws˻sws8t͖y9\m{szw}9!skssKιIܢsn99/;:W99ι/;N9sι'sq}dyǎjoqιιscιWs 1\s\X޿KsJ`זSFs_q}9*1˻WAmK9wGι Yܐso]Xo#{9wss\?:~9׿FW;}Ur?Zc*qR/4 q|f%=AՃ屫[zك1PzB%ιa%s1廝sCCz(O">I^/Wϕ˻׹WH]ԙqI_e޸F4J/騤U}!IUrPҲyge5n뿖Ժ6>:suWj#UbkY畗7 5z}त\Z݃5<֢\Z{Jg\VUb^=CgVrЃVjj[Z݃1^QG#aIit]+qtIJ$}^.O3.I?Zt6I*uD:j4,%}k9(甤gI9u4SJ/I~IϕW󓒾T[$JuxU'%>b4L = IQ/$]sNjݫ7$ԺMg%I/%]*iy lD0=1Ѓ^W$t]wK=I Jos5ֽj|n(~Jz׿䍶rν|ߦU{_\)t(ϕ~F7+y]"%(yҨZj}_C-(U~}JɄ]s^):z9IWﮨecoK7}=࢘Tّ-!%+ݡpε,{OWO[''*'~jBϯt؟oubW*ޕ!j֫b$9%o*Nι7o>bcEv.o|Ls(hiC??ιfId?URy|f%=hP[ Ec>tP]Yއ\,?V?*jɲ$imh\ӷ% ?$;VˡT9R:ǂSʿn1I_[YgV~g'V]ZD{ei-ιsEʟr-cvEw(߬v|Vz9gܖe?v?;]֓JzЮ1|<ޯg-`oOc *jk|tn"PO\(>Ϧt=[wI{SQSa>BL*k7]J˸].4I9ιι6ܷ:(NZs˜s?E^V_ޟ]%:w&ղ,d~zl֣2+_tmsEWswd!._/]ceA\s -χ|c#;+~fO:$-8s{rE??ιoR=X%I?_n\u+ʭ2YA89wLyssw9箍fj,aIaXE}蜻9wsnZY܄sos?ҨZCzhmJicLVC;*Oku9XFݒD/Jz\DJ~ksKZtJɻWK:-?yW}\~~'ǜs:Gub[a=GJ7Kz"ߜsSODgVn=fV={sΗsgo(E{%}w w-:wfk $!jUrfݯJz9k?f?R݈ZwnugVrЃfGHڣrDə^:#w\ǰ20>|Vz\$}VvKH9gMi<Zgd3vf F_{{* \F=YqC^d"e%u#!敼|d@zKa`=z,oVr[:P)9rwj}gOޤ{>BDsGI˽sWKC>R宋>]I_+R$sU5,qWJ:|}HbLϚEu~taLq5;~[җdbWҋ%}@߇scQ]IAҿIuI9^Jl* 1QP~{,a=z,-/{ʿ6~dw֑?фOG҃+yVNISqsn*Yڃι+yJ{cx's>U%gu͗s^ g??|xZgVrЃL_(G#y|忖ιKWȓպ!=/&+._qz՗QrVZo+aI~{JN?rO{識G!黔|$_b^ޑu$2s]~|.IoQ*֣bpIļ#JDZ8T=0ԣ߯r+nGMeP\ 闪z/;(_}+Ի )դ˹p%PP`[^:ĸ@+1[$P2qШ𤋮=/5^]ݱN_+#Xz̞׭:F1{3J^RYrV5KSKJ?NUyQ$Bſ<>'뎔B`j އ^jOj~mX(za.eLVR7kӈ|=4T~hK9,I;/+UyJZ;%j%t;z/Tw9~B,<䣘(_k{|~_Дޕ=־K&ڃι.IGҕJơoU ='f^3=hOLz>Wf=~1 R}ŊwTW}MBX &+ӟp]=mWYƕPу7ϭt5+|RO]W}(wov8ԹjUҹƫǜsoQ%?&+֫bH'Tz}P_a7=KӀ1{bsW^t$B [?~ ? ɵ7T쩥5.c۪V=ư}|wZǡo0AIRk߷*,Uaf,=8M}Qr/_3voEױ揕9+]u*pS^GV~%OD^WVIS?#ig張߾:$Y6IWܷSCxI?w\1I?(i|'%]S *hoW,^ɋe}~wSwyIOڃXi>?dKz58Uwy߲s_|NkZK=cyz~XsC'ܗ4xc ۵FJ&0\76u= 9~c RϥP-$to}%moP͍ 顜/.R:V)~c+Xtzy}ߋxwE+rc{I'%/}s/$^%:V*jW]^+= o?*gpA(>tBtLLh* /?ѼYjwǿM/Hj2^Qqo{.-&^ I-?iWUɳzEtWEM rt+П*y+%-/JfL?5ZcB~*^jAI#1I*eJƮJ&Y~J&GS[͹Ѓ+ܷ71qRK׮ssc(!Ւb_y[{ .ָT@k]Tɑy.7?,i*kJU1'ͫ;Ia]+q]/+U5[vTBH=v35DZUIRc~|)I3z܃GVzήcU=%UyK-SCkmz|f%AZzPde-_[øy+W֬owx}HR|ι6xYO*> %}M;c>Wx鱗/yﯗtw |D6/*{6%gSg<EBJ}M%8,JQwHzj)77?TGTXyCw0w=J3ྺZf|3@WK*-\(g<_(yUnJ٩$7Yq%o-(y3~YswWK"?cbY!j?Pn%9FaJ| o|Z70ԫ{D'?%׸'G}H̕gy7=CJ~E74p9ǔ4-lrp>)ygdNs-"Dmn}]}FGE\<Ơ\,gVJocA+Gw&h2P;׿K1A͍xЇAh&+˧ߧKWAyUV\ 8^(G|dӿ3c=yc=h1?=1Ѓ6?0>z(E1pIr=Ot%}~6sr.FmNAClDn=Ň> wQMV8ۘ`L` LV0J&0Y &+d% 9J|sXR&,czc21 h,z h,<$db|fI16!=Fa DqBǰgcLyfp;ި-MAI:w4EԛnR\Ywдc[a9,#GSF =ףwMkz$ Y{T{Xa9,Ǣa1z,4B 1rX!k+c?~e9 ae=N\zHҸ ۫je^a);VatPu;ti[X g?ZaCW9b#>oNG'%`b B 1rpk'G1bNV>mi֦mAIZ[}{40PVm߾=8J 5a9bP!;N2X_-NMaedQB 1rX!;w1,FУ1rX!F 5Q&<{2ž_Y!FcyqYK[$'+xK}[mhmuھMg'tiVm߾];w,j=5JۇZ6~戲.2nD} _CeѾϕ0J&0Y &+d%`L` LV0J&0Y &+d% HڹY-A=i8ƅ[a9,#G頸jB{,:Deʿ9,ba1z,4B 1rX!k+c?؋yRC0)q$ɇ-+pa#48\ӱy:/Guu{'ͳ^:lD+ ǻא5G1,.LnI%iھ}{f0117xOԛcPcXX'c [ICά| Wj`/(XݟzB_oZcuǷ4Ե=W}}j3bPCYO*\f]jmɴ?B2FVrX!F 5`5a9aRȳG+s\b4ICRiK@`Txۥ%Mx+4Opzz)(-0Rh=zEPqNH˦u#t(-w%88n5d{ K5dK:50-SٱeM -gB 5a9B㗖 4=Ŧ_rX!FƆPqYCj#ktPjBz,溌c9,hjB 1{sGZ[b1rrX!kcX=fG+c/J 1rX$iSjmuuvc$ɇ-+ukxHcG -9\X|i#p=TƆnB)|,ljVAw!kcXy8]ȹݒK}(y`bbBHoޟ7=T/:018N'Y@_Pl8?k__oZcA9{vxyPCYO*\fc#4G6~e%bXcXz4FNcأwM%쌧X++5ai 7jDq#3+4Zf;|AJK J!uI)sǨ!ƺx)(-n]T_[P *IwiQ#t] bPC'{ K5dK:50-SL9Kز28,PCKK\IYsX'bPCy,a1rX9,9,#6=Ŧ|+ tqWj&I2}kTOMJ v=bGiQڤs96p#p],:,a1rX!F vj{ K;L` LV0J&0Y &+d%`L` LV0J&0Y B# M5(X=z+c{rX #j#ktPjBzZ_yB 1{,1,F =#}n'G1sGZ[bcWVbޯ#1L [T(l;n"Ip~:K|X*kǨ!ƺHkҸ\p On8/G8a^(1,F 5qǰ> Dt\߯۷G l:p@zsD91=F1Eq"81ư47pbY'tuU__5;\w|{ON3clǩSEB{º#kIz,F#bXcXq02Io{d64IқSu礻5cT xπҼgNpz*=,i2 ATx5T.c{~ڂR<_lݘ=]jm ;.㇍}B YsR N l˔=Sr̎-kjh),PCKK\I.c_r,a1rXA;56Ig{d =#Y/m^Jvuii!S 3^ۦ2~oRhȤPvx Ⱥ.blwcȒ6b#BsR`L` LV0J&0Y &+d%`L` F$VsskPl8{z6Yye{8VceY26F#bXcXq02Io{d64Iҙ99\r=Rh ^v;s<%I]U};:BEc{6DwcXf4ZzeoZ!kcXy{a"vK:.Iۣ6 8p {P]ÀƢǀ8hcJrfnR}A窯}H|ߴfǖjhi_iqlTQ7.cyu#bXcX#41,F 5dae 6mji '~–]:Ң׌ҎbPB_WL'v8=V4T%^(\G [1G~8ѻv]Ə1~X{ K5dK:50-SL9[1;\kB 1r/-/s%Y{º#FX1 2FM-aiLljsNcG -9t\$"\GPc{if9NѬ֖XKϻc#r>_ȹݒK}(y`bbBHoޟ7=T/:018N'Y@_Pl8?k_x+9b5;ImDZǩSEB{ºB 9B{,<{T#f9a9bayУYsXä#J\߻*IwikNiGiB_WL؉9NOU%M˔?bc]d1 =q8q#?ƜYِK.L93o%Gfǖ55)a!Gh|2W,9, e1 =#G51 cXj#FXaa]6I?,rX'&6i li;J>m*'%~ ;YvPCuu{',1,F 5aa B 9-~` LV0J&0Y &+d%`L` LV0J&0Y &+PhDҡi57]w1kh Nc-<=Vcej 4.#f9a9bayУYsXä#JE]s))ŧq.c$ɇ%JGb#>a8Nc'nѲ,M/*}߅-$߿_۷o &&&tzsc@uY{c Dqba+ișoJ SO랫uB 9|ߴfǖoi_iq=N*\fcej̑?B+s\#f9a9.q&m0~WVzºk :Ң׌ҎbP #3+4{f;|AJK H%^(=\CE"]1G>18RR8FvbO2Yy%ؖ)G__{YP$͎-kjh),_Z^Ju#5^.!fYbPC1B 241LS BNOU%M uI)JQ!t=HE85=Xlj6r0~{ K5dK:50-SL9[a9fǖ55k 1r/-/s%Y{ºB 1rĨ!Fe͑續cư9,#};F 241L?,##4~a9[M?dc/zE]ZZȔ}kTO3NJ 8z^GRCQ*ʹJư9,#9,#vd%`L` LV0J&0Y &+d%`L` F$VsskPl8{z[a9Ҹy4[-_HY'bPC1kc9,hs B 1rXطcYsX$R 1rl>oYlBWtfNj$Ź&ٯ;Rh Αv$ɇѕ\:51\(/B'a'G1,|0s%kQĄ8?Qoz .k_@ua@cc@cq4N1l% 9M7\bqVw ]{s׾VrX!F5|i͎-hi_ۙ8us]hY'bPC#4GlXcXjB 1rXѬ9Yq"1cF5pvB.-l9#-&5X;źqŗ>vlӓ/hUaIA).2e5|D9?=2Yy%ؖ)G__{YPC1j[Ry=x,1rX!F+5G2fYbPCjBXXζ Y1 Djc#G=_:KK/ڤs9 WQj.vuii!8}kTOCNJ v=u1#b#BsR`L` LV0J&0Y &+d%`L` LV0ЈCCjnn -gϻ^x+9,#G{ؼtPjBz>#b9Wes B 1rX!F =5Gb4g8qaF{lEֺF -\X U"t+p5<%I>,IWr?b.68NNcXyqa"vK:.Iۣ6 8p {P]ÀƢǀ8hcJrfnR}A窯}PCj5;\w|{ON*\fcֶfٯxK:c9Y#bPC#k%q#b #>v}̫uKqNOU;ŠeZtπ^wQ}mA)T:Ҳi#t]!Kj ;I)/JK H%^(=Z٠ŮD9?=2Yy%ؖ)G__{YPCjٱeM -{ KK\I=6~Hd,9-1,F 5a9쏬9bd,1rX!F1cX紴m1s;J6i l\E>m*'*'%| ;Yn Q YbPCcPChcX`L` LV0J&0Y &+d%`L` LV0ЈCCjnn -gϻ^x+9,#*c{ӸVcֶfٯx264Gb49a9,#Ț#fI'a96CĈcӝRk;n#)/;-x:vGHbltۆx:K| =\Y/ ^y8NNcXy+%rn㒴~m߾=J^`3Ёқ{'Ae1 1 h,z h,Ɖ1!gV+50[,O=k{=J 5a9/7ٱYcejB 1rd)y4cya1rX!F 5a?2IWjc3G=T%^&]yτ2 ZSQZ 1Rh=zE_:Ҳi#t=HE_DaIA%ʲf%qJZT=%j=VI{ǸlK =vF}Pkps'~+FWw:XA.zYZJ gC{L/_ykI ~pfesIg$򛒶qf$=,# -QIWJDIzBR?q}JII;$jjQCIKߟrI#g$-Mι԰bϡǰacιJJz:7ngI1P os{%ݥ͹AIoT2}%iуh9b.֩Ǻߐ%cNI~S̅HƕlF(yK 9.(IJI4oUw$=8*yOu{;sVIzv~|O|}m` w~b?}>PyI7VzK-ǁ1[It 4҃\J c{LҍJ&VfIS% |pfesߪI~|xOǨm%x/5j~^NI+ Vr$}sUJuԉ {a+{uIXwdAa\ߪV%[%B/@Zι>%gRJ҇OpRqIŐԂzY# iz{{׾</}5!˩zlcvo-_KZYRKf(ו~׿\)?y[W. B+e4|c{a +{?._7cUZ>ʽpε6F!_]l/,W*ޕ[qc de~|}?]Wq 9׫TjIBy)%IURMSK=s?su;-SXwzl\#8g)*/8W$}qSM҆ATXh_ܪȈk<&+k{/^TX@*kpz~Q&/(lL?(߼9ι::s8~Dg|Ziz 1M{H[F=PǁuHSUw}IуX_d*O8k0&+kC+hx)fU}fvΝNs^|鱷+Rr崒.%,KbR =jtysnJw(IGV1kc1 =i}bB!eʿ~QIXNVX1YY畯gkkVXF,?ο$uq="ld5$fI(Pw7ꋷ=j;׽nrwKmJ =Ze:S_ֺOJkVуַ.&R&z<':_ޟͫc6UUݫ% *^NꑮUͨ+J~cKI%Iɻt?%:羳!ծz 1c[sιowrÒ~I--ucUze}2zńBJW}'+% zM?^ qW׹lsMM=朻\Iz QI_Jzɖ>Ikid D!z{?+u:%=V{TI:%֊񘅮#z Zz>V>YЃLl6Rs]~|.%Ǘ=`LV6Xҟ!Aw^Z^K#w'$iI~|s37"z U %SɯNHZP&|4e": =}RZ{ ιJBH&R kBlzͤoKpSߴ}tǟpN=UWߦbkOK38^RsZkwVcUz=crιU~|1=i}bB![9"oo𤋮D5gVf|G'%}"N9ޏ(n@)튜sRc'/kMfTkk=y7;D=Ȳc)k7~C~g%=:DXr*_IV ֳ.*cι?7I m X1YY';sfI[5ۈ._|Ɗ?Fo._e{a3[A*_z/14U3:G1W1"f^I޸s[u+Ǘ ֳ.6z9IU2o~{?^gX1YY_7GSKɏt;W9שZ{Is]~|sAC=XBDZ8^{+%~=ZDZx%}|sWpu6u@b"DAZs)W%|fD55ֹ˿?\svZ9"Es/si+q;HYGsV^k$/]_(*%Ohyѩ~_N59s99cIҷ~}$=vgsswsr}snsEι_􏒺æQgdo%ݢ䇭$=1ܥιg;wigAbCX†=^%_}EI%quVI9z$ypJPy^ҭu.g*ru9R%;W='5{{ϥJܵ?\%=墻c[eJ~5rc\6եɼߖ\-il؇%m[%a._r_\6ezLGk>Wz gV!^p=tK:|=;ޗKAGt|?b9$|Cc~Y* _g~Jz ZDZ&ÒW2YrVɻH I/ޯ]6z3?$JI5IJ~AɋURЃHֵ"c B5+ι$}SҠxsk1'i~<ׂz11Ơ.?1ЃFCEm]8>QιF,N%gh2 zGwi̋=RXccO =A_v_=f=&+PH}.UFxUV\as~D҈V-\\1T.?1Ѓp/̓9SwR?'j aq%5Gs"/}߯b~94=mo- 9Ovksa9,04?7|fHQ0 حjhP P9,#[GқaR5z88w_^wkra9,#GҔ褝5Tsgms͑mSaeB 1rXaniJ3&lj #gg噿LoasmԡMۂtlݮ;wG3uwhΝޣ-a_%՞p۵5-5slRtۺtvNvհӥsA:.bl"'hP~y8ciulݮMAEXȑwno unWk9,#.cQ0 q[[JA5twB 1rX!V0)^t8wB 1rX!F5T8skښkv#߶Gas֞sX!F 5\Mza'GYscL` LV0J&0Y &+d%`L` LV0J&0Y B#yN:nݵWԷ;(>_[#]OJ]}5yHBCel_qooN(JCXȑƹvאB 1raz"x1Y菶 `g 5a9g W yXM}Ϗ_#ƺȺ=BٱcVs[>;6H>t鰷Ie;B 1rXa!♕%amw*jB YsgÖYM=?4~l 9,#G5.,a1rX!F 5a SY ?&+d%`L` LV0J&0Y &+d%`BI/u$^_۱;}C=:^w|^tN a׾^ku-x oF_AM?Ƕ=z-tXP [vhJҙbP]'d9J]}'T{KԹC/~Z8>FXO˶OXa9뱬%菹zUrX!F 59;i=v:۶[{>1,F4~y2F 5dQ [f5t3+_{JGo%b?>v^gV^9VSYǏ7VÁv_k 5ax=:Lu|eeq_C3 ^UjdexQ[291ɧ&$xd\vã:L91u|\Oe{gOipltr080YTةL5G2q:Ma5>Yw\l2Wrk4c߶ß?mB 1rYδJ3GGպ)܉qM=| Baa]a3g'3-BT*,g1~l\ůYط#kj#4~Sjk \#G>2OdB 5ȑg KƱ$cbPCjͱĬ?&+d%`L` LV0J&0Y &+d%`L(4$>tl+I ݵ78GZGmOX|e]=94\C[#[ [KSAqyN:c~c3G. 'v Gqύ^mZ֍Ro_Xȑ cl۶;(2oox4v׾ǑŨ!ƺȺ="-uΑƆX^e;λZi\hU#f G5L뱎{۶to:who{f{ϬW͟pk 9b,<Ͳ=BsL-LgVZ9,ljzc0~.mGaI:˞ݣm[qrĬ!FYxe{ZhF=W_I6q"#3 :Hr8-$:tH;w uUW7xOԛcPcXX'c [ICά3+ǵd18^ҙ%II?F^w=z-[THRrV]tN ʱk_qǵihu^ޭ~z5sx)9;E7_~JKkءmoNwTSq~_ 0;ʅ5xfַ^):l]Zȑط #"YحT3+W}FsyYsX!F4~FٰeVsMg:Z!VrĬB\c|.F5~۽gVx+uj8j̑?<=mox3+l+G9fu2Y217)GaS %=9OlU:>9/ש'~?~&Sѣjx3'5Tqҩ-8j>}280=uǵLM/s%|\xc?k9brĨȳ?fuK)Sz缬9,#º 1sv22/4zxLrUp\5J5X 2F_t1M6/'=Xa){Xyy{.ǎmX[3k9,#Bs,,OeZf5`L` LV0J&0Y &+d%`L` LV0Ј/bOWPlx]_eWOp4v۞4ooX|e}9H:vW6Α.ŧqΰKAqո]r돻lyA6#o#f G5L뱎{V~j5bH6Ko1?[gףcguwVrĬB\ۣ0$ ]eѶXCrX26kXyy{L-4K#?Ϟck$[%ǑVz$xU9}Dt\:;wF lꪫқ{'Ae1 1 h,z h,Ɖ1!gVn?QOoSRa9 ?*x?pJuvӣ?A5t֋nF! {y~W7ݭw\;oyP#a5]e]dĖɺcVpϬ;Oh!G_Jv:Uz}\Uj#,}Gq6lsά|yH+k1k;f5~۽gVx+uj8B 5ȑQ #mox3+l+G9f~4Րʖ!L9 E-^tr0SϜSaѣjOԓj.n}f msG䏝VYK+9b`a{YδJ3GGպ)܉qM=UjBaa]9 c*3?6bcVrĨBXX1rOFOiy)8~ȸN>B 5G-<ѣ %g{^zC35xfַ^):E 9,#GbXh1r,nW2YwJ3 |͚B 1ryW1ry{gÖY=7ݟWj=4~[a9x+};f5~۽gVx+uj8J 5aqZcάr\byǑ Ys|V!5MNgQ,eTnr4 `p|ipT20sb\OMGusOda踆"t{rXm. #})uߐk4΃j*ֿ$i_ߨ~JKGm{uz5sbzӷ>ѣKY?ޫãA9v;VwGꯣoo}5uѹcVpϬ;OrX!F5YcYZJ^pf*=h.93F 5a9<#k%It3+_{uB 1rV#18o7oY /[a9,P#qbcj!^g:2cf 5a9,Ԑ5GTC&+KNjZܺ)GNG2q:S r4|dNhșL9fNkbpq }5۹'GȸN=^q?Ub'O˶o[a9b?ȳL˭4stT[Jr̝ß3cPCjB_4zxLrUp\5J 5a?ljdRpq|"sjB Rq.MfZn_֦L9,+u3G*cG۱;(><,2v׾ilް:Ҹm{皤묿e;λZj#f 1#4G9fZuş^uwŶ3cPCj#˳?_ԻGm=A>zPCik{暤A+=ڶ//[a9,P811,/<{ӯasyVjB 1rX!klj)罏ȹݒKҡCs(y`ppPW]uUzsD91=F1Eq"81ư4/}ynk4{T;WoZʭo^ St~e!bYC-ۿG5,L-w%zNe8y|g45bPCi|5G{n?ә~z=i[a9,P#q0v}mYy?cn 9,#bHca5L-{-v[>ʼ-#bPCǰTC&+eRQ- o*L5G2q:1j.n|qz2'gֽ饸 O˷ͅǟ?mB 1rĨ!#G5,,OeZnjRʔcĸ9/k 5a9,hh%IT*,g1~l\ůdB 1rXAocdsk#:DL 9,#b104imX[3pL`9,#BsR`L` LV0J&0Y &+d%`L` F$ݶG]=A%IAir_P|elsGp464G"Hk4oox4v׾Ǒƅ09$ ]/׹vwl9,#GBجg 3 :Hqy:ۻbWmPCj#˳?_ԻGmalj>zPCjkc=G۶c rX!F 5џuXelh S Hϳ9Z0)J 1rX!F 5d{ K9}Dt\:;wF lꪫқ{'Ae1 1 h,z h,Ɖ1!gV5˻ZfN߱[/M}SZ:Im{uj󠚊A95z*L_Gs&_9 ?rJ7ޠܣhP t륷\;oyP#ۭoFn<\Y;f5 j>ʭo^ St>a!bYC)%ksd#f=v:Uz}\#bPCi|5G1잛tfuxC?6VrX!F 5Tkc~۽gVx+uj8xJ 5a9b`n{3Y1bPCjȚ#jdecZ(,g1sb\ONhH6|dL->Va5 / jPklԓ9N߯rXm.S jB 1rX!4G1,0J&0Y &+d%`L` LV0J&0Y &+P9cfi*8& jdvLSg'_m&g&4?TCLf69٠KSȄZc4 sl;OMࠆf&TZ/ 3[5:7ʼns~59?sL˙ -,8.o O ׿-$MOh' 9,#GBMIYhʹg#B=3>488fư9,#bH쏬9baS+1Mϳ[a9,P#q0vZH^GM-iv)9B 5a9b`8\u{&+d%`L` LV0J&0Y &+d%`L` LV0J&0Y ?fe[IENDB`flox-0.10.3/docs/diagrams/counties-bitmask-containment.png000066400000000000000000000257311477552625700236220ustar00rootroot00000000000000PNG  IHDR09tEXtSoftwareMatplotlib version3.8.0, https://matplotlib.org/5 pHYsgR+FIDATx_,}z;$$`B⊂&d1 q} F^/aJF (1DEX /"Mn(BC֛H23Ŝ:OMUwuwuyLuw&o=<誓74&Xt Nu,8_="~>"uDˈ8D +Y}*"pD#["EcD|9"~""ID<ϟ$,P)˲?'^BID"s `,˾9"zDүwD|MY/Eysweٟ/տk"!"KD<r>"QD|6_>]`eGzo)ϲ,ylם1 tB~dw0 /UDğn^ED.`] +<5`KY}UD7">!ߞvvf<<_yM'Y]DDdWߛ"=X@rUUwU1ku0:?HxE7):Z݁UYXu# B`Z6 2Fx jt`G)N/AͺMjX2"X8fa Y+Q /KpuԚEjt`S\njxpy ,`<-/&awY~rx>VKM,]«uXXetY^ :tԚiS 5Ez:V v_:,`/Ř`ݸ`Ļq&' ZNB`Z6)ӁTZQ{z;"Z[)tj>UX7;b,Hce}X,5Ez:ZیW^$Ѕ PiG>kc&SEu5$c<X{3j%U^28X@cOL^5݋,Ӝ0hcy=^jMڤHO0Nmws%0N}vsT-q{vu;!0XK @*Uu`P):.ェ eWpy 2F?^jM,fˍa1ޣk)wB]x(`i,ӛNuI5[Z}- n|U'  WCSy{>X0PEx-+W*>GXš8,'Z`?Cpuw"Z ʻZ,`/&mRS :|R`lY9WUu>]\SA] ,OKK U~}jMqzs )`4ۿQE9^jM)3B$:+]FF ?;Y1tA0XZ+THO ıëG[Ƚ]Ob զNb6WMXNJ:tԚP5Ez:(wW`օW쬂` )mN,C_!N毂h/&0CM, CWwU.u^1,蘺N$ ը##`i/&0T PS zIxU׽}&s C.Dj;Oo."Y}!ZOe;C-kClv7b|=ZtQսl,,"TM'F?ە}l58i8*~f![j2Jj5*:7b(t`ix59-r`w)Ӂb<^`@z !#ٴNo.""X dx`ѹpy҂!`@:^ =6!voPL'OJjr j)*_i qu Ey~z8[NԬY{ &mRS UtNՅW:޺ΪrV; O0XZMcU-sOOM ,x5Xi)3B-[6Ս^1v,X=Tu}<0E5N>O,8 :@#nmDtc`ٷNo.ZϺU!"4' ;c:7 Ϋ!X/&Fٲv>5?Z <0\,:8=DX|o/Xίݴj]U:-uN溠8 WK _Evu]?\>@)ӁQ `vbv_b| ^+Ё քq9T`kMN'^A Z1ǧqANok bcTՁY̖owH,֚NN/&g58RS Cu;{ߦY\'5jkϕp B0`ZٴkHEM,Z9d֮N2q`4}]OL)HGP]P3ˊΨԧ|pX=`:p(>ɼuScpY Mjt`tA`e,=m2m{AЦ&'0  2m*eZ1`HYvu^Ց6&^XMwWeuXDKg>E@cR`=\V^Sށtlr8V `Z(|O-U}DYI>4 t2OR\]@jMh&kb|6OڇŠ)Ӂt!B&OW{.!UUg&,`tNSh=W7-!-0(ǧe{h>`ydW.,UvG]b_x5a7Mvb#5Ez,`Mj1:oMvSX@kp,);6{XGUn2N[aOq`]`sӛ XcB1z]R {zuS\+x)`91hԚt2Ot1[ $5Ez:uKvOU&զZg^ S.<(xZ:^jMƮɊ,epy_'_݃n-DM,V9j$Hɪۥs}kt ,OKK @m;.o]W) cvYr%7+BbvRk2duϻP#ZP)`/&G(j.0B=t__l^=\ڋNtPڿQg}g`<-/&($T5Ez,`MjPiuEŶ*6 `}TE$Z]|`H(6 _A@`$  @7<\ޮ}y!t2o0ҩ@ GUlS]7[]0  w -?/<̬{E@_ :V7+avmG|C <ϓ|i>N敝Mu??gY{ &ݩ)`/&mRSgcx c݃UVK]_e*g,航BO,>/}=ymX*N\̜\ X`W1^jMf1[ ti!>FM,FGWФ,i mմ곷cЁ քE HOv7,@] Y2ne-E#w׬.q} &dz|zwڊPJ[!t2 NheuRQwoɯ^dU!:, WK @蹓ԷP?T[BsϿwJϟԷ Y=\ަ`ѺeuֺkSV{]uzsQ;g;0XZ.[̖NQS4+NxiFm|/:49Qv H8 t>!@X[:h|my~ +ڴym \'Rk&5Ez:hRjT^k}Ӆ.(^Rk&5Ez:FI7)B`PWmlZp.:.u@=X-f˘NTUWP,{{]\AI%3 >rڧAa0,qt2%=`x.o+ڪA RlZs\F)3BPwF&=R&Jh媀Y]׬{/N 6zm]=-߻N 'guAk8zeV X#Qեe=Cg_x5i,)5Ez:zߺ.&G# +P `oYnXtW0vUV6yܤck1{vUQ{wo@j.PvK;ruhd_jr`"=X =E i5ɍZUpPыѾ3kKP`0dd;*/fFځqQSv]4WtJ=>TUY«CvtPOV-PtԚk 845Ez:hqUR }«uT[|bc ,OKKI8x]ө0jX`)6K6P&jvݺV.ow1v ,OKK9n}Z꾩+wTQS':0bsRk5;"=#mSn@z,7:`'5ЮcJPMu@a-f8hqX}g}m;Ju.}hu@v'u|S@ k|=t@ BK0t' 0lpo۽dy'|00@jM!;tAM(;t\(&3B.hb֟]SM= xF`5r `gl?[ubwnm͗//8r8uv?k]}$Ck4V9iǵ+kXc}'-Z}?v3.jEb@lڛUXzlЎ,Ӝ0hcy=^jyӛԷPIM: vSDާ ,OKK @jUu*Ё ;.SS7,@+UL vן7@-^XMtd@:m6=i%^X뺭/9Yojz YӅK[z`FybWo,a Cpz@iH70 6GMlem,z`oчѻuv__/H!GCk U0>@SV?Xcٲ#W',MNy0w~M8+Ug>k0/%<?=񃧓{aULN>Ri459=<OM^,}(6K @ujXt N`i,`k]DD,f/'>\޾t2uşNN!,쪎؆)i,f8^Uyӛj:ׁ^@vCm /Z~ u:с5̅V@<-/XIM^':WNy6)ڤ DO]pm=>-bD'FAx5w]lECHO bsԚ"=#0bF«M#~,؆,`<-/HO LNWۜJXХ@X`yZ:^j[0j:сU7'^iUk"":NXN6&.H:BX^BڤH/i [; .,OKk(Gx,m"N^^mc `XV;` X0?R ,*Kb1[աpjt`@j1[ H*I޻~TNF8 #`i>ך]dWqIHO翰"!#ygٲ)@tB`k1COM,虪I,`<-/&mRS N`1X3]kuvdB>'~4~@*,6&X?~+;$pzsgӉ0 `O'-Zyǧv,j`hXR{Y[0,HhSt2 ׮fgYI>4 ,=kS hFM,uUuնKO> :WTk 2.o+K>#`i/&mRS N`@` XcN#` X!U'- t :ʯów #Y-_h ##*۷?;o|t*d5b˝ZO`,XВMTY`aH" My~z819 K}Mjt`@یF؁ elX ɂ@3,h٦% 4*F 7=m)`91hPk>\n 4WМ N`@ N>躪Y~,X`W1^jMڤHOԁUw j:tUk.f8Gx @jt`ë+9Xvu'B;~`@rTӓ<3?x,`/&mRS N`i,b,`/&mRS V8G+1`0jv\ ʿ;ف }uZ3kS nSS *' V^q ^~O;,FvUmJف`0JEUʝXXB)8>pr^:wa!`Ͽgպ%q H`Sm,q!`К.gx=fXʿh_y~z819 K}Mjt`@IqB! ,()P%XEm,Fo `0x'wK} o6>[ zM`9M!pyO t`0X:`}\.ndsJu.\}Myyy@@߫v_ٳglAAAW\8ΝkUUUwꫯ&$$4{z6iii~~~cǎݶm[_@O6mr_ոӧ?P(GJ$'':+,,/lׯ/X~~~oIuAPPВ%KO>xॗ^zu@g;3|:_)((/\1_|EyyիW_MNN|ӭ[222bbb؏/_޿IID"iL[J?QņJKK߿_RR;bĈsy?xŊeeeN?~'/?l٢q… W\9p۷޽yyyƍ/{キlٲ7om߾}y'NqFIIə3g>ɓ'ufѢELMM-...**ZrYYYׯ__doјr]1_~шo߾}С:_eGEE]t*;;￟:ujG"Zf򒒒]v;ל9s&//ob999...m۶m|Z6$L_j~~>5<wΝDt}Wʈĉڽ|T*uvvfD{nIӖ&&&?Cjjkd̾曊3gjׯ[^[CP|WvW;hӦy"߿ }wqΝ,f/Yܹs+V8qbc;vl̙ܸqXw߬YV^]{=oo)bcc JJJ.\xÇj\#*:o_999mܸO?522:qv';v,\zʔ)/~W;t~!UWWgee3%%%Փ&Mj@7d(..Na ᡒ{a~U5u֭u@)͍N>u%"rpphLCYwܹs(J^,GFF^zu׮]ݻҥ?⒙k\ɒLMMH|o׮]]t!>X6r5*****¢o߾ƍyZڴiӋ/HDvDD>>>oB63331EݻwH.7;wNMM}74U Α w}7((hܸq;/B3KQ:G?~֬Y;w`ݺuGnd:Hjʺ}n"3gΰc04&DMD/^$7oH7n1bDcg#oCD,ը;u׏hhh8uT"cG;ZQa2+++~7ggh%%%Æ kLCԵiӆJThѢ!Cњ5kԯСi~>=zJYYY)v ܰakBBߕkٕ.\(++H$̨Qڴi:UܹCD{ebjjjXĉe2>aL ѣ/UTT>9uTMM "222sN3~ʊO6ݽ1 a]Æ {d%2mbbƫDԣGՙ└+Y#211y֭aL&{^UO"X[[#+VZ JgYYYVԙתݷ暪FQrrꂆǑ>5JD۷v}5>˖@z8YE?9'!jՊ45QQQ*:H:\:Xjbaٲ*,}TXƤD$}8SSSUSlrGDfc>.SSS3WLm`&{>FFF555lSVꯅ/5L,@bffPSST\}o|KDiӦ7r(&Kᤕ0UQ-/_P(\\\ZjłjCPnݺ_qFi@@lt`[ѱ g岉2UX-l!%sYlu(SSSul.`Cgccc333k\@4c``p-([oV@g30]\\4.cپΈˊHDJ\JӪ6qܦMhO`Z<իi-xZ_dj}||aWvjV`F&ݛK>}$6SJ,ѱ6V.eXF9lX%LHd``PUU؏:}  j{qI$cggȜϺmUdٺn>Փ0{ٕ\acǎ{{RlHxVusejǍȢٯ%̯fΜz,ձ3uEzVg n$E 9ä$؏lxB<[&ˋY>SN>:uTW&Mk3l\k#jNobbKa2O5vl{$0;wLW[hVy=g띊D™3gD/Bc^{ 1Y}ݒɓ'9ˎ;6|wyG;߲:N#~~ /k agx>֢_}ذa2-"t6J}/--yfvvݻw}}}Uh'L6DV5%L+SD" )//ONN}eu.֢m}Hԇ>՟ʜ9slmmWXYufٓo(uHM(FɲTо}{"tٳg_5U#aVUU(HjLclUO...lUѦmdl hSUF$+==]cXH${n}2ճjL_HDu vFvذaD~\nuR^XjGRqMD2leee;vPuTه:7~:UWWH$ݻÇLgdUIZZZ^p!==]vVVV^5fFFRD0k_G33??ƍ W9 e:gOk-0#""'N4zvۦ'i׮jGbZ]ݻw'X:ukgu$շ-e j/[ ?>}=#ht܇ID555>>>ЫFdpp͛7_ ,h-z '"ՊՊ%:wMj$:t޸qzY gϞ"H;:7666dYڳ̢u#CL&cH>1""ŋu. Mo-5|9{>:_h۶-ݼyS<*,ȄYZZޓ&H0P@&JAcY6/ae~JڵkgϞK/7MDHS}OPDp¤},{v yW,Y~wMfO:::R] @5RCTM&LD:u"k׮5bTsmϧgϞ<;vEGGkkkLVQQ0LlT:՟<[7''kA+ R>|766H<'t0YQUdysҤI?o^APԴK. ä\6}XXXY[[M)O>zIݻ_vMc64##CdyyyYYY֑ 5ѡ6ԟO``T*Z:NBևYgsrrbORה=[00?r[U_߻w/))鉟@ˣ>v8MԈI,aFEEuܹoMHZ0YPDԧO8*5BU@+;v~gD~lO:g21--jdMӞY7צ trȑt&+cSyUpS: O.1}wpk ꩻK.Dt'~J-&حw'"+++/G0~nnL&c1L,Fjɾ}Ԝ?kdX,rvv0jD ???KD.;;c%%%uH{G+K捹N'k2]v---=yd#ߨvR/U'٦2Pܹ3ՌUHɳ+`oOo7o9Q@_UÖԩj :0v ...nnn)B\mdR222믿4^244433SHP.\P$ںJ=Y]r9VbСCuӾ5S%كR0:]$"???"ڷo#HߞXYYٽ{wFXf} 3$$4fuFD R[[[;88k[ YTuds]|HSzi,ʆDj$L"4h[.>,#5JD7o~ Uʹ6lzq}%?CuۡʨQhϞ=1 ^%Fdu9ux>|^Pήo6mٓ[vB-Y % C۬o#6 @E ª>Q &<_YY٪U+ԱlP^{'/////*f#P%"cc?y/ڵk۽{ѣY_j \%[]]_+}D"՗QN333cbbbbbyhΝK,ٹs/BD&L.] /|G|IDDovmկ[1!MMHHo獤x72u!+hBΫ1cFON:ƍ`33:eqdhW^; 0 yuDTSS:hѢÇ߼y^#"33O>ҧLr>|ܸqoNMM:uvYh~̊+6mޞ^zM>᛫^}T'׬YSilӦMJJ#Wme̗^z +:gBBBBB9⋪ riVX?3QQQl֥~M^^^#FViaӦMaaa2oÆ 2DqΜ9+Wwg͚}eϞ=mKDmll̙?75̬ r\.gDdmm=yd&&&3ӦMkӦvgnݺY[[mFEEٳgڗO@{jٳ̙coooeek-ZB͛_tI*d2\>k֬]v| ^(ٹۻ4l\}˥<9$L $L $L V{k׮%''n-$$VߵxN՝0ݛ<6B1b}9Uw¬ :tV\T*] V@@@@@@ݐ4bXMhΚ0b)qnz [Y tPA伻7a_nhmd*p2HLDm̺Q[Am*e.DBA9C ۚt2>7҄.P *\ze^yIz=!6SWe pIBtKEI_;1sjc ]ZWmMm aVU+YoD'5QBt88FKë /hѬbxE5)-[$F柧3=ޭmL'\,t42Uj NKJKIwC[Je݃, e}B͵jE&p{ aW/ކښ?ʪn۲r~}X[[: Ç:tuݺud ayܹu͛7{{{vMU$Lx?~BcAAAMMMLLQMM %'''''[N.yzzvO a%%%yOOOwR:;;;++S4م ϰǫrJѣG^u{WU<( KKˀP'''}W0NJD"DҥK\J &|||r9V @ cǎf -&@*<]"}WZ$L h.J]&J?Ś˻swFxlH>;vl׮]!!!:!HEk>F|r\\֭[7m+N:uԩqqq˗/766ڵkHH+q#@ݐ0)CCࢢ'O=z*(((44u &$LbРA yf\\܉'򧩩+P ^_nѻ^6 v`'}5b\.eeeO[jպuCBB:v숉 aX2,<<<<<>"(([n 4H-MdddddkDX`L]Athmj( \oV젵J)p2#㈨^`bg(p " qEs67Eruuuuu}Ϟ={m۶m۶Mҥ &j@q555r[ssC6fXlff< BQ߫+Wtqq=z (++KLLrT*e5===b7d_2G=z(((`4MMM۷o 3FȒc7yY3NK-+pfc] Œӹ7OhŰ~Dtդl>\jT_~Q.02p++<8333>>>!!mڴa5]Ax6:*Hzs/o\tkC%_CDwʊ.\QნHYXQQ d[ |rrrrrr5jTJJJ||={CBBvjd;e(Yqwee ׭[a??oooL:!a@ JKK1Q|g`(?3]tK'{z**J|R6^/{vU%<;L|h0LMMӧOwjL i~B|^6e *CcRTQYyuK'ΒkEJAcw],R!achݺСCwhWWW5e2+ OC:t7n\JJJ\\͛7m+ߗkkP}$LxrsΝ;w8sL\\O?$ɺv~ѣG333]qx*0@zѣGGw&33ܗ_~CjZ$L%++<8333>>رcvruu8N$(KΚ5 !2999=zҥokהJ%qT*޽;k֬L}Wt "㼽tI竪.]zY}t Ǐq竫mV^^ n`&<]eee/^H$گrWVV&w%@'0*//Ucccb aemm=dȐQ껒 !Ra-Y9P]}%\̹],p憽#n Ytsa4>z [Y tPg&𹙔&tUd-*E ѽ{Bm,e~yUf T(.yHHHHHHHHHHHHH*++w%}W:tɓ'cmm4wH )((믿:Զm=ztM&R͔&Bli$P\o;22q4tKLDK.Ȍ24K%D2EKn,،24"NDDTFK DD$V2c%5K8Ndd) srrnݺyΝ;wҥ[n@>LzlPC;p{p{4cpNvƝU:$lDqڱ }biii8o*999))i]t w5&%LRYZZ&sD(&a}ER E's&7sO_#|^_)lf/{vʲjT 伤 \hShR*LByٔ&tƤ&ꬻ%·C;K"Qf+yrkG""EMeq y \ٽ{7;f,pZXX 8P.cuXx88TڣG'''}W 9Bh]v @ P/''I&rPHB%cB an an an an an an an anH~X&sDT P&%LXljjq[ϋRBZ@$<7%"rޜx"*Ϭ^(p""k^hGVQnDt+7 pcIv&iyer,Zlid1]ӓODRSGIYiכx/,t!D_ 3%YwK.DPR.@%-333+++//ŋ999bChat0eee^z֭[lӝ]G]lq q`VF.PqDdglaa'd2 v 6:Yp%rHT*bx9vX\j/AA*w[m=Ri*fdcNJ⚚x)CBB:S ,2bM6QQQ<-HT|4 >LӂycߴiӹsrrrjjjyK tFP8p`޽...-2115kVUUM4 TUUU&FrrJɓÇo޼9"""44T[n 6lx!a@Snذ!--_~J铗}Z~u)::?Oxx!C0$ๅ OjϞ=;v\`5_ի۶m裏 }<'0 ={v˖-W_ܹ6s̤Ç2$44T,^ $LxlwYv͛7ܯ_?5u7>>~Ν>|x.]8w@LbΝG_pkq!!!]v=z?rH///} :$Lhmfdd[o"lذ֭[^T#4ǎҥ˴i0, cǎ FFFZZZ^cHP7=maaocX,4]֭Mv۷ϝ;7""b<%`]/vk :W^ltwIMMݾ}cǰy&@K>L[LJL^^^sILL- &@sWsd)p"Js#X- {:I Gn'Qi|fB.4.pJ#Gbbb>777=V'O`L s3)+MB kUd-*E ѽ{Bm,/, %KOO_n]aaСC#""x8eB$D$36plm&tD2"2;XV*[JeDDA[3 ؙ\"4sb8,,,88k׮ݿTTxF!a<߿e˖gvmĈzAo-3{hrpB*8gϞױcÇ;88^x0_ {mӦ̙3cz'Tg.^800pС4&s*99ycƌ ì'h>7Ϝ7o^Ϟ=lf0;6lHII 6lkP3?HX0#UUUߵk˜9s]#GPmiӦ#G ={H!?Nٳgl٢P(&Oa 7o^||Ν;<8t`@3 ݹsgڵW^ݻСC1 EDžtѣ[lagvY@h***vy77iF5hDz+W:::5 Ka4H-qqq۶mH$SLڵk3FFF{/y&@3 eee]633_~JEEEwΝ- z ТB 0 hkV_:k0_w[_wmB`ڨz> jjjӧOѣ[nNNN4w {uvv?~6m]#gjm۶-\044422Rhi0a}YAAX}׫Q8c]:t萻a,'99yJrҤI]vwuIӦMyΝ1p@@t0K3[lmlDDeIw.uoe62NDpFnj`8ЩQ)B-274]9s&0 ҃||Uzzh`' Ƅx@Ȅ @7<==̙CDeeeW^-..yۛ7os33;;3T*?ΎN>Ȅ_?2a^pK"JJJ*,,wṀ~%Շ>-׸=ۋDmll,,,hx^ amZO;w?䓞={FEE5ffkgg8}:XKϞ=@Ƙ1cƌ3Pxt td^/|6ZY96tKsC;jd[K^Ili a@D _ _{}޻Gz)4؉(Y $L $L $L qhn%"2*FUz5)abSSS* }>L?7כub{&\̹],p&ƒ~䌼|!2Ԏ]w}+~a!,D' ^x,MMDfRVUd-tsi Qqi3%iBYCD|}|eǥ IKK{2ccc'''}@0@jNg_TCQq\׳N_#|^/-VMFFF H|^И+Z/;Pxo۶mJR<o񆵵 k)Jy<ϋbgg3g"^BK>LZ*,,TL9 |];Cн'N?~R`IDǍ;O>#!aLeeebbŜ[noTVVrg``0~P}@h*O:uYRٹs_[, t'J?lL-&;yS G$4.۷[f̘u}eCxlO>yիW-,,BBBw޺uwrr'@ XueK4 HѰ9$LzUTT9sasն< LL)/_>yH.GFFv>LA?~׮]m۶߿47aGbddz<0qܝ;w~իWr}W yѰ: 555ҥK.\044ҥK>}20@0wܹӧOQNNN~~͛7]T*y?uꔥeXXXn0Qi;zh\\\QQ۸qvѰM DZ^͚]L{4lϞ=mll]/ yDrJѣGF ٽ{GT*ׯF<% $L}ٳ'Vi333;vhhh<޽{ &x4h\.n%-ە+WVZEDޝ:urvvwqqq'Oy󦕕U=BCC1N믿mӦ;uԘgg+V޽{_}Utt L2qDጌUV߿ի...cƌ3ge!!!9997o,**iӦl;;#G~&&&~H$LAP, |rJJʉ':uҦ+i>|8F>'Lpqww)StѣڵKNNv횁#F*++;zIٳacy^߈ϯO><8z'|rCb:秤 <޽{r|MFF#h0CCC\B_t)99yݺu>>>:uСV `49s&%%ֶgϞǎ|L&IMMMLL &[n 20a{oIIɋ/iӦӧYXXL2eΜ97 _[z}ٳ'))s~BP/$LX[[+k׮%''߿_*vرSN>>>x\ 4̞=֖rc.\Ȇfgg.\XXXh"U$"33UV988ID_F)۷7oބ bbb4&MQvv6K+VyՕl׮>0h ķr/,,$֭[wMQQz¼x?o߾+Wp׶mVZQeeecJd BM a4rJrrrBBB ^>7SB9wQDp1Fv[ tNO̻6^sa4>z [Y tPhy0111y{MdΝdɒvM6O>>>>2,55LB tԩSNDTTTiӦLJP- *\ze^yIz=!6Hف2*HK*O/z(M0A#^cH㜝\YYɶٌU_cǎl gh=zcorYZZbŊI&e_~Y$ׯw1$Lg6.\HIIYrRС3DT&''c49&&fԨQ~m޽{ŭZz饗ؕ/?sNww'N~sw;@$G LP())) ۷oo@sݿ+!!a9=zHIIvyj{{#G>>>ˬ=plٲiرfͲo%akȑ%%%)))[l)))iI@>ֶwݺuhX[rʕ+U?.[lٲeW^zjX[[ϟ? aŊ/_q&..;<&@ dff|VVVrr2)H>P~~S"##hQ zn ad999999 8*---%%СCׯf}MKKIII2e >qFlH TKD)))7o.))i۶'K8~ Ə3ԩS'OhXO u7g,[ DqDdn*t[O~fiD,""δ(hNlVVVm6/_|'NTWWk׮cǎ555UVVYܹs/L&w٠>V*nnn<]:H؀ _ _{}ƻ^hQo*誯@8stttttׯBvZJJJjjygsK,X0eLiڵBhRT*%%%nL|",lcׅ#5" %oQK~Bg2y~׮]gΜy70OJJ244 ?~|])Џ#5N9Y9Y4׮]c8Dq\uu5yΝO>d1 DP(N*19 Ο?RSSd'Y"rrrz?.,,,,,Lߵf 4r\ZG @ Сנ &軎ԩS~bp0oU !J]G[hqw~AOHM&mժU۷'EU_wu~kݺСChO,##CMlFv_c9P]}%\̹],p憽#n Ytsa4>z [Y tPaDGGΞ=[.W5zN8qʕ{O"ypqڵkcbb.\`}I߿+0tͤ4 5%^Yy"W^^tOȢ%c5UE7\RD!ƍ7nܙ3gXfKzj"4iRSnbff6Lұ'L-4/QQQg1bDYY󴔗o޼K.Mdž 8c$&H#/dԩW5//o֬Y}Ojj' tQ'_~DӱcKMJljY)wʄSFf>ɛ.f+tc#IDg$-^4._˿y[nl$ icldpvQ굂߰M:@H;v5kVppp=9s۷odffNVUUoO/UwNIIٳcǪ/_,XJLL nJ벳:4l0>_ҥK;w\tٳfK.[zA>L"9tuǎm۶%>?LNN666~z.]T,DFFQEE㣣?۷kF5j("ںukLLL.]-[muOguKJS \4ɧ覃=)h0{˽쌉 +.ݶq䲲2'''*޽{DࠊD$J9fϞmkkKDrرc .dEfgg7[MMMіsmDRg 'w.Z֭[Tկ%ڵkY$"K矧O__'ѣGkD622ZxqttS| uKIIB~86hyǎO\`Ud<<㘘__˗7-ʚ5k^ؽ{#G|G 6BөbӦM~~~r\W<|ٳMMMo_>}l߾=%''kN>DD<1cu;V^-H^|Eܭbȑyyyoرcy[oرcUJ_trrzתnjs v78uꔳlO?vSǏd?yOOOKKϫWvn%-҇~hgg׫Wnݺ=3fLQQ1k,Y111 IIIڵ[x]'N\fͨQlllccc "Xsrrbcc_x;;;pىDW_}Wu^3`}~\tב#Gzyyy{{WUU߻wO.߿_5/qݻLMM㳲wڥ[ozcǎ9;;{{{߿?%%^WFCCE͘1cȑ={trr*((xQ4 z"3'3k 5ݫPbf"pc,L"24hS9cw?2"ݽ{w֭7o `]-̆͟?__jվ}|###ww [X~ƍ%%%&L7oNW*:\q_~ˏ?#r~:i$6144ٰaʕ+*++^zwyUV:|ܹs.\йsm۶9R666_}UbbbBBB۶mvqWSS ѷ~knn>bffCEti$ :%}ǯ\ztNFRIͩ);$:wYnn"3013,ڶġ%%% kV\2zh ~jb-!`n;=8]uP-,N:|N<|eeeBBB\\e@@@>}T;3)))oS f N+(Ⱥ["pU'{%1~RWe pIO_^XX(ptuf*J ?!C0E4uH )S A}m 6\V$p=FUUƍ}||] !z [Y tPBhLJP *\ze^yIz=!6Hف2*HK*[% Jn6m211aZUf V^^~aX|LLnnnC d,4 % ucJFuyٱsTT% a@rybӣ]Gh^0J4%'''''KJy%Ig̘K:qN8+ #fnin(8"36h;[K,nH UUUUr۷oxxxx{{'%%)JbqMM͠A 4_Xz}J֣w[mDNj8C&̔ Rikk3zhwwwTy9"H$o>@0uСL\^ 7&''_|ŋs>>>vvdnn^\\K/ad,4 @PeeeN ֭z@<߼yҥK.]q󶶶7v)44T-g&NG=t落}XX\..4b6rرݕx:HÜ;VW \oSCvѮC;KKHDqAPƵ 8z8D$3{ ^*,,'4cSWWW<}͛7a "4]ffK.^x5۴iӡC+H RkhmmD_sVdl[l[-XY5ϝ;~v|gFz3;V(Dtŋ/ZYYM[vݻÅ~,ɓvADPracNNNxիWJCPPD (|(9|*S'uKD|>h2)'S;-S>>? aO><ݻfW+ gg琐ۋD"}nHMMM;u4qDoooSSS}W ѐ0޽{Ux$6e)?Rz":4&{NTNQU2w#ϗ)}5QJwA''/|N0}\ULJ?ߣۇ4OJ1"G0J"u4OQΞt҈p&OKC| EEddKC:UzӌDdӅl4OzLƭk}z){]cnd.D׷K"nIk%Uҥ\"&F4&q4p'م)йO:G]HYQ{K|TvePy2# A N?1oņ48%ȦShd$rE$"=$D50[lk%""}Wѕ6I-IlHf.dWԦ@8mW%u|'IJUEd@##$g1J3iu+4>=CD[.l{])?^~V<#)ɅM_(-lǙ}wh귅nKPEdHK.WSj2$2jC+ĉ \YKG&q޸5MyV:8[qbzE3-Q;ܕڏ ߷ɰHd*խx2mGD&I{R3&<ֿTUH痐WQÔ?&`]hW_N_~UEt=A׷Ѡ$ajD;(P["\UyPG7{$8Le}9Rz$LxFN׷ceH; DDwOQ-"")9P+|NJDt)Q * "$? Tҥo(hIxi`Nfĉ )e=&mQ ދemq1_CTh T]LD$R^$2d(X;,Wҥo\""߷Hֆ^<Ŏj O&TSI׶5_C}eUFnpo(-(Pmg>LxY'&^IDF(#:^"љy+&ԦM}W4hL&1bM2w\]FD7nx7;t`hhhaa1i$ݻv2<<~X#}ƍu."RDĐggg"***3g[o۷o_sss ^x!!!A&ٮ~!44ĤsΟ|Iii5YYYFFF,={c {\Dtɋ/9R_+h={v~~#߲&<0%{naaa]Ё??oW,ڪ_sqoo%KDM4i!m?I$OII۴iᑓ7ߌ;Vٳg<СCݻw߿=srru߸qccƌQ( V_q啕'Nf'+++Y닯ԣG":p3g &̙_/Zxϟ??uԆT*˗/ٳT*mӦСC׮][UU~O?q܌34ޞqԩSE"у^~esss_|ɓ9;v9r!C8}͛7U_Q6,66VT'qٳgsw_&Y[[5իbdh7\wO[=~X,] Ѝ̛7o߾K,ܹ3ԨݺukȐ!jK_|qӦMӧOg'oqVR}AϞ= _[YYѱcgϞ$V"ڴiҥKͷoާOvr֭ JaΝ;o/YȈ5gѢE7mڤxѓ'O^z'Ѽy󒓓.\ъ2`v}׋/ҥݻ ""Dׯ ڰaC@@{Wߕcǎݾ}{6m biisј%K$&&>o &?~=11qʔ)]t9zhv풓]f``ЬCqqѣcccE"8k׮3g.]TuF]RR1tлw޽{Νn~+ra???__K.m߾=...%%E5_fC:t߄ +ht0 9C?IllL̈}meOnmP{d(pѵ 7ȰE-nĉѣG+++}||^M͛7={dffΘ1_~u^\VVjժm۶]pu9s# Zri<W օ́ {-[Ν;۷yRru]t*((hرcǎ^_~YvKJۘ1cySSӧ fΜ9'O_TߡD"sss .,,,\hyfffVrppT L}_~_~zsܹs֭ݳ|ͱc1B$}}QSgQQխ[<7xoV,,,,v0k֬`̙۬3۷oo߾}RR;YUUo7˂3gΤرc՗/_d>>>9;<2IDAT0@u͛7G]ZZO?x /_zxxhxcǎm߾}rr2m%++wk֬y753gd3f81c= b4)a<_SScI/U!""/'kjȣ+U? ywWåo* ?_~i߾}Ϟ=o߾p?nX^tiW^522 vگv͛76VZ5m4\~ \.Jϝ;<iӦsttdoILL$"֓#ɦLQsՕ /;w~2,!!a޼y7n믿lll~x{{~ub+񒍍ӧˍt}G\իWSSS_{Gm֭Dꫯj4㸈gϞ={vfffk׮ 0aT*ߛ?^zݸq1...111$$`K=JNN.++srrRE& 7A/H*6=̞=}!ˏ;pBVjjjvvvz;v숍޵k꼽 ԯ[RR2a$? 2dSN}S{U Ywtt|w^SN&sI ScVAͶm6n~ܲe˄ -[6z'߿NNɓ 6Ξ=G5PК5kN۪U+"J,\rÃ?.ɲٝa477a,22OϿ6[/,,$֭[wMQQ'LmF5-Dݘlѣ뻀E]HH~Gww'9#Gزe{ppرcw%۶mS(ډ2ydTe˖ c[19W^ 禪Qcޟ9s:￯q>22+555>>>44ɪ:b 4u,>֭[3+W\z$w999={_;g@)/ܱcǃ>QPPӧY}bbbN:5f"bf]]]]x4fI]<RI_f_h8P(>̎_9rQ믿~Wkaa矷nKQQш#JKKرcovtt!Hßf!==T=ήoΝ;^v6o+<3f̘+W&$$<{_uzkuM<СCQ-((?{q㲳o޼9s]v >>^JՀFV <^SSޖw ??_ !9-:{u։_|QcJJ̒%K>˗//^!|111KC5W^YfիWw~UN< ?777":}t×IҶm<eWf...<1>C ab`c}!&Nr!ccɓ'߽{WOOO ω'|}}Ν;WQQ`=nZblٲΘ_~zhhX`dɒW/--/85X,f#|[neeeoW7a„]>|_۷Ukjj6oެ`QQĉ WZ%r%K'OdU}׭[a`ND˗/ϝ; $-î].\haam۶GN U'YgfjjūVUw1}7 FADK,S$"%;;;U>{bo.))Qr޽z2K&@KsE"СC6{_333[[ .4bo߾#G"""-[v޽I&&8k׮n:qD@@T*޽{UUՙ3gXA]6iGȇ lE;gg瘘kk?yРASLttt|w+0o޼TAmvƍۊ;wlذ-<<|lÃYӧ߼y>cw>DD<1cu\pRR҈#~ٳGt݁1ӧOW_jΜ9k֬:/Xu]Wuuyjjj~ѣGd>5oeddNVWWWWW7&@Kb z 6}mAAA6{_zu|Ȋiȑ#l###߯ =} uFDݻw'Ǐ''';88o=;G))) ,prr:~u뒓G$OL~...'Od].]sS~nݺ]tiݺuGvR\7lзo_]u5kִmv֬Yl:uɓ's7>x`Μ9SLpg}0f̘""1bW0ciiy@77qƽ򊟟ҥK۵kxbՕ'N$QFuԩgϞ۶m}qm6n߾] vtnݺEEEׯm۶?SMM*o~&&&nǎǎ۫W=zdee[=g͚ED_}Uv۵k7cƌ~MbAAAG.((󋌌|kcc{Bxiii츺zGqrr4iR׳}V^:SVV:nD4w\KK &4MKKОQ򋝝ٳKl0;wlnn~'"OâEx^nN?{lIIIEE:'+--y^Y%..y\qy~'\|";;oy^{PԩS>_]]}Ν;w L8X666ٕOTCBBbcc***RSS?S?cdd&$D< Ƙ?[>?ƍb .\Ю];WX1{lgg猌ׯO0… =޽{GFF!M\ nĉk֬۷իWK. <866vÆ  tҌ3vؑ. 0yUVu9777999000..nҤI:ybDnݺO>w}ѣurs扫{oܼ4+Wtqqi`ӹ&$$$)))00Νζܿ}Q* ??:{ٻwoTTTII{N8P(z-0->wy]׭[?Sv2dٳg m߾}ޞ pرŋ=ݻwW㥅 .]500ֶ"!!!))]vǎS3dȐgeff"Ӎ7\\\P|g srr:|=J/|ܹ%yyyٳ'444--m׮] bȐ!ls?Qhh˗?SiӦ%''e"##O[o3,^QR.O< z9tA" wEB&[&nܸѽ{wHTۻ󸚳O(mB*%R2, [W,_c`ƚf32ƚdkc0Ȟmbd*E۽?uVWW{iqߝ9B$"""{{2$gaIDDDD/uԩVZi;"zs;VvmmGADo:aы1$ 4&i3L"""""" fDDDDDD0H3aUÇ @^TmBDD53L""ꐗPM :ԠJDDofDDUjPVB%"7 3L"*gq+,,\fM׮]--- Guͧ[>}zcƌyYll ?#ÇթSԴcǎGbƍsuu511Uφ 4mTAf͚`̘1BywU{/`eee``ШQǧ9s iiiomٲSRR599ymڴSܹs?~fܸqڷo 22R-m۶u{ʕ0{{{}}u88ڵknnnn߾ݥK7N4IF33QF͚5IVޣGǏGFF>\V9pعsgLLLVg=qzEGGnZV7yd___www~;Vp,Q {zzfӍ1"..N 999j!!! ܹSvxmAΝfjj:vXTo߾ƍX?z.… cǎuqq122233k߾w}WVVѣGK.mժIڵ;un:X\;WQYB-|Evuֱ ٳO8xj=㧯kʕ֭SΝ >_T*:uT*ݾ}"rʲ[@$JAvDڶm1tP;vlܸ1,,l+Vh֬u.]陙v-Z?u_̙3-[&J{)O<9nܸM6?~\$O믿KJJO</&Ų|2~g+VxLɉVVV#FP;5}˗߿TOOOQڲI&:::ٕinݺ~zFF ##%ԩv[n+V8wlQaƪ5lST^v---?t ))):tP;եKsU[""z ~xٲe"H^ 0 ))),,l͚52dȑ#GƌuJ23*_i>!L==VZ=eZZf͚) TӉH$222*((D 7n+PTqtKKK|quuU _|EU>}:///M6;v,mϼ>lH???EѼyT[ܹŋNNNk׮U UYZZn޼y۶mgϮc`zzz!/ٌʨ(YDVxg5ĜW 000۷_hQǎ]\\D"ڵkWݻ=Dйs9gΜ%Kۏ;[nƉ鵕Ӵi޽{WxaÆU陈j3޽{? "zCX"<͛7v}Ϟ=YYYѶV-)#7lˬB)'yڵO*;6lo>[[HGGݻ<ǧYf'NP7\:{yyظq V=9s|'#G۷oz~gYUvzzz 6j߾}6_~/\ЦM6m 6,44g.hȑNNN...C 0aUwdQ%@``oHHH^6lg)< rqq ܹsƍTIDDfD5ꠁN>}d۬'$$(꭬$&&6NOO?vF"+W0a™3gU>}D"޽{O8Kj*537m$z +ynܸxbwwgFEEegg;ڵk>SN 80%%%***//oї.]2dH5߻w۷߹sg/_ڶmυ vu6m|lol߾=77w驩͛778DJD5MIIILZ_TTPT7@N|PQ#{rذaLw@*8p@~L6Vt}QQ9ՀR"*"""w0jGۯ\ѱu#FDߧae_xk*/{eH#= a5yב6p >W#&"m@DDD.)_A ?)!i$Fg Jy2YrI>ʊ!2z^YSVsf>S-ąp{~%%KDDDT]I0F_Q‘f`jlD0מ] THX y q!W^l0 !缲wO \.BG_^}-~Inǭpn[kX.[q}-ʊzS>xv&G˵pG'aZ[ ?,+BVV} Ck]T|xO9jsꑴ geC+  b]XWމC~1bZ1$yo MbX:zEQE^y;j&M>qGBRzqM5}}d5GN%""? `-o)-mQXZE e9"/O;7Rf2N`d+/7c[?bDX=l`9fEY9N.z?Kd8~<@%{ |q@Z}sGYr.#xNGyZog`;aU'U# :.e=%VrЫ%_6?Yc񛹨UT?C *%$)^ t)AU;Β%"""zH%*&})Acp- ; }qCU 0 -aC$4 ׸}wW 66Cf{!y+gMGj}{aB"I}mÔOћDH$ň]YDDDDDD0H3af0$""""""`IDDDDDD 4&i3L"""""" fDDDDDD0H3af0$z H_i;"|LQViټrWt0wEZȽ5s#=&vh ]C(Cb8 DZ&H$A$΀.B^(dpD u? uc9=ސ@=_9Z"*cYha%KT3I%84`h>3$#i x~_@q97%R!saLQ>  |@\4}U0(;emHDDDaIT3e붰dƣ!4Hj __p @4{_ H^CDDDDm z[=9G;ݶ#-C.$oA9@d w8kDFX..DZ B*KFBR -+(+z#0ld )A8y$!<]6AP|qm ]yZ8Kf*wX K5ytVI/:Z~6Z=yL^ *eo|*}KH䏏V0z(lt䧤e 03I)̚ɈtYaLnca@^6G0oÞģt㎞ 2VMڄ8G%Ph_܆zDCנ,DDDD8>IT3ZcyDY\B!2.?/6R "/別E Xzh\^no^^VRD]L&v!-Ca&BRu 5a6 ,>IhFs3#JPp 'oJBĹ9].5¬ )?¯ϱraћԈ+V9(-C~< )EQ+i};\B5wOjuDDDD*al||0 Q]F<F5tQaYe|ni>RT1ۀcDDDDDD0H3af0$""""""`IDDDDDD 4Ctem@DDDDDDoD*官DDDDDDTi HY)j TH_OO *M*AGEθ+!9%tEXtdate:create2021-11-16T15:52:56-07:00:k%tEXtdate:modify2021-11-16T15:52:56-07:00gmIENDB`flox-0.10.3/docs/diagrams/new-blockwise-annotated.svg000066400000000000000000002330151477552625700225570ustar00rootroot00000000000000 2022-11-14T16:09:56.206507image/svg+xmlMatplotlib v3.6.0, https://matplotlib.org/ flox-0.10.3/docs/diagrams/new-blockwise.svg000066400000000000000000001137761477552625700206170ustar00rootroot00000000000000 2022-11-15T16:33:54.164631 image/svg+xml Matplotlib v3.6.0, https://matplotlib.org/ flox-0.10.3/docs/diagrams/new-cohorts-annotated.svg000066400000000000000000003073701477552625700222640ustar00rootroot00000000000000 flox-0.10.3/docs/diagrams/new-cohorts.svg000066400000000000000000001657211477552625700203130ustar00rootroot00000000000000 2022-11-14T16:28:28.725615 image/svg+xml Matplotlib v3.6.0, https://matplotlib.org/ flox-0.10.3/docs/diagrams/new-map-reduce-reindex-False-annotated.svg000066400000000000000000003156761477552625700253210ustar00rootroot00000000000000 flox-0.10.3/docs/diagrams/new-map-reduce-reindex-False.svg000066400000000000000000001717421477552625700233400ustar00rootroot00000000000000 2022-11-15T21:26:39.966187 image/svg+xml Matplotlib v3.6.0, https://matplotlib.org/ flox-0.10.3/docs/diagrams/new-map-reduce-reindex-True-annotated.svg000066400000000000000000003056361477552625700252010ustar00rootroot00000000000000 flox-0.10.3/docs/diagrams/new-map-reduce-reindex-True.svg000066400000000000000000001713171477552625700232230ustar00rootroot00000000000000 2022-11-15T16:09:02.384660 image/svg+xml Matplotlib v3.6.0, https://matplotlib.org/ flox-0.10.3/docs/diagrams/new-split-apply-combine-annotated.svg000066400000000000000000005327521477552625700244770ustar00rootroot00000000000000 flox-0.10.3/docs/diagrams/nwm-cohorts.png000066400000000000000000004527431477552625700203130ustar00rootroot00000000000000PNG  IHDRfNA9tEXtSoftwareMatplotlib version3.8.0, https://matplotlib.org/5 pHYsaa?iIDATxwxTEII=@Qz((( @+X@4"7!PBd77<{gs9s~Jk    dǭ6@AAAQAAA*<   WEGAAA᪈(   \qAAA"Σ   pUyAAA8   UQQJ WJiT~JRRRR{R(2-J)JM߭RJKJ3JJb.URJQJ%Xm7*ɁAA(/@#=`~`qv*KVJ)ov2x8 SJu{& M(j))[{R)dj7P)LVJRsg h!eTRjR?wf~i ھAXp#݈:isཀྵ*>dm_. :7nh>f[ F)jiD)_APJ gf:L@Ps?ڨg  À?1mRUsn Ap< !P)R/9tTQ[5y6(Us8m=6\*GkChKx*vZLJR€3qT$ =9l;Zdٖd60ӾV>kD;j[AptZZ&( jiZZk?L@00Dkzz0(W ܎(qh_G`R˪3q'kĵ0zʕeȥʯjZsj=tQӡ 7Ɖˌm[d+A}3#]:/m5VmPx<@)yVz |8끯3ygY0 (7+p7b=˴=H,Ȏ,W jF)J)+bRW_RڔOYJוR.+^SJE+.([6R+R 3[mٔTJM:^m e?|k!7 [ïZ7˕R'@`zR*O}~/?ZӬrt806߀`κ;P&T^M2c+`{=}^:A) [zfRj!B)qZZ%$ zW)fu5:nc KnLJg9y'7h p s WJ@%c`+AV$&L~1BI+h=f0]Px1Sֹ|,VJiY3 \j^_〣yr@g[e8Ɓ. dw-f#Σ fdz3¡kWTc:T~i`Z.Ä(JNc͡;qh:GUd 9Z J@OerΫ:M)5 e;;ahc& RJ}IP%pW>l96sUߐhe"ƱuCJ)VacSȞ"@ u cWJ}ҁ.Xg$gsĝ6+l hGjkc0IA ?fjYv'[30> ^j9zzG&իT8z6\f -Go :⪭K֫1raUJVJe]n:< {]0We>fqMxzw2nWƬȼO:Rk}}8@GTnŬ[/ǔR/VOM.tz̜a7ߧi[7V$j ]9 FLYU؋iML8qYwY*m⹶k\33puZϷ+gfN#9ycJT`pf!GNTJ ձZ 4ZgRJä"_b_/cqiy뱮RʶsfTx72c03u_ ZjR S"GSrcBn')d뉙)tq7eDO``+Y'4&U{u nU lM3J"uжfjKRZ UJZ;:`߯r,覔vwƬt $[Cf=ÕR$@Wn )wmd,Ә00_+gUku6807,y\17˘2 [pw4}k#0&YvG pɲv0?,.`~4 DŽ5iCB̍ze[40(SYv"PV&H"N*u.oY=@n]Lt̫?&ECYV}Ul*q@OXz clc1Ybm}5o8;fm[d]'sN8nr jM}Lܘ$@1R10n@S7 _^R_bfWk`%8ZE6vk)|VAXQ`D,Fֺј&*ݟri =qP?֍M11ٶ_Zڔ$PkYy38RgdK-LAI)(&&vzC0L>scQxk FV+>>>z̙ݎMb$6m@sұmI#WT7`;SAu73g'OwB ϥK֭@_w(6[/:lz 堉 I[S9 ˦> l=f~6}xcm/__,pz޼yp7vWl&n;Ww \:@W D9"E)wmwV]p)ͨPNX)@kgMI* ۹Z˗|MĦe_`ӭƹ^ֳ{{仁12A-qRM) g}zLZsJ)$ 7fu/fSAhv wjq {['&LM [M..pBeTG`)0/h,   %'tW!qȶjZkZ믕RX f=h;)UpgR}5AAA 8~㦾pq8iڜr)6YGIl&9EQAO<a$7[_i^wh5zgkpPky|Vk]9S(`7 GtA7Σ p/u4:阙G[@pjbm+@%Gt&Il-m]BÎR8J%d9:F[c9yIl&9ECAJuh8Luk-ֺRcd>F9pDCApÝ"~vRkV8"! "_8Hu |{ Աm~ ]0$~DClĦۢѦ\)Rp`7P¡EC)R*TTS2odS ]+ suIlĦ[`SNF BNrӳ**6)l**gj lWJtяR[K"e ǖJiJsJ ]7A6$X)u\)I)UV$ `Ipgr 1?eY)UA)0xiZGK9[mbM  <bV+]5d V8AAryx {1z_9QY\ht`t"j@+_Ix ySAMhhhwiC1ur9r0SA:0!JD6qh3*bXkq%PLkn2SzNk]q:DCl&I5;UCk}Q) #06! pTM]ԗHrZow #ᝩr ZkF:DCAȎ;E#3A`kh/R +p*1ar53m$hoP&Ilsͦhau,^"!E)R\[#աxhИ8 Rc*P X:HuMb$6ݲ~r2&S p:Z% Σ- x@k}l:~- T޴:ZaXGAA~V  }D?PPJ-QJ5VJ3( [ڎZ)>WJZÌVJicBrB6J R`:KAAA > bk<l9l{t8˘imT"   7yZ+ǂYסTzcP`zCW@ֺ:ֺO[R hZv(  psɱ96([gfU7{0R}$c:&Pf)Уk㾭.ۯ׏S#cז4^$)9,;N_yI\mJ-35IHKs{\AAAȶJ<ɰ Y8ږDb< 03í>N`26_xHu4HEyMb OƉ eo'3é#c=:I~6/ ̙FmJȴh~[PkRAAA At`F `%iHm`C?+qYZ4Kfs#fcIR35Xic'rSmJS|6j&8ykV?ő?c8K.fHxǼeY;q4$tey-\{V0y1Nͩ_"n) ppӝGLXSK6q hGj&OZݴֱVMcֺ:9>"~]F!;3~g$څMb 6g9HvGҶk%s"-zœ0p҇DZJDDL lQQ_rپ ?.>!Cni pא# sRՀLLr?@4qɒZ FNYu캭tϭODǯVlO `s"ԅufxx<ɗ:2FEo@sBkMN4 uAbӉ5xioȦkkHi6]a'-ɲ5{6J'YfͺN.$6[luJ;ٳ5EkioeItA=9h8S ;:Jrі0':Z뭖ZʙE֟g8( At>_tymUp?24*W;>uO>¸;p6~\hf Bd$MwWsn_<)7?󨵾^nVJì}w]nR`utXV:D{f8%=XqA}L>7f,G'p|Ҿ:4S“et.79GryQu:5z-[yOM8srhH(@(|Ӫ}j=+8NGòfpO:H {ӧT;[t&bx__®Ce?<5!oL~o$: ȳ9\zt:#qT}"q=]RIrַzTbGA]06PJfÁ6:mǕRF22t7:٠:gq9UuHuĖXL;נע4 /!̏jժ1#6IZ"&:?(kZvh[)H>M pt]ydKuٺG&r QkseZ pʚUko.]NQ9yC8߼UXd>҈eJ7wwv%3>Bl~RP]% 6M\?ŠuDY-2rojطWo]G{?pJu;)_o=; TeHwzmݘ8uI #u8mݒmڔ͂%s~}ݜp?FgoLs[&f:b^t[w5vXVmm]֨%G֝9({nZFN); x1+ߓm{&ΣR*F1R:i12d8nHvu;F ]{j.ɷkکͻfxYטm?ӣG~{k}8<ˎSG, HLHDͦI=-U20.<-B_rY,o?[+ >Yxғi֍3hTvlg8y>^3~Jk<29!r'wì.}a3wɓEvzR) {Bx@SUңG~'] Df6Nz o8ʬ}IeƦx5xi߾ciXmu]Uǽ:K%@MkU#ֶg1nVA"':^7J&UW)5Z)uT)u 5-GYLFVy\:g\Ev`ZofRO}ĩ24R˾Ip.gz3Զz"m}NdtVٍ<;˶>:}>U'ٷ i窣|aO.zr>iWh1ݾ5=hfuCm#>+ʁ)>dlWdgh,7EvcH}SŦ(I1kgntFXط-JtTp{^S)۔GO9Fuˆ2,_}_;?7f[ۮFrKgBCCYic91"PzGR'ڵIlL/z/ vj׈8N0dBWxض;Z6.^.{=î鳉*րۆ؀-(a=f9P犡5r½ ]KN8R`2dmG2GS` AL87LVևi`(¬au-~Ku4NG3VQ֍/8*҂60p҇<*Op},Cbs uپ [^=/ќQIzƚ꥽Yj5mt40&?M&إՊ̠@^z X{t/5YWi=V2; }~^iџ2fP֍/=V }GQ:h7˾{ؕE"Y4kPag-(Gm^1?+cw<#-eذa,OTԗDE}ImTk e\x=}"'9]b5>Kןf; Ƀ6_{-k 89d /榯yZ?g{ Z_VJd$YM:P s#$ItIuw#NKJ-np8Fm#˹۪ dؾ]5B.Ҡxq4o_)ֈվ«\ ]#쎟A@E"'Nyr2if/ȐHKcٮ|4t&0 'A.X}Vːߗ"\an|>z nS{t˫q O<*MM$źO6**W3d{|I0%Z mDg>Ќ_=Ls(q eGᡗ`K k JC?όǻu1tz]jH80ʔ |q>OE۰Fj~_+\aTkȗ}}LR_fYD#9 NE$p ;wY}-RwM۹^|{=)V E{0aW%TNmlEپh 4 vq6 RzƇ/w$c֭UVwTGX`\AAnTGnLgTGk=a13#98nmՋT pӹ:ۥ:9ǁQZϬmS`5-IVf[ rmjlXmݎpsn,>\%!}r(ɶzmHU,6UlT0h>־[~A!%1[duf[']W:3+2~,&$8[)>sHUiߞ%z7GQ2T.KjĖri6l#no3)> A}Gr2 >^=jMņҳYZ([ivVfӳg]HO:k<4Sp x˺7FoK}}M.?wmu/gylݼ̽IԢnTK6C-.>87ٹSIekKx5u[f @s̓?=.1^r[ Ft}z{'htl1'I|wCu(]ū7rRJW1!M 8. `_*hO_Gۼ+M=*KfV{9=0RKt8́]Z`nK}ؽ!ڱMOk=<~47;A*nv8Еy5Kh#"fe?;׹laowWt)Ӹ|,ǽ:e |6mnGAMN$q¬5J_iW>k?T}jN)UXSZтe©VdO۠Ԓa&iԁ?$q: 0Ix>Qp>}0b/)m]vkƵk ''0DLpھTYWw<۾O}#A.#'fcta3tR f=zS6!+@KLγ;?a߬ 1:m03+ܯ@EGo(]Q2EgQ Wn,SÓ|8Ӎ2wg%MO h"ӻl}(4Z/qbMbCkYkY:] QtaJ"ː{4o3MW&Tmi{[K'9ߙQ3][3m[):#(lM;aѭ6Bcm`kî[ORքUkes{c!m&/<#wY]ϫ O\5@o5kYwo։QS_fWN%"`$۟$Zvw,dt4>2luȖfVƔj]"""©6ˋw<Æ cݼ=ʒDanN Mɾ9YY`n'y?mOTA&N@3oQ  ;Z 0P..l۔yQJZbxOZmOۂ@Hknp;i=FIIt}QetG{]ÓX+&!TsGM2BZѤ3re܆>c%;/1sj5v67vGisO/S2!`ϰjMUWfd}QFb+YP?6c~b 7`>/baa o&|?1G̢B^mm$5*3c<$č¤6Y3*u[vvl!/\>AKaրNho'%j,k\g.G[I4-Yeg^y N2S߁L省s+xhlmSGy_^тYevAA6r**`:T)UdIM^q zi+vc-#CqC VK[8?/mM'){hœ볗#w Hv`;0SCl>˱ cj*5cW+Tт{KRy@Fղ1|% zXj+)咤{gAxߞ`Ʈ\ ogGy?O9 .8ne2m8h5YSgk|& 8{)m]6_YϏ\$y.o2]_ڜ;8|ۯ 죎LpLTȽÜ\CFIek:!!Չ IgcQ+%(]s6uޫ Hv-Gvy _϶zŐ7rCRJ7ul^'Fud;Gg/ե:YGT 5&mx÷J)/zFZU38^?xAzƘJ>OTT]S N 0?wgU3"+PmsAAg_@^>31h"q|Uڄ ǜU#1P4!i}*Uc_@_ϕnThBם مcעÄ=Dȴh૭ V(^γ:#}1:<&x癩I:I(`>PHNyug'-nWuj{3ENd`y5|A@E'ȡ%+Ȣ{"7}z2` QU;ӨOĖZF2@G[=o֧K$GǺw N,kOV+ي53C%Vf!Cq]_oz͸oecTZo{Nkp^$98-{J Q'#7f,F6U\3S{ߒ/'fb"?]!(zg~b >/xoOg5&Զwoy,ɔ3rjaFx{Bv$K&W<{ړ|w#Jg:{6}E욗CO34_76mژg򥑔|ՀIa1&=>[RW ___f͚uVR} "ƽIpDtGkfx'Nu|p>$虜̥9h, wjҊ^-MV%x8g \LfN?1mMtA$'$i}@MqU#P8Y_hz\:gܦZ`ΰa?&M:*2+@Q4[}8@٨N7,_:bj_jc]t5<~l,xatZZ*OP &60|h7]2#}`t!N/ |f'fΔmJC)5h˶مnl*x-҄2cԀZL y͒B)Ҽ THnFRR =zGOߙ4OP_#HJIm i3FDNBs{0ސ=X ^\v4u=7UdO?vHh}5֮ÇslA"'Z}G@zR*HHrTJOTns k'֘uPM$&k uNjo|M\hŔ.۞k /5bWҋ읺 ̒_Abw:mkЦ}v3t4N`@-K1AB0dٷ=^ ImKe2&tMJ9+xʹz8*VeoDbYL<<쏮 ]MLNJTjAQˁ"Sd&\;QQ_R v&ŵ֫OJaW1a6ZVigQJy+ R. pu*Li^)P4y{ۥҽIQI=m/ONMr o=MF~ %~o5A 9v$*/S[{3RoG;R"E)Uim>ےcqS A?R')ړ}/1a,bǻQxc޸lMn|}SZ7u?OQ?W;bOB[$hAd3j =Ex7)Ƅ2:z1,}`dBzUҲwaJSĄ_?@zZw*5::G{m%41udaMQUj'Onݺ}XUhӦM?q3lݼc6TƩS (T(u3iƏOVbJ7پm&|׏Ys*ֳ:)>Ĥ,fL)&g˳  ["ա \b0 s>$c%A)4U1Rh[["! TDHuD RNDÙM#/Qy@6JԻÁ(V xWKh#_1NZJfWDm.kl:>9v.˝H`n [m9OV((.s<̏G@zO$"&ɓxyKp3 o){>{ ('[ك׶l{}3}5n߻SUL+}B~}\F^k4{S=߹N>>>$$$8Eֽ⾎7#,;V6+m}c7ټr$2{n|n[2W T#-S_R^ۺƩ76%Qub~78l6Ꮀd[]>nWM.9h[r~U c~(U꺭α|d{ˉUGTT*h̼y Φ߭@CZe+y0RT puxao~9i^Wd-H/@Ӎ9>{~3jSj4KV5g͏J6- qY1AG=7(W 4ٞ+=e!:lŦx{]>r=cb6Y8ZӲ?CZ/*|<=Z#/ᙜ́_4EhW{3S6bsmr'IʃN~-YogQOOj{ @qP.Ō{;s8߉[Dcf\!P M-/v u}pcɋibޛE7A2Ɍ 0mEM:5Y[s 0k9Zxm?&Q3)JfzG6"ӟƹV h?z.og(b F~GY]r^DSv{|ϲ|rN cNŎ8no\{/-ebnJj֦MvAApͭ(:Z dXUJQJ]/6TǺv03k;WeQimBl^vzW8?y_|%VWH6f=CFWm}/h52Uh,Fo'|yWekM>z3g2W]zйkt훪P- e"2zg& cC޷$:V|'D_'_! Ùn]jh0^ {h71s}u%,꿁 SJE&KҝźCPWYCW /iv>j>|GCoAq^[V㡱 zœV:bԅ>xEo<k`f6 b/a:ԖưzioV/ yG_nޖڹӈ`ݼ=,3Ʀ}[;MǓ,j*ZfM9pD?zi[^͚LAVHu5@?>X꘼d*+8۩gSf[7'35mo/Zn6}f8Qݣewr83tg Ԥz|xa<}lίhgcSs~t6Y9_9U"dZ4%{Q^$ӧE?Cȴhvc@*/R>8ijP1|fY',NU '>d,}Y6IfN$FN2b+50 >QI.;~-ΜiDI9 zfY&OJ_DlTVF\S?0话яBl5cx$%cwx-KgaJN]E'gHx1qs]ӦdnѰgNY9UGU<4 'H#YH곉nѴ\|s4x d7c?ˇ_# M<_y3637Ͻ w,➱ǖ%vW>kQ#kI?Fqk׎Qƹzf,Re6 Q;w>ѯ]'5ϥ}31y1:У]Yl;zOmO8=GzSG eokHE_yW=Z?OJ 3 $f< &!VTS-"}SQyA~uX _eֹyr^x"g6sd=E~+͆+86}G6pU”6ŅlX!;3fP6,_C13٩~/mllB2|K,(ߟc̬_n0Q9H wj5f=Iy.u1! R#1ԬɔG)aW8V+372Mz #1+χ] %M?LĺQfEC&QAAo)-HhEDsybB̌T?b/ךu3}kJ5>mkHm 9Yܟn3Jo<-Y yWe+mWEY)ݳj6x%V߁DU.@ ݙ { c>ҶN+l(.c`j~)ݝ4<[eg h<$ǾŅ4?hG>L[= qi)[f@s ]Yr#~pY絰+3ZI6 =zkgpS=E=V/"r7v -AAqqZZZ? $ئh}dp\TGN0f>'2wi^ҭ I [r;/#fFM^ (hJK7UΆR#)Z^fW|3u5ƐE(4r~l4 ݑ;;G'n5Q/~蟥}y)NmN# }1 >W,%gR\-.]ʚOfLݑner̪ORHiϖr|5i{fgb=:ZN<kuMBKڋ~L0eljz..KebX1?؞_|RC^g%M^tzΞ U4[RJ78d/M~!;g133}_ @r8U9֓3vOJ;$813B^@܌xW1SRz^ sw (f/}{w^TOu'#~*>ٟYC^?lF@#Na㥮qm< IA`Iu깨ދI3B)a,i)\Z'dUgkVvwV.۪㣫:PNSQ'/:ER^ T@Yt.ς֢$T*ۣ/7Ijऩhc| _OpYFxۖ{?u3Ra:r|{ޙg}%F"D.,Gpxt'm28LW~=Ɔc 0_}c;Q6_''"D1?Sg&;mx*"!ngV:5yԝ2%g>GLsS+7J.'<^3gB9JsyY7+FI)/D&IJK#dZ4žTxq~|wSr o~@}x?A|X77"!WѺuu?AGg> )Bd*'bkdRceݝ+S6?c IwƿW2o|䲽  Kvcfc עAT7VwWoPc8( At Iqt |C-^So)!3+=~v[ 7|X:~W\MttH/pIL4{M:[Fh<ȽGIOw}-؍p˽njw9rM\ۺN \+sobWo}wT\zZ@v7l91(a 4SJxk5RZJ GnTLȤzRH{atZ)cc5/0?D#Sr7╹Ѵ{9QQAXF} obyUXd,z[-n?H;xwi;j>]v1~Nd3fR )eX:y$-v|o"x kZ:bl/e2EL Y/. >M>dr<|[ߟ<~Q*#O Ճ?2`1"bbkG'cvOűt=g|>zձYQ'mPK{w<̏O-̩AmO?e \W U;6$fk;~yjׇ [mLxUy (|]<5aq%c0.|KkEVp_¶7ntx+'l:~ϰ Lǀڕs:hAAOHu83:l ,1 gUh_* T%yIuЦ>Hu%;ѩ Ï#NS:bտ2v|/ja֬s̋eׂa߾6iysk}#͒hdl#xz:ỦZI`r+$SqB/ʕ˱day{ h]Z@ŹAҩ)45imso永ԏLSOcvF[8<'Gg,&{g\cz'Y]ʹGeKޗZ^40?vΣU}3*Ʀ{'e~Kr+Zafdi7=5? 5kw(l77ً//YMXMOO盉۳\_fui1?Mb,|o%K?2U>a/pv TwH]7^*JoRK#W)ek/JczJObɋZ~%li=ЩQtZ~/J%3kP(&mv>PL)vR6Ť692 OeٷH4Xږd~ CwutzͿ69:oV盗'Pg{a4Mg]t!Pڧsg}Ocx,Q1qsYEMn~Lf:V)j1I}6!hGgi\(;~ p]T=7ڞH' H c2zMJFqLz̬m9p>Ysd {LgւTɞ~@N (щ%VQ6\Ijjb/q=!ͳ㼀щ,U#|L^VĞ<$(l"-T:Bcd'9 e;ρԒo^`мTMd߱{$G?*gϚ0MUӮ];x;~&B~ݓiOZ]H?S{QD6YD' f*Lq8vFrjO;z=|9a]#ͤٶ$mKhlܼ->^au6#N%_!h29<џڳHku6Ju/9; ﲏk%bɲ}a$6 })4ˢTy"*K{sS}Sf1| \ÚO`6|%;JNlTuJxvv ɄZnBZ; 1r}[zĎ`Ԉ*,Xif6 {! ?GN9Z8XkZkKk]Rk텑(I i kX`xlZZR^yix@h~J| Y-ZFAeBZih%̈́CV53^cIbI㓍s\&M> {i}0gexI +`F@W}ض#ZM꥽i-v-l'zρz2K7^poWG؟hw6mk؆`x?-c{X=#?;]zq$p Q\8jI9׊dqNgݼ=OsA!{rDVJ[ZCn4f&ґJ7Kֶ_6ǍZ J)ƣ˰՛)q0fMDV]J3xd@♢b,ԥP|kq3S{^Enrۏt~d>l=i ?@G=,OHiZy{ \6BT4!6i)IUq%DfHvx;\~GL"b},o-BC _ՆąZ)ں5'PzVߟwwjz'.CДJ@%>y2?y {,a{m{t|cb!pH'kF54Eτ6cbݨlf"^Y(v vJwjx&̐ nY̊bw ɕH_cf/Z`?Vf|rybzE|}})ReŎg}Ƽ9}Yy;W` mΔ ~҄7Q6㟒M[fF11 H$3~. چ5bEo2[>M|Z' |ΚH,Szdw1ͼ;ne3#'Nߢhz4 9ﻂ^)QQ:A$':cScfZgjjk Dc5VdI=nmտZW~7 G:A7RJf0Kfmֺu#[ٯZu]4XvTǁrEo_ksߘ4=ձۺ 䱯+nx_wkf?mhUd[ֻc7~mcn]I7p.3zϱrm.g& 6ū|\Jy4[:Ͽg+mqA{fOw՗?n A|!vMۻi ^nn~O74U.Nm=Sd2P>?'*?;'WffXԟ˨n:vS_ka*8ʘq'HujU\.Ѳk.J=}mkEg}uyxjg֐K[czbuXLIƫp6( n2}i Z CrC.\EFe/9pARl/ە/16Skz}] Ͷ8z{"pQ-T "Yad$I| SBGncPT CGЈ:UTB fEƸe=y H4-&RxV†vxrQZndzo>jQ:ՀӧO3,^뾥\Lf@iAY\l~^Zms4/$ƧE)8KPp)e_["VMIdu*3/]̰ahP Oe֬YYÆ c̩.|5 vMnM-m˜ ڌDI;FҔj Ed+Dč݌~|Aщ5OWHH ^k?N,^*]fnblǧN<$,ҞܕĈTu?O0˅+te @QoRY5k/_7dxFltz=fG E:]  7=aRjM^Af#8a{RjRCJoRgR1Yl}X)u*FfryN5;KԈkʳ#\Gٲ~#:\}l7ҭ2Kgy*OgÂv1cm92yI*Ձ ~'3@11&k\?G|M@mb}5 "CM2=\m=8)Eq|1wm̘_h)sR8]ېˋwzio?X59Ɩ `z`?} H?np8ֶOvZj֋Pےe&ԁxldžv˘n?Iy3޿;3)\R2Q*$4٩0c30={0e6faٗƒe2ƒJD)BIV)6u~:3f]/(sz>v!q A,i} 6VOj5H#4U +v@Bi? 8z" G "׀""B+Rm#}Ug'ߣu6eD=o؜JPYKA{1RjZyN)iMT9 ~9fy=|did! wߵt"mF*Gl઻eDK9MY⇰c9XW?F OSj9qu}O2;y=I[_GDXfBo={ TCafC[:L磃R*^v!O; Mcӣ%zN%22zKC7~_; YvtL`o $h鎙G)I-A>bj˸r-h=ǦG3"`@~i)18-s܃|yG+n&H^kO,7=|rcWi)y;z~|M|:5'Qf7abPt}K[lBi񔖽SmQ{j5M|p#G5x# ϧ_LLc)hXyّ-1d?p%kut>SեEH5)UFiF*ƀ5 l (V@?~ .*Zu.H;ۀ= :L3H5+pLWNpjkAXh@٠͘ϓFwldr ]/y7{_C}IwW[6j6')VW$A/;Ԙ&$wTUHt4b!14rŷ-1TFoҔ1?ZId$'#=T;spjB6)D]ZՋF)TnnX6xWO#:ƶL khg^}D֦rs?C0(c9z%%]iJKM&.3 7*͎119i N6{k̷Cjv-<^QQϧ*bs> sS%*BP`ⴸueeg@ P&DDu $3Ȉ-2\T2mW4^+MDUb4E( W %"yXj"v͆&Ed&B.=xVmӭmR!-VJ)_wҿ1vRgϋ3~18y*j{OT*kLcTT>ՓOXKWJgү'^cBχn`(l },cLdlU(*6p+\[n/MGThtaԇ1)ҟEh&6$88π$% Z؆S2b(Ut^3sߣ=ϪOjTJ_ WCU?I[5sQasF?.]S矘MY0=\3\m(6snAuIK,.,^˄wY>g,0wsӹ(ϖwjk{݅hذFpcA#o(52eq%{ѭL!?qɭh!JA5{a}Δ?/݇Bi))ĽU+[0m,VnqkSﺁ/;6iNua=%} *,S*ԱFiFIQTuE@_QAEm>` YiTFiFB RTCuԲ1p*bE5Ѡ:4ITNjAuTJx4^PF&j_ zZp:LFv.}5ꮨjtBmmU?jdr,BTmjO *Ӯs]E*wfE wro)=f$[O4U5}zŪ(D[;etBZp#$067#Ԝ&@ y{rxtQ%v_}w9,>7qqbԇ4<̭iQunxA]cD:1U(D k%sdegӡ3fxh=n&\qY2!j"-eN6VM_fqP6>Su#* Տɨov$.t+ AG=cPVÀD뎮<);,1b9bNJ^^`?FzC{5v.\p j :Juz%DǧN։t~ '3\[sz+UȄS 1=Kuv29pSגĉ[iǪϭ%Ʒzo.~̄R P"ˬ{EK?5 -0kF{r]s^c \!1}%XGz6G97X¬_pO4e;.HxQQ>m kDoEQ B> gZͽ7%1D6o&maRSerL(T|]K%F!h˷L>fզ U6+aPQ48 L#t;fܬ9c=g|x?_I_KJ+>~Ⱥ^j}rD Gp~mB60e&7iH/t$v6>t.Ȅoܑ@#wqnPEz\]cUυuxbHdž|$sic H2NGHQ[ή]scWy9:m;;@by6g;{tϙBf1v:|ܶ#x8_e>l[Os:g{6AE2VJZN&̔%t쇢cSl# ϳm0-P]"ʟz0Sz[y kšs3(TlQZzs ; ۏ V{| P(x=-U*Gd*h^9nE5[IT)yF'syǯ:=?X]XRg, \ )gOWQ5iFRQE<0!~t~z1DezatrYI-n{<̣Tv/׾EhOJfp"~<_Pplw&aQ!8v?Hp5 O7*YEޮ6u^;pcSsٍiQG8%#1yo|:pZ#\p-nъ\ ZO2zUjW<0<Q^^wO59tӯfS=@[8ٛ߭Egx[OY/a/1rߤGY n~ePZUPU} ~97/sbkŌëloCѱ'[{=;יxFEObQh*bQmU[#4Hz1K QM   @HF:l#x*1EAx*S zŋkKbNp&I)./ZuIMOc~%%߸RܪeREpC|F0ilϜ8!N6"2Պ6~}T+BSoIP:En%_oՃTT딃δ޸Og)Ͳz|nZCSl;7V:,f.Y7zGXf(25aa؟گ^<.4=lD9(j3r KgMi[6!4:`^+ۺW7:q$A)(LKd.>s8=CuJ &j̱\ySJ}9q|!嗱#Zsg2Kbb5KqU֖u^'a 5& Iqq-B''pn$ziTܺ @έ~99Vir{qd ա9(̞`|%UyCXeG[V8%ӧG@:tNcCct%v+iX;;{/oteo9 C 8Ip=p#ݕ QkOv-{kw0΃zdfMM̲j72X{a&xh}y4ċ+җnMHs52ѸMf-'{6#_4F/uŽHՌ/ 揗r$&f'O63fqߩcdLgIs398ֆ\-Sֹ OEp/=LȤVǎ}ȣ_jW/p_8@v1ClPq:\mͩL젣ɂh/ & B,6|uψ kڷ{!󺼜؟^Y~`җ+ϴ81JιՏꪁlL{)hU?ecqZ$5]F( 4=ydF x#w :`4[hC_QUDK^lka?W#`pɂ41ẓ`1q%3dfmBU%_jJ1ѻ ]MLE42w:lgIK' pF"kD%sK1".w 4,%d(~ߨ}Gڹfx_F/S IPPsJsb=:L-5a/0'5 IdKolÒrGWIclCI)ѢYU|`8˱M|]N=iNRlF+ʹc\mZms&wp32A sdC#ֿ[T!!ltXO6XK;VNңP&*l_A"N֔ocd>a.;.z[En|mJ$'pp3'} q$\n7e%~BFӦc4ʳd+2`sw ]؟֘\ݤ nt} fl:+xE x `u6,8Kvh/kXGʙ5,ƭTRA0DoL= 9o-KaYv!upI!pp̔wʐRr-&Svז+3NL \BZׂ0_yB .S2.c\cʚidE,&nd ViYfsOQ1 BW6 %u Jz򳨋.1'=mQf@v~m:_ʺy'1GvB!ʜ@5R%+Y XsL{!Ȫ0ݘ"'_ i+}iE@Yy?GwtTv̥gі 3-7Rp* 6A_#Q#4H#SOsEB`B#XmL^۫ړDQVk8iFo<*>Ko;{XՍO/bTG}Oɩ1eTUu [o{]آ[u .gk]hu˨jJ7xz&u}_gM\RcJg^Vs_zRW =T:R H~/ ujdɘ}f^%IާK8SK ;Nsf@3imog!+W^lt8 ]]FIJ)spHF5NQd|qvvL.N&&Fel|I#t`¢.6)ż.nbhh(UcqtDiv\;?$.t+n6jyRJ;j!yyݩuHL -p͎*L ݃i1MBS՟?aبz^ٙ+XQ1;1v#*eC{/T>kq/>ʟ;k+g(>0Sa:)D0GM-{p7"i_I먷xgS]Q#4H#[KBAԜGDQ t5(9 \뢸d9MƒV@K)qj4MRɛC X! !|d ˕J4&On1HGk kw-88 oaZ[$7w3moˤx #H_2Ǻp#JJee^'4V .,Ro%]qUJ Ww͸u[<Ē)>A[肋#>sjmF"y?SER~@D] ZޣR wSq2KCfa!ջ; dK&Rz f䧂|z*FeS'} s00#lByΟ;P2|,"IIz? `U/.y%{FR]wlIB2襷J|MFFVEMGѿ+W[knt>^Iެ RMIl:Y&8Vx#Q&Dlba{o5sѧsꓚW4˥/EXXc {.N2\œ$MeUůZ癙ecщ{a8=N^#vc6^e<6}GK ~9NcCtQB4 cə׽0׷I~Ww[Lt}zAv#ҵLJͺ2pq[g4!b:J=djmFʎͲߕ_R\Ip#!SHjь1w:'C\ݪx^$zٟsʿڅo<9|%u,!#8flOaVn&$4jB9CF esvM~̿pܥiX;ST\FiFi׫#A <R/Ah&B+A>vo#fL-18d3_L4]Y 3ރJuC@sz_}(Ѫwl]h8LB7"yCtKU\T<evc_ OjWԧGQ1 >~rێRu2=q/?xL18O/uj/74:<2*ћIyt;j!Vse8?e~I)4ZQv^Ϻg OǑڻcӣ96=ZF|? b!>ex$yH~v bFPBݜ vT|_TK=-jogFt Gε5!(r.ܷ ƞa&u#)1 !Ke8buÇ9:S $q {I-rkNU2i학C[|F1e+yIku#QJYToyQyHP(EWI>79~FE&*5uj_t{;MPuZ? tU#g?zа0zzRQ!->!z^\.aBzbύu{\pD5x+Hh[Qu>۝ 1H]f-axO?9 rx7̤QB,}WI'JF=P*R=N!]Af/L~=#4-{$ehȇ= )>h: ,=~99]Q(<9alrM{%-"}Ssh*rO ;TQu齰^#4H#ի@u~Ag&* "a=CEQQ"!7}!HEq:44H#!IsҠ:$iP4JiP/:^E $[yRW]3@Ja5@wQ<wDQ Nj2VQrlA~E5{Zr8 %X9]^>TGAۧ:U8>I< DU#MULRGfyhNhnZCiFZ2`PcMmS A Hl𡃎~.!..X[MR-ǤFT1f97"E^՞ *?ܴ-έLw:f P.%zIϱļsĜ A,GL\\;;UU}0c{\F[I¨4JE,HQx}%4,AH!+et7Ҿt+GK'WtzpSs!5®b|iFQ*=jc_ 3/1o%?/ |?tz8fE Y0CYdnʙ)*\"ߛllT(Z+ƴm_hYOvKFq]򉋋CTrS{ƣ\Y}BL 3|?@tp +Wg,*bfط8)ygg wѣGyX|Ko[O'ζheGZ^էz;u//R/^ K6x{Tou z۽ʵmK==mϺUo{]:o^'ZQƵ;u9Yg sF/qc(DVm />ֆ勉GDӛv/ ~^^*VJQxڧ((&-$I, [TR˸QKEQ| v())ڟZBI(iYXD-%_onFQKDR_onC#(Вg۟^76,  myjz* D(eiL%( ]Wʬ}ސU'ΩcR1ڿ5nB}zLJ$Ü1Ǚ~--JK*-l]hZ{h?c_ݑKe.Q`pkZ>W]'L<)ݷTP=BJL26 --0 AN&'yy.Y*h~~IQ $Y`iuJi)WY\r1qm(`j>NpL-Џ_q<썟:y0^0Z-A*V42؆B 88`80:I:I^wۇ2v'n;h$r#cW탶`ϟxB2^GҷbrFAzi*x8:KN-U*՞OxF'#kҷtnpM_;{x}Cg'(?O"e$>P]sJ;hKq1BPf7i_>4H#4V"x܂T脔)=4tUA!fӁc9 cBneҾH߳IU닏j3uFŦHCê]dv۸=# f'z'<9^FIZ6j1%6= ^+)ې[6vH! |D^D^jz[2Dr~J:ecL[~M |ٍQf7&Xޞdڄ89Hv;1ReMfr'ב/ m*锍m3SR=)8͝C.HwD&q r zgfI>!F&(rR٧䐍_>iIIҢG&f_EM4 'xCuh.YJII >򥏣>bg"-%A]SYf5.YݠĦ'.qޚZlvu[Y a`e&}*O=%oC 6[i #;e~} I*G.pG!s_<$v_V8av#P Wwަ7$zN=Uw5zM@BIL`]?#....{k4rрS%Si5O>|x(HgT"xcӣYVKoyKH+8fT]iĞȢ),ݺ+ p]ߖKDލd_ J-rZIah $+8wcjjÄcRbC݈kbX؜(Cr@@oKW,7 fPCch[Oq&=l#XYcfFiFi߫W18n!K{?QA `SHh_ 1PƪiPVv7!}iGQBE<Ӓm=py_IM!?)H)~y|2CG+kWR*gu=(X^CӫOMwjGZw''u6溱kc.?řF+<}0׍ jי o ֮|*owUCknEI cLa|gTLs6t4wL@2xƗe?JI(DjvG"DC AlFo1-K=р߭Mtxɫ+rwIj9N $\RE;__A}@{\MD7'ؽIV㬷c"qs_b,xڋMoJ̩@ʚU@LIB"7>i@Ҳ)z[b!!Z8`3R* m ކ;6,2]95H#4H^cK}RXW PBuY? E1\HAc)tyk^y0'ݚ~Rf [.p%3s }i~o fyڔTT]=e-WQ=OoEPBPQ-'+'s36;,mIjepju:VRVai՝ ȫ () *Jȃ֔*e[>n4/!jX`H uřuD3*ImPo'z%.'BGq~DmVExoGu}Y֩XhD\9}VR3twy/Ə)l }x׊]趇+if6 m3>'8 W@ck5xWX~/oޣB,<KGwNq8j@-D۳(bh>l%‰i[YYIWkvw=&~΍mlOqD4KG3.![ޙHxy(Ebɴo> IOxd57)| yԂlq4JnZU\xgBY Ǖ:zce`ΰ;kÖ؟;̻-zpX Hx͂yywHSYA4:m!zJm'wx9Xg] Q#4H#!p#oe(cK@gQ$c ΣFiFuHy$.n\qSaјZ솗倗bTӗZozzۗ,.9;RrfDmzQޅ/zq Mz7U?R񯳭Cz[>t|Ҟ=멩J{Y]ٶa: 3`\fkF˴<>wIքxL[Uv1׷9UwӫFiFi$UsZ(>!eP;$5a[&EASCR@S/JQjgfCQg#Һw.qܬ9ZfȽgtzSe)l \X^Ʀ+5ZXKh(՛HgN8^ IcJ)t"$ 0khYy~ёNn<-OF \ɨk\XMWUe>j&+zHDHk< [(=ۄ!S6\{%7C$~mɸRVE$cB1. (0ft֔Bn~ƶxZCh-4F۸.J%99hi1_Cel5p #d`tmrsf秌ߌ zñ՚Y=z<-+=f&@v&l I_K(Hv wa) ..p:H|8wl(q#-e/(U{|qD8R[<nfR0aޓ<٥)فyHbD U2[(*Mb^LO.u3cwH(_KyLҋ|` []v> o#\]L,]6ŏ<͋=:Dɣ\V]p|ˮ3X-.kE,~Kn,+uviXH# Q(-/naϞs'{<$ 2vv{e z-¬gMz#ϰ~Ѣ}Vӕ~odώUptXHItm9@wlq&?eB_SFn#ƴeun@sB)bK 0 BXqm\[ƪe#UBB}D P CbEV;vU,Q6m`-XkkwgI^Ŕ uL20^0ENv#:ˍCvYQ ɻ՗ymCޭ ~߁w"av=΁όg3Z? w5Nįcz⧂tNyr >-zdЃq<4 E+2V{Z}[+17^|&ِ/ꊮ/x)Gb#u|uj_`z%H:9a$J*.; ̜;G8cz%OY8rPmie9KD,ЀG-b[xQW&]L%--"8i9[Z2g-"߿#s :/:\F01PJ㽔^+q?+NޣCY))Q#HR3 /s?v? 1an{+.c߼!GȹĦ=d%/^'rorrrPNl}26cvÞG?E@):Gp8 evc,r+Eœ)m>w:@9p}#&!πo6:JA@#C|/p>Vo{Fx/M3+Fq82b93$Nu6{;(̞0aWcp1) P4+`'or?uQ/f=/Wo0-~:M&&N6ªqJfh*ns?rYJN~ 1C[W0Y];65Y0ّuJs)RI;w2:)gazWLoJl~H^ 9$2OM[*;^@V4~!pfS w1TzFRIM'iz AcBsҝأexCr^z bZ xI_jBglλ!jV"W}d#vIbTS̵F8 0r6)9DXpy 84S$]L~.Oe=9)Uv@uV,'2hFźyf&Kq$S^O$癙,>@ ߒ]ܖ2S(wJbFO'77V%=j|\}F35\OI#w\olCО4Y0¢1ڿ w+y]ߦ۲cFiFiCx(b(DQtEQ84uJY(@!R  ֚"Qh#uVunAũ5W3|l{wl4(8Q$̰oߵ N٠dݎB>DK*n^#v!sg2[AcfOIi=l6mFN!ڥoe0] ;em,ּByR& (FTc6ߍ4{z\h+9u]W.WŶab!$wDtbF~) kA4ړjf5 QQQu~E;z>.ޥBW#jތ\Cn?AD)gФI&~(='jܷQ)[إ:}iT$$9g #XoHފDG4vgYK{;=L@K{JXBBK:s:IA<;}5exi=&5i԰LTP&jSեBW6N2r3vv}Nyna⇔Y}6|~AKq6`7 1a-h6f;)8 R)4=KUljin/ u!$]yD6Ls&31hoHh/K9[kxJ\^1**Hk!u$Di1~\;$D!=/~,[$'( \ %k4ڬE<cu_6ٙ]Vt.8JiD%y?Gܴ͍~ \ȎxDsP)ݕH6%Tgرз=g 淽8F\CVYUpO ZL#Օ>KZTjyƉטӏOp͒x-gm}/2jFiFGuT۸ DJ/E1S7EKU@^n`+bA|(>` Yi*L([jo8%hPiFաT HuH&p 2yS&`(7>Gu,wu+++PWߕ^y[J5:67*ζUa>j/;)GRx㺯q3q;n^7>%ymB`S߿>ucK̵F"5K>ֻ͉}}LK.}?ꨪIJGB-ԶR'r/ UճVжLhé(b񎛃DnTP5k3þEW!E%al(loلٮzOlf5ήև= )\/엋JжA[> ]&0U-q99 qά_A'G ޯE^,8DT/ x۔IJ=MA1@l$c7RbM9MfǍQVgt"$ M5jKLml*2Zӌ!(y-河Y%6 T=r}(Ns Z#R^\y@߆'\QS V`@L)f_ ZzfX!0j)˫=3jM83.F0{äڳyJϐ'uSgqZk,< {ݫ9ktw˩5!]CY W™x]#/3dsd,C2y*X۵rmXX򺏾QKp e'}F^%#24q?87|ޒq'#Zcf_it KF=*gʐ9LYK!s}o)sFTŷ?;qvvA*hN^c1v]i|+V[yע;S)LQy$v0dbt~3"?;:<|bN VB2?6)"(CH#4UAl~@&sRfU#(D= :VO ٸv&7~)bR/Ξf8mCKKDKKD%mI :t%-`{NCXI[R/jsFL(U1 ':C)dϚ%Nxd}-?ut%.*t*#ڴmA!O@W#*48۶ݸNc p+ES'xU>>>><09G|lT`{R W1Sor$f8FGSgF6BN2VJlq< H\Fyw{u/M,.uJmՕ^iijNF8&m3"*^C91c vI[镶E(Ya:?k~YZZQox PY_?(7>?tiF[6oCȤV==elGBO4T#E]g{ҫo_o\í9_8e%e1E'=Q;#~jy:4Q6Lq i!YԟC>gG+Ө)Pe=)0i_,]@292C?-uzجH^5s;5ߖxRG7lHn\8k94~_ tvosFru~%5͟~D7BseUzf䘟wkymJVrf:Om݌T(q<ߟhfO-å^%ϣYVoO.Y\Y3yY;d{u]@bnߌ`_1tZRqnI6I_b]9>}5I~&^@i`:5AB|siP%KBр-U:rc7ҝ%R'~W(N3pKiPbT3 였!.T *S{NU~hT^STj_#4H#^cWj/*ӜHmԎ'DQ|(QdZ&3_{?x ebkKzN%ir 2 K溑flSR~DX1r6I^,M÷pY@G몕Wه'rM^r-&ɿ[ۺ<2vlc Ӱ q}Zlexgt=|?SVwn(URcMzj/]['u9q1͌&̘ov'ib7~b=+hj"sSӡk<6Qy4.rЀɱYy|;ԠF)n&Nk2-K#e19g;}5ʐc3vlc2ӷåc*Oܑ/,6_.GGJaՎu\],s P0z(}\p |y}-]e_3RYg\ ];(cp@}}.V)l.aQmyָK;q}?ZJ[rۚ𓤦A>{Cx[|.YxCk0ԗ DXqlz4=y@'߱aM@~~.csOi5_b))3|9gS#}\-Sޘ[cJf4L lI(}LKz9 ch`mFNazaN(Ο@i Swjem8cf a6{GeG[ӣj"FFQƕ@j*wϴɦffZjSFiFiQz%QB+` 0C/=R>ThUe3w$`#pT3U Օ OmB0g 7/r:AAA(se;ypsʧ3,r;KXE2J@=U!;Jŭ[V8'r3z,>XGl_'D ]KMb ;wH{טGVVz $yHtNac2߯s~!kq#UerxIEXaF/a/Gޓ{d-f!f`863q6ٍjdByRb#DB-Վ{8ZU*޵1%o\C\ qt@͜'k~ibdPa}i> /pKS_$yH>TC?|Ti*as&߸R@5ְ\mRHnx>iLYqv͎q֫s5 ꏉILH]H#(RDi̿J5K{drkf9Z˛1O[fu;JΝHoLg7-U~~[ysCv;|:*uSK'_&KO֯#zI?]F~h,wP#) Hɬpgu͓ΒȒZNǧH2I#M{hWq[51ѻ W2guI<,@bZFuj-xt-m?LdOFiF戢aFuT:XjUل("& jQW|ZGAʨ slyVoAX ~mm:q2rPc#s3mjzf 2r 6pCyUqPtL_տ#?<}²t'^a U2-E$YRf2e1I5cH>o\!EV];[xBfwő>3H̱#kW (ިw?~9g7$\SP"o6Q4Z&*䟓E6,M~+*jRvlͰ}ǠZSg 9|U;% B;er;ONNWJJow^F3Rz>6L6 FhLph `ԇЏեP?mC?N =/~LH=Ip(b\ۤgb9}]<؊/P2vjʔ%o]`6ӀF N :Dq@l%%4Ɗ j3Q])Æ&[ sOl]qD[J[Gޝ{txJB.Y<5eΩnM)G>gi)tvt=tc:WhYv"h?-kΞD8C- `(/g#4tzf`>]ti.M캦C쳄7ދe?J&]YΏ3+k1=6˄qc#WfdWENr@8|x^u())uAX zXu@^դJkjFiF/Qe `/R}`(1Uڧ YH7@(ުv C#4H\%)sgGZ3U(~x AuTJꐤAu9iP4I?:^uxx&[7#NA2.⃚[A:΢(^QEQt|nD`(:F3y|=~nGX.)d\-S+WieaXda7Jjb,Op{;mZ%#*UNUi³7_O0e)TGUk**<= vI4dH7D_tQ2GDWV\Гu8칾mJw}dG(!ŝnxYvs4NЧV}5w~D[M5{A]+fEmkZ{lW|K$S?׸Of:m.Jrһ5zRܙ8/hYOoA݃:3ܒGszUl3*m3!ŝ166访pA< -MHL;nFϑAƄ n!44m=v.Ϛp)Ua&7.f/]͈"=݉.dPlDk+h%\yD J|2F(JMH8QI8WFfG᪻RvŭLƷOޅG_ X[MA066YYdNJ7[u#)E||rg<zVZȋ8h+}5|z褼6TǣG6x\T14EQ,V-q| Eo]%WuGPZ$öw;nK3umF ^jjŨپ/^XO_j/2TfK/u<)viuUbgZ2N-W%IDATTz7>Qo)/SUG#ݮ5rSvz#Ubp:?ߦT%hU7 i6]/ꠖ(Uo/:0 - Q9/cȻ՟N *[3'} Fb,‘­g:REJU1w2/8|G֞R@*(r>UϪzby)IUێpUUyq$ڜʈFPp1yL#gq#}):a4]E>Tow>];Cԥ(qE]1ݐ5IKnmX?7ʮcY5&eLvZ!_yƯ3RX4{oJ&8:4'߸?:HFC͗yB?.f4EQc (Q'G7V4H#4z%zHejΣ UٍUF[ T} <)XDAx*S zZ߰9ʼM{482{ii!wޘ%=KA};91=jA9W֘fs_b( D~uN7e7֫e#al|t*{gS+DHZdKvZ6 %˔Xf30c0UD I*QH>?ݧY~~T:ﳟ}u=Kgzx:gsϔ-ãq%MزN+ڻ-}-0yIzSsǴk|*0ۚbe">ZQIp=^nن'ٚK·`[CTzeZL[ĤUu}. sE+å}IyLvؔtuIt|/:x(k˒ȝ#g8ݭ1P f& ^Kb[aJnJ_c:™pu14p%#sL*rvRQN1IS15ٿb+$W %h5Iy2-<ogLp.њc`mЇ-}u4n7+I \ϖʹt.En1gƭHaMQlvJu"rt\-~,ʼOo/Ve@ZiVZiQ8F#P A@^ETU@AB8)_iU#oY ~g"7;u&s3/i\4fGoKDr:O6-M(ːė엙qRf-Eaks0۩̐:mrv&FY}~73[`&.(B%%,8QfG|6#ΥmbT|ٛf${\e=  y طGM&K )qKY\q~_yEsR+i1uuYIvxe6[bPC8!ZV>KJ? #$7-̜ǹ$%5ѦJ+J=Σ,˻KBl"(ye9_%a6,[I)Y5YtdY&˲- hȡp,ɲlz cY98|ȕ:pKiEI\ÉD9̱ѕn,CMC2N#دk`˪ANtg_W5!a>6O-^Gv[%A~WY+-af|3, Nde&(7g;={p"~'lпw] ŔG/S؈ %p@XcXOwV$ :quu\Ԓj~/Ԥa|3ub{%nڟ/f WoI~}p.t~FDH2ck7EeRDyl f H\AFߺL"7ëq^y96stiR ngH|e @~lv W7>|Pp8ɯhnycիr6X#e`: IjHU[Ӣig+w>;u@O]yQZ) p _prL#1ޘ'T 5Lq!sOƶjN<de=%K!9o+4ˠ ލIOV m#5>e]bj彛у DnBˇ|{b[z ➰2lO{\7:pXn]]E@T}v80\ !LsNM TQJ+J+&s5G5Ǘu2!Oh`>n"PyPfqcO_FΣa xe~]#ft::uLczE%bU#+ƫ9tS>GW(gȿW=<Q_|U8K*[YBY_ | #N]/%ִ;i3!qZ4~`h>p> (^.1WѡN7 j1ޱ ]BIR %9鹡mv'5xOΕZ٬<*&u1z`v{9ݙE2̄R2xE>\FSV;1g4 YESB3 Čѣ;4>D|q>1e<ŸK>Ȳ> ).[< N_38ZGkAum&0Mi%[J&ГPŇg{S&VOl ^M9;qg¡>Ό/_ ۖo9gl={]@i쌫&eBy_G+'+;]g3ɴ,Mrw*MN@]4(flBb{)cY]S lk&3Xj$PCzv*ˈ]Cnڴ ~f}.0 :/Yt }>mIj5(IQ]V\m7`((cp˶5U0 ߰\+m޹ qLdZG.#'rQ+Mp OF7=u>) ?rmp{Q`FUPNIkz1Ut-N*jZFxRQ[,i 2ۍ_>KR}Yr>z"{3ۀΧۂ !jZ z]Sz)~+ d\6FaIiQdB=l+oO < X鸒O@Yt?i`7?Yf(Q-.vcy2wON"jK !G_ū)^MUQR ~;~ѽ-xF5%4{#먕.R_?ܔN A{n;9pwX#kI^H|'hKt~3jTۊp;]sbq 94ggF "v,&eql58nLޚ8GcRl7k. ]"p; l~x7[),vfȰYp acԏNE᧿YgloVZiVZiUCW/k%_&ITJ4 8/IiIZ/, (YGy$i$I:j JFmB.kxeB&=),WÃ4U%so[ $DI5I&?(S݆(BӥU)0U<>vBOQZe{=tLv3yJgeMck6?|ÂKY+ 0W¾ fC+8/˄턯"m;!R!t-)o1~^t~~Ͽ>Xc31߷nQ~1ި>cGN߯>Ys،@{'%I2ewDFa#x 8(a6 VES٘痫ZW愨еf7D_:M/r48 NkIixs <6N#S8EwQme+(:=8᫙lٿvVmSoMq~5cL_sg"5ewoױeeg_V~ё1'ӫ߷5mdfV0wL:_~Bɠ 1WnC5c9tM<3xpo m3 bv(͔D\0/VIXǾʟ+s"ƛ+/tq)˭le?D5Oӫ{cYǎ7޺~;3! ؉J6>iXia nC#/A;j|_ei&: XzƠ jOpU/Xя"JF<ؚG÷ " 5t,wu6Dqk>hjm'vq0 :w , РLxN ק0{3cc[ȟD'pmWQw g̉Eֹ48+wKnFz0$;}sqKTN%V'39 =l Mm9i('ӉOAt9B3ۺ)3>Vce6ۗkfmB1H '*>I91?]5#n\R)ZnMlB~!dFrx6bDE-نmVZiVZwM8@SQ廒$9="S,#,a`6J ρ\!$Ie0e,#dY \߉(ƩW!5ʢ:^[{~y?D!Bo8ޗ e[Ev^.+Ȇ\D,E]>Z#k' 0T{ ٬+Epes/sN7PUg z<1MVL^du]Ku {9F<Ԉ}y-O+fWklq:}LuqeNQ>Kt~ۗBzlQ\xg 8pL1mz}~bfWkxṍ+"T7$6 G[&H? 4֓)L sunb>.:o*䢽cDI(Vssz<wrv!Ebh!Sgv$R(-MVƮyXe c ;T3).HHy 3ۯ{>KznAA$J\OA6?bhhd3.܆աVZiVvTG׈b@Tϐ$8/[U,=J#Q["*ɲSb˸-C+JEu^6gUmp\% SUy3aOWkжZko?+%U]v58=4aUګwkΞ׻;DWiiѮR&*uqcWjV;絷?^{XiK2}*>jč[:#!$ʲc^Za6dY.ޔ$)$IR[Y5 v:LMM*rr'3Ƣm0]<%P#t]WJO\0?yQ;bŕ5*^G+Dq@)zeJ 'y DMMުrygRS| /s%k avJu]JD&%q[f1еEĶHNmcp،|h}rP(Es~=6p’P#Qy2rX·41P2W%4!ը,Y,Z ^aSn=|~[4Ӗ{MqI?DA6S;5|tt'v(Z֚+r>;MSp Y ޥ)sA5wBD|] t"dg)ӣsl = CTp4Ս䏺L\hќL,n ԈxKȟg-&f%16zE Lfcxx4QWigȴi,_Lnp5 &zkHkX:zj'|}/ΰvs ~hQ;ؔv ,Vj< AJ>ǨAMm1IQ_RIc+(}/t;Kܹ{WPƑ,PKo S#Η}Wfs-Pmʲ~Q] ijlZNR'u{6}z}E,94i Kg+*}S}|i64" %(w~j8 AܮS'Сd֙F!3EGGS?S,qZ.(7!.wti` xn)uva]IW<<Ghn8B.^ '4 0sd{&D`?u3JCSr 3~aIGh@L/0/\?y: 8=ҞyD}'{Ybp!DLj5:o[B8j2kL61$')ϫ& #Y26u= 8k3\eZLJH]7b} ",ZƜi=7+Ƿhb1OkZ^Zٮ~J$S7B`G_"R\'8h҈Iy/ ='&"9E|84Ok뚱;~1iw#CnJZ_ W|lۋɖybﳫJ+J+{&Db؋ aO$IZp,NIgɎ%El'Q;adY\ߨ myZWjP?@q^_󃜚4 Rä1ɕ7N~Ie}K`w>HevaT=PYy(wԾܓ=xrL#"w1U{b)(뎽g˼p}5UG;^Φanv/V>4It@J["5QT`:q;i }Hq8A'^@w8.84}4]a~p65_軥{#[G ޖdXSQ>gsBQ3UAIh} -lVl̪D/[yZW| Ʒ`3#oT~ ݨQBk @X[Tί15g1(u} aAWi6}fE?%*c8]tYx+lⷄ;VlY;IzfX{qAξ`>4LMW3Qw-[G3V~d]a4vŇ㜘"r&lb`C7.ZdjsHt6˩g)tOqmdw?<ܚ3.9#HEINp:gd69\փFIt~UTmv榾P׍bϾwnÍ͎u:K&J;@i1G?J+Jn  '@ CI X#5,gh$5̀ӈ<ƥu40=ߘ1x.JfK\sl=]w~ UΞ|I4==vbJDώ^Rtލ8d,Rp[xؒo^8!ܺV=vA',9c0wLk= $pPH?7b.q Jl}xkZiatz$RSZyaADHi41BAHhaOԞ3$: &c6e_b֕y)3;v:߬dvL+u2ĘpJðLYuUf`0M {FIp8y[oaT=$S"w~kIϷ_FyuI$TG$˴jÕ qK0F^v(lvC@`3j؍WHA+J+M8a@ GeC{p:!˲>"5ݲd[YeYn,X&W ,=&cLw)GK4NH[{tApI8_ѦzcQ&j޺~o%V[֯= ~ʫOY3qj4˟Qa^aLMGw4H'4!Ifib*!+\ׄ5> _y<ݙ)AK&DGKzڗy500=%sLFh;UHMlz0I#=z:gn㉭rIg3,.pjbۊDnfF4|f{LnU )\)\4Agܒ?! qӃ.^NFXnx4![rk8kL{سcD9f$v2S:D헒pN$됒Ha.PaARJ+J+{& ,Fޑ$,cWufEJa eK4TquI$}F 籲jeVfq4F" VcL d;WsĦ'9nQx7gU*5bRc; ?;=dRxa; Mw ϧOwr4<h49_7~@{}>Jcݥ$&I-jLXQ?Yl{Pp#gmNy pR6q@P:ISxxlthb yh ۡxatĺOYQhzON,̐ :c:4}4g|Wa݇1"'-,MTJߢ^Pe^eqbXga93t &?kO'H%+q䉊>9kH>CT,RCxF5QaMۀ@Wix '''Ps\^(뜖3F@Yq>Pؓ]N,w4_߾wrz3KInR?Qʝ82Xu\7эXKs?8m.qj8U=3Q~!ɉ'qVrfԉW_*(P82&}xgܼM(W) ).@+Č"%\F05I-)u5eDY dY~i@6ΣrVmczbu,P g@-17t\$gG3Mu4՝GYb㔉dDϧ#gtoWJW$KxyL{lp_^Vw!_quQeΜ^-mJq?t|u՟b,4؇Uwo_yhmj7Tpp>up]Rf.A؂UwT4<oч74pn[/Hw8.=\?qmU;+j|m\?mj7{nuEu' ;J_I*qxy Z9l4՝]~[ 2* ^ G_E%< WI @Ad$lK4̢Y_2"2Iy %yG ]IᜬcKeB֑YYGMl<}w,n9+W&cC~#9g?]+J+Xoy<,$)M|IҀπ,gQ/5+D%%IK83 dߏI\1ؿաCni/.,Co]&\C&sH*37ާJߥP~̍aX\&{Jg E.bG!TµY9ߕi3dOc}:lH9Cq9 L*R=ⱸ[[)X}t>ܲ/ibRX )ukZ❉_5p܊Tm-r$ -vrj0Ѭyz2$W'#,l;d+|4-ãYLoV*ոr|$WgUZiH׉ݱlϾXb/QʧK1&Vc!ߧTi_={b36D1wLv ldk>TfDEv$g!X{ .T6HB>:߬ peqt;4} T+μp}`{J+KoyL^ڶ.CI2Wf{ɧIMK?^b';\NzJF ?kq0[O,:1,1}7&G?ZV :R4ф_Xdcc8jGS>7qP̓F=<fom^N+ܟ>\Tb/Q=84&m/KpK)=ɬ;C΃0wL,FD#x~&:ZHA'Ԉʈ5q[ H`A[b|~ݓ|[wV+d fbE+Gk[`p}o8Q>ag_vtϲZta+&d h;F~ F[gaxÒ*-,3IܷaL01p)at5C&Yw\L?F8҈|(t:8|Ͼ4flBsS;Sc;!%%)oaƄn{mg~lvt%"\tMaqk/St={f+1M琑\d's.њ;cKC~%]{nfiDN߄q yNue{|8 upSVĎϘPGgWs] ppt\y/统(8eţ1Q@5C8uQ-+öt7q^yu1(`lPSLGײ~I\܇M0:,I+J+o՛p/IH G*ZY@o`S$eHtKo%I2.cp/hȲ|*8$KTS(044IudvEh8Mr^(.FҕE+PUa*jIB.)#Kvv!gl$ En?\ջ)^.0(ӚIڑ \*Xv۲u6b\Q$!q#w83z[So*J[vI|4\K[JcN#y@B@Զ,{5_ Jd^]HyMoګQҟ8ڮ) ŻE ř9P} " Q ֍ĉȲ> :)fbqZ,$*c5ʑC0w\~9FOZ,P}ȴQ<ݼ&Qkב04 =^˓\|o'J T4=S_qG sBs}[>1t|v'2dp tX簎VzX"[>[*sY l o_84+ jDX|v&|}9giI4G$eKXE%IڪI(Y+Jz X󸰒?fV,Y>GTUm'˲O_&/m.J+CcOEDW\園u"*B' ITGWjk``c[b|_oJm #S 5;R[eFU/4)i/_k;)[OT+E/ }Dz_V9$d}32*oC NFfWj;]q@ƕHiŻ_諾 anUDߎ(y$)U+X4XqeyC?o/IV+*؞TrxC{ҹ`ΉMU\bP^!r"S[&KOXC+Uev{-UHArOԴҊ%)*UiO$qirъj Ҁ̿Ш+\lloCזֆ2s[y s̬WW`>JaFӾr\>7eXyp ٍ3ʞ]SLj$f>/OrSȼJ!'i+BV5#f 9?} kK̪4XеEY\z n?!ZMff&_R>E*bVbCj}}?DNuRB)cd&40\y4>dYXD7Tt͎,]i_,ML.=]g3݇:`; yyH 5"4B +Nn<| /W)t|E&lZTֱy{6#W56yXԻ zr4׉ّ#1Ʒwp3MO4몕8MTB&3҂X|bcQQ@⹠CL&_>*NCmچ5yfw*|^ o@uȲܳߒ$i!e~hVZiտEo"lU<ʲ%v`XN$ubWM_<)Tw4SbsF%1Vi0AG|/ ۂV$TJP*V=aFDtoY,F鿄xKQOQ"CfdY8^U߯ԓ̅,S)S:2\;wp8:Ӯi(s5kTctnwDE+1Ƥt"PW@:ډ[xڀ+Q] P 6sf D bBXRW"o/!@Pr78-FF|Z|P O7n {TŃE,R?~<fTN,4^99SP@zQ!o .zaoEtPu.B%v)IݞIlCiqs8fKdž} @ׇGIa%rM u{6g3.f}?C+q0J+oMq$I^DIqH"II#*BRKW<*KVpH{I;B7)Sz-ã/+e^ԏjb.ofHY#%%3(yz˻z-} ǿml꣬|NnbB W۵?;fMi1m^__.L{̓xb+k 1,.EۅjK#z`piŻ95i~`3ίٗѝ}YDZIHLo9&.yGr>La>tS[ ' w2='4ëqSp0ׯ2-V8 sڠ[ W84G!?7RРǷz*a?F|! @)+fIIyd֝p@Tq 3>¸1p"^31l#3zC@ @ ps#)Ӳ`f&[FI40(&/VNV~ԃ*S#轶3@l=Mkã UŹrUAg2ZV-OEy+rH\˜<$/wR73+fBaM5cǎё;kGuzG1 ZUT@,7˘OC*I~%۩0_+J+ބ+"?00,%I!IR#,0x?tGaGˣԧ"1r3_}adV1, ^3Q/tisѝ}8ב?m_PEȍKe9v4L쇫v vrU1s"ן0lǑnfDhRq;O. =yTQBz3)!^3{@:ŷ`/Q#* M Ϊ+RENd0҆Ԥa'rB\ݮiю鴳*mf)S+ö1! ]{nfe6)?3"sR+;⎇-:-ο:۰A8֬&n\8K[%3҄&sH 0UWkrȌU[u&N-Z[ ]KbnC0 Aphiτ5eHu*M/vP&-CD\CHlbD4/[{S㗖nfʂ8<&Ct{Sٙ#yp Hkڤ4TB-;n*0=ߝ9oc6]wp0'% ii*`&{ I]ynA8E=|kaK$VJH@WYtdY~(IR"/œ@J+?7<#fɲY(I,%I:,Ka+2pѫlGIJyy, )ң:*+Sge![>ODܷxHJFnN:zJ ,%>sa, ʬSJϪۆ\ڸc>Ǵrx?˪ߡ;;+r[mQ*9Gυi6پ?K/Tles&ɛjwFZNWsƻG*9GXb}nztRlxYeryPV)˥wϗשH.5?[hkM~&6GJXZ&Z\:99tvfܺu ǃJG IfwJϏq*<g9+Э_%JG]6?O;+%?5a5V}xbBa ۏEٶ y:o2ps:~y8-f'Acx0O?52#鱹G_dKME:aF q@kΜvQH(ϣmy<1:,{5S">/.86=B]r~'1H-LR6`j}}-uEv]DĎ3RWD>DEqDsAz<ԓԍm6x 9{=Yj? ioV0e}VG5!Db[p>Ԋl7KS|7s%o5ƪnľC07eP+}XK?W>SgmEjJBUW "xɲ\/1l(M('Y 2=VZi)I"6ۀXYˬYyFGUWPU ~FEuBycko(QtqXG_>(שDNUjC1}_l:-Uj;v#*>YXZRg_kL?oGuD>@;v_pW:46C>d6oތC<ٳYhFrPEK"ŠH 5blr-S~uQ, +Cu$lPרs)hK_Eu\pӣs/*[琛7ɡKxwC88?RSu-L.Ұ4N*nfO|֤&6`JмQ>&2V*3VZHUcx)tI~܎IHz6X:3߽Et#5NdٽOt}xÉ)8׎4 @l}3從^@+QQ$PPzg@QP,N,gT QXD7R##~U& KbV.ށLѓLLq<ݙEZʼJuyXB-x:Ǽi0wg3?GԣH. Uӓk;gGAt &g܃ 6w]ٯCL7N>Ʃm HUOFVcrA*'%?(>(y5ۇP'7LrheĕG 6cw\+s:  Linց}Tp@>.fН~Oiyb⃶P4ޙv~Al=QQQlt} l2xET,]tV>SH:ʼZjTg)݊W6Ne"uQ/-"_uB S?8jVZi8,<$I2jbՊT$vٰUJꠂ? Q|{Y[W;( j^6s1ݚ?x銿=}Ni5(Uei^=ho/e\(fa_5-֨H{ګzL,_a_]T*56Z!deu}ʘ˩iӕ#x^U._GQ;!M[hI+@l=IMB #pMBjy˴i.OQgC jnSԚ{ (~ ǖ 泙.P  _S:F!CT$ Lsi`Yȉ];.)-A/wnY\ń ,]KW?yWlg7W%r1,=MVZiVDUE,8>%`:hHThIAut,N'C4I}woJ*cP{=&'jNLZE?nv)čx%8_wU~UaɃN2.fia:$0տc9hhq-^I2=3aAl4a^>.cO @>dTQ%rk{FXR݂n #07rl3|nUnI3hO+'kCDⷄI{ӳg9F9u= Q~`t!ΞYnY׸!yn֞W)Z kZWqh*9PM%9!M7Ä!^uag̙xdrX13.6ܼ/eNc\LILZ<3PRi[ {c7|<3\srL#w&l'' PP *Jf'nv):$o_qԤa'$9%!&fӧQ3וAݛUkiMDD' @Kb 1~nɟ~Wi~9.w&ꊒU@=9hMWqM cbʵ-s|[w>썥{~ݬɞE qJ+J+FGIIt@I R-%:$t*I7$YȲ;$I$IFTm}]1IKQ: g5ܻ[~y.KS&ahZo [IQ6&*> -.|31,߶Ϊf"`@$C]pw޹ M\8<%+獝ލ;[O]l+©ɻpN` ~VYyhe+UȊV4NHglc^TFSn ӹ? 7!.>aCy:0!lYO}BvlC43Fm9d L ML?cbX&:8g= 1"%s"zr `Z[.7q]XW?: >>dх}l4zV+f4b=̠.[ ,'œݻb1#ǖۨ'yci"^z|bn#cTzJ+J]Ǐ) DB-D8Up>3ͽ~O|Ǔ,e=?K=|9]5YJePz혨"2Sw_k|09z["5(솏ʿ;n`vȍL ld+eyNpZ}]:4)rD{l0U^3p-CntOӺ=A#G"~>~@SnX{5J_|Ydd7k4M`Gp et>8 1\N=̔hߗn֞FOC@ɿLtA 'g SɃNbB7cmPsɊ=*77Fe^)< VObq"i:x?u&yV=t9@ wr_AN }y~M}c9}V0̥WX׸O>B~Ҫ;=f^cTSCOMW)~ l DA憻a6Jr'-bcOm3$SVBK]bRC:%'+23EuaoooMc(Ōce6& gnֶsDFIL#t\-EueJ+J+cĈ/% !FtyB&-vr޹ Kȹ٥P盕1m[}ǎ?.4#}ʪ\a8}]?e]p2 $W˪&#,E1bDM{N<+a^3UVek_6m(f`C7KQ"v+eRHfYuA613~, YCcN8k[즲n*׏a!"W1g2²@K_ AX)^OKg2sYuUf~ٓʓo%^BЉP.Q>D-هEfw8NE77 G\Fb&dǸzr,(#o$*Cܯn}q1K&v;=E.f/y~E8Gﵝ(;{6( wY`Nb61L "0gk]>@T>L{B]r_'v'1i1IO*w{mgù{`L%&xlL3I.K3m# |\LyxgL,#NFkxf6YX9N)[f㩱؃q1H ;?WzLZiVZiz3S$yp%IRہodYUŶK!I-I$ɸMyTh,GtV1I%Ii<V؀Ŧ~vOOO7r}S+97nX__7{$5&Ldf=Qȴ Ohu{W2ޅv֢d,I_j%mĩ_xxƀbQT*_Qr۠}e"W Uʩ R(I}WOWVkkG @[Q ͭƉ' e>+k/OGN% EqD;~ɔ|r^؎JX Ziz[sCBEZk&eb[ik6&bښCx3w cOm*J`duJ[cRS? ʣ)Vu]m%R-ixR.T =s_}L\+%3G>90mz~X*74nE햹=#ZtKI?PN~xD`.1( KȲ>H޵†h*Sƕ޹ERV;[fI<Ջˆ$$nDZD؟'2= &_s`2:vCD׫b&PhaG0A X_k'rђVLO8l֏(7[Ux'k`;s%1'Z%v-Q+JU [9ֱ>R˪޾q| jVcJ|I񋫴Z ALvvQޮ).V,Z{cՁoe}I^LNVU蟬*qnTmg|`![nԦ::r=+뿋(IR;}\*rҕ7%I.IV+*<[Ye}[zMkfIcg:+gS+9U/5vK&BqO;hM%6f!\!yfBg!@i_^HK3ńYy3S'OT.ZLĞx{{rr'3OR:,Ghߗ5kʼO4F7w233?ߛ\+#~UxݣY*s +BpQY&еrOMȊVkJhOξZiVZiUzScIn%Img{3&t~k_F Ep -ynANfgPo ŬI3VU7++9vɬ;ٿrW^a;Eݸn$vxFn1S軭C 9܈0V_E:sl4E͎b7ǩV8mri ({xycm-`w'[~ '2sQV=a[5MҖ &AמmƜDh!u#ʵ Db$9U.R^Mve`@cnp9 md7ߛ>Ӣ8_/ߘ}w- 6)bs^#y H\ Q!"g$XƃF8Zgw?Q^Sop-QH^\8+b\q%*=>2$1iRX6VU Q[%-ϒܱr7]壵 Dnnـmd&r)ǡM9տR0?µ+(wrرr7@Ѳ4ϟ_J+J+қBu#3qY3񈊬C?;.P(Q MDF5XR,1{^ J(ƊX"(;;"d?Yv;wʮ{sRޅoA NQo4jtr''{cJ߽[L! F[଻TUH{":./Z"志Žns(~9\XmJ\VW+W=lG{Q%9>&wexn_cq,D%yB}(﵎)_߮YGmHR֦I]{;n̬ڵmZrY,fi&dž G{:<s!^W ssirk8;n6$C4If_n9H*^7>kl*1jsԂycw6yMe{x"L=\ w?U|$X> 4IN8yCNἁ82TWszs'AEaQoZ[! hx-dolK1:w my?O>,hn; n.v@%"9H+W`(GEQ <-vH85HyH(ա}LGtF[guwjQ܍8mG_)nsuV ^0vz fX%8 8MǝOqzE;vbZ[ȳZ۱/z+'']Dz0sGIk YezH9w/GO+C|$BId\45qXzDy' ƟXb))B\rсsr㒺t}4 7g̵x^60\fծ%nN#tp}\:K{]zE59z޼RGRAAAAAAQ0G)'bf 1:(]A#9j%Rs I0R]\\6fA0@r@5R)W]@K!%kYJj_9#=2pʆz, rmJ%[l3}4קlU@T1kGUgE-Ţ{W5渾H\7odcaU 8G6NĨ.ysd[ėjRgwW([;CQtcB|Rŷ7=ZGNA,˳ :7[qu `5Og}W544Y):*kUQ7#m< YT_"ϒ+3}9uWjx+ﲪ m/x*иr=¶uڽ^txQtZ mϦ6.>v6(?IH"䦡+iC/ 6BZc<>|a/Wċd<*Th$VzIu〶@ X.]Qw' k5.V՞2U,x|[l?aW,IsssY^L1%AQo<~Hp$(Bdl`cF\ٳ"AtU :e7161M/Hi3u]!o S ̛N)dʼYiT5Sxmoz+ 7xDgnNA̒bر[ czG2qs߁PNAI fI|MeemLwا5ުIJ'փ2e [s H<&'?C30Ӑ'˷8{j6NlPXsw56CbBn1|R%]2$[z'2ȼ*v-BLn:}̤ Sꢃ&}*u8?P  [aљ}? Kܶ&6J#Լ[>5p14 y}(+R6AO_ahr\5#z.Z~L9ҏ~)އˆSu[̬eYROα߲YR'noea-jfG>N":kMHqn\ >?P:^Rȯ("EQ;.ҁJ >ӵ2:^d;AGk)T;ݺ|%,@u:::| HLwr=,Z{ո"H g֗Mt,sZ>?ؕ:Ԑr+!/[\,v7ˍ gƪux>BRӝNw_MTʍOZ xǥrַ ?.%+);1TWS ցob>5D_YyTzx %wcRvSo@n *׳*D]?Dmok4v7xԐ^d%g0gm;yG gd?53qwM0G,(w6%^隋"aYTʍfo @^T*T*%؝ REe=AQio(d =Gz<`Cu QC\BKTsg#L !">Í%{Ij]*R ?< A_A#(3vAY A]W$}HN>0W>Lf(59~ 32 VuzJ׏ЛzV+KՇQtʓE':ȩe>[ce Sc`-Bz HDE|\GEhF#?EҬC7Td$5*Ml'puwAT)劉`( 'nE[4=YBPdj~v(.Lv+bH)dyC M䚗E}^\vcڦquVER$խW.I^&EZw_RFhZ&{V횸'uIy'QSTԛp&j<˃ P?՞b8ӤY^ 'b}~Or'glG3{MZXu',[)ݴOՒӰ݉TAHHMs*sX{Ib3@&8MGiz,$;ie6ޡ|ǑiԡJI^ mC:(2~[nڧ~-iV&&sֿHqɩ܌CuֶREPd rW0 Cď H,ʑp-}@yGHC M'ڨtzDo 6d'; g۱i{BiVI5:=ʶnxܘvO)((((((;Σ(DQLr % lEQ8tlmJO.UEM"/`)p.S +C%B<3WXZiNeZR5;m||=9,dn:?45?vƉf1xuZ_>>kKx%N-@Ҫ YAΝ*eڭ T^%8x^ A-9s#" 6* +ˍ[(^k,k2 Z@׉QTV~Z_#yp`IN<ϝ"OC3(|p]Gz`lYhuK2DibhcSn'~ϱAǪN 6*4nL%t輞;v)a;U:XtI^cWJJ@gokZ[^~s4Rv,[Xu^8.pPYQ0y-,="˻i|Mv6<{՘ٕK lR{q,d<2>Fc`I>I3H:ݨX9i:MiQ,Dw)/5IaD DQ&\_UV%'`^c/ҨPE9u K]_**i)cf~rQo;o!N; ۴ZN^MηFpR{l ;oHWw!Z(C-bջz5'ݜzCAv;J iEYsu).iWk 1xUu{FQԫ\Re`?%c{>qt;'b!>kB?B$^MDي"_¹+,u 19g𜿋E>x_g ʲ{ >.fD?fՠS ݶ5g9[B_A] B] iJ޾OqJt6xXFz=0~{_(h{3^9~~TsYֹ,t^,*1ď!~~@QX`oeCQ:(Y8k3J+_^%kj++ϔ\ǹY}>9s]SUSC&'{k-1#@-nb]9 98!|/֬yș3K$ESoHƜ Rr-MD/K`x'v܊O04XMd_#P/WPcHկb41+۹>& uѝKFяnRu&z:$KrQI sM(A{ix󨠠O<E1X DQ[f5HUW;?Ӿ#+`*bK E1[Հ()(;,G Pt"N}{RF~__*7|&H+6*`*=GNfXqë锂TTUIV^ JêN_ߊr;wVFu-{9TU@&Gmt%pIk [6e7PK{`_ot~NĖ t5ȺpC1R4ݵZSo4iNZmܲw?[yaӪ-cpvv&..NԠ3_}-KoEe`@O177JSZ6D"~>`VWI9&3p6 ZO9 gC'~3{}XUw7)˪AӵtC*L=py]D'tO┋z14@$B-,eg92<u([loWK<3dn~QZ}G>>Fgp 1V!f\Gs}DTq-E?7Ω)u*'.M 1y5,s gA$8Ӓo2ojj4h*U>WZ:ڀ ]dm3G3qєzT-O"8=[3.q1)i$ |C.bEj7U淐. !B!UO>;fѵ>iM1s'1^#g֔A; EU U|ΣB9HN@J^>UMA]f_R-?B_ZLt@,.٫EL+IoLɴzٔ@T+z)Ը&aeou>' s0#'vtgewtK>m<./0rV.RܗϝOMrYqw!o=«9VRU*(t9iS=r/o7hTdߞ67&g %n148uws@Ʋ\C; ņSQDG~4&=9oqhK{RnqIND_Una|O<߄aI+9]VDt+Iwix-9z cA*"|ҥ< b]V~()Rᡸbl)[K9PXkj:][e:Z勾ft8Ilˁ'ykvwIn.)T! / 5"}ְ|xVZkS%jsxaS8- M%u-} a@|go+t x?8~ªODžtꧢ7TR%6qKn@ A /.T`{nD.͓' UYLMw?qePcKUk ag;ZohEzȩ%+Gp1TzngVe$ROz@8QNSeA̳kk*(((((((ʼ^6u3˳X=g]'jAH #6UЗPRܑ۷Y vӾfƧ49&IY ʣQ+np۸Mf< C_2xrB^5tjI>J׏X L4`T?4I> ^Gv4լ0a^lp!.=;GYjx`x8ɑLr:lڦI?j9R an_\䪇E U>%d֦?HAėƐADkw]bqeeF|tGy(J.i+hmy0֣ͣ11 rٴ}1j=u䚗@yAL r0;[f"]}:s3PT]H!d0ݞ{ni,FWq2u.^>P-}Oٴ& #zB3ua΄pԀ7K 8ᖟ>MUdf>t b1􎂛6v`^+OOtnN9ݚdCJnjŌWۍ7aWs28dڒ洞>V?C򎝌faM!!|=6W l9;v>ѩHzزl`=r˻r5׾ϫ(((((((y( (jet?ET~2Sf RꀛZ."fIMQRCǿw߹_cϮŒ>^E5~Dw}EA*|G`5#jFk_u"_hhB{iz[ ތO3fo ^neЫ;܁W  {I`~O5.F7K`gW S~ +]? &ga*KYxq1FӯI5&ZvKqKs04 Kn'tp}HC8HM(}:CJ'qEǿJeH3HKWf;¶;+5ͩ\e, V+;;)N=skk.*Q#23&誴Ӌle_}̓mԔ7s+ss{3Dhrϕj#GḰi|k\yHYI #f|R'UP;cBAzҒ?A"Ʊqr qYT6#hEjKrY)MFCs.4d^Fnr㧭TΛ>bhM|j1 os4n,Ku\M _Srf,Zyp:^ou 6OUoU-R.7f #yϝk0w{:}ؠ*fI̛YGA'ӭjȳxrӁ|4fܒ442,w"*#gІ(:P%R!; wm|%2enf:-UoM<2 'W+,UDbnnu(oY`zc_gtДycٳ3zuSnu,N%..tWj7"L;~2 gLKrhoDqfe\w̺ i7F'<`Ԙ,;Ճ=Zu2?meh: K跫%iEd[KXҬt/O[uEgVMa$V+H!_ $U˱*((((x]tEH9zv5u!e$j{mSoߎ{)n Yw]k]$<^dx0bלILc&>LH8 5Bץma!7SLJE8w$խwjGxsOGz A7#|{VQ jW .th:`/pp|]i3׊΅yGt-FD3第 zΠ]z_]%:ԺԀʟafj j*4l5xIG rfb` } @{g (pF +Z>3( ,kc+ϰ6F޷W_'i|"f(9%W/w4 0=O5qLu9djՠ ꄵAAot"IR/Ʋn,0sp3wc_%'mK$խ)W0W4KinX; Daؘoղ-[Mk`5IFࡒFG, @:quIW>Zf"a ?\}o6;)_F`lmYu ɖ5 lˣy1s C@oT? Hy#=6¡lmv>&cVB_C Y.yt4ǏgfnW;:M@iF7Kv`ey^Ap1i+RD 9P!21 )LUkG_U #9 -y<xHOL>4c꾞Tq5^C(& Cl >@ʋn.Ym{79{wSr>:ŚAG+ly[e\EgzS(xmcpr+JdsL븢FMAQL\V,;Ɋ̥GOL$}Nn+Q3MP gQ3?^>4 HQ5ݱ:,;ȲLRF[t؋p|ߞ[*aaa/~quQX^C8i'}CHHB8(H\A}NnujT%I1=%l[+3248t,39=aÐ/Byhoѣ_pn4iz.0+WKگOn֟-QImFA(cazsVXe#UPPPP*c$Ʒ9jfH݇ )|WS;`)]o2n؁ZVi,ޯ"n.^\)߾}2yq#?@P-,{xZ]~i&Lp͘z+B\%}wdhP%`4? 37('ݧ;6`b 4)nz" :}&um)|ض_ϳcKOecsèy}b: G-5ZυUNT/Ih:$fz3mѵYaU3PՔeH̑ t|p.&/á$I݂mt[ȻwO%gu 6* WeVt"Ʒ'tp}zsKgݾLyt4Ã@UyxP:̊2 }tOm5cSV!ôƯz!SQDQ<#4 0=DBQjwQPPPPP(RE(U[ۢ(eA؇:C4ABb$34{x̮T@4Ǽ2vTO:~}v|VX0_a{}ݮ'ׯ']:H%js]."T8EEWDA@L^,F%*1a*n*N*zZaͫ_o;#"jfI}O9\G:tyuWfd в`+qbxojNJu,A=9.0 C7b@$+Zz"D$7Cwj3pt ÜddȰ,WoQSPPPPPxM:!=8 ]YPT4` Eo᧘og:vuך m 2^&w2!LUhk/x!#z+}jQ8M<,="_i7 t+rxuzV˯^B웥EiH/Z^2nr!T%`z? ^8Q 9nA]~(RQ !|xXy>O[isF~uAxA(Yi4\P*1#rqnPa۬FeT%dGe ttd:R4mˡ[#P{*ky"ZP\0, j-UϝOd$,[Iznj3:Tj0ΐlͩZGڼۻ4Ih̒%B4?q')&S$5fpǵxG, hC̒bk٬0d w"*3Q\e#]mhΓ&GzU%8ʳB2ywa{,x|O4 d2A;vRMD{l7r(uoAuD9c&򸁍M<5Y>1>at\:Tж7v! hn fE [E{d>Շ[ [=3)Z[ytlj?"i0y&'5/aFl Xm;.=eَtB2kN5I؉ր$2͸M>4bkTRY9:Hu’tԋC#!BZb%R.FbC_Ngn" R?MXGFҌ.lo`Fx42'k@Sv! 9i-EɪϨM2 %:۝D%01M`Ylٙ<D/g"V2v-/yt4neu YAAAAAAAyR[F9H9Y6><`A\ͶH9 =QS(RWF^an-dVZ,&ܥpBd4~ UϝEB3jRwcb3"8zަF܋4`Hs%gk R.x%f],jyWHWτ ?\(:7n,kiH ၭ.]I^mlPEąy7=b F )<- )"")Y*= kQk[iNt{bs8Jؐ)a[YL&p"Nt"4-wii .GW8DGtj>y1'NƱn_h< x9nw^so&уM)K>s4ԙα]Q< ՠ<ɪfx\Q,-Kéz Lhe[>}O֡aЕ[MSwZSt%8/v8@iz̵@'Ӵ((((((((TC$Ger1DQ$瑀$|ɑ,!RnZ hԯ׌1H a( _c}lx0MxgT6*)~Et[HfƳ$ ̛'%oDv0*$j9ܝa7nuIQc3bCV,&pT>h*M`|1:hh̾"H>ueCW̾\MNU:W#.NdG V_8D.4mO1 HCuT]FDlT6C)z@aGfծŲiLt;̣`m(Ay'lHg%Gë4C8#zN탢UEJ2yI%9kk~0Y`,3 @5=b?&$|Z]E ӚlK} `lFJ[ip|1>i#Ȏqr\Mc@"lF~LcolXm'Q`GW.IukVgumX]V7 3UeV[<ҜIK)#b O)/ٗ髐cH=YX àMq ljՏFW^yLay˺SՐ","8hKtDR_zR 4 UKY̽Vb` kZ 05;Y32wBhK cǪ$fWQRYROrgu񭍯bF pF|dlCГ gV2e=ðuHG^x nGV) .: j-iOHudˈ_q~^(nq݆GĵUDa_ c}ÙbK5; p>\_ ^<\w S[9<83M?ffkU zᦷN=Hx/%s 2\htzc0H]W}Nbi;QN:NiSbRҘDAQbSzfR8yfD,8|p˄)LԪӯ!Lgr}tk)7[^UiU!)VōS9k:o"fĢR}~m12GT[]:ԡk_ )T5xW"O(:>}"NyjG)(āG:^YIN+w~+o} MBDïԯϺ/kJƆxv;v|5__DWj)B[o7d+_?RA Т2t$gIl@yώiٹLP5':/'n*:cqvv&.. "H3{Z܉g-~9N ը=47? =jIžڲGҷoUNar_ne4ڈ´[k"l!21OΑh/kֻ)Y+!^\iro;# rb?{tct}AEԫ'޲ʼEe ht85zn=bW,e8M;{KhϿZ,/9o4P%88$sn&EI9Ȫ gd9dfwi.GiDDD쌇-aG'HOwt!YLs} [sl/?megiA&P%x٢'-Vss1U洣dtۮ1D}C`+_naͥ)YQ,XӔu"\TCdggs㧭L7a7i`x'M,NNDe5ʕ_>gDQAAAAAqO<#j+1B1@uAZM:1tAjRwLlCRrUGZ}RRi9jbg*$x%NK3T^TC'HeOHǐfiы-|*sAf&7:nk"CW iCI ~' q^HbZ'0g5X@jH~\b@|g>2YP> ,J4>jH[^zOf@_P/ФG2U*tttx32d_AڧN+jBX' DǟGIͻP3cGscӹp6AV翍w5fۇue[ 8 H[> 9 ՚b/O{̙ W[YCΝy@>daW*zV|WaԘ>1zJ׏`"0VLSK`%RڜMa@ؐly3i:ڵ#-.]CS]dj`s|p^SǶmqD,x+l Fs+m)Q3 U '/Q%621qu<؛:> l7y?gAQAAAAAuUt; )&'T 'XWmT۫d8<'j=`AtDQSjDQLP{H. ւ\ 5Rs>d9?BYQc)m٠<E@2RAlԲk / v3M[nePoT]uOZ42S6YnQĆHM$[u\8ķkJgc:ҝ~3c'ыgЦ;ڰ'tޖZ}=̪47b*j`ƅNVs 04(y:Fnb_)vk V9a<9_KI VzjsVT&iRqx @J2Ζ6%&i5ksM fV+?ב0^J]Ǔls'rg|(] S M'2pnek I,:щ)?f&qYLc[M4=JR̪]}E^ZY,ۭqߢ{xR)hT U#1_m~ugU>V/{i'y-{ ː/ewj %jsN%Kо{oTFnēDl9$/eAjV?#}c`.֤I L,kjhr-mcSM 4r5^UPpKHb-d' 귚*RnL}M- s'~-]ҍ8M%]kn1Ǹi*WXS0f)p6?p[ Y`lڳw2"Cf&PXҵ47?4K yZmЄC7.R^6>ANv>nr9(Z5cKscp#gY^f/k~v N[l#\gƒk^RH>Cteɴ<&ߋvIV *67Q땼((((((((Iu,xT0^moRۏ~,7J:(}MG5$Σ‹hD:;~okZxɉecS>.`$<_^bg782\/ycxn_Zej .ռ>O0ɳ+g#1RrGt_8>w F\g<[_l'9E{WMK#7aXɳuI^o…rG@LkmJ<7 F}_/HtC jXN)OmӋmU`>|[*`eЛVjWcl\)7T/KN}J׏ܽ&92c!&C 6n`%զt}Ĥ10,nX;1':7YG!7No0LHpcbbb~q(((((((w':AT0[Gl3E@#(O62TgwHu(c*@T,fC0ett$g:^M`u;ִnG7a8KNb,#I|qiki;+M6{+V<ᨛYlXs7ݹ_pvvryc|ooo`U{< :R>Abwx4/8#p=}ZCEQؿ*L[j}d)@Pïe<֨ul躊 w7f$O朢4(|B{]!vS@Rd%qm&jC-ӤpS&&ҺaUN=&TS)kI`o\oֻ1:5N(,%j^ R_h_ ? 0 =[.yCL%(CLlM#ƪ3-ዖ*Y[:'j dIx 0˛\>&08Gl7<^D 1ў \pS B`_KV}\dӊo j9E)8貚a$L >3izhܐ>vekcSJOLSbʫQoMZKRtn,Ln_fе>҅ uƓs9ܙPBдtn "HA0#s `p!S xdIJrV V-aXzbؘ{q^Y,9hIJ:mhZ$tj^s3x.g_\e&JY;dOyJ;?õq o4U4 h_mzs4Ԓ(g)nz†$^.g*R GA:#ԫ QXTIQ[fS` zfI4/yTPx:hj5"a]0ԈoҴU<C'sbė| l:V=BҼMŹ*d徾]5 }{SY ,{gsU08G\*y<65qE8Qkx!:vMRGlDp#FWuC sY\r*-2$,[IQélKIiQ\pyyWPPPPPP*RH~_geu :Y&HN^u0Cr"Uw[` hEQS0 i%0IH7(o0Ԃ&NpډE,<^wmك_G~@3\:NL_i2*TѳV6*~ 9q!+ Y es6y~BΆL sWMweU{gokTs+$pQ^CemnjVVlU~?BN^n!mUϝ vcyXM=p줭 t# )~\! cUlo`HF[~./2Z}r|Fo0bg$B`7cYtSmϘ;LP?:,V03dD|G$H3Gc#%u)˨pwWqs$14<#q<@Sg`0TDZ6TZ h~my#VSJGECUA' ׶>GHeszIF0:ps\;5۾-Ms*s.Ql:Z ܾL/"5qr_G$A_2pvs(~9mp\n vɰ滴[DhZ&]Fam K$LϪfx*Sr:N:iņ Nv>,K4ͩLR7pN%lHK#7g+KX=7arqP:pA_*_k$Ϯnu5N٩v^[̾{.&Rۿ i%ʣyt3%%\]]v+kjIӜ?_Q8Y;If& ;L6D` "UGVoc Е!ס b2 uEznq,COSe05.:Hf1"~ $-Q;Ux)&S8Q*9憡ߍz3OU wrwU.6 SGg0Ǎ''3dAR aۻz48G(6poOETD/yԿkQ=)Tl?v}ήE T ޡ3ozG:ʉ bN]6XEa{e8l8>!;~vuC n+Σ(bQZqMLL~I9Y̛7ʱ1ΣYkUhۛ]q9AG+iXq×(/R?ȊM/ㄥG6)mʒѱbIq%WoD Z-WY#"m)G_` Eqک 3u(S<ډm9c󨠠PΣ¿QmlŹ{]z>{y| +.4rӫkBۋǗ"M˗"ɻ^DCy+[äBorOy-uA+B: RA=Il@yT/X6gLUi/|B7}Dl{m<ҌcޓYqmH ״l=Fy3*tT&*fthC),I04XMvv6Ny48Uífw Cr̬0h%[ȧ^蓑oˡw+'5'ApfI޿Çv41 fBp>]8j{Y2oW{S8&pK)?NGT%c8:*fNrcZg˭OS/G`|OV{҅خ!,T͸#3:z՝ЏI|{ ]9pL(ǖGM_c;*: < 'R֟զ EQMM 'l3]ڢ(j;|yTew:tKNj5t 9G>o#tx<6Cz%Hvں]9(a}d?0d+>bIq+|с}Ry=1dgCaAʛpAۍDn b V ;̴䃤l韡=|͂\ bg;7rzr+ L(6cx6@g]6%@.5?2s}do&op|gEV0N~g-f},q;ʉf1`ޑp0٭R~ޙA~~>ݞ{; ۷5P+ LaCH:ݺB=\['쫝ueJ\xRj ~W31VyTPPPPP?\0GN D ~G#! ߫K5 SFL0ARA! 8,E1@x$S ؓ@f\TInݿ9H󪮖x7l ב{GpB|fğ ܋v,rᛲ{KHHvJ9 $h=S~hIQ4 7fύh}Fhc,lM{jК7s@I1z E#zRȼ-oe?sZˇS!Æ$06yUIyKŜ$iZ3Y,}/==g^oCJnǦ&UMcDH)/$$mbOƞeA,*S;ͳr' M^EB RƀЀ4=T`TtGѐJ<"y-{ ԹI;ywto#FFU†$`s[Ju5XKz}0i+mVZ}kz:cCӉ Kr!<)W=uZG㓤#} g_}L;Ȋ}c92v8J!β62[ InE~/dMw\Vf_ܽ:pХ>:v`ej\~spw*GY;3c}/^rGd%ŞY EƮަzDQtETJ(f0ˆѪ \GUQEv/󨆤?9XyTXؗ[ql(_0w߹ Wf?f~ZN[.vÝO?Vl/&TnzªNY1neȋ&qKJEl -qIU8IDAT+O,="%'8j,ѶXGq!.vBXuf#=(_a79O,UR<:deڍ"lH<4#=]-gcWoWKzSi*[nRЩ%8^@G5N xkv "C-Ƚq5@4WqO)ZQ]׼Mj&\g nZ<>8|t[b_w6y5 Zv(((((((_~/˟Dud_텅#8sP̤^ +==FäT%rWׇ1 7MxvqӋ %8 95-[~5n&,,-믨׳. 5aUU c\(NBmZC(~735?ֿ !H`$PW20GCj{0fDQlMCm7X69`(Y?A>ir.O!jj(W9"աPT %@zc$j0L(wHu(<8Ճ/Th{QNv?J]Ò m[[x+sh/^z~86z$"^!+[0t6Ik?#!>)4vTTC*s]&2o/ a(ٚ͞mAP'>7[u1ҒH#zJ{1e}P Kfay˺Z{Xy>NDe2`KDA%7쒴h/4# 61:KcQłǷkt hV_PGu[a]'nٹ<7'AéL1W_k&\V$zRXr̬]۾P%d8]磦t nl~HGK#3=[)aљ}o +OjaEC_Y5]=iPWRPPPPP[G < PA*Cŗ%T6yGECӟi[+4zG}5]RuT8!T l߾-뢃M0%bqRDۭk f]$h{Au> vyŽ\ US^߸:ݸu ]`[XH?p1޶%T %5r0eI+`k7o<1wU¦d5R%D,:v}=dC?jٿJ {|06ZNf+y-q1ݓN]XFj3bfguK^c7Ju *#ITi訮z DEQ(߃s6"A_A)cJݑj `LՐrA8*QmLA, f5h"A%?G? =vߗ\ _AN%/nrU'1Yhܜ$DNҳS>e_n;3_՞R3UpTtq  .959אe+ǵ=pZ9Z~ ; 0ӘXKA!a(<b>O5y}{W"{a0kmA;O8LAgA#!xV@AiHa5v/2h4)piqPAAZT=qApEΪ,Lݮ%0Ic.pYyTܑ|݆u~[XyH+CWO\zd{twf< @ x~ށwu#H7N\ุK5{b'#ӨfY,PBn i˨2zup!;ܘuv(0Yeٹ3#)dgvy&BVM㓰5eYF]y\: _&h xxEŝe$  TAMU$I F$Ov~H-(u?V/€HѭH)nDpE1F Q=hw@*/sA<U$^Ta@aGtK"S.bDTsȈ6v9Y9eKRm);̎w7fI# BDtB3* ""E@AY JQ݄  }ٖ=;{swoʖ3)w̜wNy7<|T,Fo3) #F%L[~ ;ɝOgbԔzӖE cC u|Wiry}9:~Ucܴ}L-F`wӟ㬛jtɂ9ٞ9919mC!Xx !ihzo$3:6H uΆԾ~=VSنgGY0N/x X'{x?w;XY}B[e=WMO8 gp#ϯQz QK[$ΔGbW.u}܄6^<?i\L'syu9;r}ާ[qޢLY"m!h+૖$3C!hZUW\ ȺSa]!+CUB@\u\k<2<[トv !9CWN{KrFË{aMƁhzsձ4]`~G|6LH)p{([7r!ڥLϽ7b3={SMw23I&lsG~ 6+^Vy;n=F‚M]nF1Ǐ)E~4ƧAՄ"؝}RTTS揵'ڛuKҒzjAO~MBjN=L[P!!BZUQ{\ŭ,Mϛx8+\uvoy]Nw[{7?YL'>#Xw=ۿ3yc{^Ka7h/l$ǢmaVx|:ݖ9|?@!pQ/hsE7-řȕUmA< wڲ%U~{c8/ |Z-Ư1Q(Z:1Kj"v%tv{H ocwtb.vkf| JKK5-m~=QS8<>y;9*eťdElYy3 aco]BѪ4jUݩ@9petaRɯ&={CR!9qBK̬:G~%SiuСXq<ǷәHϫ#a}Ypv`_Ǧg=3XBMQW}L/x$^mJOin7 Qz;.I^BB]}[)뺤-`?BKοvSiqBa;=׸{6h$3rϭ?i{<Wz@z 7{s^ !\̾$(yB8$+^5pByA~B4@!yl Q!?p >?eG0-1BOۙ&R3 Lk$ۉq=pՁr}f?Iٖ7rՊ~8Z5:\˺i ϣBQ42570 i]>96 !$W(Z;ũNm[Fx5BqpB_`g#`}=2r JLo>e:%x z}7]#oOi0-1s+貮mB(B*l?$x_{v7W|GE]a=sqV«ID3[eBTvO8rydS7!.vyQSԕg6 go !3Cr<1s̫n$?_j`1 (=5νo l;em(\:BiWfv8p/>kYq>a'vJ!p{cYqb5UBƐ! ̅Gxa?~>o2EDtו\u4v9̆<lBH>j-I#W]tä)k֘:u~} G?x-F]]-o߾TVV/3;W]I2_?3:ٚ[YRTTСCYP\\/ҦP\u! ,c(nl*p n%q_=D|#{$7͑u?2>ϣ\uHNT4>G|]^eETFqy4SN n)L]I2_ڙ_NEEGM~C>/ǯ-ksNR!ay3nfY20ۀcfY-1gUWKcfϛY90?',7|WRqv2wӈG! 6 .zǞbdu˸= `fۚ_p?8+'BWm2D 2Wj^ y*B$̸pfrWƝw._]8dާ0cDŽhn:hﻻz⟟Fu绰M^[K4!B3lu #!@p!>q&1y&1}#0+BG20pV*|>D 6|8zʀo;2߃_Jmq`ջꥭڣ  xOv w/y{Yc2!B#x ! V[=*]u GU*`xvxd?GB޻8y\ O\uN7\uޘsd2_Z۽Gg['6]rv<[y?'_~1f6?jU]ff{ 3{ m l4)[91$p?xxާ;fVafˀFB`9릃PSv7:w?m!hlQѕYaWf0COdB!h p|uTp7ojq#|pׅcx{%{Xbl,.D׏+~A?c]{X~ݓw+X=DU!u6CW,jG'*CG᫣vͬ>BB)# B( !l |8̒/JC=XO !xps\7ο,(4n^gheB4G3+~B_hvy:9BmfO>^MřLMVJ&$$SG+kC{sCޏqp0 >pAֻm)p:3S6mڴiKozꗀaq\!,I6͑?2>c$$dj|ۂL2x,㍬, qW5Пl@6Bua8 gCfʣ#37g 㓁UԹ{$$d*|ےL9`hֱہ6rFoY-'BHH; 膿!| ?tw1\:B4)#oe8 _bC!ĆSX:R{qTO6rodLۖd*WYqDW!c >lݳ:)WڴiӦ-Vp:sSl"x$Fadbo,B:<_$ %dj6d4NsB8 1 BL0s6OaflY2 UB!B-_2ܘ8+fqo|NUx§1\u!B!D :VvWA=8V?b|>CIf 2!w.AI>coƼsyB!B~ς9/1. v.n<BXff+ln]z<@>B4KB!B!yfX~ c83^ [\utѵkWV\I׮]טadLI2u6.WB!D4q6 cCJE|_4,N!n::_5pB!!"mB[[̶ߩB!D-C5>s׉3Af2T3s<̜S`xh.>tu5f֛FV[}E!X|e.z/-& +|%dL9mK2%z/f JHGkh퐞:ꁿٍG3닻ᘓ/*!oCY`fxyKlB1]V]uDtZeoX!+$dL͙o[)HaG,ӕ)?!=t7Mjcpsȱڪm o( .e͡VJ&$$SsۖdʗQ˽.HGkh퐞:ڠVB!Bt Z[!B!G!B!y(B!"/2B!B'a6ܕ;T 42|(\V5ۍYg}fcZ`Y ˕6? { wr \ӀNSr[9Xꨃ\zΟK&4ęXCx@ӿ'H[ L>kZ'c\xuK]Tx\1ҮmXx59t6&KΓ赡LuEC$YSl6V:YXDG벥kl[jTs{5N-1psn>`f,d~ `Z,Z[Vp`v Gdzv/-ȴGHOk)_먁k}5zg*mf_c__J3 xV7V~ ,}\_9 IppMR|]S5Qsaߔf(ˉxc.-1'[q@*u%O'G4' \Kbܗ4OL oů喝XӬ2sj<[:cO*c8-_aeQ:$?9Kpcjxy-M-q?O ~ڼ#yl7vd׹y)}< 2}Ő&oZާ m5^wLP),"!%a9R s,MM gq*Ve/#wEr3ar]gJ=~ <CxDUQ>TIJ-X 5QFNy}'C `jo:5UD_I]XJ~;Z { 8I}J01 &7}2qa<?]" |.ua9xCY0fv {E 3۪5je׽k3'1|Oz4=2o ]=mfRqN( HO, M~Zۚn o@J]C;"^U1xb⍠U11+d ̹Sy7|7.TYuYigKc1R>{ Uu9XU*qBVkp*9NJ}øAIܭҦyC9R:JbIӣ~;ٞ`NL|XIGR?&)WuHJUp uE-IYiӽz҃쇑IC~Vk~IMWXʱߥf_Dz&u Ire=5{}(ul>p[vdzϒ6iy:)!$1AcưwqC,y6"u [&z\LwGvZ\'$$]\^/45t}#ԧpc3yʚ\F :sz |Kֱ-[!lIM7AXI~QWåZ;Q]Z]1[ ~:Zj̬̎ſ0NH`fCai܊^xoٸ=UV5 t$ fUqyP ;̮iKg#I27p{/&J> 7Z_G>Gbt|sl%yWvQ4Unp |qpjhs~7yGdzޡD5/3U+j20uQdzϤ~tcdwn|RWk@%u.H%bR2k v XBx5:IܸA%fօLok/E$a_qΊ ~/&C@u%I[D^&ӣ։oscteffɗ㾖Ao[>=םt !N:{tBһT =^dFQϻSߖM \BѮ#,.=e)³Yǥ'g6ifvm~:hfCl82OF= %f(7p#!%xcov K{Lf(oD]ʳ7cG9M[,G0O]4 7:;}~VFP< sf +_ޟIM Mz_{ƲL L uMDzvNJL|=2?,O&ROܨ+#"H.%y%C{%Xch@7ؓ{ Z|5QG+cx]1,ߣL#cy],˲xO9n&SV7äWib EzĘI7 \Fd ?7ouԙkdL{Y*EܘNds"b@MV=d>&7)Re*~&Fz,Uq}V_  ʷo&cf\_//$]x/ޭp*{¯˜vZ58&c,az<1$^if}C̵}n>nBv>kb7F}tV,,h<6&|O;I`|Wl& Mp=:>X^BroJOFڝQ$ջv| (Srbf}it87nht:r.ۣKb1X*גidn¯@2Fd滁~dd؃i.~.x<P~-& u<#^SRaed(CAһfpCg|*m9PB_adB~|y9ԟs/oPB87ۈUCf-RiDH_)i`Vdÿ$3OPYx#+&37!(L]c Ry U]%@)~vrOa{un(gRzdU ﱭ"9]{$qƧiq$Nd_Q;,[R|75fa?)Iwq#Tx^s~'=uo|}_)i}A0x}KP\߿ǯ{T޸`n%zJ^K5Qyϑɐ$ߤ9',E72zy&qg9~wǷd~)Js5O#%3s(cge!(t|P霒111p]$bLEvOwde#7"cԾϥ;:7;+l33ۗL=?t$2%+~)ʸkw:#c%L2*cr%zs5s\Rg"4]᭐!G yQeXdd>Ǎ5<9L0kWL$MM=ԋLYFyF%י5S2rv<N[CbD<B$G B5>ztVh&d^V,Aҙ9GeE&iwJGsxw:>GHO)̬_cQZ{P_‡V6Q{?LJxa!>*P$tOL1UL⍴xn)>olLLJK%s𞑥QޗŊ=xc oT.{K߁rxQ^W}@W*ʖF8/ MA<oU}ss}B@5x:~wlx~/mQP91xSe_|'2NNf\MX|t2{]a2qʷ&^1^-+oC9'Q GTUCoDY>e~//!^x-7s#b>3Ȭ2L QGu37YKewRgae4u>.$36Y|(KYyN(TdȬk,#Sa#; u\{/4QKwIx.^DIo TdJT'RYJH$슨2"361r$uC+yI/I4~LW*]cD!')QOISJ"ZyA3++߉֯}yދd1G5ekEt'ӈM[ b==q7ڲu0"HO;F6~;pd>gn^<[Z?Vx# ohLT#$Se?Edܞdo[ݷ3ȼ [>(׵ _kNそZ[QCщ8S#NQء=gπϷ~,T!B!hyB!BB!B!"Q!B!D^d< !B!ȋG!B!y(B!"/2B!BEƣB!BxB!BB!B!"Q!B!D^d< !B!ȋG!B!y(B!"/2B!BEƣB!BxB!BB!B!"Q!B!D^d< !B!ȋG!B!y(B!"/2B!BEƣB!BxB!BB!B!"Q!B!D^d< !B!ȋG!B!y(B!"/Z[̺]Z[!CuhnBEZJ̺S\xqmk"p lӞ H3ګ_e 淶(B! ᅫn<;rs"آzU6zadzQ߇Kp2bUq?vٌ9!39po.ڴw8_~owc\#{ ^.c={k޷+~B[5l$^#5㏔l|#^<׉3fq^c`a ]oy;a^beYTy+m_wʱEqn;?Y#`YZe&r,'Tw!W:Nޙ4L)ZC|;ۧL~45dJ'έLtp]c֐)2!S>lאI:h:d9tw9tܷrޟ'Ly6Y{'}]NnᇭtE%EťtVDinyѕQGWz*NJuk,1J({qq?ֳ=VZJn%tMNJ>H ^.c%%-}vb]FvL.^\Lqi1=K=agu7bugi)]=NQ؈n())G-=QTw[7#twtfɴ|֐)9_QI})9t2:(*ZLtK:!S>T!S>I:h:hlL=;u_gtuuУG1t˖z{C !B!ȋG!B!y(B!"/2B!BEƣB!BxB!BB!B!"Q!B!D^d< !B!ȋG!B!y(B!"/2B!BEƣB!BxB!BB!B!ҩ(*WRWUDVR^^GEu9uU++| VwaTUR+_AeM ˫UT[^QAŊ2+*X^^EXgX˅pUJjQV*kj嬨X^XQF|e[ԆZWVRWPGUUU8e,/bQQuzK>_ELWWLeJΗK|A蠮bM LyuC|:%S>t%t.u<٘WXgtjlLeetPd  !4p1g@֖E!D0&Pڂ4z !AzŹ%vv0WZnP;b;ja^ݞ mVu[ڮmUnhUޔr}xtB[[̒ܠG:T;jc}isi׶ mW*7]۪veoi`B!BxB!Bny3+mffB7Pvh̆ 82>z7m&_ljZkpQBӶ~LeNHɕѐ\~GuWY &޻~OJDj;ovBo9BOkfS涨(g}À5-zluYv|ݧ/o3~+Ws?ǚ3\ Bl$jކiE!7,?W9x")D{IߧטRZ\mugM B( ̍+\n$NT l#~]B6]v3>|¬TP} u!T> !B_:nF_-ljZkvxV*`pveGj:6*% |~_S YW !B#VC!j `tVhyCvoix% ?p2d _襩9ef{ 糴] Ѳ娯WB^({4nn`_-lj)k=1:pWI^6N{ul#% 'SK+d7겎ՒZO Y -od*?q/U>eۀ2]+60| A1|#cŹKė`HEaNsﭚ<ޡ]u4Xv5aV[/;xMGPTvy󕽽^w2KLCq7|[H w=AZcMW';mϣ.LJR)t9 Rn|^,2:Wv­-Xi%}? 7|9@%"NYyt,V.[mlS܉?ؗjفR)Zu|PQ6W~J<xبm׻[P?Z rسdo{8);mć^Ȳr@ܭ kn1c!B!A:G!B!놌G!B!y(B!"/2B!BEƣB!BxB!BB!B!"Q!B!D^d< !B!ȋG!B!y(B!"/2B!B <;IENDB`flox-0.10.3/docs/diagrams/split-apply-combine.svg000066400000000000000000001676461477552625700217430ustar00rootroot00000000000000 2022-11-26T15:23:21.390398 image/svg+xml Matplotlib v3.6.2, https://matplotlib.org/ flox-0.10.3/docs/diagrams/split-reduce.png000066400000000000000000001542221477552625700204220ustar00rootroot00000000000000PNG  IHDRQ! gAMA a cHRMz&u0`:pQ<bKGD pHYs %uIDATxw\TW3Ї@D *5l4hLVbʚƬu1 1o14 E $4,CE}?gߙ;w) =}Z,ذ])ܽ{7777;;9!!!11@ LII cǎ e:]sĉgϚ/[͍^>|СC'O9r$u&\~ DEE]b0LrʕYf]:{Cg͚v]ưwޜ%K]^0rHSS)S];t&O|ȑL8VffÇz)>)ڠ;v޽{ٮ 1Pjjitt40ɓ'߸q3,mCdfffz~Ńn|p8FfȑFj 1ʡCBx<ŀfQCCRW8cP ڱbBBٳgsrrٮ x8N"F#W`nMi8_(5r\s8o)J1M'tBb.pnPHGNW`ffoBQUUUZZ:}~QyQQRVUUrγo6r JwBr%!DӤbtѭÇ>)L755E~j8ryII ATUU9;;] 㱱155E!())aF\UUv-5<`2 1.橯cmy4}d% A~аK^p_|㏯ZA :t֬Y}7UUUK,8qbpppHH[_]ʣS>~;8'77wʔ)111qsss'{ƍ :t+_'愰o}W^ywL&cZ1ʕ+Mv6?=tPuuu{Tmm,oݺinnnYYY;NÇ+Wڼ_-[ds={V}]W^q޽}ȑ#555<8qĜ9s^SѼk7n,..dk.}zٳo߮t믿pBNt'|r۶m=ݻm~JK={/P(>{w1a„o͛r+W>ҥKiպEEEɄ^zi߾}J7=3<<;\`bbB_899i%7Ύg3+WlٲO>=@/:zVܾ}BD֬Y^{cǎM9qDuuu}}իW?iӦݿ9\ᅧ*͋dnnn{Y~ٳguCJBȁDx?pҥ>M(BZZZJKKz }_Gs!Νk5A{֎'8Ž{t?e۷ͯ0 "\x~_~!HR}ZXVVF,~ &xC urGU^^^ڰaþk//oՙޝF},,,!=== !C=|a!6lx9tٳgϞmmm`ffO'Ћݻw޼yÇAFy_y啥K0I%0ٴi>EB*++ !|IPPo|VC4L|ŖǏB^}հ9sШ?n5`hB[o͝;w͚5Ǐϴ/䆆kk3g4ڍy'O>o/Oڲ!cǎoBs=GZYYnL7xxx<|p SNէ!ڜ8Nss@ &OLٱc9&yh@PZZoS:8bĈݻw{{{_p3*Wd2>G̘1tЏ_QQAYr%<Vڟ'H>'==GMMMtYYYj{V !mw-! |||i!n:uj#<3._ӂBHtt4sd<///\L:E椭pё1.$^x'Ӎ̔?|D3t!4;^Šu^{55SQI;19tPKKKzN0}fuu5-+&&Z9880-"  t{/:N vclxGa)4ZZB;P7ts8 &`B>&X,vvv&U=]eaanv155U4xi-mZDLLLhfC$0,--RZf~!k/]ݫV'Ngb x{kHSGMtb澽M!WReccComt fVdd$!$-- X,,,BCC5 GABM:rV8Wh6VmnA杽s>S? tۉ'oR{sVu0vGܹsvE{Z++yZF6.Z\M7I-ݻpnt,qm3f YLj/sq|afu])AwXBѣGI:EDVL0޵yӔk扵p\BԔ9t&M"\xQ?;% J%A޽{G7˥gIm4ĉuOډywyzf(;QYYHxt󸺺4ނB닋mhhCWu3%нWn}W<."jE7mvQa!mPvC3эM{ƐʠpҔJeppɠ !'NsN^ʺy󦍍nҙfmns6u@>4GYbbimoMWg͙3GϢ鏒< !Wn%+c(@~fzЎiCסcv#<6׮]ozfF ܖA>ֺW0hCh bǏÇޅ ƍ7k,:硿=OGcˣ0 "--Ӝu(dGVUL:3|pVwX.fInNdmt(-n3awIFLc܍liiIlViG~ t+kgQcIIIBw-ޞlٲEU+ !:!nsH^vp]lѢE׮][hnhgYh =OItgZCř'&&CiIJeB~K.B˜Q݉[ttl .\ ܰ9vXbninE6h9Oޔk׊ٺT7UGY;sq8:z74T믿޺uk񭭭ĉ~_r/luhsNljw;:ei9Nzf}Q8])cBh3bfR :lbmmMYZ1~ =3M8OppM>?qooٲU Cm'K{LA6[!!!z6 z<_~%;;Lխ?M=dokV4hs]t*W&55믿޳gvY1ϨQ\n8L]_[[kggjn-]w `OϰD$94Czh/~zt_Ѵ~W\Ѿ&i3uqq!:N+HϘA{-&&cz@?nCoFr&Q/ T*Ν;qCcwSΟ?o{7?VMBBB~Vtuu%m<It@=9BݱPiq]3j(FD"DMMM#ft͘ˤtPiNN\rRR' 2ĉzfGB1#gn,X`_}+2bQ][XX 6!mM>sA"Mr.^|Ԫܑ#GXM)ݼyS\.dL1hV61m霮Ƒzxq]3b@jrZ{\JDjkk#tx͍d%ڷLZpGt$;M+߻wڵk%kڍy Z06Cq1ٳƍbŊ{N07ڷ'&*u1h9Q/^ꫯH|رcC`Y[݃i|ڣS].hD(6 #K´wQm<4i4[<4_;>iǁÆ #!33S/j]RUUUq\ M)޼ B(--%:=O4;LGB'sۘ[/Z?g 5t1}@@3M[J5hРV(=MMMCBB ^B̚VBի)}||$Bоקjϰxjjntϧ (&whzk=!dС~/__|yȑiu7Aiª͞$8q_&]"H҂q$aSD"ooꬫ)VDV1!'C!L2ǬѽۦӴs8B?Y1f̘Ao>ݜ]gwܡ5y;եU9yd'ai}L0:F;v,!ڵk~D[}}]vȝ;wGi'0`~%%%:tn"W^yroܶm!駟-I&[֭o޽V B .d->X|ÇiʵB[~RIk>L{CB#;`GGGBZ.(( cӦM+..~!֭kŋ_xɓӦM3gݻw{N4@j˖-{]v3|ƌl2=;3ݿ^cرN{4)// iURRBB/-@h… .\`y޼y K,ٲe_M̞=ieر .LJJ߽{7ss֬Y5"KLjZZZ.]#q{2YtGDDX-ײe!H$=zf4hМ9s,wذannnÇR;_{KKKSSӨ>?7!vvvO>D"aB;J*ݻ7&&F$ :t>>>v1_bx֭ϟW׬Y{Q<<<ڜ#Gy'NhggWUU5hР7x믿 i%::Ҳ 688888B$… Wp1ܜnJK-YIwHD\\\fϞ??^c2q!Dw_7x^+wH$ ^fÇapT:ydB !^^^^^^z-el9GyxxzxV;[yBΗ(j\:2Λg.h R '6S__ߥsBñ_k"XD \[5ԟ*պDje>ҭ^w'ܹj* :(q؃-n$܍MLXl~W]&sRb@ JJFEحX444<@h4=<Їt؂~xTp{~ > 1gy?Cb@\}ӧ<===<<ٮ 1O^^ޭ[خ0@p8kkk+e֭[?sKK-۵xdt!mllخ3@D8+eqqqqqqjۿ'T*%򲴴d}WbFv`ӦM*Z@\'QlW\T*JQQQRYRRBC#Gh4:CC 333k Ї`=YXXp8+ECCRd{yyyyyѷwܹ}vqqٳgOgB wwwSSSk &<SHy̭ ! KEF.knb5އ+r7^\Pe[yMdWʍY4Olj=5&P{r9}vjjjMM !Q;lk%ult5U]3[mW6p7vMdxGOfffm}}}QQ ~dž„@|>nß9~2(((((Oqq˗x<+ͅ₵v ($D" %h4 &T*ӓ9::by"IQTwޥknܸRB!988]k@㹹EGGBJ%MwׯˋmnnϜ9CqsscB gggɈ䄇PcܩkzQMM U~FSQQAs!ܾ};33SPBwwwf"=U?st˝1c<ȩ `8555^^^l9""VitӧO+Jl vƍ+WTVV޻w޽{lW翬+Do-w×.]^jp8r>06HvrRT*"Tw!'NT*l ڵkG-,,tww󳱱rlw>-..xƏmbbғk";AAAl%e'|„ ly<-::T*ʕ+GQ6666=555mݺss9/t:y_~H$ș-wV^ݓd=ybvax޽vE111RIs?ԔZC_Ï?aʦ||^z߿{AJ^^ϟc]ccܱyLAݓSRR2yn_חRXXvCQQ'󽽽?%%%t(##c߾}GGGykyŊVVVlW LMM,Y~Wޣ%<Я̚5o?~`` 1fٳgGEE0 9lٲ\D$\t 1 .]b6f0P8xӷMMMoߦ@UUUL˗/Ɯ9s>?k7xCp-,,z꩐kdX̖;---&LNIIȑ#bqϯm6RR!q;v>| }`@yyyo6脥ePPPPP}[[[LxbccP(tss!P?h.\OQΜ9S\\ѥ/"~Nlۻw/o;[hhŋ{kz޽Nh4xZ w-YDSؔߤ4fB oonҹ%Kgγp|[NC6x`fp̙&SSS-lW#??_.uVVV....]B_!!!?Ӎ7v5o<#̖;ofWG&}n"0T*U/޽=֭KII5kV'ba; `gQ3+EpJ7oܼ'I{{{{{{:Iܿ577.Y`%%%VVV~e ...%%%]bLMME;w8lrngURRg566T*ghO/<^ dVTH^R^^vڞ/S8CXX!Dє~;qR辨}w?M@rM6 >B j-w,X޻1H$p84ԡA.vss[`;jkkA݃\.?|k!u{4ᵋKTT!DR1!׏=RlllfTPTBZ33n$Bʕ+?͛7/YoMlrO>j(CY"/orZ&p8Wޟ%ir|˖-\]]]]] !Jfľx4D"ўgjjv.ǚV_-ޫV׿駟._φ=̖;MMMƍKHH0'/\DxLV^/^/l2ydU ˗/}}Čo7 _>ڴiӊ+- rΝ;᯼і(C\oFffAD ݻO>qvv E}V^^ޭ[jjj"##/_< \.Oqqɓ'\ )"^_)plmm_M6mܸqʕa Zvmom'O֮\9Bק۪UJJJrrr\rYzޠO),,/|||ƎhEEE4 :tP]]],Čp\k <<0In6lXr-+ըﳳ zA555?ԩSmllX77777ɓ'WUU=x𠪪ifffnnn}gL177qʕ+Օrttr|a7 1 PVO>_aO}}?&H,YZcdwqqqM"9X[[ѷt۷ϝ;'ɄB<==r, 1'p$q3`ʎ`.&&\!2uYzpb9BG'8ۛG5jEf=mH$Zr_|G\87N|8!DTVV!SN) sssf!5U~7߬6r|{`kGѢRk4z+\G`#(U*Kp*ReɈ4\R*5F-6\s9([Z65|>[lٸq+ ^ќ?>0aC"oii9sf/URRqƄv[ MxNQt"ov Rimml z"`Ab/T>h4rKEyB.^URkE^ ߻m䆋?"͍lͥY4ƉB\/]4))iӦM+Wݰڵk///9rdbbX,6f/TTx"}}„@|>mOm߾>X,R gbb2mڴ^gx055e&i"=z.bEJl3݆ &L033۵k!Ӱrd̙FO-[& {r?/^DY@@@@@}[__OC|777?8;;s\k wf}ѣMMMo.'L9̖;---ƍKHH+ 5MrrСC{Nrsi4<0XZZѷoߦI/^(\]]Q GGG'B.'))I.?1[֎=zĉ}w\zOܹ{nxx)S$ DMḾ&NhccӓH$999 e"wY0>{{{{{ÇB4PaaaZZZssH$rwwC@lWyBЉ˗r|ԨQ?í[/^,Jٮ^v-zC֬Y}ƍf>Fh44?yRemmv <駓.\zA])}\zW_ .]R*K.lo ]\\&* _~QJemmd277{577$ !厝]]]eccJrϞ=~~~xp_[TTk=%%%999Ʋ2 |||8H$ exѣG[ZZfΜً,,,,**?~O."N:YUU%_I&cFDǏH$(<|>MFP(tܹsh4LxTTTBjkkˡCΞ=OL: ̖;<oԩtLu՛6mڴiӊ+}IeeO?4| ^lzzWhɓB0&&& ] weddB33h{ǏOLLd}bl7a„V믿iӦ7Z/=R't566fggϝ;{_/))/ǎ;rH{ v̙fffxv 00 BV.Ypt-1pZZZ!?3g}ޝޟ`Xi4sν;}]HH|0}6C[[իWlܸ튷-333t;wĄ&쪜?a͚5xdff6v5k֨Tu֕]#` .]Dy)ʇLJJdl׷/B@!׮][nOO{駟oiizjBȆ ٮ~k}lll[Ȉ3$33/ Y`A{2233{{>y@[QQZ&h@rt7|3??j9ݭ[[n VO>裕+W:88ݔڿɴiz+VsrrvSO7Yfu{3tn r,--%7igg9mBWeew}wu+Ww "hʕ_|ņ ^yWWWD!ŧO~z=\zzz@@@W.))IJJBUSLinnׯ_ߗWqDFFJ$777333@T[[s{ݻ>oэ ˗/J7n,--eeD$''z˗/Ӕ]_zyyM2x$%&&/.\8y`<]B;\reׯ/_qBv[wԩnUٳgbqPPPSO-.33gyٺ1 8onnn 3fLo]/]tȐ!6mjZMḾ&L;ި̨(. 4d2١CF=acc3lذ+BCzz۷{<oѢE~)[c'x^kjkk`:554::O&O؈n݃"䫯xY3o޼#Gn޼aO~~Kϟ~naÆu5{ANNc=A333 6AQhh\.OJJϛ7oܸq7o>ZT*### ://oԨQ]VUUUiiiv/]W^e$䪆Oׯ_OKKF.=fff۷oojjѣ3g4gggFS...m~p 1Rsy֧:!999= PF:rȈ#ܻן0aٮ]r  ږʟ~i֬Y~qZ}̙I&uUUU*5775)573!țZJcp,D&IlVixRbԢiL\NBդPhc!+S.˵aƞW033*))AUy`Dᥥ7o~7A1zhSSӤ$\O-{J]{얖~1??Hx 68Z<C̹[aK̞'߿x_Z}7|Z`+ oEUgˌY'ހ\~{B0&ȅ dy`JHHxw_yov׮] ."ÅB͛ry ъ_70Pss~QtElvZ,.T*Jcҙ+UjoURkްfb#@Μ9dɒ .( lppҥKϞ=k.=ܼgϞXCT@#HU666peX -00ԩSǏ޼yB0DA+W~͛[-ӧOϙ3G( Gb222ZZZ/^ظk.jժO?Wh4ɏ=28eeeDFFƁHReeexs]x@eZlӦM= {̙cIOOtvF:@~)BI Tի6m$ɺwN0@,,,,--6\BHbbǖ-[z7ـ6_qƍݸ¾}͟x IzzOo `|؟iiig֭۷oٳ Tի7lذaÆ+WW_}7Կgy@ogi6n_3Q*ɑ~~~덌 P}eAvcbBVO>cwwwDbB---WZ'|G+Wtpp+'O9sA{#---::WBVwo}o1ܲbGh! W6٘QCr"S>!Xrpx 4 |B^3PC"mXߟH3͜9p˖-W ,V\_lذW^qu'XSS?N:@!֎3pE@dШ]yFHYlRc!*Ul6E; vV9y*WDFF߿?++kфdɒu8pP(\|-[6nܸb ooܽ{S\\A!--mȐ!]ZbЗa ALL MZMI$ ?~~3h|>ҥ6mjk׮͟?&ׯ_Q@`qbFcΜ.:U[]K4__l̢"멞C !g*n].3~çz 6ݬ_7jM{~mqqq'N GBBBbccm۶vZ+++6[tiRRҧ~|rOaؕgϞL<-e%uFn VNCժR#.4jBH졑K+EYc-+ -mjYm̢~MXG'L6pBjuff&y61jԨ ]S\\|aCp͛yӃO1cP(4hٵ111n&1!~~~NNN'O>(HϟC/졞|ɉ'n߾=--;bC6l0s@`bΟ?/ɴ1"""b۶m>4B&OOڵ?ϭ[ϟooܸ1j(#[7omیS &̘1ܹsvt4xtggg???hFV>}ZҥK SSSSwZYY-HP?>6680&<m{annnso߾m:gee=sK.=~]4ʺtRKKKdd`|y$(***,,l˖-rph4Ç ^tٳgo߮R Q\zzzxx@ 0\؂mV3͛' WS***fΜI/_<''gͽa[΄U1@ۼ {_n=555:u sʕ7oV*X\ZZ!.knn UG .׀ ӧDV9;;Ϙ14hg{n''VǽWZqO?_-Jϟ??w\#<"~XRTZgBlf?o"0ЮPH2dȖ-[ E/sڵ9jժM6A{Ν;gbb2|pt!}ZVy… !vJ={bcc=<<;uuuu7na^b5Çm~jffdɒ .dffJqGmii6mZǧ988---6lIؓ_^^>j(Cu@#"hذa*1cݻ{XVeeO?4c }ڮ^dÆ +1=== ِ]2</***,,͛7paOrrA}{K.ݼyM^z%N6:y!ClmmN[\P(4F'b>~T*kSǯC^37 QF9r$;;{Ĉp/^nݺ}͞=[j4䠠6v [tiRRҧ~wfuuu^^ފ+J08@ 0`N}Nqf1Rєᖉ ?"1^D"Qxxxjjj1!ʊYeO:UQQlٲ^$[h͛.\i?sϣ,(RTEMV(S'7ͯ!V8?1}ŽЅ=;vpwwxaOMḾNjcc[p8333۾};!D7QgΜ4i] Wx<gjjڻh4Jf­x. 7˥х@_NNN& HLL_:]سo>''^O>oj얖zhQ\ YQ.(x\#rNp 3B1:JNhgc֬P}!Bbє)=_755׷wNKKF-˥х_2.=z͛kkkbqq8%Kt'???;;70 ɓ'ڵK.kKKK 577g Q2L# !<1gS#+{ù,M45Ylx{3FHp+Je7Fr.\U ^y=%%%twBAAAVVVN>}zgJ$ |Wm.Q* hOBoܸv/4kZga}fff{YfTWWL544tpN˥z}TJ P޽{7o_G޾b.I;s;v 4JG666N6=ziRR\.;wc|7233wjvh!뀅 \KASe C$r)FC㱺#G:t(,,,::(?۾1@Ф՗.]'f9sfaam^yPeeO?4| #T8<<\(~uuuW^h4f<<¢>*6D@D =׽$~tT'WP(I?UXɥK.\  -,,ry=Bt MZOžÇO<ܳgϠArK| x!p8/ܯ\{[0wBȅts30ws397fb+(O}DD=#R0K 1-M#?44tȑ...l+ tI)g>>>999뜕E?*Bjyi%k *뚌_@RBd-F.YхiD}6"~@˥֯__\\̼Sx>^"]1@7EFFB9933sDDĖ-[r. ȈrQQQW~N'a7<$"""O:f޼y ))h-//3f sg]~ĉBCX>cA}G}I$8B@ Xti^^^jjq*`kkD">|˗ݟzX yJcj`WddZ.w􌊊j3fطosW,--mذaӧOtF<̆Zc  yzJ ĴJZ;yf6Syyy^^ƍ5 #0cvb^w=&iuss={;…  Ů]z>*j<ޫV hD;ۛ0,bxذaL#GB&Mw=LӓJpÇlwaawUTTTVVΙ3G(]OO)S޽I **++wݱc+B D0yW|}}9B;]]]O<E2ǏFD;.É07Ivvv~i|>h4F s׌5*++sw9ŋSRRFt￟$J}^T濖H$\.6>085C ٽ{c=fccӍ[YY-^_W```HHH'k4Ç/Zݝ~;f//C>|Y` @k.;v6`0N9s&JZх=NrFz~_XYYYfŊXTTt3gb& L詚̙3‚uɧ~ckk[^^fooC ֭[uӸ ݻw{zzFEE1Gϟ?/ɺ}ͫW477бN˗/mRRRZZZz}rcccO>=n8VkG;/!o޼߿㯧EEE1%%%?}Ap8{4K7v.!|ȥsMp¬Bcjn3{P(!ʯnf1af&MdWʍY4Olj=mѣ bڴi5ȑ##F:*ڡBBBbccm۶vZ++[[[;rHHRRŬY"$~ )7rK4PU%uF.g%$j !KjJ+\@RBTFv~k;TeeO? mD<*((j0fΜYXXm۶W^yAԴ!C2ozVI 05jTQQQaa~//i3!x%K>|_d/x}L"JYquw޵g۷oJ] MZ*/zD2woͅ=?X, $T[JdzE"ؓ-&Dv-=~ĉ666e&iuQQQ DEEEDDl۶Çj3gATVV>s!]MMMlWonn陡VVV酅|w\/v)a̛7|۶m[ZZbbb!EEEǏ7o۝D$IyQZo555M ӧOϟ?|j(:tC<|_|뭷ڽҥK SSSiii b֭d3দ<+O]跸2hӰ 4MrrrDD>4@nn.;v3z>yܹ;w4hgyy7ONٵkWKK߉D"7ob ^Ar,΍7^yԩSUUU+V;w߿?77n޼[D lٲvtWWWoo/;wnŊ"NݻwرffflVSSl2+dה0vb-180g ιsΑ#Grrrhp\]]}V ܺm޼yoݺ5uԺ]v7DEE:tĉSLa.C` M󠌅m6e.1mF;#''ݤ&,YFFF~666SNe0y;vĴޖP[kSWGK#nn K[ {~`Eaaa^^ޫvEUyܹ7xCFBBg}V[[+{>VVVuuu뭷x<۝Ԇofɒ%3bY[!liib;ԘEw.22 1@Jerrrll/\0""E;VVVNzɓ3f̐JlwR^|u}w< sPR׳]H塈HYl-[DYf]Gb=zTPL69R^^~l;; u:{رD@*{zz* wCD/}f,CH٢"%%v}*= #1@'*++^(v,XOC5ȑ#YYYGao޼ /|-4 ܖ-[_~7O=Ҹ}HY'˷lR[[k!!I Љ={߻w믿~,XGFF꿊F$JΝrׯ_3f޽ꈟkVQQe˖Ч~'\.wڵnnnlW#999cǎMJJϟhPZZZXXZk4PBH|||@@͛ 77kZXXHIIAoݲeU$ 50 ]ͻvrrr?)͛w_M$ϯ^pwyg׮] .d:"VZ; `=f'\W_}Yzbݻw?x=v=OZ~Fd\fff/׿P~~~233srrRRR!4tvvƺPjkkryyyYppe˰(@CZuuϟ?Rƍ7mڴ^v@n'yfaa#Z}?iҤݻw=ahxSRRBh4l xD"Q@@!;࿪=zY X3s^/%..nHZhk>hѢ6OHLLy-[V^7(mSpp0}=ydrBHuuurro}ՙ3g&&&* mI=ZWW7jԨx}.@=3shBN2% SNuiCq8cvp'$$ȝ !恁gϞ533:ujll,lk.D2j(wر@}W(۶mdgǎBQ]]ݱcӅBԩSnj# GΝ{78* Nl̦!8::2i8󺥥~z=W(@3aGGG#w ݤ*j֭Rt^^^+.ټy;6>>^$ݚ1 tv1b3_N|T*JZ|}}KO:‚ݚ >KOOH$Ν[bNMMqvv lٲvڎ#Jeyy0Nmm-!D"HR??xTzXɓn1 \H$qqqlWI[ZZƍ@ill|Ezy_>))iٲelh6m1+܁>...55&ͭ3fJ=zt^^h43FcmmMӉjlmk=gw,6<<ԅG{gg`{n:Xb:ȖXSSs?O}dJڶm߬Y i괴!C0\pu배VՔJP(R6O@dccv-~w_~7^~A533[dɆ Fv7DSS,F&ga&O,Jmmm1Q3HpDU%7rKx3p7BZEKC#XtrTh'@Bȅ{jʍp  5MyY4Jhݝ!5+++>:G^^/^lmmM ݷo_JJX, 4t鞞3fطoo/f`JЎp !R;&&F*:99„(Ȍ|{<<g233YVd2YRRRDDĈ#.;f̘-~|||AA͛|M@botCUUvSYYRD"6d&Դˀ6\]]ǍwV299d޼y---jѪAڵk…lu>ryiiiYY߻w655193ڄS^|y׮]/"+xbvv5kZ;w̙3Ʃ ]ח݁/mJ\{I{,!!A*:::b)t 1 P<o…7nx"3hnGZ]__bT*._2sοBРe)m۶hzzzxx8WrcccO>=i$.k4iҎ;% :tPVV!swwR"e2YaaS!SL1GNMMvSQQT*TWIDATt5v4`bSO]~3_~Y¢3GFuȑl#X {lٲzjCD;^^^<󌗗}N.]t7ߌ5k1WMMMtu]L󝜜RiXXplllخ)!y!B駟"""lR222^~ekkkOӽjP$9a۷oٽ{/R(.YĘzYhhhhhh^^|^{M_.@R***qldD"J}||bccihcMbB2dHdddRRڵk 1M&%%%~ؘ=wV}]c&žM6ž;v 6l֬Yl!| ,VПh4jRR19R<5k￿^ ܹĤ{sΙ >q''#'zqa޽{O:SO-3k,Tc\vuI&hOTknnRtȑR 9хw"hܹ_}UhhhpKKKy&###::O>3c&f̜9p˖-_z6433W#G455MII133h<J%uuuA*>ǏJ b  MJJz{k7e//6O///_bE:88:ujF dɒu>|')KJJv؁ꔔ777큾Fܿ_{ڽ{Ѯ ~zJhn*JǏ90jԨÇ'&&7Ld8d7od2ODDD JZu\brUJs.#h[TT˗]Mx;ߒ6dȐhc1i5!۶m_\y~{#|/.];_Dw?YUU)RY^^29{채yO;ǎ0aߪ۹sg\\\@@@UWW嵗ZI[~}RRҲe:=9''G,V%4]VPzEwi~~>V4̓p*++jT*uqq>|8%g̘wP{{{=dcc3s̎O矝O\\YIZM K_ذROv9;;e0d Y(iFJnnnx`y:2z .$%%ZJiiioV sj3g&Mԥʰrvv;wΝ; %%%lEy BA'ݽ{9uuuLCN8Q*b!… ׭[wԩNG67c T---]@ a1i5Elٲeڵ,h4HQ ˝;wخF9333+/^^f{IZOCHIIaRc> RIsF3jkk\?>~xTjggjb.puu}'X\WWk׮qyyy{ՠIيy.\(H>rj5s͍Zt}&)//{.!D,oݝjtĉsrrvڵlٲ$GGǩSt^٩Փ'Oo߾V!ÑdwSPB+/@bx>|m۶7oz7ؘ3J5baMZ-H򗿤߿_TꖖK.M<*@?T*؆xNNNR488xҤIR֖@\]]###333gΜigg222LLLBCC{?XOZu۷oT*Dbh'ݻwOaHT& 1@)۷o ۷oh.[ I%ɚ5k耏B:ݻJR$ьj tM)5Gb.KIIill\xW_}5b}3VWW3wk7߰Ueoߞt577F>O' >U* tM^^^FF/{n__NC^qk׮ &UUUUUUGN/RSjuee%+NYYs!HRAF-J{qMy@&%%%BMv vׯɴWpܘӧO77 27hjj33e˖&hTTT(Jsss:?-(( lVRR"k퇏(44tȐ!}ٳb@ h#F"(88ܹsFSgaab G&|'""F8l)((HIIa|ZViCWF 1daV飝}F;q\jYY[GZZZNN믿.ٹs_6^ѣGV4iujjbBHTTԹsZm^hW +Ѭ8N*щj>@ܿss3Tw_[GR544(@T*q z}L꣧z}]Ӯ]V[[cѤ%%%FT*wx-BoTUUiG8*ҒބHRg@`"hذaϟמvhxN.ӉjtЦ&@,JR5$H4 3<k֬6!@>\VV7gcȐ!IIIk׮՞V^^ꫯJZ|ҥ̛7o∈#Gx7|g#,X-Jݻ̂Z JƎ+J1Q O)++trrŵ䏀sxڃEEEGyg:8m֬YgϞLOOwvv3t%CCC׋Ioܸq˗/ꐐ_~ߟI 7dȐׯ E=jjj rytt; `h4j56b:Q]@_T*JJJkyxxdff~j1bѢElWO_7([v:!ΝW_n SN5B=\nlllϓVWUU?>++޽{3f 33k"oذa7o|zkݻw /\p=BȚ5kiL&+))ΨLTJx ݿ?===33tQQQ{inn s} HJJJKK99888444))x.]jii4NUGuȑ%V(W\9{l~~>QQQ=a{˗_^WWJ%IHH1NJa1cE\v---_2dȢEt 9.>>>+;;;##_~oցOnF"(<_Tj4JE$$$Eν{4 3Q-,, ] gϞMOOW(111O?m{'kO"@vЅ]D"Ѽym^TTQoIHHxw ;>633asuu=88]1@ےlllfΜ/1ŋw4`NNN>>>VVVo߾)SH]!//O /Ƭ?,,a1jjjbpRL񜜜]qRi ?ix_`Rbq_~ہJ~~~yyҥKycǎ>())d7o2dȒ%K Gfh4188xڴi&&&YYY"hܸqlwܺuoFuu5!V*^j^@y7N8Q\\۔)SBaO@!ɲꢢy$#z?gwޢV{:/^^fMKKKW`oo?u|\.pBff۷Z>3ݤƤR7lOqq?|%gg?҉M*wn1dz,0&{k~Bp-rQ F"D ^Њ^ V\t>e>o~֭î_'"'t,#$$G6D)r$_'sT;s^ٳ+V 3gRݽhѢ.E"oaE}wRoqtt\pMƿ0ZMk׮q8lV&IRR@𴗳اz``J#[:ŢLuX?af յRuavMR}g6m2eee>>>;`ʕgΜٻw8466VTTr L+999GR;vL8p… ֞N1T0p.7Z-H HMMϟ.Ӟjzxx"ijc}Qtt-`(//,kT* ngggUUL&koo޲e˂ ٴl2DT*i,E&FEEر(I,ꢢi<n0͛t-tw1ay~uuuU[[;22:a!!!VpBLLqVo***|||E|}}-Fch5IVyFFF +++333%BT $Lkbx``@(?<^^^5556rx\zB8{lffT*1 ;w5kV;vϸkL dQ'BD\.'VX!oˏ??Lb:^_ZZ*&!yi4Hjƴ666N0/::ʕ+ bܹ6lHLLZV3###_{5OOOF`*?g,2xâ0\wg~j XkW H $LGCCCUUU$I~|>_`(HHH +..޵kӵN" l6;??7Mbb MsjjBrӧ:dg-A7Bk$}62nuttbTcǎl]]݅ v.pe{~`A===DѤ=ztOl۶mO?`G1cFddի-Zdhy^ܼy>Ԙb8<<ɓ'޽;gΜ ^,vqqi3dɒ̈́Jɓ`3HT(VJNNvuue(EDDSo޼yΝ"`X3gΜ3gWH;jO<`Ph CmmmUUEQj:%%eǎ3gΜ8"СCxI]V2 ŗ.]Zt CF#b|}l&xڲbbbbbb.ĺGFFrssf޽2Ζ7>|ŋ6myHOO/**Zn3{{{O8Rvޝ`wOOO[ BᅲH$9vT*uVHHȊ+b,%//Ϸ7chX,O8l0ij$ܹK/p4Lzqz&==ݴE}wRoquuMNN mhhhkk{W-Rd\\\JJH$:zUcW}ҥ⧟~`kkk#IR&999K2`ㆇM6^YY)ɺz_|1>>~!111suY`huooH$jjjڵk I [ni8<`8NrrďJMMM!!!˖-KJJxZ-wm:7o|ر띐C<<<< 0NVK$ᔔ-[m4Lz} _cccEE\.gIII?|hh#r?V*##w}(zt?[JJJnn 0EQeeeuuuAAAk֬A4LzwUUU2O=ƹ`0HҴ4s'77g?W_}e%=ZhNch[?[o[oRjjj"IR.s\OinnJ555,+))igϞILHHpwwRy󋊊bsQQQRR͛PT "I޽{{A4=؋nvbbbxopxpSTǎt3gjXhS^^.H|< ֆl ݷoŇmiiyfNNkIOO7p{7J&FEEر~Z7$IB흞i z !I2,,!,+??ȑ#$I6HaaaeeevvvffCwV[/#Fdbᅬ< 4CôZmUU͛ڵ_ϟoB\{{c7wܱ888,]ի+Vppp`n߾]VVVSSꚚxbO#0LpBFf_reuu3g;EFF߿kK,/kkkY V[]]M۷#""< Bbqrr3#X۷;RT(N%ZO?-++{~p8+W3$) CJJʶm< Sz|;v0XCHHȪU.\;܌me˖9r`^P(Hlhh~< S z Ν;sLfXfB8{={9̸m֬Y r zZ-H5 aB0^xBít:]QQիW333'Hlٲ>ۄ7Eח._0D"qrrLBv؇2 j bccM<66ӳtݺuL/ X+ x><!IrS'y pΝT*'O 8t9K.-))Yf Si 0555$)9azAXJ駟fc۷o/~ CqqK.]aGGECe2?344TUUEJx< ӎUz7м7ͳsX, 8n4t1~1, 7?ΩYށ֞$ɸ!VHHȊ+Ν;xJ{G6Vb<vkpp6T*$+++ HIIy< ӔUz .I XG02uSOY xy)ļfZ]WW7ǀ#;;ڵk/ Vs8.Z[[M`t$I޸q#88x=^czQ`GzFrS`_MQԪU,{ʂաJ3!K0QGGX,JCCC|>`<{ @ Jm-'Nsή]ϝ;7o< Na/@֨( PWWGR[r% jkkSSS.?4668quuu.\˳,V0{T__t!S^^.H < 6 =#$t!DQԥKSRRrsspqqi/^hfh5Ù?RDvy`Zhhh IRP#xlz666LOɓׯ_߶m@ x"##SSSE"ѡC,M \xjwԩe˖y{{3u,J?߿X|f!t_4`qc4U[[\6i4L&ڢ_|Eiyy&CBB̿"nݺuqGGǷz 7n<|ŋ7mdISSSKKKyB+W뜜O9,IՌ; Xa 7_|1w\lleee555l6[ ? d,fyVf Ң͛7?2K^^>on%%%J亜AAA>щK [<>ӄ.!>ܬT*qV[]]M۷gϞ'&&u`:|σހNe=G555###m!H$ۺu"=j \.7&&f<nnxf.݋ܫV;QȢO.FL&gf(fXaX-GQ`b899nݺGlc=z̰-[w999މ'^}FZNe=ӵz^P$zjOXaew4/Ùvv '[zk{v ?AvR]:'SXsbFmpHGŸ,Au;TtNTe+ܼy377΅<ĸm%p87o>vXRRRHHE* 4?ٳ>|VVփW{(U/\0<<|!k{V%Iyyy__dm|m]vZ2oIAz?A PUÃ05'V34Ҽai>!Ox$>e/I s!پ 6O<p &w߾}^kk] `:Op.EQeeeuuu i``jɒ%̮R~_ܺu&4tS/))9x~;wWZv- *++.\H20**jǎ|Tpl| ThhyO8lٲ)0Y͗/_nnnNOO`L<ݾ}rI !x`yI",^􂑑L%&&VWWD[d[zz_m~hٴiӲe>'NDDDĄ=6`*w͛7kjjZ[[ϟ;2]ؗlقi@@ZPP@ǎܷoܹs-5-[=ZZZaGIr܄CBQQQaqKaX7n LJ4 .D4 111tn"(22rnܸs||Eveddmmm8iӦΖ :;;߿oYgbŊP\=<%0Yy\{nzj~iYY% =oah!!!/_˳ x` x:x `2<D"r4eglc>|$I,YhݺupOyyy1] IR(:8X=xe֬YUrׯ_2|.khŋb833g >WTTGGGܹly)EYuNWTTtl+ Ү]v̙{91X? ؕ&$< `=y]|9..ΪjL?~[* B3lh5 UUU$R< `UyzFT+ʓ'O:tcggg_p!&&nO*$J.\m6OXzkeee3gδқxP\\\RRrʬ,GGfgٳ̡Z `'t:]mm-I7n F4=2 <5=qJzW0b9R]]hPaaa UbX*"x~y~֎X|dR)|||6ڵkϞ=k| B&`0Ց$T*/^iE$wwwl.]Zt 6ʕ+ wi8|>)//H$]]]{쉋'ƚ*ohظn: ivΝݻw'$$0bo;( g8K.z+7X $I* 777PdwyN␐pK x 0G Yjչs͛b8K,)..5 [hd2X,nkkضmϟ:W)=jUUUK.:991ZzBp9!'!!ʕ+y}vYYYMM A .ܵkWpp0E@H&h4'OljjڶmE*6__)))&c nii ezMj$I޾};00gE4X,NNN6Y׷n:~۔6Vhh+D"ѡCLf &I`HdZ_Sz;wݱc㔖%%%Mlcegg/nڴAZ F+ $/_i=Xo7VPz//#9::[.ȴ4HtQ6mGQT}}}YYY]];/<`_*++|i/7g?~~~9qS>[WWW\\lC,Y_"lOEEX,~DD< 0_/DxbK DovÆ L\\\$&&pZ $I\fSRRҦ%}Rlmm-(( U*ձct:̙݁:,&22r"?i;ܖ-[vȑf >~CCCUUU$ITdWWW@v$ɘIJ&mݺ26nx᯾*++˄\r=LS*$J^sss#"". , = Z]WW{dddڵk6m,?㙶---\.LN%Iƍ+WLKK1cuU{QQQr'x|{{cml?XqqqIII",k/H$])YGGX,Jy. T6d2ٹs"##o{9zhII  ~g,PWWGR1c z ߺSSSH~L{LGg'N&@h5Le===+""G? ] I2!!}Ì7w\[`H$ڿd;=V@BpttD4=CO׿&E"ѬY^{5OOOf@nn~Ҍɾ0uh4L&ڂ7mڄi;lߍ7fΜt:]QQիW333e˖ӧOɆ!۷o > / x<`(y>jjuAAAll,2,11̙3{k-[ jI}_VVP(D4B6nhh`0Á˗/Z] Zh__K0u x,=O&5`-`{RTٟi b톋%i!@Do'+XK@A%ݦsLx赝 ?~w...c#77GLl6γ7AoBx#C H$3gNNN+˗/^^~~ x,77777 <<<<00`lFѯ[[[lt_uvvg[CqrrgΝ2v|焌 JNX|9=L۶m>?p۶mMi jkk#IR& Ϧb<2GFFj4jG_8xYiAuuuUUU+W~ܫFC ũS6nxk:uj \tijju@'''//// Ot;Fuuuݽ{wã/d٣C;6Nco fX۶mZ[[ (JT*ʀիW ^%.^(E"QFF|JDý=x0x:::z׮]<oji =9yI$xAr力[GGGOOY"/_^[[K=`t``uIzDB 2:ndK.JVbvIMMJy16x:66NbQQQ{}fKJJ222wpp`zA`0Xj(f-G ;;;'l.iDEGGGGG7o޼qd_rJJJryJJJWWWEE]x1͞1c5Hk2z r "##5N;) =LNvv\.ꫯNsRRիW e˖aXwh?{S K-JeW+ͳ{:Dj;^sjW-8 ߚz;\583gliiyu`Cf=f֬YAG۞]{ڙZH}]G z gSZc!tW7 x <0i"DQԽ{><22BQAx>΍7HD4Ly>o]RRiΕ+WF#h6mڤRt:hcd[,G*2]244TUUEJB4L#y...~BB裏nܸxyy8qbdd{t=88tSE[[[iiieeNx6mz\N=L) t:ў ]v?~V` ^rŋavZZZjh?S__1AAAw>v.~؃ӱ{쉋c`BϿ 5z7EQmmmc z饗;n`{0x](.Yߟ̂&믿6^y(a.M6>})XSփ۷ox?`BCC_u^7=,돼966vÆ EEEAtuu1i6tQLp|n@j^T* QTTV0nlE\]] 鲲?jZ7osfT*$< =%::|`FDKK8ǣ`N%Iƍp8`FיZzT*G4!<`}n}QCCӵA x_DGG??<Ӆ;O<?8N~~L&c;iL{O$''#x=46<|~nnnDDEL-y֬Y# g̘t]Sz餣C,Kҁt]Szi`0Ց$T*===,Yt]z)$IDk.f `@0EO;99 @~L-c3uLWy OXz!x04|4Ӄe؄A,/*bxA-S{A89fR>>33\utNNb,tdCQ`$3@ų韚"` <|4:ϩ]g[``0 miB(& `rO=O?@BpvvF4,(|0jhhB,͞=;77711tBUOSpGgf({  z xztt4Ez<<흞xb///B`[n}111wNHH`"xz$&&3] <zyzznذ*`s KƠ3fX, {߯阮b7t>>JBLT*CBB|}}.,=XP(僃L0i`3Uddd 3]X @Vp***pEt-`yZ222|}}O>t!pi___Pt!`1Llsrrn7%N yvO'W _Wwg]x# wr`u<>|kkCh>?`U,lYCC￟hѢol<hק???x `cISN%$$~UX,???______G<`.-C =2<`?Sh%tEXtdate:create2021-11-16T15:52:10-07:00%tEXtdate:modify2021-11-16T15:51:39-07:00BwIENDB`flox-0.10.3/docs/make.bat000066400000000000000000000145031477552625700151270ustar00rootroot00000000000000@ECHO OFF REM Command file for Sphinx documentation if "%SPHINXBUILD%" == "" ( set SPHINXBUILD=sphinx-build ) set BUILDDIR=_build set ALLSPHINXOPTS=-d %BUILDDIR%/doctrees %SPHINXOPTS% . set I18NSPHINXOPTS=%SPHINXOPTS% . if NOT "%PAPER%" == "" ( set ALLSPHINXOPTS=-D latex_paper_size=%PAPER% %ALLSPHINXOPTS% set I18NSPHINXOPTS=-D latex_paper_size=%PAPER% %I18NSPHINXOPTS% ) if "%1" == "" goto help if "%1" == "help" ( :help echo.Please use `make ^` where ^ is one of echo. html to make standalone HTML files echo. dirhtml to make HTML files named index.html in directories echo. singlehtml to make a single large HTML file echo. pickle to make pickle files echo. json to make JSON files echo. htmlhelp to make HTML files and a HTML help project echo. qthelp to make HTML files and a qthelp project echo. devhelp to make HTML files and a Devhelp project echo. epub to make an epub echo. latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter echo. text to make text files echo. man to make manual pages echo. texinfo to make Texinfo files echo. gettext to make PO message catalogs echo. changes to make an overview over all changed/added/deprecated items echo. xml to make Docutils-native XML files echo. pseudoxml to make pseudoxml-XML files for display purposes echo. linkcheck to check all external links for integrity echo. doctest to run all doctests embedded in the documentation if enabled goto end ) if "%1" == "clean" ( for /d %%i in (%BUILDDIR%\*) do rmdir /q /s %%i del /q /s %BUILDDIR%\* goto end ) %SPHINXBUILD% 2> nul if errorlevel 9009 ( echo. echo.The 'sphinx-build' command was not found. Make sure you have Sphinx echo.installed, then set the SPHINXBUILD environment variable to point echo.to the full path of the 'sphinx-build' executable. Alternatively you echo.may add the Sphinx directory to PATH. echo. echo.If you don't have Sphinx installed, grab it from echo.http://sphinx-doc.org/ exit /b 1 ) if "%1" == "html" ( %SPHINXBUILD% -b html %ALLSPHINXOPTS% %BUILDDIR%/html if errorlevel 1 exit /b 1 echo. echo.Build finished. The HTML pages are in %BUILDDIR%/html. goto end ) if "%1" == "dirhtml" ( %SPHINXBUILD% -b dirhtml %ALLSPHINXOPTS% %BUILDDIR%/dirhtml if errorlevel 1 exit /b 1 echo. echo.Build finished. The HTML pages are in %BUILDDIR%/dirhtml. goto end ) if "%1" == "singlehtml" ( %SPHINXBUILD% -b singlehtml %ALLSPHINXOPTS% %BUILDDIR%/singlehtml if errorlevel 1 exit /b 1 echo. echo.Build finished. The HTML pages are in %BUILDDIR%/singlehtml. goto end ) if "%1" == "pickle" ( %SPHINXBUILD% -b pickle %ALLSPHINXOPTS% %BUILDDIR%/pickle if errorlevel 1 exit /b 1 echo. echo.Build finished; now you can process the pickle files. goto end ) if "%1" == "json" ( %SPHINXBUILD% -b json %ALLSPHINXOPTS% %BUILDDIR%/json if errorlevel 1 exit /b 1 echo. echo.Build finished; now you can process the JSON files. goto end ) if "%1" == "htmlhelp" ( %SPHINXBUILD% -b htmlhelp %ALLSPHINXOPTS% %BUILDDIR%/htmlhelp if errorlevel 1 exit /b 1 echo. echo.Build finished; now you can run HTML Help Workshop with the ^ .hhp project file in %BUILDDIR%/htmlhelp. goto end ) if "%1" == "qthelp" ( %SPHINXBUILD% -b qthelp %ALLSPHINXOPTS% %BUILDDIR%/qthelp if errorlevel 1 exit /b 1 echo. echo.Build finished; now you can run "qcollectiongenerator" with the ^ .qhcp project file in %BUILDDIR%/qthelp, like this: echo.^> qcollectiongenerator %BUILDDIR%\qthelp\complexity.qhcp echo.To view the help file: echo.^> assistant -collectionFile %BUILDDIR%\qthelp\complexity.ghc goto end ) if "%1" == "devhelp" ( %SPHINXBUILD% -b devhelp %ALLSPHINXOPTS% %BUILDDIR%/devhelp if errorlevel 1 exit /b 1 echo. echo.Build finished. goto end ) if "%1" == "epub" ( %SPHINXBUILD% -b epub %ALLSPHINXOPTS% %BUILDDIR%/epub if errorlevel 1 exit /b 1 echo. echo.Build finished. The epub file is in %BUILDDIR%/epub. goto end ) if "%1" == "latex" ( %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex if errorlevel 1 exit /b 1 echo. echo.Build finished; the LaTeX files are in %BUILDDIR%/latex. goto end ) if "%1" == "latexpdf" ( %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex cd %BUILDDIR%/latex make all-pdf cd %BUILDDIR%/.. echo. echo.Build finished; the PDF files are in %BUILDDIR%/latex. goto end ) if "%1" == "latexpdfja" ( %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex cd %BUILDDIR%/latex make all-pdf-ja cd %BUILDDIR%/.. echo. echo.Build finished; the PDF files are in %BUILDDIR%/latex. goto end ) if "%1" == "text" ( %SPHINXBUILD% -b text %ALLSPHINXOPTS% %BUILDDIR%/text if errorlevel 1 exit /b 1 echo. echo.Build finished. The text files are in %BUILDDIR%/text. goto end ) if "%1" == "man" ( %SPHINXBUILD% -b man %ALLSPHINXOPTS% %BUILDDIR%/man if errorlevel 1 exit /b 1 echo. echo.Build finished. The manual pages are in %BUILDDIR%/man. goto end ) if "%1" == "texinfo" ( %SPHINXBUILD% -b texinfo %ALLSPHINXOPTS% %BUILDDIR%/texinfo if errorlevel 1 exit /b 1 echo. echo.Build finished. The Texinfo files are in %BUILDDIR%/texinfo. goto end ) if "%1" == "gettext" ( %SPHINXBUILD% -b gettext %I18NSPHINXOPTS% %BUILDDIR%/locale if errorlevel 1 exit /b 1 echo. echo.Build finished. The message catalogs are in %BUILDDIR%/locale. goto end ) if "%1" == "changes" ( %SPHINXBUILD% -b changes %ALLSPHINXOPTS% %BUILDDIR%/changes if errorlevel 1 exit /b 1 echo. echo.The overview file is in %BUILDDIR%/changes. goto end ) if "%1" == "linkcheck" ( %SPHINXBUILD% -b linkcheck %ALLSPHINXOPTS% %BUILDDIR%/linkcheck if errorlevel 1 exit /b 1 echo. echo.Link check complete; look for any errors in the above output ^ or in %BUILDDIR%/linkcheck/output.txt. goto end ) if "%1" == "doctest" ( %SPHINXBUILD% -b doctest %ALLSPHINXOPTS% %BUILDDIR%/doctest if errorlevel 1 exit /b 1 echo. echo.Testing of doctests in the sources finished, look at the ^ results in %BUILDDIR%/doctest/output.txt. goto end ) if "%1" == "xml" ( %SPHINXBUILD% -b xml %ALLSPHINXOPTS% %BUILDDIR%/xml if errorlevel 1 exit /b 1 echo. echo.Build finished. The XML files are in %BUILDDIR%/xml. goto end ) if "%1" == "pseudoxml" ( %SPHINXBUILD% -b pseudoxml %ALLSPHINXOPTS% %BUILDDIR%/pseudoxml if errorlevel 1 exit /b 1 echo. echo.Build finished. The pseudo-XML files are in %BUILDDIR%/pseudoxml. goto end ) :end flox-0.10.3/docs/requirements-docs.txt000066400000000000000000000001201477552625700177220ustar00rootroot00000000000000numpydoc pydata-sphinx-theme nbsphinx ipython ipykernel - r ../requirements.txt flox-0.10.3/docs/source/000077500000000000000000000000001477552625700150175ustar00rootroot00000000000000flox-0.10.3/docs/source/_static/000077500000000000000000000000001477552625700164455ustar00rootroot00000000000000flox-0.10.3/docs/source/_static/style.css000066400000000000000000000003551477552625700203220ustar00rootroot00000000000000.xr-wrap { font-size: 0.85em; margin-left: 1.25em; padding-left: 1.25em; border-left: thin var(--color-foreground-muted) solid; } .xr-array-wrap, .xr-var-data, .xr-var-preview { font-size: 0.9em; } .gp { color: darkorange; } flox-0.10.3/docs/source/aggregations.md000066400000000000000000000027261477552625700200220ustar00rootroot00000000000000# Aggregations `flox` implements all common reductions provided by `numpy_groupies` in `aggregations.py`. Control this by passing the `func` kwarg: - `"sum"`, `"nansum"` - `"prod"`, `"nanprod"` - `"count"` - number of non-NaN elements by group - `"mean"`, `"nanmean"` - `"var"`, `"nanvar"` - `"std"`, `"nanstd"` - `"argmin"` - `"argmax"` - `"first"`, `"nanfirst"` - `"last"`, `"nanlast"` - `"median"`, `"nanmedian"` - `"mode"`, `"nanmode"` - `"quantile"`, `"nanquantile"` ```{tip} We would like to add support for `cumsum`, `cumprod` ([issue](https://github.com/xarray-contrib/flox/issues/91)). Contributions are welcome! ``` ## Custom Aggregations `flox` also allows you to specify a custom Aggregation (again inspired by dask.dataframe), though this might not be fully functional at the moment. See `aggregations.py` for examples. See the ["Custom Aggregations"](user-stories/custom-aggregations.ipynb) user story for a more user-friendly example. ```python mean = Aggregation( # name used for dask tasks name="mean", # operation to use for pure-numpy inputs numpy="mean", # blockwise reduction chunk=("sum", "count"), # combine intermediate results: sum the sums, sum the counts combine=("sum", "sum"), # generate final result as sum / count finalize=lambda sum_, count: sum_ / count, # Used when "reindexing" at combine-time fill_value=0, # Used when any member of `expected_groups` is not found final_fill_value=np.nan, ) ``` flox-0.10.3/docs/source/api.rst000066400000000000000000000012531477552625700163230ustar00rootroot00000000000000.. currentmodule:: flox API Reference ------------- Functions ~~~~~~~~~ .. autosummary:: :toctree: generated/ groupby_reduce groupby_scan xarray.xarray_reduce Rechunking ~~~~~~~~~~ .. autosummary:: :toctree: generated/ rechunk_for_blockwise rechunk_for_cohorts xarray.rechunk_for_blockwise xarray.rechunk_for_cohorts Visualization ~~~~~~~~~~~~~ .. autosummary:: :toctree: generated/ visualize.draw_mesh visualize.visualize_groups_1d visualize.visualize_cohorts_2d Aggregation Objects ~~~~~~~~~~~~~~~~~~~ .. autosummary:: :toctree: generated/ Aggregation Scan aggregations.sum_ aggregations.nansum flox-0.10.3/docs/source/arrays.md000066400000000000000000000020141477552625700166370ustar00rootroot00000000000000# Duck Array Support Aggregating over other array types will work if the array types supports the following methods, [ufunc.reduceat](https://numpy.org/doc/stable/reference/generated/numpy.ufunc.reduceat.html) or [ufunc.at](https://numpy.org/doc/stable/reference/generated/numpy.ufunc.at.html) | Reduction | `method="numpy"` | `method="flox"` | | ------------------------------ | ---------------- | ----------------- | | sum, nansum | bincount | add.reduceat | | mean, nanmean | bincount | add.reduceat | | var, nanvar | bincount | add.reduceat | | std, nanstd | bincount | add.reduceat | | count | bincount | add.reduceat | | prod | multiply.at | multiply.reduceat | | max, nanmax, argmax, nanargmax | maximum.at | maximum.reduceat | | min, nanmin, argmin, nanargmin | minimum.at | minimum.reduceat | flox-0.10.3/docs/source/conf.py000066400000000000000000000215451477552625700163250ustar00rootroot00000000000000# # complexity documentation build configuration file, created by # sphinx-quickstart on Tue Jul 9 22:26:36 2013. # # This file is execfile()d with the current directory set to its containing dir. # # Note that not all possible configuration values are present in this # autogenerated file. # # All configuration values have a default; values that are commented out # serve to show the default. import datetime import os import sys import flox import flox.aggregations import flox.visualize import flox.xarray # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the # documentation root, use os.path.abspath to make it absolute, like shown here. # sys.path.insert(0, os.path.abspath('.')) cwd = os.getcwd() parent = os.path.dirname(cwd) sys.path.insert(0, parent) # -- General configuration ----------------------------------------------------- extensions = [ "sphinx.ext.autodoc", "sphinx.ext.viewcode", "sphinx.ext.autosummary", "sphinx.ext.intersphinx", "sphinx.ext.extlinks", "numpydoc", "sphinx.ext.napoleon", "myst_nb", "sphinx_codeautolink", "sphinx_remove_toctrees", ] codeautolink_concat_default = True extlinks = { "issue": ("https://github.com/xarray-contrib/flox/issues/%s", "GH#%s"), "pr": ("https://github.com/xarray-contrib/flox/pull/%s", "PR#%s"), } templates_path = ["_templates"] source_suffix = [".rst"] master_doc = "index" language = "en" remove_from_toctrees = ["generated/*"] # General information about the project. project = "flox" current_year = datetime.datetime.now().year copyright = f"2021-{current_year}, Deepak Cherian" author = "Deepak Cherian" # Myst_nb options nb_execution_excludepatterns = ["climatology-hourly.ipynb"] nb_execution_raise_on_error = True nb_execution_mode = "cache" # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the # built documents. # The short X.Y version. version = flox.__version__.split("+")[0] # The full version, including alpha/beta/rc tags. release = flox.__version__ # There are two options for replacing |today|: either, you set today to some # non-false value, then it is used: # today = '' # Else, today_fmt is used as the format for a strftime call. # today_fmt = '%B %d, %Y' # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. exclude_patterns = ["_build"] # The reST default role (used for this markup: `text`) to use for all documents. # default_role = None # If true, '()' will be appended to :func: etc. cross-reference text. # add_function_parentheses = True # If true, the current module name will be prepended to all description # unit titles (such as .. function::). # add_module_names = True # If true, sectionauthor and moduleauthor directives will be shown in the # output. They are ignored by default. # show_authors = False # The name of the Pygments (syntax highlighting) style to use. pygments_style = "igor" # -- Options for HTML output --------------------------------------------------- html_theme = "furo" # Theme options are theme-specific and customize the look and feel of a theme # further. For a list of options available for each theme, see the # documentation. css_vars = { "admonition-font-size": "0.9rem", "font-size--small": "92%", "font-size--small--2": "87.5%", } html_theme_options = dict( sidebar_hide_name=True, light_css_variables=css_vars, dark_css_variables=css_vars, ) html_context = { "github_user": "xarray-contrib", "github_repo": "flox", "github_version": "main", "doc_path": "doc", } # Theme options are theme-specific and customize the look and feel of a theme # further. For a list of options available for each theme, see the # documentation. # html_theme_options = {} # Add any paths that contain custom themes here, relative to this directory. # html_theme_path = [] html_title = "flox" # The name of an image file (relative to this directory) to place at the top # of the sidebar. # html_logo = None # The name of an image file (within the static path) to use as favicon of the # docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 # pixels large. # html_favicon = None # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". html_static_path = ["_static"] html_css_files = ["style.css"] # If not '', a 'Last updated on:' timestamp is inserted at every page bottom, # using the given strftime format. # html_last_updated_fmt = '%b %d, %Y' # If true, SmartyPants will be used to convert quotes and dashes to # typographically correct entities. # html_use_smartypants = True # Custom sidebar templates, maps document names to template names. # html_sidebars = {} # Additional templates that should be rendered to pages, maps page names to # template names. # html_additional_pages = {} # If false, no module index is generated. # html_domain_indices = True # If false, no index is generated. # html_use_index = True # If true, the index is split into individual pages for each letter. # html_split_index = False # If true, links to the reST sources are added to the pages. # html_show_sourcelink = True # If true, "Created using Sphinx" is shown in the HTML footer. Default is True. # html_show_sphinx = True # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. # html_show_copyright = True # If true, an OpenSearch description file will be output, and all pages will # contain a tag referring to it. The value of this option must be the # base URL from which the finished HTML is served. # html_use_opensearch = '' # Output file base name for HTML help builder. htmlhelp_basename = "floxdoc" intersphinx_mapping = { "python": ("https://docs.python.org/3/", None), "pandas": ("https://pandas.pydata.org/pandas-docs/stable", None), "numpy": ("https://numpy.org/doc/stable", None), # "numba": ("https://numba.pydata.org/numba-doc/latest", None), "dask": ("https://docs.dask.org/en/latest", None), "xarray": ("https://docs.xarray.dev/en/stable/", None), } autosummary_generate = True autodoc_typehints = "none" # Napoleon configurations napoleon_google_docstring = False napoleon_numpy_docstring = True napoleon_use_param = False napoleon_use_rtype = False napoleon_preprocess_types = True napoleon_type_aliases = { # general terms "sequence": ":term:`sequence`", "iterable": ":term:`iterable`", "callable": ":py:func:`callable`", "dict_like": ":term:`dict-like `", "dict-like": ":term:`dict-like `", "path-like": ":term:`path-like `", "mapping": ":term:`mapping`", "file-like": ":term:`file-like `", # special terms # "same type as caller": "*same type as caller*", # does not work, yet # "same type as values": "*same type as values*", # does not work, yet # stdlib type aliases "MutableMapping": "~collections.abc.MutableMapping", "sys.stdout": ":obj:`sys.stdout`", "timedelta": "~datetime.timedelta", "string": ":class:`string `", # numpy terms "array_like": ":term:`array_like`", "array-like": ":term:`array-like `", "scalar": ":term:`scalar`", "array": ":term:`array`", "hashable": ":term:`hashable `", # matplotlib terms "color-like": ":py:func:`color-like `", "matplotlib colormap name": ":doc:`matplotlib colormap name `", "matplotlib axes object": ":py:class:`matplotlib axes object `", "colormap": ":py:class:`colormap `", # objects without namespace: xarray "DataArray": "~xarray.DataArray", "Dataset": "~xarray.Dataset", "Variable": "~xarray.Variable", "DatasetGroupBy": "~xarray.core.groupby.DatasetGroupBy", "DataArrayGroupBy": "~xarray.core.groupby.DataArrayGroupBy", # objects without namespace: numpy "ndarray": "~numpy.ndarray", "DaskArray": "~dask.array.Array", "MaskedArray": "~numpy.ma.MaskedArray", "dtype": "~numpy.dtype", "ComplexWarning": "~numpy.ComplexWarning", # objects without namespace: pandas "Index": "~pandas.Index", "MultiIndex": "~pandas.MultiIndex", "CategoricalIndex": "~pandas.CategoricalIndex", "TimedeltaIndex": "~pandas.TimedeltaIndex", "DatetimeIndex": "~pandas.DatetimeIndex", "Series": "~pandas.Series", "DataFrame": "~pandas.DataFrame", "Categorical": "~pandas.Categorical", "Path": "~~pathlib.Path", # objects with abbreviated namespace (from pandas) "pd.Index": "~pandas.Index", "pd.NaT": "~pandas.NaT", } flox-0.10.3/docs/source/engines.md000066400000000000000000000034031477552625700167710ustar00rootroot00000000000000(engines)= # Engines `flox` provides multiple options, using the `engine` kwarg, for computing the core GroupBy reduction on numpy or other array types other than dask. 1. `engine="numpy"` wraps `numpy_groupies.aggregate_numpy`. This uses indexing tricks and functions like `np.bincount`, or the ufunc `.at` methods (.e.g `np.maximum.at`) to provided reasonably performant aggregations. 1. `engine="numba"` wraps `numpy_groupies.aggregate_numba`. This uses `numba` kernels for the core aggregation. 1. `engine="flox"` uses the `ufunc.reduceat` method after first argsorting the array so that all group members occur sequentially. This was copied from a [gist by Stephan Hoyer](https://gist.github.com/shoyer/f538ac78ae904c936844) 1. `engine="numbagg"` uses the reductions available in [`numbagg.grouped`](https://github.com/numbagg/numbagg/blob/main/numbagg/grouped.py) from the [numbagg](https://github.com/numbagg/numbagg) project. See [](arrays) for more details. ## Tradeoffs For the common case of reducing a nD array by a 1D array of group labels (e.g. `groupby("time.month")`), `engine="numbagg"` is almost always faster, and `engine="flox"` _can_ be faster. The reason is that `numpy_groupies` converts all groupby problems to a 1D problem, this can involve [some overhead](https://github.com/ml31415/numpy-groupies/pull/46). It is possible to optimize this a bit in `flox` or `numpy_groupies`, but the work has not been done yet. The advantage of `engine="numpy"` is that it tends to work for more array types, since it appears to be more common to implement `np.bincount`, and not `np.add.reduceat`. ```{tip} One other potential engine we could add is [`datashader`](https://github.com/xarray-contrib/flox/issues/142). Contributions or discussion is very welcome! ``` flox-0.10.3/docs/source/implementation.md000066400000000000000000000366061477552625700204010ustar00rootroot00000000000000--- jupytext: text_representation: format_name: myst kernelspec: display_name: Python 3 name: python3 --- (algorithms)= # Parallel Algorithms `flox` outsources the core GroupBy operation to the vectorized implementations controlled by the [`engine` kwarg](engines.md). Applying these implementations on a parallel array type like dask can be hard. Performance strongly depends on how the groups are distributed amongst the blocks of an array. `flox` implements 4 strategies for grouped reductions, each is appropriate for a particular distribution of groups among the blocks of a dask array. ```{tip} By default, `flox >= 0.9.0` will use [heuristics](method-heuristics) to choose a `method`. ``` Switch between the various strategies by passing `method` and/or `reindex` to either {py:func}`flox.groupby_reduce` or {py:func}`flox.xarray.xarray_reduce`. Your options are: 1. [`method="map-reduce"` with `reindex=False`](map-reindex-false) 1. [`method="map-reduce"` with `reindex=True`](map-reindex-True) 1. [`method="blockwise"`](method-blockwise) 1. [`method="cohorts"`](method-cohorts) The most appropriate strategy for your problem will depend on the chunking of your dataset, and the distribution of group labels across those chunks. Currently these strategies are implemented for dask. We would like to generalize to other parallel array types as appropriate (e.g. Ramba, cubed, arkouda). Please open an issue to discuss if you are interested. (xarray-split)= ## Background Without `flox` installed, Xarray's GroupBy strategy is to find all unique group labels, index out each group, and then apply the reduction operation. Note that this only works if we know the group labels (i.e. you cannot use this strategy to group by a dask array), and is basically an unvectorized slow for-loop over groups. Schematically, this looks like (colors indicate group labels; separated groups of colors indicate different blocks of an array): ```{image} ../diagrams/new-split-apply-combine-annotated.svg --- alt: xarray-current-strategy width: 100% --- ``` The first step is to extract all members of a group, which involves a _lot_ of communication and is quite expensive (in dataframe terminology, this is a "shuffle"). This is fundamentally why many groupby reductions don't work well right now with big datasets. ## `method="map-reduce"` ![map-reduce-strategy-schematic](/../diagrams/map-reduce.png) The "map-reduce" strategy is inspired by `dask.dataframe.groupby`). The GroupBy reduction is first applied blockwise. Those intermediate results are combined by concatenating to form a new array which is then reduced again. The combining of intermediate results uses dask's `_tree_reduce` till all group results are in one block. At that point the result is "finalized" and returned to the user. ### General Tradeoffs 1. This approach works well when either the initial blockwise reduction is effective, or if the reduction at the first combine step is effective. Here "effective" means we have multiple members of a single group in a block so the blockwise application of groupby-reduce actually reduces values and releases some memory. 1. One downside is that the final result will only have one chunk along the new group axis. 1. We have two choices for how to construct the intermediate arrays. See below. (map-reindex-True)= ### `reindex=True` If we know all the group labels, we can do so right at the blockwise step (`reindex=True`). This matches `dask.array.histogram` and `xhistogram`, where the bin edges, or group labels oof the output, are known. The downside is the potential of large memory use if number of output groups is much larger than number of groups in a block. ```{image} ../diagrams/new-map-reduce-reindex-True-annotated.svg --- alt: map-reduce-reindex-True-strategy-schematic width: 100% --- ``` (map-reindex-False)= ### `reindex=False` We can `reindex` at the combine stage to groups present in the blocks being combined (`reindex=False`). This can limit memory use at the cost of a performance reduction due to extra copies of the intermediate data during reindexing. ```{image} ../diagrams/new-map-reduce-reindex-False-annotated.svg --- alt: map-reduce-reindex-True-strategy-schematic width: 100% --- ``` This approach allows grouping by a dask array so group labels can be discovered at compute time, similar to `dask.dataframe.groupby`. ### reindexing to a sparse array For large numbers of groups, we might be reducing to a very sparse array (e.g. [this issue](https://github.com/xarray-contrib/flox/issues/428)). To control memory, we can instruct flox to reindex the intermediate results to a `sparse.COO` array using: ```python from flox import ReindexArrayType, ReindexStrategy ReindexStrategy( # do not reindex to the full output grid at the blockwise aggregation stage blockwise=False, # when combining intermediate results after blockwise aggregation, reindex to the # common grid using a sparse.COO array type array_type=ReindexArrayType.SPARSE_COO, ) ``` See [this user story](user-stories/large-zonal-stats) for more discussion. ### Example For example, consider `groupby("time.month")` with monthly frequency data and chunksize of 4 along `time`. ![cohorts-schematic](/../diagrams/cohorts-month-chunk4.png) With `reindex=True`, each block will become 3x its original size at the blockwise step: input blocks have 4 timesteps while output block has a value for all 12 months. One could use `reindex=False` to control memory usage but also see [`method="cohorts"`](method-cohorts) below. (method-blockwise)= ## `method="blockwise"` One case where `method="map-reduce"` doesn't work well is the case of "resampling" reductions. An example here is resampling from daily frequency to monthly frequency data: `da.resample(time="M").mean()` For resampling type reductions, 1. Group members occur sequentially (all days in January 2001 occur one after the other) 1. All groups not of exactly equal length (31 days in January but 28 in most Februaries) 1. All members in a group are next to each other (if the time series is sorted, which it usually is). 1. Because there can be a large number of groups, concatenating results for all groups in a single chunk could be catastrophic. In this case, it makes sense to use `dask.dataframe` resample strategy which is to rechunk using {py:func}`flox.rechunk_for_blockwise` so that all members of a group are in a single block. Then, the groupby operation can be applied blockwise. ```{image} ../diagrams/new-blockwise-annotated.svg --- alt: blockwise-strategy-schematic width: 100% --- ``` _Tradeoffs_ 1. Only works for certain groupings. 1. Group labels must be known at graph construction time, so this only works for numpy arrays 1. Currently the rechunking is only implemented for 1D arrays (being motivated by time resampling), but a nD generalization seems possible. 1. Only can use the `blockwise` strategy for grouping by `nD` arrays. 1. Works better when multiple groups are already in a single block; so that the initial rechunking only involves a small amount of communication. (method-cohorts)= ## `method="cohorts"` The `map-reduce` strategy is quite effective but can involve some unnecessary communication. It can be possible to exploit patterns in how group labels are distributed across chunks (similar to `method="blockwise"` above). Two cases are illustrative: 1. Groups labels can be _approximately-periodic_: e.g. `time.dayofyear` (period 365 or 366) or `time.month` (period 12). Consider our earlier example, `groupby("time.month")` with monthly frequency data and chunksize of 4 along `time`. ![cohorts-schematic](/../diagrams/cohorts-month-chunk4.png) Because a chunksize of 4 evenly divides the number of groups (12) all we need to do is index out blocks 0, 3, 7 and then apply the `"map-reduce"` strategy to form the final result for months Jan-Apr. Repeat for the remaining groups of months (May-Aug; Sep-Dec) and then concatenate. 1. Groups can be _spatially localized_ like the blockwise case above, for example grouping by country administrative boundaries like counties or districts. In this case, concatenating the result for the northwesternmost county or district and the southeasternmost district can involve a lot of wasteful communication (again depending on chunking). For such cases, we can adapt xarray's shuffling or subsetting strategy by indexing out "cohorts" or group labels that tend to occur next to each other. ### A motivating example : time grouping One example is the construction of "climatologies" which is a climate science term for something like `groupby("time.month")` ("monthly climatology") or `groupby("time.dayofyear")` ("daily climatology"). In these cases, 1. Groups occur sequentially (day 2 is always after day 1; and February is always after January) 1. Groups are approximately periodic (some years have 365 days and others have 366) Consider our earlier example, `groupby("time.month")` with monthly frequency data and chunksize of 4 along `time`. ![cohorts-schematic](/../diagrams/cohorts-month-chunk4.png) With `method="map-reduce", reindex=True`, each block will become 3x its original size at the blockwise step: input blocks have 4 timesteps while output block has a value for all 12 months. Note that the blockwise groupby-reduction _does not reduce_ the data since there is only one element in each group. In addition, since `map-reduce` will make the final result have only one chunk of size 12 along the new `month` dimension, the final result has chunk sizes 3x that of the input, which may not be ideal. However, because a chunksize of 4 evenly divides the number of groups (12) all we need to do is index out blocks 0, 3, 7 and then apply the `"map-reduce"` strategy to form the final result for months Jan-Apr. Repeat for the remaining groups of months (May-Aug; Sep-Dec) and then concatenate. This is the essence of `method="cohorts"` ### Summary We can generalize this idea for more complicated problems (inspired by the `split_out`kwarg in `dask.dataframe.groupby`) We first apply the groupby-reduction blockwise, then split and reindex blocks to create a new array with which we complete the reduction using `map-reduce`. Because the split or shuffle step occurs after the blockwise reduction, we _sometimes_ communicate a significantly smaller amount of data than if we split or shuffled the input array. ```{image} /../diagrams/new-cohorts-annotated.svg --- alt: cohorts-strategy-schematic width: 100% --- ``` ### Tradeoffs 1. Group labels must be known at graph construction time, so this only works for numpy arrays. 1. This does require more tasks and a more complicated graph, but the communication overhead can be significantly lower. 1. The detection of "cohorts" is currently slow but could be improved. 1. The extra effort of detecting cohorts and multiple copying of intermediate blocks may be worthwhile only if the chunk sizes are small relative to the approximate period of group labels, or small relative to the size of spatially localized groups. ### Example : sensitivity to chunking One annoyance is that if the chunksize doesn't evenly divide the number of groups, we still end up splitting a number of chunks. Consider our earlier example, `groupby("time.month")` with monthly frequency data and chunksize of 4 along `time`. ![cohorts-schematic](/../diagrams/cohorts-month-chunk4.png) ```{code-cell} import flox import numpy as np labels = np.tile(np.arange(12), 12) chunks = (tuple(np.repeat(4, labels.size // 4)),) ``` `flox` can find these cohorts, below it identifies the cohorts with labels `1,2,3,4`; `5,6,7,8`, and `9,10,11,12`. ```{code-cell} preferred_method, chunks_cohorts = flox.core.find_group_cohorts(labels, chunks) chunks_cohorts.values() ``` Now consider `chunksize=5`. ![cohorts-schematic](/../diagrams/cohorts-month-chunk5.png) ```{code-cell} labels = np.tile(np.arange(12), 12) chunks = (tuple(np.repeat(5, labels.size // 5)) + (4,),) preferred_method, chunks_cohorts = flox.core.find_group_cohorts(labels, chunks, merge=True) chunks_cohorts.values() ``` We find 7 cohorts (note the original xarray strategy is equivalent to constructing 12 cohorts). In this case, it seems to better to rechunk to a size of `4` (or `6`) along `time`. Indeed flox's heuristics think `"map-reduce"` is better for this case: ```{code-cell} preferred_method ``` ### Example : spatial grouping Spatial groupings are particularly interesting for the `"cohorts"` strategy. Consider the problem of computing county-level aggregated statistics ([example blog post](https://xarray.dev/blog/flox)). There are ~3100 groups (counties), each marked by a different color. There are ~2300 chunks of size (350, 350) in (lat, lon). Many groups are contained to a small number of chunks: see left panel where the grid lines mark chunk boundaries. ![cohorts-schematic](/../diagrams/nwm-cohorts.png) This seems like a good fit for `'cohorts'`: to get the answer for a county in the Northwest US, we needn't look at values for the southwest US. How do we decide that automatically for the user? (method-heuristics)= ## Heuristics `flox >=0.9` will automatically choose `method` for you. To do so, we need to detect how each group label is distributed across the chunks of the array; and the degree to which the chunk distribution for a particular label overlaps with all other labels. The algorithm is as follows. 1. First determine which labels are present in each chunk. The distribution of labels across chunks is represented internally as a 2D boolean sparse array `S[chunks, labels]`. `S[i, j] = 1` when label `j` is present in chunk `i`. 1. Then we look for patterns in `S` to decide if we can use `"blockwise"`. The dark color cells are `1` at that cell in `S`. ![bitmask-patterns](/../diagrams/bitmask-patterns-perfect.png) - On the left, is a monthly grouping for a monthly time series with chunk size 4. There are 3 non-overlapping cohorts so `method="cohorts"` is perfect. - On the right, is a resampling problem of a daily time series with chunk size 10 to 5-daily frequency. Two 5-day periods are exactly contained in one chunk, so `method="blockwise"` is perfect. 1. The metric used for determining the degree of overlap between the chunks occupied by different labels is [containment](http://ekzhu.com/datasketch/lshensemble.html). For each label `i` we can quickly compute containment against all other labels `j` as `C = S.T @ S / number_chunks_per_label`. Here is `C` for a range of chunk sizes from 1 to 12, for computing the monthly mean of a monthly time series problem, \[the title on each image is `(chunk size, sparsity)`\]. ```python chunks = np.arange(1, 13) labels = np.tile(np.arange(1, 13), 30) ``` ![cohorts-schematic](/../diagrams/containment.png) 1. To choose between `"map-reduce"` and `"cohorts"`, we need a summary measure of the degree to which the labels overlap with each other. We can use _sparsity_ --- the number of non-zero elements in `C` divided by the number of elements in `C`, `C.nnz/C.size`. We use sparsity(`S`) as an approximation for the sparsity(`C`) to avoid a potentially expensive sparse matrix dot product when `S` isn't particularly sparse. When sparsity(`S`) > 0.4 (arbitrary), we choose `"map-reduce"` since there is decent overlap between (any) cohorts. Otherwise we use `"cohorts"`. Cool, isn't it?! For reference here is `S` and `C` for the US county groupby problem: ![county-bitmask](/../diagrams/counties-bitmask-containment.png) The sparsity of `C` is 0.006, so `"cohorts"` seems a good strategy here. flox-0.10.3/docs/source/index.md000066400000000000000000000075301477552625700164550ustar00rootroot00000000000000# flox: fast & furious GroupBy reductions for `dask.array` [![GitHub Workflow CI Status](https://img.shields.io/github/actions/workflow/status/xarray-contrib/flox/ci.yaml?branch=main&logo=github&style=flat)](https://github.com/xarray-contrib/flox/actions) [![pre-commit.ci status](https://results.pre-commit.ci/badge/github/xarray-contrib/flox/main.svg)](https://results.pre-commit.ci/latest/github/xarray-contrib/flox/main) [![image](https://img.shields.io/codecov/c/github/xarray-contrib/flox.svg?style=flat)](https://codecov.io/gh/xarray-contrib/flox) [![Documentation Status](https://readthedocs.org/projects/flox/badge/?version=latest)](https://flox.readthedocs.io/en/latest/?badge=latest) [![PyPI](https://img.shields.io/pypi/v/flox.svg?style=flat)](https://pypi.org/project/flox/) [![Conda-forge](https://img.shields.io/conda/vn/conda-forge/flox.svg?style=flat)](https://anaconda.org/conda-forge/flox) [![NASA-80NSSC18M0156](https://img.shields.io/badge/NASA-80NSSC18M0156-blue)](https://earthdata.nasa.gov/esds/competitive-programs/access/pangeo-ml) [![NASA-80NSSC22K0345](https://img.shields.io/badge/NASA-80NSSC22K0345-blue)](https://science.nasa.gov/open-science-overview) ## Overview `flox` mainly provides strategies for fast GroupBy reductions with dask.array. `flox` uses the MapReduce paradigm (or a "tree reduction") to run the GroupBy operation in a parallel-native way totally avoiding a sort or shuffle operation. It was motivated by 1. Dask Dataframe GroupBy [blogpost](https://blog.dask.org/2019/10/08/df-groupby) 1. numpy_groupies in Xarray [issue](https://github.com/pydata/xarray/issues/4473) See a presentation ([video](https://discourse.pangeo.io/t/november-17-2021-flox-fast-furious-groupby-reductions-with-dask-at-pangeo-scale/2016), [slides](https://docs.google.com/presentation/d/1YubKrwu9zPHC_CzVBhvORuQBW-z148BvX3Ne8XcvWsQ/edit?usp=sharing)) about this package, from the Pangeo Showcase. ## Why flox? 1. {py:func}`flox.groupby_reduce` [wraps](engines.md) the `numpy-groupies` package for performant Groupby reductions on nD arrays. 1. {py:func}`flox.groupby_reduce` provides [parallel-friendly strategies](implementation.md) for GroupBy reductions by wrapping `numpy-groupies` for dask arrays. 1. `flox` [integrates with xarray](xarray.md) to provide more performant Groupby and Resampling operations. 1. {py:func}`flox.xarray.xarray_reduce` [extends](xarray.md) Xarray's GroupBy operations allowing lazy grouping by dask arrays, grouping by multiple arrays, as well as combining categorical grouping and histogram-style binning operations using multiple variables. 1. `flox` also provides utility functions for rechunking both dask arrays and Xarray objects along a single dimension using the group labels as a guide: 1. To rechunk for blockwise operations: {py:func}`flox.rechunk_for_blockwise`, {py:func}`flox.xarray.rechunk_for_blockwise`. 1. To rechunk so that "cohorts", or groups of labels, tend to occur in the same chunks: {py:func}`flox.rechunk_for_cohorts`, {py:func}`flox.xarray.rechunk_for_cohorts`. ## Installing ```shell $ pip install flox ``` ```shell $ conda install -c conda-forge flox ``` ## Acknowledgements This work was funded in part by 1. NASA-ACCESS 80NSSC18M0156 "Community tools for analysis of NASA Earth Observing System Data in the Cloud" (PI J. Hamman), 1. NASA-OSTFL 80NSSC22K0345 "Enhancing analysis of NASA data with the open-source Python Xarray Library" (PIs Scott Henderson, University of Washington; Deepak Cherian, NCAR; Jessica Scheick, University of New Hampshire), and 1. [NCAR's Earth System Data Science Initiative](https://ncar.github.io/esds/). It was motivated by many discussions in the [Pangeo](https://pangeo.io) community. ## Contents ```{eval-rst} .. toctree:: :maxdepth: 1 intro.md aggregations.md engines.md arrays.md implementation.md xarray.md user-stories.md api.rst ``` flox-0.10.3/docs/source/intro.md000066400000000000000000000073461477552625700165060ustar00rootroot00000000000000--- jupytext: text_representation: format_name: myst kernelspec: display_name: Python 3 name: python3 --- ```{eval-rst} .. currentmodule:: flox ``` # 10 minutes to flox ## GroupBy single variable ```{code-cell} import numpy as np import xarray as xr from flox.xarray import xarray_reduce labels = xr.DataArray( [1, 2, 3, 1, 2, 3, 0, 0, 0], dims="x", name="label", ) labels ``` ### With numpy ```{code-cell} da = xr.DataArray( np.ones((9,)), dims="x", name="array" ) ``` Apply the reduction using {py:func}`flox.xarray.xarray_reduce` specifying the reduction operation in `func` ```{code-cell} xarray_reduce(da, labels, func="sum") ``` ### With dask Let's first chunk `da` and `labels` ```{code-cell} da_chunked = da.chunk(x=2) labels_chunked = labels.chunk(x=3) ``` Grouping a dask array by a numpy array is unchanged ```{code-cell} xarray_reduce(da_chunked, labels, func="sum") ``` When grouping **by** a dask array, we need to specify the "expected group labels" on the output so we can construct the result DataArray. Without the `expected_groups` kwarg, an error is raised ```{code-cell} --- tags: [raises-exception] --- xarray_reduce(da_chunked, labels_chunked, func="sum") ``` Now we specify `expected_groups`: ```{code-cell} dask_result = xarray_reduce( da_chunked, labels_chunked, func="sum", expected_groups=[0, 1, 2, 3], ) dask_result ``` Note that any group labels not present in `expected_groups` will be ignored. You can also provide `expected_groups` for the pure numpy GroupBy. ```{code-cell} numpy_result = xarray_reduce( da, labels, func="sum", expected_groups=[0, 1, 2, 3], ) numpy_result ``` The two are identical: ```{code-cell} numpy_result.identical(dask_result) ``` ## Binning by a single variable For binning, specify the bin edges in `expected_groups` using {py:class}`pandas.IntervalIndex`: ```{code-cell} import pandas as pd xarray_reduce( da, labels, func="sum", expected_groups=pd.IntervalIndex.from_breaks([0.5, 1.5, 2.5, 6]), ) ``` Similarly for dask inputs ```{code-cell} xarray_reduce( da_chunked, labels_chunked, func="sum", expected_groups=pd.IntervalIndex.from_breaks([0.5, 1.5, 2.5, 6]), ) ``` For more control over the binning (which edge is closed), pass the appropriate kwarg to {py:class}`pandas.IntervalIndex`: ```{code-cell} xarray_reduce( da_chunked, labels_chunked, func="sum", expected_groups=pd.IntervalIndex.from_breaks([0.5, 1.5, 2.5, 6], closed="left"), ) ``` ## Grouping by multiple variables ```{code-cell} arr = np.ones((4, 12)) labels1 = np.array(["a", "a", "c", "c", "c", "b", "b", "c", "c", "b", "b", "f"]) labels2 = np.array([1, 2, 2, 1]) da = xr.DataArray( arr, dims=("x", "y"), coords={"labels2": ("x", labels2), "labels1": ("y", labels1)} ) da ``` To group by multiple variables simply pass them as `*args`: ```{code-cell} xarray_reduce(da, "labels1", "labels2", func="sum") ``` ## Histogramming (Binning by multiple variables) An unweighted histogram is simply a groupby multiple variables with count. ```{code-cell} python arr = np.ones((4, 12)) labels1 = np.array(np.linspace(0, 10, 12)) labels2 = np.array([1, 2, 2, 1]) da = xr.DataArray( arr, dims=("x", "y"), coords={"labels2": ("x", labels2), "labels1": ("y", labels1)} ) da ``` Specify bins in `expected_groups` ```{code-cell} python xarray_reduce( da, "labels1", "labels2", func="count", expected_groups=( pd.IntervalIndex.from_breaks([-0.5, 4.5, 6.5, 8.9]), # labels1 pd.IntervalIndex.from_breaks([0.5, 1.5, 1.9]), # labels2 ), ) ``` ## Resampling Use the xarray interface i.e. `da.resample(time="M").mean()`. Optionally pass [`method="blockwise"`](method-blockwise): `da.resample(time="M").mean(method="blockwise")` flox-0.10.3/docs/source/user-stories.md000066400000000000000000000005121477552625700200030ustar00rootroot00000000000000# Tricks & Stories ```{eval-rst} .. toctree:: :maxdepth: 1 user-stories/overlaps.md user-stories/climatology.ipynb user-stories/climatology-hourly.ipynb user-stories/climatology-hourly-cubed.ipynb user-stories/custom-aggregations.ipynb user-stories/nD-bins.ipynb user-stories/large-zonal-stats.ipynb ``` flox-0.10.3/docs/source/user-stories/000077500000000000000000000000001477552625700174635ustar00rootroot00000000000000flox-0.10.3/docs/source/user-stories/climatology-hourly-cubed.ipynb000066400000000000000000000054751477552625700254640ustar00rootroot00000000000000{ "cells": [ { "cell_type": "markdown", "id": "0", "metadata": {}, "source": [ "# More climatology reductions using Cubed\n", "\n", "This is the Cubed equivalent of [More climatology reductions](climatology-hourly.ipynb).\n", "\n", "The task is to compute an hourly climatology from an hourly dataset with 744 hours in each chunk, using the \"map-reduce\" strategy." ] }, { "cell_type": "code", "execution_count": null, "id": "1", "metadata": {}, "outputs": [], "source": [ "import cubed\n", "import cubed.array_api as xp\n", "import numpy as np\n", "import pandas as pd\n", "import xarray as xr\n", "\n", "import flox.xarray" ] }, { "cell_type": "markdown", "id": "2", "metadata": {}, "source": [ "## Create data\n", "\n", "Note that we use fewer lat/long points so the computation can be run locally." ] }, { "cell_type": "code", "execution_count": null, "id": "3", "metadata": {}, "outputs": [], "source": [ "spec = cubed.Spec(allowed_mem=\"2GB\")\n", "ds = xr.Dataset(\n", " {\n", " \"tp\": (\n", " (\"time\", \"latitude\", \"longitude\"),\n", " xp.ones((8760, 72, 144), chunks=(744, 5, 144), dtype=np.float32, spec=spec),\n", " )\n", " },\n", " coords={\"time\": pd.date_range(\"2021-01-01\", \"2021-12-31 23:59\", freq=\"h\")},\n", ")\n", "ds" ] }, { "cell_type": "markdown", "id": "4", "metadata": {}, "source": [ "## Computation" ] }, { "cell_type": "code", "execution_count": null, "id": "5", "metadata": {}, "outputs": [], "source": [ "hourly = flox.xarray.xarray_reduce(ds.tp, ds.time.dt.hour, func=\"mean\", reindex=True)\n", "hourly" ] }, { "cell_type": "code", "execution_count": null, "id": "6", "metadata": {}, "outputs": [], "source": [ "hourly.compute()" ] }, { "cell_type": "markdown", "id": "7", "metadata": {}, "source": [ "## Other climatologies: resampling by month\n", "\n", "This uses the \"blockwise\" strategy." ] }, { "cell_type": "code", "execution_count": null, "id": "8", "metadata": {}, "outputs": [], "source": [ "monthly = ds.tp.resample(time=\"ME\").sum(method=\"blockwise\")\n", "monthly" ] }, { "cell_type": "code", "execution_count": null, "id": "9", "metadata": {}, "outputs": [], "source": [ "monthly.compute()" ] } ], "metadata": { "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3" } }, "nbformat": 4, "nbformat_minor": 5 } flox-0.10.3/docs/source/user-stories/climatology-hourly.ipynb000066400000000000000000002207661477552625700244060ustar00rootroot00000000000000{ "cells": [ { "cell_type": "markdown", "id": "0", "metadata": {}, "source": [ "# More climatology reductions\n", "\n", "This one is motivated by\n", "[this Pangeo Discourse post](https://discourse.pangeo.io/t/dask-xarray-and-swap-memory-polution-on-local-linux-cluster/2453/5)\n", "and follows\n", "[this notebook](https://nbviewer.ipython.org/gist/fmaussion/95d1b9c9a3113db2f987b91e842cb8e0)\n", "\n", "The task is to compute an hourly climatology from an hourly dataset with 744\n", "hours in each chunk.\n", "\n", "We choose the \"map-reduce\" strategy because:\n", "\n", "1. all hours (groups) are present in each chunk;\n", "2. a groupby reduction applied blockwise will result in arrays of shape (X,\n", " Y, 744) being reduced to (X, Y, 24) i.e. 744/24=31x decrease in chunk size,\n", " so this should work well memory wise.\n" ] }, { "cell_type": "code", "execution_count": null, "id": "1", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/Users/dcherian/mambaforge/envs/dcpy/lib/python3.8/site-packages/distributed/node.py:180: UserWarning: Port 8787 is already in use.\n", "Perhaps you already have a cluster running?\n", "Hosting the HTTP server on port 51613 instead\n", " warnings.warn(\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "fc1dd8438def4d75acee8602c544248c", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Tab(children=(HTML(value='
\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "
<xarray.Dataset>\n",
       "Dimensions:  (time: 8760, latitude: 721, longitude: 1440)\n",
       "Coordinates:\n",
       "  * time     (time) datetime64[ns] 2021-01-01 ... 2021-12-31T23:00:00\n",
       "Dimensions without coordinates: latitude, longitude\n",
       "Data variables:\n",
       "    tp       (time, latitude, longitude) float32 dask.array<chunksize=(744, 50, 1440), meta=np.ndarray>
" ], "text/plain": [ "\n", "Dimensions: (time: 8760, latitude: 721, longitude: 1440)\n", "Coordinates:\n", " * time (time) datetime64[ns] 2021-01-01 ... 2021-12-31T23:00:00\n", "Dimensions without coordinates: latitude, longitude\n", "Data variables:\n", " tp (time, latitude, longitude) float32 dask.array" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ds = xr.Dataset(\n", " {\n", " \"tp\": (\n", " (\"time\", \"latitude\", \"longitude\"),\n", " dask.array.ones((8760, 721, 1440), chunks=(744, 50, 1440), dtype=np.float32),\n", " )\n", " },\n", " coords={\"time\": pd.date_range(\"2021-01-01\", \"2021-12-31 23:59\", freq=\"H\")},\n", ")\n", "ds" ] }, { "cell_type": "markdown", "id": "5", "metadata": {}, "source": [ "Here's just plain xarray: 10000 tasks and one chunk per hour in the output\n" ] }, { "cell_type": "code", "execution_count": null, "id": "6", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "
<xarray.DataArray 'tp' (hour: 24, latitude: 721, longitude: 1440)>\n",
       "dask.array<stack, shape=(24, 721, 1440), dtype=float32, chunksize=(1, 50, 1440), chunktype=numpy.ndarray>\n",
       "Coordinates:\n",
       "  * hour     (hour) int64 0 1 2 3 4 5 6 7 8 9 ... 14 15 16 17 18 19 20 21 22 23\n",
       "Dimensions without coordinates: latitude, longitude
" ], "text/plain": [ "\n", "dask.array\n", "Coordinates:\n", " * hour (hour) int64 0 1 2 3 4 5 6 7 8 9 ... 14 15 16 17 18 19 20 21 22 23\n", "Dimensions without coordinates: latitude, longitude" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ds.tp.groupby(\"time.hour\").mean()" ] }, { "cell_type": "markdown", "id": "7", "metadata": {}, "source": [ "And flox: 600 tasks and all hours in a single chunk\n" ] }, { "cell_type": "code", "execution_count": null, "id": "8", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "
<xarray.DataArray 'tp' (hour: 24, latitude: 721, longitude: 1440)>\n",
       "dask.array<transpose, shape=(24, 721, 1440), dtype=float32, chunksize=(24, 50, 1440), chunktype=numpy.ndarray>\n",
       "Coordinates:\n",
       "  * hour     (hour) int64 0 1 2 3 4 5 6 7 8 9 ... 14 15 16 17 18 19 20 21 22 23\n",
       "Dimensions without coordinates: latitude, longitude
" ], "text/plain": [ "\n", "dask.array\n", "Coordinates:\n", " * hour (hour) int64 0 1 2 3 4 5 6 7 8 9 ... 14 15 16 17 18 19 20 21 22 23\n", "Dimensions without coordinates: latitude, longitude" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "hourly = flox.xarray.xarray_reduce(ds.tp, ds.time.dt.hour, func=\"mean\")\n", "hourly" ] }, { "cell_type": "code", "execution_count": null, "id": "9", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "10", "metadata": {}, "outputs": [], "source": [ "with performance_report(\"hourly-climatology.html\"):\n", " hourly.compute()" ] }, { "cell_type": "markdown", "id": "11", "metadata": {}, "source": [ "View the performance report\n", "[here](https://rawcdn.githack.com/dcherian/flox/592c46ba0bb859f732968b68426b6332caebc213/docs/source/user-stories/hourly-climatology.html),\n", "and a video of the dask dashboard\n", "[here](https://drive.google.com/file/d/1uY36DiTbv1w7TefbrCEyBcOli5NiaNUP/view?usp=sharing)\n" ] } ], "metadata": { "keep_output": true, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3" }, "mystnb": { "execution_mode": "off" } }, "nbformat": 4, "nbformat_minor": 5 } flox-0.10.3/docs/source/user-stories/climatology.ipynb000066400000000000000000000227761477552625700230670ustar00rootroot00000000000000{ "cells": [ { "cell_type": "markdown", "id": "0", "metadata": {}, "source": [ "# Strategies for climatology calculations\n", "\n", "This notebook is motivated by\n", "[this post](https://discourse.pangeo.io/t/understanding-optimal-zarr-chunking-scheme-for-a-climatology/2335)\n", "on the Pangeo discourse forum.\n" ] }, { "cell_type": "code", "execution_count": null, "id": "1", "metadata": { "tags": [] }, "outputs": [], "source": [ "import dask.array\n", "import pandas as pd\n", "import xarray as xr\n", "\n", "import flox\n", "import flox.xarray" ] }, { "cell_type": "markdown", "id": "2", "metadata": {}, "source": [ "Let's first create an example Xarray Dataset representing the OISST dataset,\n", "with chunk sizes matching that in the post.\n" ] }, { "cell_type": "code", "execution_count": null, "id": "3", "metadata": {}, "outputs": [], "source": [ "oisst = xr.DataArray(\n", " dask.array.ones((14532, 720, 1440), chunks=(20, -1, -1)),\n", " dims=(\"time\", \"lat\", \"lon\"),\n", " coords={\"time\": pd.date_range(\"1981-09-01 12:00\", \"2021-06-14 12:00\", freq=\"D\")},\n", " name=\"sst\",\n", ")\n", "oisst" ] }, { "cell_type": "markdown", "id": "4", "metadata": {}, "source": [ "To account for Feb-29 being present in some years, we'll construct a time vector to group by as \"mmm-dd\" string.\n", "\n", "```{seealso}\n", "For more options, see [this great website](https://strftime.org/).\n", "```" ] }, { "cell_type": "code", "execution_count": null, "id": "5", "metadata": {}, "outputs": [], "source": [ "day = oisst.time.dt.strftime(\"%h-%d\").rename(\"day\")\n", "day" ] }, { "cell_type": "markdown", "id": "6", "metadata": {}, "source": [ "## First, `method=\"map-reduce\"`\n", "\n", "The default\n", "[method=\"map-reduce\"](https://flox.readthedocs.io/en/latest/implementation.html#method-map-reduce)\n", "doesn't work so well. We aggregate all days in a single ~3GB chunk.\n", "\n", "For this to work well, we'd want smaller chunks in space and bigger chunks in\n", "time.\n" ] }, { "cell_type": "code", "execution_count": null, "id": "7", "metadata": {}, "outputs": [], "source": [ "flox.xarray.xarray_reduce(\n", " oisst,\n", " day,\n", " func=\"mean\",\n", " method=\"map-reduce\",\n", ")" ] }, { "cell_type": "markdown", "id": "8", "metadata": {}, "source": [ "### Rechunking for map-reduce\n", "\n", "We can split each chunk along the `lat`, `lon` dimensions to make sure the\n", "output chunk sizes are more reasonable\n" ] }, { "cell_type": "code", "execution_count": null, "id": "9", "metadata": {}, "outputs": [], "source": [ "flox.xarray.xarray_reduce(\n", " oisst.chunk({\"lat\": -1, \"lon\": 120}),\n", " day,\n", " func=\"mean\",\n", " method=\"map-reduce\",\n", ")" ] }, { "cell_type": "markdown", "id": "10", "metadata": {}, "source": [ "But what if we didn't want to rechunk the dataset so drastically (note the 10x\n", "increase in tasks). For that let's try `method=\"cohorts\"`\n", "\n", "## `method=\"cohorts\"`\n", "\n", "We can take advantage of patterns in the groups here \"day of year\".\n", "Specifically:\n", "\n", "1. The groups at an approximately periodic interval, 365 or 366 days\n", "2. The chunk size 20 is smaller than the period of 365 or 366. This means, that\n", " to construct the mean for days 1-20, we just need to use the chunks that\n", " contain days 1-20.\n", "\n", "This strategy is implemented as\n", "[method=\"cohorts\"](https://flox.readthedocs.io/en/latest/implementation.html#method-cohorts)\n" ] }, { "cell_type": "code", "execution_count": null, "id": "11", "metadata": {}, "outputs": [], "source": [ "flox.xarray.xarray_reduce(\n", " oisst,\n", " day,\n", " func=\"mean\",\n", " method=\"cohorts\",\n", ")" ] }, { "cell_type": "markdown", "id": "12", "metadata": {}, "source": [ "By default cohorts doesn't work so well for this problem because the period\n", "isn't regular (365 vs 366) and the period isn't divisible by the chunk size. So\n", "the groups end up being \"out of phase\" (for a visual illustration\n", "[click here](https://flox.readthedocs.io/en/latest/implementation.html#method-cohorts)).\n", "Now we have the opposite problem: the chunk sizes on the output are too small.\n", "\n", "Let us inspect the cohorts" ] }, { "cell_type": "code", "execution_count": null, "id": "13", "metadata": {}, "outputs": [], "source": [ "# integer codes for each \"day\"\n", "codes, _ = pd.factorize(day.data)\n", "preferred_method, cohorts = flox.core.find_group_cohorts(\n", " labels=codes,\n", " chunks=(oisst.chunksizes[\"time\"],),\n", ")\n", "print(len(cohorts))" ] }, { "cell_type": "markdown", "id": "14", "metadata": {}, "source": [ "Looking more closely, we can see many cohorts with a single entry. " ] }, { "cell_type": "code", "execution_count": null, "id": "15", "metadata": {}, "outputs": [], "source": [ "cohorts.values()" ] }, { "cell_type": "markdown", "id": "16", "metadata": {}, "source": [ "## Rechunking data for cohorts\n", "\n", "Can we fix the \"out of phase\" problem by rechunking along time?\n", "\n", "First lets see where the current chunk boundaries are" ] }, { "cell_type": "code", "execution_count": null, "id": "17", "metadata": {}, "outputs": [], "source": [ "oisst.chunksizes[\"time\"][:10]" ] }, { "cell_type": "markdown", "id": "18", "metadata": {}, "source": [ "We'll choose to rechunk such that a single month in is a chunk. This is not too different from the current chunking but will help your periodicity problem" ] }, { "cell_type": "code", "execution_count": null, "id": "19", "metadata": {}, "outputs": [], "source": [ "newchunks = xr.ones_like(day).astype(int).resample(time=\"M\").count()" ] }, { "cell_type": "code", "execution_count": null, "id": "20", "metadata": {}, "outputs": [], "source": [ "rechunked = oisst.chunk(time=tuple(newchunks.data))" ] }, { "cell_type": "markdown", "id": "21", "metadata": {}, "source": [ "And now our cohorts contain more than one group, *and* there is a substantial reduction in number of cohorts **162 -> 12**\n" ] }, { "cell_type": "code", "execution_count": null, "id": "22", "metadata": {}, "outputs": [], "source": [ "preferred_method, new_cohorts = flox.core.find_group_cohorts(\n", " labels=codes,\n", " chunks=(rechunked.chunksizes[\"time\"],),\n", ")\n", "# one cohort per month!\n", "len(new_cohorts)" ] }, { "cell_type": "code", "execution_count": null, "id": "23", "metadata": {}, "outputs": [], "source": [ "preferred_method" ] }, { "cell_type": "code", "execution_count": null, "id": "24", "metadata": {}, "outputs": [], "source": [ "new_cohorts.values()" ] }, { "cell_type": "markdown", "id": "25", "metadata": {}, "source": [ "Now the groupby reduction **looks OK** in terms of number of tasks but remember\n", "that rechunking to get to this point involves some communication overhead.\n" ] }, { "cell_type": "code", "execution_count": null, "id": "26", "metadata": {}, "outputs": [], "source": [ "flox.xarray.xarray_reduce(rechunked, day, func=\"mean\", method=\"cohorts\")" ] }, { "cell_type": "markdown", "id": "27", "metadata": {}, "source": [ "flox's heuristics will choose `\"cohorts\"` automatically!" ] }, { "cell_type": "code", "execution_count": null, "id": "28", "metadata": {}, "outputs": [], "source": [ "flox.xarray.xarray_reduce(rechunked, day, func=\"mean\")" ] }, { "cell_type": "markdown", "id": "29", "metadata": {}, "source": [ "## How about other climatologies?\n", "\n", "Let's try monthly\n" ] }, { "cell_type": "code", "execution_count": null, "id": "30", "metadata": {}, "outputs": [], "source": [ "flox.xarray.xarray_reduce(oisst, oisst.time.dt.month, func=\"mean\")" ] }, { "cell_type": "markdown", "id": "31", "metadata": {}, "source": [ "This looks great. Why?\n", "\n", "It's because each chunk (size 20) is smaller than number of days in a typical\n", "month. `flox` initially applies the groupby-reduction blockwise. For the chunk\n", "size of 20, we will have at most 2 groups in each chunk, so the initial\n", "blockwise reduction is quite effective - at least a 10x reduction in size from\n", "20 elements in time to at most 2 elements in time.\n", "\n", "For this kind of problem, `\"map-reduce\"` works quite well.\n" ] } ], "metadata": { "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3" } }, "nbformat": 4, "nbformat_minor": 5 } flox-0.10.3/docs/source/user-stories/custom-aggregations.ipynb000066400000000000000000000174031477552625700245150ustar00rootroot00000000000000{ "cells": [ { "cell_type": "markdown", "id": "0", "metadata": {}, "source": [ "# Custom Aggregations\n", "\n", "This notebook is motivated by a\n", "[post](https://discourse.pangeo.io/t/using-xhistogram-to-bin-measurements-at-particular-stations/2365/4)\n", "on the Pangeo discourse forum.\n", "\n", "> Even better would be a command that lets me simply do the following.\n", ">\n", "> A = da.groupby(['lon_bins', 'lat_bins']).mode()\n", "\n", "This notebook will describe how to accomplish this using a custom `Aggregation`.\n", "\n", "\n", "```{tip}\n", "flox now supports `mode`, `nanmode`, `quantile`, `nanquantile`, `median`, `nanmedian` using exactly the same \n", "approach as shown below\n", "```\n" ] }, { "cell_type": "code", "execution_count": null, "id": "1", "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "import numpy_groupies as npg\n", "import xarray as xr\n", "\n", "import flox.xarray\n", "from flox import Aggregation\n", "from flox.aggregations import mean\n", "\n", "# define latitude and longitude bins\n", "binsize = 1.0 # 1°x1° bins\n", "lon_min, lon_max, lat_min, lat_max = [-180, 180, -65, 65]\n", "lon_bins = np.arange(lon_min, lon_max, binsize)\n", "lat_bins = np.arange(lat_min, lat_max, binsize)\n", "\n", "size = 28397\n", "\n", "\n", "da = xr.DataArray(\n", " np.random.randint(0, 7, size=size),\n", " dims=\"profile\",\n", " coords={\n", " \"lat\": (\n", " \"profile\",\n", " (np.random.random(size) - 0.5) * (lat_max - lat_min),\n", " ),\n", " \"lon\": (\n", " \"profile\",\n", " (np.random.random(size) - 0.5) * (lon_max - lon_min),\n", " ),\n", " },\n", " name=\"label\",\n", ")\n", "da" ] }, { "cell_type": "markdown", "id": "2", "metadata": {}, "source": [ "## A built-in reduction\n", "\n", "First a simple example of lat-lon binning using a built-in reduction: mean\n" ] }, { "cell_type": "code", "execution_count": null, "id": "3", "metadata": {}, "outputs": [], "source": [ "binned_mean = flox.xarray.xarray_reduce(\n", " da,\n", " da.lat,\n", " da.lon,\n", " func=\"mean\", # built-in\n", " expected_groups=(lat_bins, lon_bins),\n", " isbin=(True, True),\n", ")\n", "binned_mean.plot()" ] }, { "cell_type": "markdown", "id": "4", "metadata": {}, "source": [ "## Aggregations\n", "\n", "flox knows how to interperet `func=\"mean\"` because it's been implemented in\n", "`aggregations.py` as an\n", "[Aggregation](https://flox.readthedocs.io/en/latest/generated/flox.aggregations.Aggregation.html)\n", "\n", "An `Aggregation` is a blueprint for computing an aggregation, with both numpy\n", "and dask data.\n" ] }, { "cell_type": "code", "execution_count": null, "id": "5", "metadata": {}, "outputs": [], "source": [ "print(type(mean))\n", "mean" ] }, { "cell_type": "markdown", "id": "6", "metadata": {}, "source": [ "Here's how the mean Aggregation is created\n", "\n", "```python\n", "mean = Aggregation(\n", " name=\"mean\",\n", "\n", " # strings in the following are built-in grouped reductions\n", " # implemented by the underlying \"engine\": flox or numpy_groupies or numbagg\n", "\n", " # for pure numpy inputs\n", " numpy=\"mean\",\n", "\n", " # The next are for dask inputs and describe how to reduce\n", " # the data in parallel\n", " chunk=(\"sum\", \"nanlen\"), # first compute these blockwise : (grouped_sum, grouped_count)\n", " combine=(\"sum\", \"sum\"), # reduce intermediate results (sum the sums, sum the counts)\n", " finalize=lambda sum_, count: sum_ / count, # final mean value (divide sum by count)\n", "\n", " fill_value=(0, 0), # fill value for intermediate sums and counts when groups have no members\n", " dtypes=(None, np.intp), # optional dtypes for intermediates\n", " final_dtype=np.floating, # final dtype for output\n", ")\n", "```\n" ] }, { "cell_type": "markdown", "id": "7", "metadata": {}, "source": [ "## Defining a custom aggregation\n", "\n", "First we'll need a function that executes the grouped reduction given numpy\n", "inputs.\n", "\n", "Custom functions are required to have this signature (copied form\n", "numpy_groupies):\n", "\n", "```python\n", "\n", "def custom_grouped_reduction(\n", " group_idx, array, *, axis=-1, size=None, fill_value=None, dtype=None\n", "):\n", " \"\"\"\n", " Parameters\n", " ----------\n", "\n", " group_idx : np.ndarray, 1D\n", " integer codes for group labels (1D)\n", " array : np.ndarray, nD\n", " values to reduce (nD)\n", " axis : int\n", " axis of array along which to reduce. Requires array.shape[axis] == len(group_idx)\n", " size : int, optional\n", " expected number of groups. If none, output.shape[-1] == number of uniques in group_idx\n", " fill_value : optional\n", " fill_value for when number groups in group_idx is less than size\n", " dtype : optional\n", " dtype of output\n", "\n", " Returns\n", " -------\n", "\n", " np.ndarray with array.shape[-1] == size, containing a single value per group\n", " \"\"\"\n", " pass\n", "```\n", "\n", "Since numpy_groupies does not implement a median, we'll do it ourselves by\n", "passing `np.median` to `numpy_groupies.aggregate_numpy.aggregate`. This will\n", "loop over all groups, and then execute `np.median` on the group members in\n", "serial. It is not fast, but quite convenient.\n" ] }, { "cell_type": "code", "execution_count": null, "id": "8", "metadata": {}, "outputs": [], "source": [ "def grouped_median(group_idx, array, *, axis=-1, size=None, fill_value=None, dtype=None):\n", " return npg.aggregate_numpy.aggregate(\n", " group_idx,\n", " array,\n", " func=np.median,\n", " axis=axis,\n", " size=size,\n", " fill_value=fill_value,\n", " dtype=dtype,\n", " )" ] }, { "cell_type": "markdown", "id": "9", "metadata": {}, "source": [ "Now we create the `Aggregation`\n" ] }, { "cell_type": "code", "execution_count": null, "id": "10", "metadata": {}, "outputs": [], "source": [ "agg_median = Aggregation(\n", " name=\"median\",\n", " numpy=grouped_median,\n", " fill_value=-1,\n", " chunk=None,\n", " combine=None,\n", ")\n", "agg_median" ] }, { "cell_type": "markdown", "id": "11", "metadata": {}, "source": [ "And apply it!\n" ] }, { "cell_type": "code", "execution_count": null, "id": "12", "metadata": {}, "outputs": [], "source": [ "flox.xarray.xarray_reduce(\n", " da,\n", " da.lat,\n", " da.lon,\n", " func=agg_median,\n", " expected_groups=(lat_bins, lon_bins),\n", " isbin=(True, True),\n", " fill_value=np.nan,\n", ")" ] } ], "metadata": { "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3" } }, "nbformat": 4, "nbformat_minor": 5 } flox-0.10.3/docs/source/user-stories/hourly-climatology.html000066400000000000000000026312141477552625700242250ustar00rootroot00000000000000 Dask Performance Report
flox-0.10.3/docs/source/user-stories/large-zonal-stats.ipynb000066400000000000000000000202201477552625700240710ustar00rootroot00000000000000{ "cells": [ { "cell_type": "markdown", "id": "0", "metadata": {}, "source": [ "# Large Raster Zonal Statistics\n", "\n", "\"Zonal statistics\" spans a large range of problems. \n", "\n", "This one is inspired by [this issue](https://github.com/xarray-contrib/flox/issues/428), where a cell areas raster is aggregated over 6 different groupers and summed. Each array involved has a global extent on a 30m grid with shape 560_000 x 1440_000 and chunk size 10_000 x 10_000. Three of the groupers `tcl_year`, `drivers`, and `tcd_thresholds` have a small number of group labels (23, 5, and 7). \n", "\n", "The last 3 groupers are [GADM](https://gadm.org/) level 0, 1, 2 administrative area polygons rasterized to this grid; with 248, 86, and 854 unique labels respectively (arrays `adm0`, `adm1`, and `adm2`). These correspond to country-level, state-level, and county-level administrative boundaries. " ] }, { "cell_type": "markdown", "id": "1", "metadata": {}, "source": [ "## Example dataset" ] }, { "cell_type": "markdown", "id": "2", "metadata": {}, "source": [ "Here is a representative version of the dataset (in terms of size and chunk sizes)." ] }, { "cell_type": "code", "execution_count": null, "id": "3", "metadata": {}, "outputs": [], "source": [ "import dask.array\n", "import numpy as np\n", "import xarray as xr\n", "\n", "from flox.xarray import xarray_reduce\n", "\n", "sizes = {\"y\": 560_000, \"x\": 1440_000}\n", "chunksizes = {\"y\": 10_000, \"x\": 10_000}\n", "dims = (\"y\", \"x\")\n", "shape = tuple(sizes[d] for d in dims)\n", "chunks = tuple(chunksizes[d] for d in dims)\n", "\n", "ds = xr.Dataset(\n", " {\n", " \"areas\": (dims, dask.array.ones(shape, chunks=chunks, dtype=np.float32)),\n", " \"tcl_year\": (\n", " dims,\n", " 1 + dask.array.zeros(shape, chunks=chunks, dtype=np.float32),\n", " ),\n", " \"drivers\": (dims, 2 + dask.array.zeros(shape, chunks=chunks, dtype=np.float32)),\n", " \"tcd_thresholds\": (\n", " dims,\n", " 3 + dask.array.zeros(shape, chunks=chunks, dtype=np.float32),\n", " ),\n", " \"adm0\": (dims, 4 + dask.array.ones(shape, chunks=chunks, dtype=np.float32)),\n", " \"adm1\": (dims, 5 + dask.array.zeros(shape, chunks=chunks, dtype=np.float32)),\n", " \"adm2\": (dims, 6 + dask.array.zeros(shape, chunks=chunks, dtype=np.float32)),\n", " }\n", ")\n", "ds" ] }, { "cell_type": "markdown", "id": "4", "metadata": {}, "source": [ "## Zonal Statistics" ] }, { "cell_type": "markdown", "id": "5", "metadata": {}, "source": [ "Next define the grouper arrays and expected group labels" ] }, { "cell_type": "code", "execution_count": null, "id": "6", "metadata": {}, "outputs": [], "source": [ "by = (ds.tcl_year, ds.drivers, ds.tcd_thresholds, ds.adm0, ds.adm1, ds.adm2)\n", "expected_groups = (\n", " np.arange(23),\n", " np.arange(1, 6),\n", " np.arange(1, 8),\n", " np.arange(248),\n", " np.arange(86),\n", " np.arange(854),\n", ")" ] }, { "cell_type": "code", "execution_count": null, "id": "7", "metadata": {}, "outputs": [], "source": [ "result = xarray_reduce(\n", " ds.areas,\n", " *by,\n", " expected_groups=expected_groups,\n", " func=\"sum\",\n", ")\n", "result" ] }, { "cell_type": "markdown", "id": "8", "metadata": {}, "source": [ "Formulating the three admin levels as orthogonal dimensions is quite wasteful --- not all countries have 86 states or 854 counties per state. The total number of GADM geometries for levels 0, 1, and 2 is ~48,000 which is much smaller than 23 x 5 x 7 x 248 x 86 x 854 = 14_662_360_160.\n", "\n", "We end up with one humoungous 56GB chunk, that is mostly empty (sparsity ~ 48,000/14_662_360_160 ~ 0.2%).\n", "\n", "## We can do better using a sparse array\n", "\n", "Since the results are very sparse, we can instruct flox to construct dense arrays of intermediate results on the full 23 x 5 x 7 x 248 x 86 x 854 output grid.\n", "\n", "```python\n", "ReindexStrategy(\n", " # do not reindex to the full output grid at the blockwise aggregation stage\n", " blockwise=False,\n", " # when combining intermediate results after blockwise aggregation, reindex to the\n", " # common grid using a sparse.COO array type\n", " array_type=ReindexArrayType.SPARSE_COO\n", ")\n", "```" ] }, { "cell_type": "code", "execution_count": null, "id": "9", "metadata": {}, "outputs": [], "source": [ "from flox import ReindexArrayType, ReindexStrategy\n", "\n", "result = xarray_reduce(\n", " ds.areas,\n", " *by,\n", " expected_groups=expected_groups,\n", " func=\"sum\",\n", " reindex=ReindexStrategy(\n", " blockwise=False,\n", " array_type=ReindexArrayType.SPARSE_COO,\n", " ),\n", " fill_value=0,\n", ")\n", "result" ] }, { "cell_type": "markdown", "id": "10", "metadata": {}, "source": [ "The output is a sparse array (see the **Data type** section)! Note that the size of this array cannot be estimated without computing it.\n", "\n", "The computation runs smoothly with low memory." ] }, { "cell_type": "markdown", "id": "11", "metadata": {}, "source": [ "## Why\n", "\n", "To understand why you might do this, here is how flox runs reductions. In the images below, the `areas` array on the left has 5 2D chunks. Each color represents a group, each square represents a value of the array; clearly there are different groups in each chunk. \n", "\n", "\n", "### reindex = True\n", "\n", "\n", "\n", "First, the grouped-reduction is run on each chunk independently, and the results are constructed as _dense_ arrays on the full 23 x 5 x 7 x 248 x 86 x 854 output grid. This means that every chunk balloons to ~50GB. This method cannot work well.\n", "\n", "### reindex = False with sparse intermediates\n", "\n", "\n", "\n", "First, the grouped-reduction is run on each chunk independently. Conceptually the result after this step is an array with differently sized chunks. \n", "\n", "Next results from neighbouring blocks are concatenated and a reduction is run again. These results are first aligned or reindexed to a common grid of group labels, termed \"reindexing\". At this stage, we instruct flox to construct a _sparse array_ during reindexing, otherwise we will eventually end up constructing _dense_ reindexed arrays of shape 23 x 5 x 7 x 248 x 86 x 854.\n", "\n", "\n", "## Can we do better?\n", "\n", "Yes. \n", "\n", "1. Using the reindexing machinery to convert intermediates to sparse is a little bit hacky. A better option would be to aggregate directly to sparse arrays, potentially using a new `engine=\"sparse\"` ([issue](https://github.com/xarray-contrib/flox/issues/346)).\n", "2. The total number of GADM geometries for levels 0, 1, and 2 is ~48,000. A much more sensible solution would be to allow grouping by these _geometries_ directly. This would allow us to be smart about the reduction, by exploiting the ideas underlying the [`method=\"cohorts\"` strategy](../implementation.md#method-cohorts).\n", "\n", "Regardless, the ability to do such reindexing allows flox to scale to much larger grouper arrays than previously possible.\n", "\n" ] } ], "metadata": { "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3" } }, "nbformat": 4, "nbformat_minor": 5 } flox-0.10.3/docs/source/user-stories/nD-bins.ipynb000066400000000000000000000215371477552625700220300ustar00rootroot00000000000000{ "cells": [ { "cell_type": "markdown", "id": "0", "metadata": { "tags": [], "user_expressions": [] }, "source": [ "# Binning with multi-dimensional bins\n", "\n", "```{warning}\n", "This post is a proof-of-concept for discussion. Expect APIs to change to enable this use case.\n", "```\n", "\n", "Here we explore a binning problem where the bins are multidimensional\n", "([xhistogram issue](https://github.com/xgcm/xhistogram/issues/28))\n", "\n", "> One of such multi-dim bin applications is the ranked probability score rps we\n", "> use in `xskillscore.rps`, where we want to know how many forecasts fell into\n", "> which bins. Bins are often defined as terciles of the forecast distribution\n", "> and the bins for these terciles\n", "> (`forecast_with_lon_lat_time_dims.quantile(q=[.33,.66],dim='time')`) depend on\n", "> `lon` and `lat`.\n" ] }, { "cell_type": "code", "execution_count": null, "id": "1", "metadata": { "tags": [] }, "outputs": [], "source": [ "import math\n", "\n", "import numpy as np\n", "import pandas as pd\n", "import xarray as xr\n", "\n", "import flox\n", "import flox.xarray" ] }, { "cell_type": "markdown", "id": "2", "metadata": { "user_expressions": [] }, "source": [ "## Create test data\n" ] }, { "cell_type": "markdown", "id": "3", "metadata": { "user_expressions": [] }, "source": [ "Data to be reduced\n" ] }, { "cell_type": "code", "execution_count": null, "id": "4", "metadata": { "tags": [] }, "outputs": [], "source": [ "array = xr.DataArray(\n", " np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12]]),\n", " dims=(\"space\", \"time\"),\n", " name=\"array\",\n", ")\n", "array" ] }, { "cell_type": "markdown", "id": "5", "metadata": { "user_expressions": [] }, "source": [ "Array to group by\n" ] }, { "cell_type": "code", "execution_count": null, "id": "6", "metadata": { "tags": [] }, "outputs": [], "source": [ "by = xr.DataArray(\n", " np.array([[1, 2, 3], [3, 4, 5], [5, 6, 7], [6, 7, 9]]),\n", " dims=(\"space\", \"time\"),\n", " name=\"by\",\n", ")\n", "by" ] }, { "cell_type": "markdown", "id": "7", "metadata": { "tags": [], "user_expressions": [] }, "source": [ "Multidimensional bins:\n" ] }, { "cell_type": "code", "execution_count": null, "id": "8", "metadata": { "tags": [] }, "outputs": [], "source": [ "bins = by + 0.5\n", "bins = xr.DataArray(\n", " np.concatenate([bins, bins[:, [-1]] + 1], axis=-1)[:, :-1].T,\n", " dims=(\"time\", \"nbins\"),\n", " name=\"bins\",\n", ")\n", "bins" ] }, { "cell_type": "markdown", "id": "9", "metadata": { "user_expressions": [] }, "source": [ "## Concept\n", "\n", "The key idea is that GroupBy is two steps:\n", "\n", "1. Factorize (a.k.a \"digitize\") : convert the `by` data to a set of integer\n", " codes representing the bins.\n", "2. Apply the reduction.\n", "\n", "We treat multi-dimensional binning as a slightly complicated factorization\n", "problem. Assume that bins are a function of `time`. So we\n", "\n", "1. generate a set of appropriate integer codes by:\n", " 1. Loop over \"time\" and factorize the data appropriately.\n", " 2. Add an offset to these codes so that \"bin 0\" for `time=0` is different\n", " from \"bin 0\" for `time=1`\n", "2. apply the groupby reduction to the \"offset codes\"\n", "3. reshape the output to the right shape\n", "\n", "We will work at the xarray level, so its easy to keep track of the different\n", "dimensions.\n", "\n", "### Factorizing\n", "\n", "The core `factorize_` function (which wraps `pd.cut`) only handles 1D bins, so\n", "we use `xr.apply_ufunc` to vectorize it for us.\n" ] }, { "cell_type": "code", "execution_count": null, "id": "10", "metadata": { "tags": [] }, "outputs": [], "source": [ "factorize_loop_dim = \"time\"" ] }, { "cell_type": "code", "execution_count": null, "id": "11", "metadata": { "tags": [] }, "outputs": [], "source": [ "def factorize_nd_bins_core(by, bins):\n", " group_idx, *_, props = flox.core.factorize_(\n", " (by,),\n", " axes=(-1,),\n", " expected_groups=(pd.IntervalIndex.from_breaks(bins),),\n", " )\n", " # Use -1 as the NaN sentinel value\n", " group_idx[props.nanmask] = -1\n", " return group_idx\n", "\n", "\n", "codes = xr.apply_ufunc(\n", " factorize_nd_bins_core,\n", " by,\n", " bins,\n", " # TODO: avoid hardcoded dim names\n", " input_core_dims=[[\"space\"], [\"nbins\"]],\n", " output_core_dims=[[\"space\"]],\n", " vectorize=True,\n", ")\n", "codes" ] }, { "cell_type": "markdown", "id": "12", "metadata": { "user_expressions": [] }, "source": [ "### Offset the codes\n", "\n", "These are integer codes appropriate for a single timestep.\n", "\n", "We now add an offset that changes in time, to make sure \"bin 0\" for `time=0` is\n", "different from \"bin 0\" for `time=1` (taken from\n", "[this StackOverflow thread](https://stackoverflow.com/questions/46256279/bin-elements-per-row-vectorized-2d-bincount-for-numpy)).\n" ] }, { "cell_type": "code", "execution_count": null, "id": "13", "metadata": { "tags": [] }, "outputs": [], "source": [ "N = math.prod([codes.sizes[d] for d in codes.dims if d != factorize_loop_dim])\n", "offset = xr.DataArray(np.arange(codes.sizes[factorize_loop_dim]), dims=factorize_loop_dim)\n", "# TODO: think about N-1 here\n", "offset_codes = (codes + offset * (N - 1)).rename(by.name)\n", "offset_codes.data[codes == -1] = -1\n", "offset_codes" ] }, { "cell_type": "markdown", "id": "14", "metadata": { "user_expressions": [] }, "source": [ "### Reduce\n", "\n", "Now that we have appropriate codes, let's apply the reduction\n" ] }, { "cell_type": "code", "execution_count": null, "id": "15", "metadata": { "tags": [] }, "outputs": [], "source": [ "interim = flox.xarray.xarray_reduce(\n", " array,\n", " offset_codes,\n", " func=\"sum\",\n", " # We use RangeIndex to indicate that `-1` code can be safely ignored\n", " # (it indicates values outside the bins)\n", " # TODO: Avoid hardcoding 9 = sizes[\"time\"] x (sizes[\"nbins\"] - 1)\n", " expected_groups=pd.RangeIndex(9),\n", ")\n", "interim" ] }, { "cell_type": "markdown", "id": "16", "metadata": { "user_expressions": [] }, "source": [ "## Make final result\n", "\n", "Now reshape that 1D result appropriately.\n" ] }, { "cell_type": "code", "execution_count": null, "id": "17", "metadata": { "tags": [] }, "outputs": [], "source": [ "final = (\n", " interim.coarsen(by=3)\n", " # bin_number dimension is last, this makes sense since it is the core dimension\n", " # and we vectorize over the loop dims.\n", " # So the first (Nbins-1) elements are for the first index of the loop dim\n", " .construct({\"by\": (factorize_loop_dim, \"bin_number\")})\n", " .transpose(..., factorize_loop_dim)\n", " .drop_vars(\"by\")\n", ")\n", "final" ] }, { "cell_type": "markdown", "id": "18", "metadata": { "user_expressions": [] }, "source": [ "I think this is the expected answer.\n" ] }, { "cell_type": "code", "execution_count": null, "id": "19", "metadata": { "tags": [] }, "outputs": [], "source": [ "array.isel(space=slice(1, None)).rename({\"space\": \"bin_number\"}).identical(final)" ] }, { "cell_type": "markdown", "id": "20", "metadata": { "tags": [], "user_expressions": [] }, "source": [ "## TODO\n", "\n", "This could be extended to:\n", "\n", "1. handle multiple `factorize_loop_dim`\n", "2. avoid hard coded dimension names in the `apply_ufunc` call for factorizing\n", "3. avoid hard coded number of output elements in the `xarray_reduce` call.\n", "4. Somehow propagate the bin edges to the final output.\n" ] } ], "metadata": { "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3" } }, "nbformat": 4, "nbformat_minor": 5 } flox-0.10.3/docs/source/user-stories/overlaps.md000066400000000000000000000044641477552625700216500ustar00rootroot00000000000000--- jupytext: text_representation: format_name: myst kernelspec: display_name: Python 3 name: python3 --- ```{eval-rst} .. currentmodule:: flox ``` # Overlapping Groups This post is motivated by the problem of computing the [Meridional Overturning Circulation](https://en.wikipedia.org/wiki/Atlantic_meridional_overturning_circulation). One of the steps is a binned average over latitude, over regions of the World Ocean. Commonly we want to average globally, as well as over the Atlantic, and the Indo-Pacific. Generally group-by problems involve non-overlapping groups. In this example, the "global" group overlaps with the "Indo-Pacific" and "Atlantic" groups. Below we consider a simplified version of this problem. Consider the following labels: ```{code-cell} import numpy as np import xarray as xr from flox.xarray import xarray_reduce labels = xr.DataArray( [1, 2, 3, 1, 2, 3, 0, 0, 0], dims="x", name="label", ) labels ``` These labels are non-overlapping. So when we reduce this data array over those labels along `x` ```{code-cell} da = xr.ones_like(labels) da ``` we get (note the reduction over `x` is implicit here): ```{code-cell} xarray_reduce(da, labels, func="sum") ``` Now let's _also_ calculate the `sum` where `labels` is either `1` or `2`. We could easily compute this using the grouped result but here we use this simple example for illustration. The trick is to add a new dimension with new labels (here `4`) in the appropriate locations. ```{code-cell} # assign 4 where label == 1 or 2, and -1 otherwise newlabels = xr.where(labels.isin([1, 2]), 4, -1) # concatenate along a new dimension y; # note y is not present on da expanded = xr.concat([labels, newlabels], dim="y") expanded ``` Now we reduce over `x` _and_ the new dimension `y` (again implicitly) to get the appropriate sum under `label=4` (and `label=-1`). We can discard the value accumulated under `label=-1` later. ```{code-cell} xarray_reduce(da, expanded, func="sum") ``` This way we compute all the reductions we need, in a single pass over the data. This technique generalizes to more complicated aggregations. The trick is to - generate appropriate labels - concatenate these new labels along a new dimension (`y`) absent on the object being reduced (`da`), and - reduce over that new dimension in addition to any others. flox-0.10.3/docs/source/xarray.md000066400000000000000000000021071477552625700166470ustar00rootroot00000000000000(xarray)= # Xarray Xarray will use flox by default (if installed) for DataArrays containing numpy and dask arrays. The default choice is `method="cohorts"` which generalizes the best. Pass flox-specific kwargs to the specific reduction method: ```python ds.groupby("time.month").mean(method="map-reduce", engine="flox") ds.groupby_bins("lon", bins=[0, 10, 20]).mean(method="map-reduce") ds.resample(time="M").mean(method="blockwise") ``` Xarray's GroupBy operations are currently limited: 1. One can only group by a single variable. 1. When grouping by a dask array, that array will be computed to discover the unique group labels, and their locations These limitations can be avoided by using {py:func}`flox.xarray.xarray_reduce` which allows grouping by multiple variables, lazy grouping by dask variables, as well as an arbitrary combination of categorical grouping and binning. For example, ```python flox.xarray.xarray_reduce( ds, ds.time.dt.month, ds.lon, func="mean", expected_groups=[None, [0, 10, 20]], isbin=[False, True], method="map-reduce", ) ``` flox-0.10.3/flox/000077500000000000000000000000001477552625700135375ustar00rootroot00000000000000flox-0.10.3/flox/__init__.py000066400000000000000000000007571477552625700156610ustar00rootroot00000000000000#!/usr/bin/env python # flake8: noqa """Top-level module for flox .""" from . import cache from .aggregations import Aggregation, Scan # noqa from .core import ( groupby_reduce, groupby_scan, rechunk_for_blockwise, rechunk_for_cohorts, ReindexStrategy, ReindexArrayType, ) # noqa def _get_version(): __version__ = "999" try: from ._version import __version__ except ImportError: pass return __version__ __version__ = _get_version() flox-0.10.3/flox/aggregate_flox.py000066400000000000000000000240051477552625700170700ustar00rootroot00000000000000from functools import partial import numpy as np from . import xrdtypes as dtypes from .xrutils import is_scalar, isnull, notnull def _prepare_for_flox(group_idx, array): """ Sort the input array once to save time. """ assert array.shape[-1] == group_idx.shape[0] issorted = (group_idx[:-1] <= group_idx[1:]).all() if issorted: ordered_array = array perm = slice(None) else: perm = group_idx.argsort(kind="stable") group_idx = group_idx[..., perm] ordered_array = array[..., perm] return group_idx, ordered_array, perm def _lerp(a, b, *, t, dtype, out=None): """ COPIED from numpy. Compute the linear interpolation weighted by gamma on each point of two same shape array. a : array_like Left bound. b : array_like Right bound. t : array_like The interpolation weight. """ if out is None: out = np.empty_like(a, dtype=dtype) with np.errstate(invalid="ignore"): diff_b_a = np.subtract(b, a) # asanyarray is a stop-gap until gh-13105 np.add(a, diff_b_a * t, out=out) np.subtract(b, diff_b_a * (1 - t), out=out, where=t >= 0.5) return out def quantile_(array, inv_idx, *, q, axis, skipna, group_idx, dtype=None, out=None): inv_idx = np.concatenate((inv_idx, [array.shape[-1]])) array_validmask = notnull(array) actual_sizes = np.add.reduceat(array_validmask, inv_idx[:-1], axis=axis) newshape = (1,) * (array.ndim - 1) + (inv_idx.size - 1,) full_sizes = np.reshape(np.diff(inv_idx), newshape) nanmask = full_sizes != actual_sizes # The approach here is to use (complex_array.partition) because # 1. The full np.lexsort((array, labels), axis=-1) is slow and unnecessary # 2. Using record_array.partition(..., order=["labels", "array"]) is incredibly slow. # 3. For complex arrays, partition will first sort by real part, then by imaginary part, so it is a two element # lex-partition. # Therefore we use approach (3) and set # complex_array = group_idx + 1j * array # group_idx is an integer (guaranteed), but array can have NaNs. # Now the sort order of np.nan is bigger than np.inf # >>> c = (np.array([0, 1, 2, np.nan]) + np.array([np.nan, 2, 3, 4]) * 1j) # >>> c.partition(2) # >>> c # array([ 1. +2.j, 2. +3.j, nan +4.j, nan+nanj]) # So we determine which indices we need using the fact that NaNs get sorted to the end. # This *was* partly inspired by https://krstn.eu/np.nanpercentile()-there-has-to-be-a-faster-way/ # but not any more now that I use partition and avoid replacing NaNs qin = q q = np.atleast_1d(qin) q = np.reshape(q, (len(q),) + (1,) * array.ndim) # This is numpy's method="linear" # TODO: could support all the interpolations here offset = actual_sizes.cumsum(axis=-1) actual_sizes -= 1 virtual_index = q * actual_sizes # virtual_index is relative to group starts, so now offset that virtual_index[..., 1:] += offset[..., :-1] is_scalar_q = is_scalar(qin) if is_scalar_q: virtual_index = virtual_index.squeeze(axis=0) idxshape = array.shape[:-1] + (actual_sizes.shape[-1],) else: idxshape = (q.shape[0],) + array.shape[:-1] + (actual_sizes.shape[-1],) lo_ = np.floor( virtual_index, casting="unsafe", out=np.empty(virtual_index.shape, dtype=np.int64), ) hi_ = np.ceil( virtual_index, casting="unsafe", out=np.empty(virtual_index.shape, dtype=np.int64), ) kth = np.unique(np.concatenate([lo_.reshape(-1), hi_.reshape(-1)])) # partition the complex array in-place labels_broadcast = np.broadcast_to(group_idx, array.shape) with np.errstate(invalid="ignore"): cmplx = 1j * (array.view(int) if array.dtype.kind in "Mm" else array) # This is a very intentional way of handling `array` with -inf/+inf values :/ # a simple (labels + 1j * array) will yield `nan+inf * 1j` instead of `0 + inf * j` cmplx.real = labels_broadcast cmplx.partition(kth=kth, axis=-1) if is_scalar_q: a_ = cmplx.imag else: a_ = np.broadcast_to(cmplx.imag, (q.shape[0],) + array.shape) # get bounds, Broadcast to (num quantiles, ..., num labels) loval = np.take_along_axis(a_, np.broadcast_to(lo_, idxshape), axis=axis) hival = np.take_along_axis(a_, np.broadcast_to(hi_, idxshape), axis=axis) # TODO: could support all the interpolations here gamma = np.broadcast_to(virtual_index, idxshape) - lo_ result = _lerp(loval, hival, t=gamma, out=out, dtype=dtype) if not skipna and np.any(nanmask): result[..., nanmask] = np.nan return result def _np_grouped_op( group_idx, array, op, axis=-1, size=None, fill_value=None, dtype=None, out=None, **kwargs, ): """ most of this code is from shoyer's gist https://gist.github.com/shoyer/f538ac78ae904c936844 """ # assumes input is sorted, which I do in core._prepare_for_flox aux = group_idx flag = np.concatenate((np.array([True], like=array), aux[1:] != aux[:-1])) uniques = aux[flag] (inv_idx,) = flag.nonzero() if size is None: # This is sorted, so the last value is the largest label size = uniques[-1] + 1 if dtype is None: dtype = array.dtype if out is None: q = kwargs.get("q", None) if q is None: out = np.full(array.shape[:-1] + (size,), fill_value=fill_value, dtype=dtype) else: nq = len(np.atleast_1d(q)) out = np.full((nq,) + array.shape[:-1] + (size,), fill_value=fill_value, dtype=dtype) kwargs["group_idx"] = group_idx if (len(uniques) == size) and (uniques == np.arange(size, like=array)).all(): # The previous version of this if condition # ((uniques[1:] - uniques[:-1]) == 1).all(): # does not work when group_idx is [1, 2] for e.g. # This happens during binning op(array, inv_idx, axis=axis, dtype=dtype, out=out, **kwargs) else: out[..., uniques] = op(array, inv_idx, axis=axis, dtype=dtype, **kwargs) return out def _nan_grouped_op(group_idx, array, func, fillna, *args, **kwargs): if fillna in [dtypes.INF, dtypes.NINF]: fillna = dtypes._get_fill_value(kwargs.get("dtype", None) or array.dtype, fillna) result = func(group_idx, np.where(isnull(array), fillna, array), *args, **kwargs) # np.nanmax([np.nan, np.nan]) = np.nan # To recover this behaviour, we need to search for the fillna value # (either np.inf or -np.inf), and replace with NaN # Our choice of fillna does the right thing for sum, prod if fillna in (np.inf, -np.inf): allnangroups = result == fillna if allnangroups.any(): result[allnangroups] = kwargs["fill_value"] return result sum = partial(_np_grouped_op, op=np.add.reduceat) nansum = partial(_nan_grouped_op, func=sum, fillna=0) prod = partial(_np_grouped_op, op=np.multiply.reduceat) nanprod = partial(_nan_grouped_op, func=prod, fillna=1) max = partial(_np_grouped_op, op=np.maximum.reduceat) nanmax = partial(_nan_grouped_op, func=max, fillna=dtypes.NINF) min = partial(_np_grouped_op, op=np.minimum.reduceat) nanmin = partial(_nan_grouped_op, func=min, fillna=dtypes.INF) quantile = partial(_np_grouped_op, op=partial(quantile_, skipna=False)) nanquantile = partial(_np_grouped_op, op=partial(quantile_, skipna=True)) median = partial(partial(_np_grouped_op, q=0.5), op=partial(quantile_, skipna=False)) nanmedian = partial(partial(_np_grouped_op, q=0.5), op=partial(quantile_, skipna=True)) # TODO: all, any def sum_of_squares(group_idx, array, *, axis=-1, size=None, fill_value=None, dtype=None): return sum( group_idx, array**2, axis=axis, size=size, fill_value=fill_value, dtype=dtype, ) def nansum_of_squares(group_idx, array, *, axis=-1, size=None, fill_value=None, dtype=None): return sum_of_squares( group_idx, np.where(isnull(array), 0, array), size=size, fill_value=fill_value, axis=axis, dtype=dtype, ) def nanlen(group_idx, array, *args, **kwargs): return sum(group_idx, (notnull(array)).astype(int), *args, **kwargs) def mean(group_idx, array, *, axis=-1, size=None, fill_value=None, dtype=None): if fill_value is None: fill_value = 0 out = sum(group_idx, array, axis=axis, size=size, dtype=dtype, fill_value=fill_value) with np.errstate(invalid="ignore", divide="ignore"): out /= nanlen(group_idx, array, size=size, axis=axis, fill_value=0) return out def nanmean(group_idx, array, *, axis=-1, size=None, fill_value=None, dtype=None): if fill_value is None: fill_value = 0 out = nansum(group_idx, array, size=size, axis=axis, dtype=dtype, fill_value=fill_value) with np.errstate(invalid="ignore", divide="ignore"): out /= nanlen(group_idx, array, size=size, axis=axis, fill_value=0) return out def ffill(group_idx, array, *, axis, **kwargs): group_idx, array, perm = _prepare_for_flox(group_idx, array) shape = array.shape ndim = array.ndim assert axis == (ndim - 1), (axis, ndim - 1) flag = np.concatenate((np.array([True], like=array), group_idx[1:] != group_idx[:-1])) (group_starts,) = flag.nonzero() # https://stackoverflow.com/questions/41190852/most-efficient-way-to-forward-fill-nan-values-in-numpy-array mask = isnull(array).copy() # copy needed since we might have a broadcast-trick array # modified from the SO answer, just reset the index at the start of every group! mask[..., np.asarray(group_starts)] = False idx = np.where(mask, 0, np.arange(shape[axis])) np.maximum.accumulate(idx, axis=axis, out=idx) slc = [ np.arange(k)[tuple([slice(None) if dim == i else np.newaxis for dim in range(ndim)])] for i, k in enumerate(shape) ] slc[axis] = idx invert_perm = slice(None) if isinstance(perm, slice) else np.argsort(perm, kind="stable") return array[tuple(slc)][..., invert_perm] flox-0.10.3/flox/aggregate_npg.py000066400000000000000000000134361477552625700167120ustar00rootroot00000000000000from functools import partial import numpy as np import numpy_groupies as npg def _get_aggregate(engine): return npg.aggregate_numpy if engine == "numpy" else npg.aggregate_numba def _casting_wrapper(func, grp, dtype): """Used for generic aggregates. The group is dtype=object, need to cast back to fix weird bugs""" return func(grp.astype(dtype)) def sum_of_squares( group_idx, array, engine, *, axis=-1, size=None, fill_value=None, dtype=None, ): return _get_aggregate(engine).aggregate( group_idx, array, axis=axis, func="sumofsquares", size=size, fill_value=fill_value, dtype=dtype, ) def nansum_of_squares( group_idx, array, engine, *, axis=-1, size=None, fill_value=None, dtype=None, ): return _get_aggregate(engine).aggregate( group_idx, array, axis=axis, func="nansumofsquares", size=size, fill_value=fill_value, dtype=dtype, ) def nansum(group_idx, array, engine, *, axis=-1, size=None, fill_value=None, dtype=None): # npg takes out NaNs before calling np.bincount # This means that all NaN groups are equivalent to absent groups # This behaviour does not work for xarray return _get_aggregate(engine).aggregate( group_idx, np.where(np.isnan(array), 0, array), axis=axis, func="sum", size=size, fill_value=fill_value, dtype=dtype, ) def nanprod(group_idx, array, engine, *, axis=-1, size=None, fill_value=None, dtype=None): # npg takes out NaNs before calling np.bincount # This means that all NaN groups are equivalent to absent groups # This behaviour does not work for xarray return _get_aggregate(engine).aggregate( group_idx, np.where(np.isnan(array), 1, array), axis=axis, func="prod", size=size, fill_value=fill_value, dtype=dtype, ) def _len(group_idx, array, engine, *, func, axis=-1, size=None, fill_value=None, dtype=None): if array.dtype.kind in "US": array = np.broadcast_to(np.array([1]), array.shape) result = _get_aggregate(engine).aggregate( group_idx, array, axis=axis, func=func, size=size, fill_value=0, dtype=np.int64, ) if fill_value is not None: result = result.astype(np.array([fill_value]).dtype) result[result == 0] = fill_value return result len = partial(_len, func="len") nanlen = partial(_len, func="nanlen") def _var_std_wrapper(group_idx, array, engine, *, axis=-1, **kwargs): # Attempt to increase numerical stability by subtracting the first element. # https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance # Cast any unsigned types first dtype = np.result_type(array, np.int8(-1) * array[0]) array = array.astype(dtype, copy=False) first = _get_aggregate(engine).aggregate(group_idx, array, func="nanfirst", axis=axis) array = array - first[..., group_idx] return _get_aggregate(engine).aggregate(group_idx, array, axis=axis, **kwargs) var = partial(_var_std_wrapper, func="var") nanvar = partial(_var_std_wrapper, func="nanvar") std = partial(_var_std_wrapper, func="std") nanstd = partial(_var_std_wrapper, func="nanstd") def median(group_idx, array, engine, *, axis=-1, size=None, fill_value=None, dtype=None): return npg.aggregate_numpy.aggregate( group_idx, array, func=partial(_casting_wrapper, np.median, dtype=np.result_type(array.dtype)), axis=axis, size=size, fill_value=fill_value, dtype=dtype, ) def nanmedian(group_idx, array, engine, *, axis=-1, size=None, fill_value=None, dtype=None): return npg.aggregate_numpy.aggregate( group_idx, array, func=partial(_casting_wrapper, np.nanmedian, dtype=np.result_type(array.dtype)), axis=axis, size=size, fill_value=fill_value, dtype=dtype, ) def quantile(group_idx, array, engine, *, q, axis=-1, size=None, fill_value=None, dtype=None): return npg.aggregate_numpy.aggregate( group_idx, array, func=partial( _casting_wrapper, partial(np.quantile, q=q), dtype=np.result_type(dtype, array.dtype), ), axis=axis, size=size, fill_value=fill_value, dtype=dtype, ) def nanquantile(group_idx, array, engine, *, q, axis=-1, size=None, fill_value=None, dtype=None): return npg.aggregate_numpy.aggregate( group_idx, array, func=partial( _casting_wrapper, partial(np.nanquantile, q=q), dtype=np.result_type(dtype, array.dtype), ), axis=axis, size=size, fill_value=fill_value, dtype=dtype, ) def mode_(array, nan_policy, dtype): from scipy.stats import mode # npg splits `array` into object arrays for each group # scipy.stats.mode does not like that # here we cast back return mode(array.astype(dtype, copy=False), nan_policy=nan_policy, axis=-1, keepdims=True).mode def mode(group_idx, array, engine, *, axis=-1, size=None, fill_value=None, dtype=None): return npg.aggregate_numpy.aggregate( group_idx, array, func=partial(mode_, nan_policy="propagate", dtype=array.dtype), axis=axis, size=size, fill_value=fill_value, dtype=dtype, ) def nanmode(group_idx, array, engine, *, axis=-1, size=None, fill_value=None, dtype=None): return npg.aggregate_numpy.aggregate( group_idx, array, func=partial(mode_, nan_policy="omit", dtype=array.dtype), axis=axis, size=size, fill_value=fill_value, dtype=dtype, ) flox-0.10.3/flox/aggregate_numbagg.py000066400000000000000000000071561477552625700175500ustar00rootroot00000000000000from functools import partial import numbagg import numbagg.grouped import numpy as np from packaging.version import Version NUMBAGG_SUPPORTS_DDOF = Version(numbagg.__version__) >= Version("0.7.0") DEFAULT_FILL_VALUE = { "nansum": 0, "nanmean": np.nan, "nanvar": np.nan, "nanstd": np.nan, "nanmin": np.nan, "nanmax": np.nan, "nanany": False, "nanall": False, "nansum_of_squares": 0, "nanprod": 1, "nancount": 0, "nanargmax": np.nan, "nanargmin": np.nan, "nanfirst": np.nan, "nanlast": np.nan, } CAST_TO = { # "nansum": {np.bool_: np.int64}, "nanmean": {np.int_: np.float64}, "nanvar": {np.int_: np.float64}, "nanstd": {np.int_: np.float64}, "nanfirst": {np.datetime64: np.int64, np.timedelta64: np.int64}, "nanlast": {np.datetime64: np.int64, np.timedelta64: np.int64}, "nancount": {np.datetime64: np.int64, np.timedelta64: np.int64}, } FILLNA = {"nansum": 0, "nanprod": 1} def _numbagg_wrapper( group_idx, array, *, func, axis=-1, size=None, fill_value=None, dtype=None, **kwargs, ): cast_to = CAST_TO.get(func, None) if cast_to: for from_, to_ in cast_to.items(): if np.issubdtype(array.dtype, from_): array = array.astype(to_, copy=False) func_ = getattr(numbagg.grouped, f"group_{func}") result = func_( array, group_idx, axis=axis, num_labels=size, **kwargs, # The following are unsupported # fill_value=fill_value, # dtype=dtype, ).astype(dtype, copy=False) return result def nanvar(group_idx, array, *, axis=-1, size=None, fill_value=None, dtype=None, ddof=0): kwargs = {} if NUMBAGG_SUPPORTS_DDOF: kwargs["ddof"] = ddof elif ddof != 1: raise ValueError("Need numbagg >= v0.7.0 to support ddof != 1") return _numbagg_wrapper( group_idx, array, axis=axis, size=size, func="nanvar", **kwargs, # fill_value=fill_value, # dtype=dtype, ) def nanstd(group_idx, array, *, axis=-1, size=None, fill_value=None, dtype=None, ddof=0): kwargs = {} if NUMBAGG_SUPPORTS_DDOF: kwargs["ddof"] = ddof elif ddof != 1: raise ValueError("Need numbagg >= v0.7.0 to support ddof != 1") return _numbagg_wrapper( group_idx, array, axis=axis, size=size, func="nanstd", **kwargs, # fill_value=fill_value, # dtype=dtype, ) def nanlen(group_idx, array, *, axis=-1, size=None, fill_value=None, dtype=None): if array.dtype.kind in "US": array = np.broadcast_to(np.array([1]), array.shape) return _numbagg_wrapper( group_idx, array, axis=axis, size=size, func="nancount", # fill_value=fill_value, # dtype=dtype, ) nansum = partial(_numbagg_wrapper, func="nansum") nanmean = partial(_numbagg_wrapper, func="nanmean") nanprod = partial(_numbagg_wrapper, func="nanprod") nansum_of_squares = partial(_numbagg_wrapper, func="nansum_of_squares") nanprod = partial(_numbagg_wrapper, func="nanprod") nanfirst = partial(_numbagg_wrapper, func="nanfirst") nanlast = partial(_numbagg_wrapper, func="nanlast") # nanargmax = partial(_numbagg_wrapper, func="nanargmax) # nanargmin = partial(_numbagg_wrapper, func="nanargmin) nanmax = partial(_numbagg_wrapper, func="nanmax") nanmin = partial(_numbagg_wrapper, func="nanmin") any = partial(_numbagg_wrapper, func="nanany") all = partial(_numbagg_wrapper, func="nanall") # sum = nansum # mean = nanmean # sum_of_squares = nansum_of_squares flox-0.10.3/flox/aggregations.py000066400000000000000000000670311477552625700165720ustar00rootroot00000000000000from __future__ import annotations import copy import logging import warnings from collections.abc import Callable, Sequence from dataclasses import dataclass from functools import cached_property, partial from typing import TYPE_CHECKING, Any, Literal, TypedDict import numpy as np import pandas as pd from numpy.typing import ArrayLike, DTypeLike from . import aggregate_flox, aggregate_npg, xrutils from . import xrdtypes as dtypes if TYPE_CHECKING: FuncTuple = tuple[Callable | str, ...] OptionalFuncTuple = tuple[Callable | str | None, ...] logger = logging.getLogger("flox") T_ScanBinaryOpMode = Literal["apply_binary_op", "concat_then_scan"] def _is_arg_reduction(func: str | Aggregation) -> bool: if isinstance(func, str) and func in ["argmin", "argmax", "nanargmax", "nanargmin"]: return True if isinstance(func, Aggregation) and func.reduction_type == "argreduce": return True return False class AggDtypeInit(TypedDict): final: DTypeLike | None intermediate: tuple[DTypeLike, ...] class AggDtype(TypedDict): user: DTypeLike | None final: np.dtype numpy: tuple[np.dtype | type[np.intp], ...] intermediate: tuple[np.dtype | type[np.intp], ...] def get_npg_aggregation(func, *, engine): try: method_ = getattr(aggregate_npg, func) method = partial(method_, engine=engine) except AttributeError: aggregate = aggregate_npg._get_aggregate(engine).aggregate method = partial(aggregate, func=func) return method def generic_aggregate( group_idx, array, *, engine: str, func: str, axis=-1, size=None, fill_value=None, dtype=None, **kwargs, ): if func == "identity": return array if func in ["nanfirst", "nanlast"] and array.dtype.kind in "US": func = func[3:] if engine == "flox": try: method = getattr(aggregate_flox, func) except AttributeError: # logger.debug(f"Couldn't find {func} for engine='flox'. Falling back to numpy") method = get_npg_aggregation(func, engine="numpy") elif engine == "numbagg": from . import aggregate_numbagg try: if "var" in func or "std" in func: ddof = kwargs.get("ddof", 0) if aggregate_numbagg.NUMBAGG_SUPPORTS_DDOF or (ddof != 0): method = getattr(aggregate_numbagg, func) else: logger.debug(f"numbagg too old for ddof={ddof}. Falling back to numpy") method = get_npg_aggregation(func, engine="numpy") else: method = getattr(aggregate_numbagg, func) except AttributeError: # logger.debug(f"Couldn't find {func} for engine='numbagg'. Falling back to numpy") method = get_npg_aggregation(func, engine="numpy") elif engine in ["numpy", "numba"]: method = get_npg_aggregation(func, engine=engine) else: raise ValueError( f"Expected engine to be one of ['flox', 'numpy', 'numba', 'numbagg']. Received {engine} instead." ) group_idx = np.asarray(group_idx, like=array) with warnings.catch_warnings(): warnings.filterwarnings("ignore", r"All-NaN (slice|axis) encountered") result = method( group_idx, array, axis=axis, size=size, fill_value=fill_value, dtype=dtype, **kwargs, ) return result def _atleast_1d(inp, min_length: int = 1): if xrutils.is_scalar(inp): inp = (inp,) * min_length assert len(inp) >= min_length return inp def returns_empty_tuple(*args, **kwargs): return () @dataclass class Dim: values: ArrayLike name: str | None @cached_property def is_scalar(self) -> bool: return xrutils.is_scalar(self.values) @cached_property def size(self) -> int: return 0 if self.is_scalar else len(self.values) # type: ignore[arg-type] class Aggregation: def __init__( self, name: str, *, numpy: str | None = None, chunk: str | FuncTuple | None, combine: str | FuncTuple | None, preprocess: Callable | None = None, finalize: Callable | None = None, fill_value=None, final_fill_value=dtypes.NA, dtypes=None, final_dtype: DTypeLike | None = None, reduction_type: Literal["reduce", "argreduce"] = "reduce", new_dims_func: Callable | None = None, preserves_dtype: bool = False, ): """ Blueprint for computing grouped aggregations. See aggregations.py for examples on how to specify reductions. Attributes ---------- name : str Name of reduction. numpy : str or callable, optional Reduction function applied to numpy inputs. This function should compute the grouped reduction and must have a specific signature. If string, these must be "native" reductions implemented by the backend engines (numpy_groupies, flox, numbagg). If None, will be set to ``name``. chunk : None or str or tuple of str or callable or tuple of callable For dask inputs only. Either a single function or a list of functions to be applied blockwise on the input dask array. If None, will raise an error for dask inputs. combine : None or str or tuple of str or callbe or tuple of callable For dask inputs only. Functions applied when combining intermediate results from the blockwise stage (see ``chunk``). If None, will raise an error for dask inputs. finalize : callable For dask inputs only. Function that combines intermediate results to compute final result. preprocess : callable For dask inputs only. Preprocess inputs before ``chunk`` stage. reduction_type : {"reduce", "argreduce"} Type of reduction. fill_value : number or tuple(number), optional Value to use when a group has no members. If single value will be converted to tuple of same length as chunk. If appropriate, provide a different fill_value per reduction in ``chunk`` as a tuple. final_fill_value : optional fill_value for final result. dtypes : DType or tuple(DType), optional dtypes for intermediate results. If single value, will be converted to a tuple of same length as chunk. If appropriate, provide a different fill_value per reduction in ``chunk`` as a tuple. final_dtype : DType, optional DType for output. By default, uses dtype of array being reduced. new_dims_func: Callable Function that receives finalize_kwargs and returns a tupleof sizes of any new dimensions added by the reduction. For e.g. quantile for q=(0.5, 0.85) adds a new dimension of size 2, so returns (2,) preserves_dtype: bool, Whether a function preserves the dtype on return E.g. min, max, first, last, mode """ self.name = name # preprocess before blockwise self.preprocess = preprocess # Use "chunk_reduce" or "chunk_argreduce" self.reduction_type = reduction_type self.numpy: FuncTuple = (numpy,) if numpy is not None else (self.name,) # initialize blockwise reduction self.chunk: OptionalFuncTuple = _atleast_1d(chunk) # how to aggregate results after first round of reduction self.combine: OptionalFuncTuple = _atleast_1d(combine) # simpler reductions used with the "simple combine" algorithm self.simple_combine: OptionalFuncTuple = () # finalize results (see mean) self.finalize: Callable | None = finalize self.fill_value = {} # This is used for the final reindexing self.fill_value[name] = final_fill_value # Aggregation.fill_value is used to reindex to group labels # at the *intermediate* step. # They should make sense when aggregated together with results from other blocks self.fill_value["intermediate"] = self._normalize_dtype_fill_value(fill_value, "fill_value") self.dtype_init: AggDtypeInit = { "final": final_dtype, "intermediate": self._normalize_dtype_fill_value(dtypes, "dtype"), } self.dtype: AggDtype = None # type: ignore[assignment] # The following are set by _initialize_aggregation self.finalize_kwargs: dict[Any, Any] = {} self.min_count: int = 0 self.new_dims_func: Callable = returns_empty_tuple if new_dims_func is None else new_dims_func self.preserves_dtype = preserves_dtype @cached_property def new_dims(self) -> tuple[Dim]: return self.new_dims_func(**self.finalize_kwargs) @cached_property def num_new_vector_dims(self) -> int: return len(tuple(dim for dim in self.new_dims if not dim.is_scalar)) def _normalize_dtype_fill_value(self, value, name): value = _atleast_1d(value) if len(value) == 1 and len(value) < len(self.chunk): value = value * len(self.chunk) if len(value) != len(self.chunk): raise ValueError(f"Bad {name} specified for Aggregation {name}.") return value def __dask_tokenize__(self): return ( Aggregation, self.name, self.preprocess, self.reduction_type, self.numpy, self.chunk, self.combine, self.finalize, self.fill_value, self.dtype, ) def __repr__(self) -> str: return "\n".join( ( f"{self.name!r}, fill: {self.fill_value.values()!r}, dtype: {self.dtype}", f"chunk: {self.chunk!r}", f"combine: {self.combine!r}", f"finalize: {self.finalize!r}", f"min_count: {self.min_count!r}", ) ) count = Aggregation( "count", numpy="nanlen", chunk="nanlen", combine="sum", fill_value=0, final_fill_value=0, dtypes=np.intp, final_dtype=np.intp, ) # note that the fill values are the result of np.func([np.nan, np.nan]) # final_fill_value is used for groups that don't exist. This is usually np.nan sum_ = Aggregation("sum", chunk="sum", combine="sum", fill_value=0) nansum = Aggregation("nansum", chunk="nansum", combine="sum", fill_value=0) prod = Aggregation("prod", chunk="prod", combine="prod", fill_value=1, final_fill_value=1) nanprod = Aggregation("nanprod", chunk="nanprod", combine="prod", fill_value=1) def _mean_finalize(sum_, count): with np.errstate(invalid="ignore", divide="ignore"): return sum_ / count mean = Aggregation( "mean", chunk=("sum", "nanlen"), combine=("sum", "sum"), finalize=_mean_finalize, fill_value=(0, 0), dtypes=(None, np.intp), final_dtype=np.floating, ) nanmean = Aggregation( "nanmean", chunk=("nansum", "nanlen"), combine=("sum", "sum"), finalize=_mean_finalize, fill_value=(0, 0), dtypes=(None, np.intp), final_dtype=np.floating, ) # TODO: fix this for complex numbers def _var_finalize(sumsq, sum_, count, ddof=0): with np.errstate(invalid="ignore", divide="ignore"): result = (sumsq - (sum_**2 / count)) / (count - ddof) result[count <= ddof] = np.nan return result def _std_finalize(sumsq, sum_, count, ddof=0): return np.sqrt(_var_finalize(sumsq, sum_, count, ddof)) # var, std always promote to float, so we set nan var = Aggregation( "var", chunk=("sum_of_squares", "sum", "nanlen"), combine=("sum", "sum", "sum"), finalize=_var_finalize, fill_value=0, final_fill_value=np.nan, dtypes=(None, None, np.intp), final_dtype=np.floating, ) nanvar = Aggregation( "nanvar", chunk=("nansum_of_squares", "nansum", "nanlen"), combine=("sum", "sum", "sum"), finalize=_var_finalize, fill_value=0, final_fill_value=np.nan, dtypes=(None, None, np.intp), final_dtype=np.floating, ) std = Aggregation( "std", chunk=("sum_of_squares", "sum", "nanlen"), combine=("sum", "sum", "sum"), finalize=_std_finalize, fill_value=0, final_fill_value=np.nan, dtypes=(None, None, np.intp), final_dtype=np.floating, ) nanstd = Aggregation( "nanstd", chunk=("nansum_of_squares", "nansum", "nanlen"), combine=("sum", "sum", "sum"), finalize=_std_finalize, fill_value=0, final_fill_value=np.nan, dtypes=(None, None, np.intp), final_dtype=np.floating, ) min_ = Aggregation("min", chunk="min", combine="min", fill_value=dtypes.INF, preserves_dtype=True) nanmin = Aggregation( "nanmin", chunk="nanmin", combine="nanmin", fill_value=dtypes.INF, final_fill_value=dtypes.NA, preserves_dtype=True, ) max_ = Aggregation("max", chunk="max", combine="max", fill_value=dtypes.NINF, preserves_dtype=True) nanmax = Aggregation( "nanmax", chunk="nanmax", combine="nanmax", fill_value=dtypes.NINF, final_fill_value=dtypes.NA, preserves_dtype=True, ) def argreduce_preprocess(array, axis): """Returns a tuple of array, index along axis. Copied from dask.array.chunk.argtopk_preprocess """ import dask.array import numpy as np # TODO: arg reductions along multiple axes seems weird. assert len(axis) == 1 axis = axis[0] idx = dask.array.arange(array.shape[axis], chunks=array.chunks[axis], dtype=np.intp) # broadcast (TODO: is this needed?) idx = idx[tuple(slice(None) if i == axis else np.newaxis for i in range(array.ndim))] def _zip_index(array_, idx_): return (array_, idx_) return dask.array.map_blocks( _zip_index, array, idx, dtype=array.dtype, meta=array._meta, name="groupby-argreduce-preprocess", ) def _pick_second(*x): return x[1] argmax = Aggregation( "argmax", preprocess=argreduce_preprocess, chunk=("max", "argmax"), # order is important combine=("max", "argmax"), reduction_type="argreduce", fill_value=(dtypes.NINF, 0), final_fill_value=-1, finalize=_pick_second, dtypes=(None, np.intp), final_dtype=np.intp, ) argmin = Aggregation( "argmin", preprocess=argreduce_preprocess, chunk=("min", "argmin"), # order is important combine=("min", "argmin"), reduction_type="argreduce", fill_value=(dtypes.INF, 0), final_fill_value=-1, finalize=_pick_second, dtypes=(None, np.intp), final_dtype=np.intp, ) nanargmax = Aggregation( "nanargmax", preprocess=argreduce_preprocess, chunk=("nanmax", "nanargmax"), # order is important combine=("max", "argmax"), reduction_type="argreduce", fill_value=(dtypes.NINF, 0), final_fill_value=-1, finalize=_pick_second, dtypes=(None, np.intp), final_dtype=np.intp, ) nanargmin = Aggregation( "nanargmin", preprocess=argreduce_preprocess, chunk=("nanmin", "nanargmin"), # order is important combine=("min", "argmin"), reduction_type="argreduce", fill_value=(dtypes.INF, 0), final_fill_value=-1, finalize=_pick_second, dtypes=(None, np.intp), final_dtype=np.intp, ) first = Aggregation("first", chunk=None, combine=None, fill_value=None, preserves_dtype=True) last = Aggregation("last", chunk=None, combine=None, fill_value=None, preserves_dtype=True) nanfirst = Aggregation( "nanfirst", chunk="nanfirst", combine="nanfirst", fill_value=dtypes.NA, preserves_dtype=True, ) nanlast = Aggregation( "nanlast", chunk="nanlast", combine="nanlast", fill_value=dtypes.NA, preserves_dtype=True, ) all_ = Aggregation( "all", chunk="all", combine="all", fill_value=True, final_fill_value=False, dtypes=bool, final_dtype=bool, ) any_ = Aggregation( "any", chunk="any", combine="any", fill_value=False, final_fill_value=False, dtypes=bool, final_dtype=bool, ) # Support statistical quantities only blockwise # The parallel versions will be approximate and are hard to implement! median = Aggregation( name="median", fill_value=dtypes.NA, chunk=None, combine=None, final_dtype=np.floating, ) nanmedian = Aggregation( name="nanmedian", fill_value=dtypes.NA, chunk=None, combine=None, final_dtype=np.floating, ) def quantile_new_dims_func(q) -> tuple[Dim]: return (Dim(name="quantile", values=q),) # if the input contains integers or floats smaller than float64, # the output data-type is float64. Otherwise, the output data-type is the same as that # of the input. quantile = Aggregation( name="quantile", fill_value=dtypes.NA, chunk=None, combine=None, final_dtype=np.float64, new_dims_func=quantile_new_dims_func, ) nanquantile = Aggregation( name="nanquantile", fill_value=dtypes.NA, chunk=None, combine=None, final_dtype=np.float64, new_dims_func=quantile_new_dims_func, ) mode = Aggregation(name="mode", fill_value=dtypes.NA, chunk=None, combine=None, preserves_dtype=True) nanmode = Aggregation(name="nanmode", fill_value=dtypes.NA, chunk=None, combine=None, preserves_dtype=True) @dataclass class Scan: # This dataclass is separate from Aggregations since there's not much in common # between reductions and scans name: str # binary operation (e.g. np.add) # Must be None for mode="concat_then_scan" binary_op: Callable | None # in-memory grouped scan function (e.g. cumsum) scan: str # Grouped reduction that yields the last result of the scan (e.g. sum) reduction: str # Identity element identity: Any # dtype of result dtype: Any = None preserves_dtype: bool = False # "Mode" of applying binary op. # for np.add we apply the op directly to the `state` array and the `current` array. # for ffill, bfill we concat `state` to `current` and then run the scan again. mode: T_ScanBinaryOpMode = "apply_binary_op" preprocess: Callable | None = None finalize: Callable | None = None def concatenate(arrays: Sequence[AlignedArrays], axis=-1, out=None) -> AlignedArrays: group_idx = np.concatenate([a.group_idx for a in arrays], axis=axis) array = np.concatenate([a.array for a in arrays], axis=axis) return AlignedArrays(array=array, group_idx=group_idx) @dataclass class AlignedArrays: """Simple Xarray DataArray type data class with two aligned arrays.""" array: np.ndarray group_idx: np.ndarray def __post_init__(self): assert self.array.shape[-1] == self.group_idx.size def last(self) -> AlignedArrays: from flox.core import chunk_reduce reduced = chunk_reduce( self.array, self.group_idx, func=("nanlast",), axis=-1, # TODO: automate? engine="flox", dtype=self.array.dtype, fill_value=dtypes._get_fill_value(self.array.dtype, dtypes.NA), expected_groups=None, ) return AlignedArrays(array=reduced["intermediates"][0], group_idx=reduced["groups"]) @dataclass class ScanState: """Dataclass representing intermediates for scan.""" # last value of each group seen so far state: AlignedArrays | None # intermediate result result: AlignedArrays | None def __post_init__(self): assert (self.state is not None) or (self.result is not None) def reverse(a: AlignedArrays) -> AlignedArrays: a.group_idx = a.group_idx[..., ::-1] a.array = a.array[..., ::-1] return a def scan_binary_op(left_state: ScanState, right_state: ScanState, *, agg: Scan) -> ScanState: from .core import reindex_ assert left_state.state is not None left = left_state.state right = right_state.result if right_state.result is not None else right_state.state assert right is not None if agg.mode == "apply_binary_op": assert agg.binary_op is not None # Implements groupby binary operation. reindexed = reindex_( left.array, from_=pd.Index(left.group_idx), # can't use right.group_idx since we need to do the indexing later to=pd.RangeIndex(right.group_idx.max() + 1), fill_value=agg.identity, axis=-1, ) result = AlignedArrays( array=agg.binary_op(reindexed[..., right.group_idx], right.array), group_idx=right.group_idx, ) elif agg.mode == "concat_then_scan": # Implements the binary op portion of the scan as a concatenate-then-scan. # This is useful for `ffill`, and presumably more generalized scans. assert agg.binary_op is None concat = concatenate([left, right], axis=-1) final_value = generic_aggregate( concat.group_idx, concat.array, func=agg.scan, axis=concat.array.ndim - 1, engine="flox", fill_value=agg.identity, ) result = AlignedArrays(array=final_value[..., left.group_idx.size :], group_idx=right.group_idx) else: raise ValueError(f"Unknown binary op application mode: {agg.mode!r}") # This is quite important. We need to update the state seen so far and propagate that. # So we must account for what we know when entering this function: i.e. `left` # TODO: this is a bit wasteful since it will sort again, but for now let's focus on # correctness and DRY lasts = concatenate([left, result]).last() return ScanState( state=lasts, # The binary op is called on the results of the reduction too when building up the tree. # We need to be careful and assign those results only to `state` and not the final result. # Up above, `result` is privileged when it exists. result=None if right_state.result is None else result, ) # TODO: numpy_groupies cumsum is a broken when NaNs are present. # cumsum = Scan("cumsum", binary_op=np.add, reduction="sum", scan="cumsum", identity=0) nancumsum = Scan("nancumsum", binary_op=np.add, reduction="nansum", scan="nancumsum", identity=0) # ffill uses the identity for scan, and then at the binary-op state, # we concatenate the blockwise-reduced values with the original block, # and then execute the scan # TODO: consider adding chunk="identity" here, like with reductions as an optimization ffill = Scan( "ffill", binary_op=None, reduction="nanlast", scan="ffill", # Important: this must be NaN otherwise, ffill does not work. identity=dtypes.NA, mode="concat_then_scan", preserves_dtype=True, ) bfill = Scan( "bfill", binary_op=None, reduction="nanlast", scan="ffill", # Important: this must be NaN otherwise, bfill does not work. identity=dtypes.NA, preserves_dtype=True, mode="concat_then_scan", preprocess=reverse, finalize=reverse, ) # TODO: not implemented in numpy_groupies # cumprod = Scan("cumprod", binary_op=np.multiply, preop="prod", scan="cumprod") AGGREGATIONS: dict[str, Aggregation | Scan] = { "any": any_, "all": all_, "count": count, "sum": sum_, "nansum": nansum, "prod": prod, "nanprod": nanprod, "mean": mean, "nanmean": nanmean, "var": var, "nanvar": nanvar, "std": std, "nanstd": nanstd, "max": max_, "nanmax": nanmax, "min": min_, "nanmin": nanmin, "argmax": argmax, "nanargmax": nanargmax, "argmin": argmin, "nanargmin": nanargmin, "first": first, "nanfirst": nanfirst, "last": last, "nanlast": nanlast, "median": median, "nanmedian": nanmedian, "quantile": quantile, "nanquantile": nanquantile, "mode": mode, "nanmode": nanmode, # "cumsum": cumsum, "nancumsum": nancumsum, "ffill": ffill, "bfill": bfill, } def _initialize_aggregation( func: str | Aggregation, dtype, array_dtype, fill_value, min_count: int, finalize_kwargs: dict[Any, Any] | None, ) -> Aggregation: agg: Aggregation if not isinstance(func, Aggregation): try: # TODO: need better interface # we set dtype, fillvalue on reduction later. so deepcopy now agg_ = copy.deepcopy(AGGREGATIONS[func]) assert isinstance(agg_, Aggregation) agg = agg_ except KeyError: raise NotImplementedError(f"Reduction {func!r} not implemented yet") elif isinstance(func, Aggregation): # TODO: test that func is a valid Aggregation agg = copy.deepcopy(func) func = agg.name else: raise ValueError("Bad type for func. Expected str or Aggregation") # np.dtype(None) == np.dtype("float64")!!! # so check for not None dtype_: np.dtype | None = ( np.dtype(dtype) if dtype is not None and not isinstance(dtype, np.dtype) else dtype ) final_dtype = dtypes._normalize_dtype( dtype_ or agg.dtype_init["final"], array_dtype, agg.preserves_dtype, fill_value ) agg.dtype = { "user": dtype, # Save to automatically choose an engine "final": final_dtype, "numpy": (final_dtype,), "intermediate": tuple( ( dtypes._normalize_dtype(int_dtype, np.result_type(array_dtype, final_dtype), int_fv) if int_dtype is None else np.dtype(int_dtype) ) for int_dtype, int_fv in zip(agg.dtype_init["intermediate"], agg.fill_value["intermediate"]) ), } # Replace sentinel fill values according to dtype agg.fill_value["user"] = fill_value agg.fill_value["intermediate"] = tuple( dtypes._get_fill_value(dt, fv) for dt, fv in zip(agg.dtype["intermediate"], agg.fill_value["intermediate"]) ) agg.fill_value[func] = dtypes._get_fill_value(agg.dtype["final"], agg.fill_value[func]) if _is_arg_reduction(agg): # this allows us to unravel_index easily. we have to do that nearly every time. agg.fill_value["numpy"] = (0,) else: agg.fill_value["numpy"] = (agg.fill_value[func],) if finalize_kwargs is not None: assert isinstance(finalize_kwargs, dict) agg.finalize_kwargs = finalize_kwargs # This is needed for the dask pathway. # Because we use intermediate fill_value since a group could be # absent in one block, but present in another block # We set it for numpy to get nansum, nanprod tests to pass # where the identity element is 0, 1 # Also needed for nanmin, nanmax where intermediate fill_value is +-np.inf, # but final_fill_value is dtypes.NA if ( # TODO: this is a total hack, setting a default fill_value # even though numpy doesn't define identity for nanmin, nanmax agg.name in ["nanmin", "nanmax"] and min_count == 0 ): min_count = 1 agg.fill_value["user"] = agg.fill_value["user"] or agg.fill_value[agg.name] if min_count > 0: agg.min_count = min_count agg.numpy += ("nanlen",) if agg.chunk != (None,): agg.chunk += ("nanlen",) agg.combine += ("sum",) agg.fill_value["intermediate"] += (0,) agg.fill_value["numpy"] += (0,) agg.dtype["intermediate"] += (np.intp,) agg.dtype["numpy"] += (np.intp,) else: agg.min_count = 0 simple_combine: list[Callable | None] = [] for combine in agg.combine: if isinstance(combine, str): if combine in ["nanfirst", "nanlast"]: simple_combine.append(getattr(xrutils, combine)) else: simple_combine.append(getattr(np, combine)) else: simple_combine.append(combine) agg.simple_combine = tuple(simple_combine) return agg flox-0.10.3/flox/cache.py000066400000000000000000000004131477552625700151520ustar00rootroot00000000000000from functools import partial try: import cachey import dask # 1MB cache cache = cachey.Cache(1e6) memoize = partial(cache.memoize, key=dask.base.tokenize) except ImportError: cache = {} memoize = lambda x: x # type: ignore[assignment] flox-0.10.3/flox/core.py000066400000000000000000003554561477552625700150630ustar00rootroot00000000000000from __future__ import annotations import copy import datetime import itertools import logging import math import operator import sys import warnings from collections import namedtuple from collections.abc import Callable, Sequence from concurrent.futures import ThreadPoolExecutor from dataclasses import dataclass from enum import Enum, auto from functools import partial, reduce from itertools import product from numbers import Integral from typing import ( TYPE_CHECKING, Any, Literal, TypeAlias, TypedDict, TypeVar, Union, cast, overload, ) import numpy as np import numpy_groupies as npg import pandas as pd import toolz as tlz from scipy.sparse import csc_array, csr_array from . import xrdtypes from .aggregate_flox import _prepare_for_flox from .aggregations import ( AGGREGATIONS, Aggregation, AlignedArrays, Scan, ScanState, _atleast_1d, _initialize_aggregation, generic_aggregate, quantile_new_dims_func, ) from .cache import memoize from .lib import ArrayLayer from .xrutils import ( _contains_cftime_datetimes, _to_pytimedelta, datetime_to_numeric, is_chunked_array, is_duck_array, is_duck_cubed_array, is_duck_dask_array, isnull, module_available, notnull, ) if module_available("numpy", minversion="2.0.0"): from numpy.lib.array_utils import normalize_axis_tuple else: from numpy.core.numeric import normalize_axis_tuple # type: ignore[no-redef] HAS_NUMBAGG = module_available("numbagg", minversion="0.3.0") HAS_SPARSE = module_available("sparse") if TYPE_CHECKING: try: if sys.version_info < (3, 11): from typing_extensions import Unpack else: from typing import Unpack except (ModuleNotFoundError, ImportError): Unpack: Any # type: ignore[no-redef] from .types import CubedArray, DaskArray, Graph T_DuckArray: TypeAlias = np.ndarray | DaskArray | CubedArray # Any ? T_By: TypeAlias = T_DuckArray T_Bys = tuple[T_By, ...] T_ExpectIndex = pd.Index T_ExpectIndexTuple = tuple[T_ExpectIndex, ...] T_ExpectIndexOpt = T_ExpectIndex | None T_ExpectIndexOptTuple = tuple[T_ExpectIndexOpt, ...] T_Expect = Sequence | np.ndarray | T_ExpectIndex T_ExpectTuple = tuple[T_Expect, ...] T_ExpectOpt = Sequence | np.ndarray | T_ExpectIndexOpt T_ExpectOptTuple = tuple[T_ExpectOpt, ...] T_ExpectedGroups = T_Expect | T_ExpectOptTuple T_ExpectedGroupsOpt = T_ExpectedGroups | None T_Func = str | Callable T_Funcs = T_Func | Sequence[T_Func] T_Agg = str | Aggregation T_Scan = str | Scan T_Axis = int T_Axes = tuple[T_Axis, ...] T_AxesOpt = T_Axis | T_Axes | None T_Dtypes = np.typing.DTypeLike | Sequence[np.typing.DTypeLike] | None T_FillValues = np.typing.ArrayLike | Sequence[np.typing.ArrayLike] | None T_Engine = Literal["flox", "numpy", "numba", "numbagg"] T_EngineOpt = None | T_Engine T_Method = Literal["map-reduce", "blockwise", "cohorts"] T_MethodOpt = None | Literal["map-reduce", "blockwise", "cohorts"] T_IsBins = bool | Sequence[bool] T = TypeVar("T") IntermediateDict = dict[str | Callable, Any] FinalResultsDict = dict[str, Union["DaskArray", "CubedArray", np.ndarray]] FactorProps = namedtuple("FactorProps", "offset_group nan_sentinel nanmask") # This dummy axis is inserted using np.expand_dims # and then reduced over during the combine stage by # _simple_combine. DUMMY_AXIS = -2 logger = logging.getLogger("flox") class ReindexArrayType(Enum): """ Enum describing which array type to reindex to. These are enumerated, rather than accepting a constructor, because we might want to optimize for specific array types, and because they don't necessarily have the same signature. For example, scipy.sparse.COO only supports a fill_value of 0. """ AUTO = auto() NUMPY = auto() SPARSE_COO = auto() # Sadly, scipy.sparse.coo_array only supports fill_value = 0 # SCIPY_SPARSE_COO = auto() # SPARSE_GCXS = auto() def is_same_type(self, other) -> bool: match self: case ReindexArrayType.AUTO: return True case ReindexArrayType.NUMPY: return isinstance(other, np.ndarray) case ReindexArrayType.SPARSE_COO: import sparse return isinstance(other, sparse.COO) @dataclass class ReindexStrategy: """ Strategy for reindexing. Attributes ---------- blockwise: bool, optional Whether to reindex at the blockwise step. Must be False for method="cohorts" array_type: ReindexArrayType, optional Whether to reindex to a different array type than array being reduced. """ # whether to reindex at the blockwise step blockwise: bool | None array_type: ReindexArrayType = ReindexArrayType.AUTO def __post_init__(self): if self.blockwise is True: if self.array_type not in (ReindexArrayType.AUTO, ReindexArrayType.NUMPY): raise ValueError("Setting reindex.blockwise=True not allowed for non-numpy array type.") def set_blockwise_for_numpy(self): self.blockwise = True if self.blockwise is None else self.blockwise def get_dask_meta(self, other, *, fill_value, dtype) -> Any: import dask if self.array_type is ReindexArrayType.AUTO: other_type = type(other._meta) if isinstance(other, dask.array.Array) else type(other) return other_type([], dtype=dtype) elif self.array_type is ReindexArrayType.NUMPY: return np.ndarray([], dtype=dtype) elif self.array_type is ReindexArrayType.SPARSE_COO: import sparse return sparse.COO.from_numpy(np.ones(shape=(0,) * other.ndim, dtype=dtype), fill_value=fill_value) class FactorizeKwargs(TypedDict, total=False): """Used in _factorize_multiple""" by: T_Bys axes: T_Axes fastpath: bool reindex: bool sort: bool def _postprocess_numbagg(result, *, func, fill_value, size, seen_groups): """Account for numbagg not providing a fill_value kwarg.""" from .aggregate_numbagg import DEFAULT_FILL_VALUE if not isinstance(func, str) or func not in DEFAULT_FILL_VALUE: return result # The condition needs to be # len(found_groups) < size; if so we mask with fill_value (?) default_fv = DEFAULT_FILL_VALUE[func] needs_masking = fill_value is not None and not np.array_equal(fill_value, default_fv, equal_nan=True) groups = np.arange(size) if needs_masking: mask = np.isin(groups, seen_groups, assume_unique=True, invert=True) if mask.any(): result[..., groups[mask]] = fill_value return result def identity(x: T) -> T: return x def _issorted(arr: np.ndarray) -> bool: return bool((arr[:-1] <= arr[1:]).all()) def _is_arg_reduction(func: T_Agg) -> bool: if isinstance(func, str) and func in ["argmin", "argmax", "nanargmax", "nanargmin"]: return True if isinstance(func, Aggregation) and func.reduction_type == "argreduce": return True return False def _is_minmax_reduction(func: T_Agg) -> bool: return not _is_arg_reduction(func) and (isinstance(func, str) and ("max" in func or "min" in func)) def _is_first_last_reduction(func: T_Agg) -> bool: if isinstance(func, Aggregation): func = func.name return func in ["nanfirst", "nanlast", "first", "last"] def _is_bool_supported_reduction(func: T_Agg) -> bool: if isinstance(func, Aggregation): func = func.name return ( func in ["all", "any"] # TODO: enable in npg # or _is_first_last_reduction(func) # or _is_minmax_reduction(func) ) def _is_sparse_supported_reduction(func: T_Agg) -> bool: if isinstance(func, Aggregation): func = func.name return HAS_SPARSE and all(f not in func for f in ["first", "last", "prod", "var", "std"]) def _get_expected_groups(by: T_By, sort: bool) -> T_ExpectIndex: if is_duck_dask_array(by): raise ValueError("Please provide expected_groups if not grouping by a numpy array.") flatby = by.reshape(-1) expected = pd.unique(flatby[notnull(flatby)]) return _convert_expected_groups_to_index((expected,), isbin=(False,), sort=sort)[0] def _get_chunk_reduction(reduction_type: Literal["reduce", "argreduce"]) -> Callable: if reduction_type == "reduce": return chunk_reduce elif reduction_type == "argreduce": return chunk_argreduce else: raise ValueError(f"Unknown reduction type: {reduction_type}") def is_nanlen(reduction: T_Func) -> bool: return isinstance(reduction, str) and reduction == "nanlen" def _move_reduce_dims_to_end(arr: np.ndarray, axis: T_Axes) -> np.ndarray: """Transpose `arr` by moving `axis` to the end.""" axis = tuple(axis) order = tuple(ax for ax in np.arange(arr.ndim) if ax not in axis) + axis arr = arr.transpose(order) return arr def _collapse_axis(arr: np.ndarray, naxis: int) -> np.ndarray: """Reshape so that the last `naxis` axes are collapsed to one axis.""" newshape = arr.shape[:-naxis] + (math.prod(arr.shape[-naxis:]),) return arr.reshape(newshape) @memoize def _get_optimal_chunks_for_groups(chunks, labels): chunkidx = np.cumsum(chunks) - 1 # what are the groups at chunk boundaries labels_at_chunk_bounds = _unique(labels[chunkidx]) # what's the last index of all groups last_indexes = npg.aggregate_numpy.aggregate(labels, np.arange(len(labels)), func="last") # what's the last index of groups at the chunk boundaries. lastidx = last_indexes[labels_at_chunk_bounds] if len(chunkidx) == len(lastidx) and (chunkidx == lastidx).all(): return chunks first_indexes = npg.aggregate_numpy.aggregate(labels, np.arange(len(labels)), func="first") firstidx = first_indexes[labels_at_chunk_bounds] newchunkidx = [0] for c, f, l in zip(chunkidx, firstidx, lastidx): # noqa Δf = abs(c - f) Δl = abs(c - l) if c == 0 or newchunkidx[-1] > l: continue if Δf < Δl and f > newchunkidx[-1]: newchunkidx.append(f) else: newchunkidx.append(l + 1) if newchunkidx[-1] != chunkidx[-1] + 1: newchunkidx.append(chunkidx[-1] + 1) newchunks = np.diff(newchunkidx) assert sum(newchunks) == sum(chunks) return tuple(newchunks) def _unique(a: np.ndarray) -> np.ndarray: """Much faster to use pandas unique and sort the results. np.unique sorts before uniquifying and is slow.""" return np.sort(pd.unique(a.reshape(-1))) def slices_from_chunks(chunks): """slightly modified from dask.array.core.slices_from_chunks to be lazy""" cumdims = [tlz.accumulate(operator.add, bds, 0) for bds in chunks] slices = ( (slice(s, s + dim) for s, dim in zip(starts, shapes)) for starts, shapes in zip(cumdims, chunks) ) return product(*slices) def _compute_label_chunk_bitmask(labels, chunks, nlabels): def make_bitmask(rows, cols): data = np.broadcast_to(np.array(1, dtype=np.uint8), rows.shape) return csc_array((data, (rows, cols)), dtype=bool, shape=(nchunks, nlabels)) assert isinstance(labels, np.ndarray) shape = tuple(sum(c) for c in chunks) nchunks = math.prod(len(c) for c in chunks) approx_chunk_size = math.prod(c[0] for c in chunks) # Shortcut for 1D with size-1 chunks if shape == (nchunks,): rows_array = np.arange(nchunks) cols_array = labels mask = labels >= 0 return make_bitmask(rows_array[mask], cols_array[mask]) labels = np.broadcast_to(labels, shape[-labels.ndim :]) cols = [] ilabels = np.arange(nlabels) def chunk_unique(labels, slicer, nlabels, label_is_present=None): if label_is_present is None: label_is_present = np.empty((nlabels + 1,), dtype=bool) label_is_present[:] = False subset = labels[slicer] # This is a quite fast way to find unique integers, when we know how many there are # inspired by a similar idea in numpy_groupies for first, last # instead of explicitly finding uniques, repeatedly write True to the same location label_is_present[subset.reshape(-1)] = True # skip the -1 sentinel by slicing # Faster than np.argwhere by a lot uniques = ilabels[label_is_present[:-1]] return uniques # TODO: refine this heuristic. # The general idea is that with the threadpool, we repeatedly allocate memory # for `label_is_present`. We trade that off against the parallelism across number of chunks. # For large enough number of chunks (relative to number of labels), it makes sense to # suffer the extra allocation in exchange for parallelism. THRESHOLD = 2 if nlabels < THRESHOLD * approx_chunk_size: logger.debug( "Using threadpool since num_labels %s < %d * chunksize %s", nlabels, THRESHOLD, approx_chunk_size, ) with ThreadPoolExecutor() as executor: futures = [ executor.submit(chunk_unique, labels, slicer, nlabels) for slicer in slices_from_chunks(chunks) ] cols = tuple(f.result() for f in futures) else: logger.debug( "Using serial loop since num_labels %s > %d * chunksize %s", nlabels, THRESHOLD, approx_chunk_size, ) cols = [] # Add one to handle the -1 sentinel value label_is_present = np.empty((nlabels + 1,), dtype=bool) for region in slices_from_chunks(chunks): uniques = chunk_unique(labels, region, nlabels, label_is_present) cols.append(uniques) rows_array = np.repeat(np.arange(nchunks), tuple(len(col) for col in cols)) cols_array = np.concatenate(cols) return make_bitmask(rows_array, cols_array) # @memoize def find_group_cohorts( labels, chunks, expected_groups: None | pd.RangeIndex = None, merge: bool = False ) -> tuple[T_Method, dict]: """ Finds groups labels that occur together aka "cohorts" If available, results are cached in a 1MB cache managed by `cachey`. This allows us to be quick when repeatedly calling groupby_reduce for arrays with the same chunking (e.g. an xarray Dataset). Parameters ---------- labels : np.ndarray mD Array of integer group codes, factorized so that -1 represents NaNs. chunks : tuple chunks of the array being reduced expected_groups: pd.RangeIndex (optional) Used to extract the largest label expected merge: bool (optional) Whether to merge cohorts or not. Set to True if a user specifies "cohorts" but other methods are preferable. Returns ------- preferred_method: {"blockwise", cohorts", "map-reduce"} cohorts: dict_values Iterable of cohorts """ # To do this, we must have values in memory so casting to numpy should be safe labels = np.asarray(labels) shape = tuple(sum(c) for c in chunks) nchunks = math.prod(len(c) for c in chunks) # assumes that `labels` are factorized if expected_groups is None: nlabels = labels.max() + 1 else: nlabels = expected_groups[-1] + 1 # 1. Single chunk, blockwise always if nchunks == 1: return "blockwise", {(0,): list(range(nlabels))} labels = np.broadcast_to(labels, shape[-labels.ndim :]) bitmask = _compute_label_chunk_bitmask(labels, chunks, nlabels) CHUNK_AXIS, LABEL_AXIS = 0, 1 chunks_per_label = bitmask.sum(axis=CHUNK_AXIS) # can happen when `expected_groups` is passed but not all labels are present # (binning, resampling) present_labels = np.arange(bitmask.shape[LABEL_AXIS]) present_labels_mask = chunks_per_label != 0 if not present_labels_mask.all(): present_labels = present_labels[present_labels_mask] bitmask = bitmask[..., present_labels_mask] chunks_per_label = chunks_per_label[present_labels_mask] label_chunks = { present_labels[idx].item(): bitmask.indices[slice(bitmask.indptr[idx], bitmask.indptr[idx + 1])] for idx in range(bitmask.shape[LABEL_AXIS]) } # Invert the label_chunks mapping so we know which labels occur together. def invert(x) -> tuple[np.ndarray, ...]: arr = label_chunks[x] return tuple(arr.tolist()) chunks_cohorts = tlz.groupby(invert, label_chunks.keys()) # 2. Every group is contained to one block, use blockwise here. if bitmask.shape[CHUNK_AXIS] == 1 or (chunks_per_label == 1).all(): logger.debug("find_group_cohorts: blockwise is preferred.") return "blockwise", chunks_cohorts # 3. Perfectly chunked so there is only a single cohort if len(chunks_cohorts) == 1: logger.debug("Only found a single cohort. 'map-reduce' is preferred.") return "map-reduce", chunks_cohorts if merge else {} # 4. Our dataset has chunksize one along the axis, single_chunks = all(all(a == 1 for a in ac) for ac in chunks) # 5. Every chunk only has a single group, but that group might extend across multiple chunks one_group_per_chunk = (bitmask.sum(axis=LABEL_AXIS) == 1).all() # 6. Existing cohorts don't overlap, great for time grouping with perfect chunking no_overlapping_cohorts = (np.bincount(np.concatenate(tuple(chunks_cohorts.keys()))) == 1).all() if one_group_per_chunk or single_chunks or no_overlapping_cohorts: logger.debug("find_group_cohorts: cohorts is preferred, chunking is perfect.") return "cohorts", chunks_cohorts # We'll use containment to measure degree of overlap between labels. # Containment C = |Q & S| / |Q| # - |X| is the cardinality of set X # - Q is the query set being tested # - S is the existing set # The bitmask matrix S allows us to calculate this pretty efficiently using a dot product. # S.T @ S / chunks_per_label # # We treat the sparsity(C) = (nnz/size) as a summary measure of the net overlap. # 1. For high enough sparsity, there is a lot of overlap and we should use "map-reduce". # 2. When labels are uniformly distributed amongst all chunks # (and number of labels < chunk size), sparsity is 1. # 3. Time grouping cohorts (e.g. dayofyear) appear as lines in this matrix. # 4. When there are no overlaps at all between labels, containment is a block diagonal matrix # (approximately). # # However computing S.T @ S can still be the slowest step, especially if S # is not particularly sparse. Empirically the sparsity( S.T @ S ) > min(1, 2 x sparsity(S)). # So we use sparsity(S) as a shortcut. MAX_SPARSITY_FOR_COHORTS = 0.4 # arbitrary sparsity = bitmask.nnz / math.prod(bitmask.shape) preferred_method: Literal["map-reduce"] | Literal["cohorts"] logger.debug( "sparsity of bitmask is {}, threshold is {}".format( # noqa sparsity, MAX_SPARSITY_FOR_COHORTS ) ) # 7. Groups seem fairly randomly distributed, use "map-reduce". if sparsity > MAX_SPARSITY_FOR_COHORTS: if not merge: logger.debug( "find_group_cohorts: bitmask sparsity={}, merge=False, choosing 'map-reduce'".format( # noqa sparsity ) ) return "map-reduce", {} preferred_method = "map-reduce" else: preferred_method = "cohorts" # Note: While A.T @ A is a symmetric matrix, the division by chunks_per_label # makes it non-symmetric. asfloat = bitmask.astype(float) containment = csr_array(asfloat.T @ asfloat / chunks_per_label) logger.debug( "sparsity of containment matrix is {}".format( # noqa containment.nnz / math.prod(containment.shape) ) ) # Next we for-loop over groups and merge those that are quite similar. # Use a threshold on containment to always force some merging. # Note that we do not use the filtered containment matrix for estimating "sparsity" # because it is a bit hard to reason about. MIN_CONTAINMENT = 0.75 # arbitrary mask = containment.data < MIN_CONTAINMENT # Now we also know "exact cohorts" -- cohorts whose constituent groups # occur in exactly the same chunks. We only need examine one member of each group. # Skip the others by first looping over the exact cohorts, and zero out those rows. repeated = np.concatenate([v[1:] for v in chunks_cohorts.values()]).astype(int) repeated_idx = np.searchsorted(present_labels, repeated) for i in repeated_idx: mask[containment.indptr[i] : containment.indptr[i + 1]] = True containment.data[mask] = 0 containment.eliminate_zeros() # Figure out all the labels we need to loop over later n_overlapping_labels = containment.astype(bool).sum(axis=1) order = np.argsort(n_overlapping_labels, kind="stable")[::-1] # Order is such that we iterate over labels, beginning with those with most overlaps # Also filter out any "exact" cohorts order = order[n_overlapping_labels[order] > 0] logger.debug("find_group_cohorts: merging cohorts") merged_cohorts = {} merged_keys = set() for rowidx in order: if present_labels[rowidx] in merged_keys: continue cohidx = containment.indices[slice(containment.indptr[rowidx], containment.indptr[rowidx + 1])] cohort_ = present_labels[cohidx] cohort = [elem.item() for elem in cohort_ if elem not in merged_keys] if not cohort: continue merged_keys.update(cohort) allchunks = (label_chunks[member].tolist() for member in cohort) chunk = tuple(set(itertools.chain(*allchunks))) merged_cohorts[chunk] = cohort actual_ngroups = np.concatenate(tuple(merged_cohorts.values())).size expected_ngroups = present_labels.size assert len(merged_keys) == actual_ngroups assert expected_ngroups == actual_ngroups, (expected_ngroups, actual_ngroups) # sort by first label in cohort # This will help when sort=True (default) # and we have to resort the dask array as_sorted = dict(sorted(merged_cohorts.items(), key=lambda kv: kv[1][0])) return preferred_method, as_sorted def rechunk_for_cohorts( array: DaskArray, axis: T_Axis, labels: np.ndarray, force_new_chunk_at: Sequence, chunksize: int | None = None, ignore_old_chunks: bool = False, debug: bool = False, ) -> DaskArray: """ Rechunks array so that each new chunk contains groups that always occur together. Parameters ---------- array : dask.array.Array array to rechunk axis : int Axis to rechunk labels : np.ndarray 1D Group labels to align chunks with. This routine works well when ``labels`` has repeating patterns: e.g. ``1, 2, 3, 1, 2, 3, 4, 1, 2, 3`` though there is no requirement that the pattern must contain sequences. force_new_chunk_at : Sequence Labels at which we always start a new chunk. For the example ``labels`` array, this would be `1`. chunksize : int, optional nominal chunk size. Chunk size is exceeded when the label in ``force_new_chunk_at`` is less than ``chunksize//2`` elements away. If None, uses median chunksize along axis. Returns ------- dask.array.Array rechunked array """ if chunksize is None: chunksize = np.median(array.chunks[axis]).astype(int) if len(labels) != array.shape[axis]: raise ValueError( "labels must be equal to array.shape[axis]. " f"Received length {len(labels)}. Expected length {array.shape[axis]}" ) force_new_chunk_at = _atleast_1d(force_new_chunk_at) oldchunks = array.chunks[axis] oldbreaks = np.insert(np.cumsum(oldchunks), 0, 0) if debug: labels_at_breaks = labels[oldbreaks[:-1]] print(labels_at_breaks[:40]) isbreak = np.isin(labels, force_new_chunk_at) if not np.any(isbreak): raise ValueError("One or more labels in ``force_new_chunk_at`` not present in ``labels``.") divisions = [] counter = 1 for idx, lab in enumerate(labels): if lab in force_new_chunk_at or idx == 0: divisions.append(idx) counter = 1 continue next_break = np.nonzero(isbreak[idx:])[0] if next_break.any(): next_break_is_close = next_break[0] <= chunksize // 2 else: next_break_is_close = False if (not ignore_old_chunks and idx in oldbreaks) or (counter >= chunksize and not next_break_is_close): divisions.append(idx) counter = 1 continue counter += 1 divisions.append(len(labels)) if debug: labels_at_breaks = labels[divisions[:-1]] print(labels_at_breaks[:40]) newchunks = tuple(np.diff(divisions)) if debug: print(divisions[:10], newchunks[:10]) print(divisions[-10:], newchunks[-10:]) assert sum(newchunks) == len(labels) if newchunks == array.chunks[axis]: return array else: return array.rechunk({axis: newchunks}) def rechunk_for_blockwise(array: DaskArray, axis: T_Axis, labels: np.ndarray) -> DaskArray: """ Rechunks array so that group boundaries line up with chunk boundaries, allowing embarrassingly parallel group reductions. This only works when the groups are sequential (e.g. labels = ``[0,0,0,1,1,1,1,2,2]``). Such patterns occur when using ``.resample``. Parameters ---------- array : DaskArray Array to rechunk axis : int Axis along which to rechunk the array. labels : np.ndarray Group labels Returns ------- DaskArray Rechunked array """ # TODO: this should be unnecessary? labels = factorize_((labels,), axes=())[0] chunks = array.chunks[axis] newchunks = _get_optimal_chunks_for_groups(chunks, labels) if newchunks == chunks: return array else: return array.rechunk({axis: newchunks}) def reindex_numpy(array, from_: pd.Index, to: pd.Index, fill_value, dtype, axis: int): idx = from_.get_indexer(to) indexer = [slice(None, None)] * array.ndim indexer[axis] = idx reindexed = array[tuple(indexer)] if (idx == -1).any(): if fill_value is None: raise ValueError("Filling is required. fill_value cannot be None.") indexer[axis] = idx == -1 reindexed = reindexed.astype(dtype, copy=False) reindexed[tuple(indexer)] = fill_value return reindexed def reindex_pydata_sparse_coo(array, from_: pd.Index, to: pd.Index, fill_value, dtype, axis: int): import sparse assert axis == -1 # Are there any elements in `to` that are not in `from_`. if isinstance(to, pd.RangeIndex) and len(to) > len(from_): # 1. pandas optimizes set difference between two RangeIndexes only # 2. We want to avoid realizing a very large numpy array in to memory. # This happens in the `else` clause. # There are potentially other tricks we can play, but this is a simple # and effective one. If a user is reindexing to sparse, then len(to) is # almost guaranteed to be > len(from_). If len(to) <= len(from_), then realizing # another array of the same shape should be fine. needs_reindex = True else: needs_reindex = (from_.get_indexer(to) == -1).any() if needs_reindex and fill_value is None: raise ValueError("Filling is required. fill_value cannot be None.") idx = to.get_indexer(from_) mask = idx != -1 # indices along last axis to keep if mask.all(): mask = slice(None) shape = array.shape if isinstance(array, sparse.COO): subset = array[..., mask] data = subset.data coords = subset.coords if subset.nnz > 0: coords[-1, :] = idx[mask][coords[-1, :]] if fill_value is None: # no reindexing is actually needed (dense case) # preserve the fill_value fill_value = array.fill_value else: ranges = np.broadcast_arrays( *np.ix_(*(tuple(np.arange(size) for size in shape[:axis]) + (idx[mask],))) ) coords = np.stack(ranges, axis=0).reshape(array.ndim, -1) data = array[..., mask].reshape(-1) reindexed = sparse.COO( coords=coords, data=data.astype(dtype, copy=False), shape=(*array.shape[:axis], to.size), fill_value=fill_value, ) return reindexed def reindex_( array: np.ndarray, from_, to, *, array_type: ReindexArrayType = ReindexArrayType.AUTO, fill_value: Any = None, axis: T_Axis = -1, promote: bool = False, ) -> np.ndarray: if not isinstance(to, pd.Index): if promote: to = pd.Index(to) else: raise ValueError("reindex requires a pandas.Index or promote=True") if to.ndim > 1: raise ValueError(f"Cannot reindex to a multidimensional array: {to}") if array.shape[axis] == 0: # all groups were NaN shape = array.shape[:-1] + (len(to),) if array_type in (ReindexArrayType.AUTO, ReindexArrayType.NUMPY): reindexed = np.full(shape, fill_value, dtype=array.dtype) else: raise NotImplementedError return reindexed from_ = pd.Index(from_) # short-circuit for trivial case if from_.equals(to) and array_type.is_same_type(array): return array if from_.dtype.kind == "O" and isinstance(from_[0], tuple): raise NotImplementedError( "Currently does not support reindexing with object arrays of tuples. " "These occur when grouping by multi-indexed variables in xarray." ) if fill_value is xrdtypes.NA or isnull(fill_value): new_dtype, fill_value = xrdtypes.maybe_promote(array.dtype) else: new_dtype = array.dtype if array_type is ReindexArrayType.AUTO: # TODO: generalize here # Right now, we effectively assume NEP-18 I think # assert isinstance(array, np.ndarray) array_type = ReindexArrayType.NUMPY if array_type is ReindexArrayType.NUMPY: reindexed = reindex_numpy(array, from_, to, fill_value, new_dtype, axis) elif array_type is ReindexArrayType.SPARSE_COO: reindexed = reindex_pydata_sparse_coo(array, from_, to, fill_value, new_dtype, axis) return reindexed def offset_labels(labels: np.ndarray, ngroups: int) -> tuple[np.ndarray, int]: """ Offset group labels by dimension. This is used when we reduce over a subset of the dimensions of by. It assumes that the reductions dimensions have been flattened in the last dimension Copied from xhistogram & https://stackoverflow.com/questions/46256279/bin-elements-per-row-vectorized-2d-bincount-for-numpy """ assert labels.ndim > 1 offset: np.ndarray = ( labels + np.arange(math.prod(labels.shape[:-1])).reshape((*labels.shape[:-1], -1)) * ngroups ) # -1 indicates NaNs. preserve these otherwise we aggregate in the wrong groups! offset[labels == -1] = -1 size: int = math.prod(labels.shape[:-1]) * ngroups return offset, size def _factorize_single(by, expect, *, sort: bool, reindex: bool) -> tuple[pd.Index, np.ndarray]: flat = by.reshape(-1) if isinstance(expect, pd.RangeIndex): # idx is a view of the original `by` array # copy here so we don't have a race condition with the # group_idx[nanmask] = nan_sentinel assignment later # this is important in shared-memory parallelism with dask # TODO: figure out how to avoid this idx = flat.copy() found_groups = cast(pd.Index, expect) # TODO: fix by using masked integers idx[idx > expect[-1]] = -1 elif isinstance(expect, pd.IntervalIndex): if expect.closed == "both": raise NotImplementedError bins = np.concatenate([expect.left.to_numpy(), expect.right.to_numpy()[[-1]]]) # digitize is 0 or idx.max() for values outside the bounds of all intervals # make it behave like pd.cut which uses -1: if len(bins) > 1: right = expect.closed_right idx = np.digitize( flat, bins=bins.view(np.int64) if bins.dtype.kind == "M" else bins, right=right, ) idx -= 1 within_bins = flat <= bins.max() if right else flat < bins.max() idx[~within_bins] = -1 else: idx = np.zeros_like(flat, dtype=np.intp) - 1 found_groups = cast(pd.Index, expect) else: if expect is not None and reindex: sorter = np.argsort(expect) groups = expect[(sorter,)] if sort else expect idx = np.searchsorted(expect, flat, sorter=sorter) mask = ~np.isin(flat, expect) | isnull(flat) | (idx == len(expect)) if not sort: # idx is the index in to the sorted array. # if we didn't want sorting, unsort it back idx[(idx == len(expect),)] = -1 idx = sorter[(idx,)] idx[mask] = -1 else: idx, groups = pd.factorize(flat, sort=sort) found_groups = cast(pd.Index, groups) return (found_groups, idx.reshape(by.shape)) def _ravel_factorized(*factorized: np.ndarray, grp_shape: tuple[int, ...]) -> np.ndarray: group_idx = np.ravel_multi_index(factorized, grp_shape, mode="wrap") # NaNs; as well as values outside the bins are coded by -1 # Restore these after the raveling nan_by_mask = reduce(np.logical_or, [(f == -1) for f in factorized]) group_idx[nan_by_mask] = -1 return group_idx @overload def factorize_( by: T_Bys, axes: T_Axes, *, fastpath: Literal[True], expected_groups: T_ExpectIndexOptTuple | None = None, reindex: bool = False, sort: bool = True, ) -> tuple[np.ndarray, tuple[pd.Index, ...], tuple[int, ...], int, int, None]: ... @overload def factorize_( by: T_Bys, axes: T_Axes, *, expected_groups: T_ExpectIndexOptTuple | None = None, reindex: bool = False, sort: bool = True, fastpath: Literal[False] = False, ) -> tuple[np.ndarray, tuple[pd.Index, ...], tuple[int, ...], int, int, FactorProps]: ... @overload def factorize_( by: T_Bys, axes: T_Axes, *, expected_groups: T_ExpectIndexOptTuple | None = None, reindex: bool = False, sort: bool = True, fastpath: bool = False, ) -> tuple[np.ndarray, tuple[pd.Index, ...], tuple[int, ...], int, int, FactorProps | None]: ... def factorize_( by: T_Bys, axes: T_Axes, *, expected_groups: T_ExpectIndexOptTuple | None = None, reindex: bool = False, sort: bool = True, fastpath: bool = False, ) -> tuple[np.ndarray, tuple[pd.Index, ...], tuple[int, ...], int, int, FactorProps | None]: """ Returns an array of integer codes for groups (and associated data) by wrapping pd.cut and pd.factorize (depending on isbin). This method handles reindex and sort so that we don't spend time reindexing / sorting a possibly large results array. Instead we set up the appropriate integer codes (group_idx) so that the results come out in the appropriate order. """ if expected_groups is None: expected_groups = (None,) * len(by) if len(by) > 2: with ThreadPoolExecutor() as executor: futures = [ executor.submit(partial(_factorize_single, sort=sort, reindex=reindex), groupvar, expect) for groupvar, expect in zip(by, expected_groups) ] results = tuple(f.result() for f in futures) else: results = tuple( _factorize_single(groupvar, expect, sort=sort, reindex=reindex) for groupvar, expect in zip(by, expected_groups) ) found_groups = tuple(r[0] for r in results) factorized = [r[1] for r in results] grp_shape = tuple(len(grp) for grp in found_groups) ngroups = math.prod(grp_shape) if len(by) > 1: group_idx = _ravel_factorized(*factorized, grp_shape=grp_shape) else: (group_idx,) = factorized if fastpath: return group_idx, found_groups, grp_shape, ngroups, ngroups, None if len(axes) == 1 and by[0].ndim > 1: # Not reducing along all dimensions of by # this is OK because for 3D by and axis=(1,2), # we collapse to a 2D by and axis=-1 offset_group = True group_idx, size = offset_labels(group_idx.reshape(by[0].shape), ngroups) else: size = ngroups offset_group = False # numpy_groupies cannot deal with group_idx = -1 # so we'll add use ngroups as the sentinel # note we cannot simply remove the NaN locations; # that would mess up argmax, argmin nan_sentinel = size if offset_group else ngroups nanmask = group_idx == -1 if nanmask.any(): # bump it up so there's a place to assign values to the nan_sentinel index size += 1 group_idx[nanmask] = nan_sentinel props = FactorProps(offset_group, nan_sentinel, nanmask) return group_idx, tuple(found_groups), grp_shape, ngroups, size, props def chunk_argreduce( array_plus_idx: tuple[np.ndarray, ...], by: np.ndarray, func: T_Funcs, expected_groups: pd.Index | None, axis: T_AxesOpt, fill_value: T_FillValues, dtype: T_Dtypes = None, reindex: bool = False, engine: T_Engine = "numpy", sort: bool = True, user_dtype=None, ) -> IntermediateDict: """ Per-chunk arg reduction. Expects a tuple of (array, index along reduction axis). Inspired by dask.array.reductions.argtopk """ array, idx = array_plus_idx by = np.broadcast_to(by, array.shape) results = chunk_reduce( array, by, func, expected_groups=None, axis=axis, fill_value=fill_value, dtype=dtype, engine=engine, sort=sort, user_dtype=user_dtype, ) if not all(isnull(results["groups"])): idx = np.broadcast_to(idx, array.shape) # array, by get flattened to 1D before passing to npg # so the indexes need to be unraveled newidx = np.unravel_index(results["intermediates"][1], array.shape) # Now index into the actual "global" indexes `idx` results["intermediates"][1] = idx[newidx] if reindex and expected_groups is not None: results["intermediates"][1] = reindex_( results["intermediates"][1], results["groups"].squeeze(), expected_groups, fill_value=0, ) assert results["intermediates"][0].shape == results["intermediates"][1].shape return results def chunk_reduce( array: np.ndarray, by: np.ndarray, func: T_Funcs, expected_groups: pd.Index | None, axis: T_AxesOpt = None, fill_value: T_FillValues = None, dtype: T_Dtypes = None, reindex: bool = False, engine: T_Engine = "numpy", kwargs: Sequence[dict] | None = None, sort: bool = True, user_dtype=None, ) -> IntermediateDict: """ Wrapper for numpy_groupies aggregate that supports nD ``array`` and mD ``by``. Core groupby reduction using numpy_groupies. Uses ``pandas.factorize`` to factorize ``by``. Offsets the groups if not reducing along all dimensions of ``by``. Always ravels ``by`` to 1D, flattens appropriate dimensions of array. When dask arrays are passed to groupby_reduce, this function is called on every block. Parameters ---------- array : numpy.ndarray Array of values to reduced by : numpy.ndarray Array to group by. func : str or Callable or Sequence[str] or Sequence[Callable] Name of reduction or function, passed to numpy_groupies. Supports multiple reductions. axis : (optional) int or Sequence[int] If None, reduce along all dimensions of array. Else reduce along specified axes. Returns ------- dict """ funcs = _atleast_1d(func) nfuncs = len(funcs) dtypes = _atleast_1d(dtype, nfuncs) fill_values = _atleast_1d(fill_value, nfuncs) kwargss = _atleast_1d({}, nfuncs) if kwargs is None else kwargs if isinstance(axis, Sequence): axes: T_Axes = axis nax = len(axes) else: nax = by.ndim axes = () if axis is None else (axis,) * nax assert by.ndim <= array.ndim final_array_shape = array.shape[:-nax] + (1,) * (nax - 1) final_groups_shape = (1,) * (nax - 1) if 1 < nax < by.ndim: # when axis is a tuple # collapse and move reduction dimensions to the end by = _collapse_axis(by, nax) array = _collapse_axis(array, nax) axes = (-1,) nax = 1 # if indices=[2,2,2], npg assumes groups are (0, 1, 2); # and will return a result that is bigger than necessary # avoid by factorizing again so indices=[2,2,2] is changed to # indices=[0,0,0]. This is necessary when combining block results # factorize can handle strings etc unlike digitize group_idx, grps, found_groups_shape, _, size, props = factorize_( (by,), axes, expected_groups=(expected_groups,), reindex=bool(reindex), sort=sort ) (groups,) = grps # do this *before* possible broadcasting below. # factorize_ has already taken care of offsetting if engine == "numbagg": seen_groups = _unique(group_idx) order = "C" if nax > 1: needs_broadcast = any( group_idx.shape[ax] != array.shape[ax] and group_idx.shape[ax] == 1 for ax in range(-nax, 0) ) if needs_broadcast: # This is the dim=... case, it's a lot faster to ravel group_idx # in fortran order since group_idx is then sorted # I'm seeing 400ms -> 23ms for engine="flox" # Of course we are slower to ravel `array` but we avoid argsorting # both `array` *and* `group_idx` in _prepare_for_flox group_idx = np.broadcast_to(group_idx, array.shape[-by.ndim :]) if engine == "flox": group_idx = group_idx.reshape(-1, order="F") order = "F" # always reshape to 1D along group dimensions newshape = array.shape[: array.ndim - by.ndim] + (math.prod(array.shape[-by.ndim :]),) array = array.reshape(newshape, order=order) # type: ignore[call-overload] group_idx = group_idx.reshape(-1) assert group_idx.ndim == 1 empty = np.all(props.nanmask) hasnan = np.any(props.nanmask) results: IntermediateDict = {"groups": [], "intermediates": []} if reindex and expected_groups is not None: # TODO: what happens with binning here? results["groups"] = expected_groups else: if empty: results["groups"] = np.array([np.nan]) else: results["groups"] = groups # npg's argmax ensures that index of first "max" is returned assuming there # are many elements equal to the "max". Sorting messes this up totally. # so we skip this for argreductions if engine == "flox": # is_arg_reduction = any("arg" in f for f in func if isinstance(f, str)) # if not is_arg_reduction: group_idx, array, _ = _prepare_for_flox(group_idx, array) final_array_shape += results["groups"].shape final_groups_shape += results["groups"].shape # we commonly have func=(..., "nanlen", "nanlen") when # counts are needed for the final result as well as for masking # optimize that out. previous_reduction: T_Func = "" for reduction, fv, kw, dt in zip(funcs, fill_values, kwargss, dtypes): if empty: result = np.full(shape=final_array_shape, fill_value=fv) elif is_nanlen(reduction) and is_nanlen(previous_reduction): result = results["intermediates"][-1] else: # fill_value here is necessary when reducing with "offset" groups kw_func = dict(size=size, dtype=dt, fill_value=fv) kw_func.update(kw) if callable(reduction): # passing a custom reduction for npg to apply per-group is really slow! # So this `reduction` has to do the groupby-aggregation result = reduction(group_idx, array, **kw_func) else: result = generic_aggregate( group_idx, array, axis=-1, engine=engine, func=reduction, **kw_func ).astype(dt, copy=False) if engine == "numbagg": result = _postprocess_numbagg( result, func=reduction, size=size, fill_value=fv, # Unfortunately, we cannot reuse found_groups, it has not # been "offset" and is really expected_groups in nearly all cases seen_groups=seen_groups, ) if hasnan: # remove NaN group label which should be last result = result[..., :-1] # TODO: Figure out how to generalize this if reduction in ("quantile", "nanquantile"): new_dims_shape = tuple(dim.size for dim in quantile_new_dims_func(**kw) if not dim.is_scalar) else: new_dims_shape = tuple() result = result.reshape(new_dims_shape + final_array_shape[:-1] + found_groups_shape) results["intermediates"].append(result) previous_reduction = reduction results["groups"] = np.broadcast_to(results["groups"], final_groups_shape) return results def _squeeze_results(results: IntermediateDict, axis: T_Axes) -> IntermediateDict: # at the end we squeeze out extra dims groups = results["groups"] newresults: IntermediateDict = {"groups": [], "intermediates": []} newresults["groups"] = np.squeeze( groups, axis=tuple(ax for ax in range(groups.ndim - 1) if groups.shape[ax] == 1) ) for v in results["intermediates"]: squeeze_ax = tuple(ax for ax in sorted(axis)[:-1] if v.shape[ax] == 1) newresults["intermediates"].append(np.squeeze(v, axis=squeeze_ax) if squeeze_ax else v) return newresults def _finalize_results( results: IntermediateDict, agg: Aggregation, axis: T_Axes, expected_groups: pd.Index | None, reindex: ReindexStrategy, ) -> FinalResultsDict: """Finalize results by 1. Squeezing out dummy dimensions 2. Calling agg.finalize with intermediate results 3. Mask using counts and fill with user-provided fill_value. 4. reindex to expected_groups """ squeezed = _squeeze_results(results, tuple(agg.num_new_vector_dims + ax for ax in axis)) min_count = agg.min_count if min_count > 0: counts = squeezed["intermediates"][-1] squeezed["intermediates"] = squeezed["intermediates"][:-1] # finalize step finalized: FinalResultsDict = {} if agg.finalize is None: finalized[agg.name] = squeezed["intermediates"][0] else: finalized[agg.name] = agg.finalize(*squeezed["intermediates"], **agg.finalize_kwargs) fill_value = agg.fill_value["user"] if min_count > 0: count_mask = counts < min_count if count_mask.any() or reindex.array_type is ReindexArrayType.SPARSE_COO: # For one count_mask.any() prevents promoting bool to dtype(fill_value) unless # necessary if fill_value is None: raise ValueError("Filling is required but fill_value is None.") # This allows us to match xarray's type promotion rules if fill_value is xrdtypes.NA: new_dtype, fill_value = xrdtypes.maybe_promote(finalized[agg.name].dtype) finalized[agg.name] = finalized[agg.name].astype(new_dtype) finalized[agg.name] = np.where(count_mask, fill_value, finalized[agg.name]) # Final reindexing has to be here to be lazy if not reindex.blockwise and expected_groups is not None: finalized[agg.name] = reindex_( finalized[agg.name], squeezed["groups"], expected_groups, fill_value=fill_value, array_type=reindex.array_type, ) finalized["groups"] = expected_groups else: finalized["groups"] = squeezed["groups"] finalized[agg.name] = finalized[agg.name].astype(agg.dtype["final"], copy=False) return finalized def _aggregate( x_chunk, combine: Callable, agg: Aggregation, expected_groups: pd.Index | None, axis: T_Axes, keepdims: bool, fill_value: Any, reindex: ReindexStrategy, ) -> FinalResultsDict: """Final aggregation step of tree reduction""" results = combine(x_chunk, agg, axis, keepdims, is_aggregate=True) return _finalize_results(results, agg, axis, expected_groups, reindex=reindex) def _expand_dims(results: IntermediateDict) -> IntermediateDict: results["intermediates"] = tuple(np.expand_dims(array, DUMMY_AXIS) for array in results["intermediates"]) return results def _find_unique_groups(x_chunk) -> np.ndarray: from dask.base import flatten from dask.utils import deepmap unique_groups = _unique(np.asarray(tuple(flatten(deepmap(listify_groups, x_chunk))))) unique_groups = unique_groups[notnull(unique_groups)] if len(unique_groups) == 0: unique_groups = np.array([np.nan]) return unique_groups def _simple_combine( x_chunk, agg: Aggregation, axis: T_Axes, keepdims: bool, reindex: ReindexStrategy, is_aggregate: bool = False, ) -> IntermediateDict: """ 'Simple' combination of blockwise results. 1. After the blockwise groupby-reduce, all blocks contain a value for all possible groups, and are of the same shape; i.e. reindex must have been True 2. _expand_dims was used to insert an extra axis DUMMY_AXIS 3. Here we concatenate along DUMMY_AXIS, and then call the combine function along DUMMY_AXIS 4. At the final aggregate step, we squeeze out DUMMY_AXIS """ from dask.array.core import deepfirst from dask.utils import deepmap if not reindex.blockwise: # We didn't reindex at the blockwise step # So now reindex before combining by reducing along DUMMY_AXIS unique_groups = _find_unique_groups(x_chunk) x_chunk = deepmap( partial( reindex_intermediates, agg=agg, unique_groups=unique_groups, array_type=reindex.array_type, ), x_chunk, ) else: unique_groups = deepfirst(x_chunk)["groups"] results: IntermediateDict = {"groups": unique_groups} results["intermediates"] = [] axis_ = axis[:-1] + (DUMMY_AXIS,) for idx, combine in enumerate(agg.simple_combine): array = _conc2(x_chunk, key1="intermediates", key2=idx, axis=axis_) assert array.ndim >= 2 with warnings.catch_warnings(): warnings.filterwarnings("ignore", r"All-NaN (slice|axis) encountered") assert callable(combine) result = combine(array, axis=axis_, keepdims=True) if is_aggregate: # squeeze out DUMMY_AXIS if this is the last step i.e. called from _aggregate # can't just pass DUMMY_AXIS, because of sparse.COO result = result.squeeze(range(result.ndim)[DUMMY_AXIS]) results["intermediates"].append(result) return results def _conc2(x_chunk, key1, key2=slice(None), axis: T_Axes | None = None) -> np.ndarray: """copied from dask.array.reductions.mean_combine""" from dask.array.core import _concatenate2 from dask.utils import deepmap mapped = deepmap(lambda x: x[key1][key2], x_chunk) return _concatenate2(mapped, axes=axis) # This doesn't seem to improve things at all; and some tests fail... # from dask.array.core import concatenate3 # for _ in range(mapped[0].ndim-1): # mapped = [mapped] # return concatenate3(mapped) def reindex_intermediates( x: IntermediateDict, agg: Aggregation, unique_groups, array_type ) -> IntermediateDict: new_shape = x["groups"].shape[:-1] + (len(unique_groups),) newx: IntermediateDict = {"groups": np.broadcast_to(unique_groups, new_shape)} newx["intermediates"] = tuple( reindex_( v, from_=np.atleast_1d(x["groups"].squeeze()), to=pd.Index(unique_groups), fill_value=f, array_type=array_type, ) for v, f in zip(x["intermediates"], agg.fill_value["intermediate"]) ) return newx def listify_groups(x: IntermediateDict): return list(np.atleast_1d(x["groups"].squeeze())) def _grouped_combine( x_chunk, agg: Aggregation, axis: T_Axes, keepdims: bool, engine: T_Engine, is_aggregate: bool = False, sort: bool = True, ) -> IntermediateDict: """Combine intermediates step of tree reduction.""" from dask.utils import deepmap combine = agg.combine if isinstance(x_chunk, dict): # Only one block at final step; skip one extra groupby return x_chunk if len(axis) != 1: # when there's only a single axis of reduction, we can just concatenate later, # reindexing is unnecessary # I bet we can minimize the amount of reindexing for mD reductions too, but it's complicated unique_groups = _find_unique_groups(x_chunk) x_chunk = deepmap( partial( reindex_intermediates, agg=agg, unique_groups=unique_groups, array_type=ReindexArrayType.AUTO ), x_chunk, ) # these are negative axis indices useful for concatenating the intermediates neg_axis = tuple(range(-len(axis), 0)) groups = _conc2(x_chunk, "groups", axis=neg_axis) if agg.reduction_type == "argreduce": # If "nanlen" was added for masking later, we need to account for that if agg.chunk[-1] == "nanlen": slicer = slice(None, -1) else: slicer = slice(None, None) # We need to send the intermediate array values & indexes at the same time # intermediates are (value e.g. max, index e.g. argmax, counts) array_idx = tuple(_conc2(x_chunk, key1="intermediates", key2=idx, axis=axis) for idx in (0, 1)) # for a single element along axis, we don't want to run the argreduction twice # This happens when we are reducing along an axis with a single chunk. avoid_reduction = array_idx[0].shape[axis[0]] == 1 if avoid_reduction: results: IntermediateDict = { "groups": groups, "intermediates": list(array_idx), } else: results = chunk_argreduce( array_idx, groups, # count gets treated specially next func=combine[slicer], # type: ignore[arg-type] axis=axis, expected_groups=None, fill_value=agg.fill_value["intermediate"][slicer], dtype=agg.dtype["intermediate"][slicer], engine=engine, sort=sort, ) if agg.chunk[-1] == "nanlen": counts = _conc2(x_chunk, key1="intermediates", key2=2, axis=axis) if avoid_reduction: results["intermediates"].append(counts) else: # sum the counts results["intermediates"].append( chunk_reduce( counts, groups, func="sum", axis=axis, expected_groups=None, fill_value=(0,), dtype=(np.intp,), engine=engine, sort=sort, user_dtype=agg.dtype["user"], )["intermediates"][0] ) elif agg.reduction_type == "reduce": # Here we reduce the intermediates individually results = {"groups": None, "intermediates": []} for idx, (combine_, fv, dtype) in enumerate( zip(combine, agg.fill_value["intermediate"], agg.dtype["intermediate"]) ): assert combine_ is not None array = _conc2(x_chunk, key1="intermediates", key2=idx, axis=axis) if array.shape[-1] == 0: # all empty when combined results["intermediates"].append(np.empty(shape=(1,) * (len(axis) - 1) + (0,), dtype=dtype)) results["groups"] = np.empty(shape=(1,) * (len(neg_axis) - 1) + (0,), dtype=groups.dtype) else: _results = chunk_reduce( array, groups, func=combine_, axis=axis, expected_groups=None, fill_value=(fv,), dtype=(dtype,), engine=engine, sort=sort, user_dtype=agg.dtype["user"], ) results["intermediates"].append(*_results["intermediates"]) results["groups"] = _results["groups"] return results def _reduce_blockwise( array, by, agg: Aggregation, *, axis: T_Axes, expected_groups, fill_value: Any, engine: T_Engine, sort: bool, reindex: ReindexStrategy, ) -> FinalResultsDict: """ Blockwise groupby reduction that produces the final result. This code path is also used for non-dask array aggregations. """ # for pure numpy grouping, we just use npg directly and avoid "finalizing" # (agg.finalize = None). We still need to do the reindexing step in finalize # so that everything matches the dask version. agg.finalize = None assert agg.finalize_kwargs is not None finalize_kwargs_: tuple[dict[Any, Any], ...] = (agg.finalize_kwargs,) + ({},) + ({},) results = chunk_reduce( array, by, func=agg.numpy, axis=axis, expected_groups=expected_groups, # This fill_value should only apply to groups that only contain NaN observations # BUT there is funkiness when axis is a subset of all possible values # (see below) fill_value=agg.fill_value["numpy"], dtype=agg.dtype["numpy"], kwargs=finalize_kwargs_, engine=engine, sort=sort, reindex=bool(reindex.blockwise), user_dtype=agg.dtype["user"], ) if _is_arg_reduction(agg): results["intermediates"][0] = np.unravel_index(results["intermediates"][0], array.shape)[-1] result = _finalize_results(results, agg, axis, expected_groups, reindex=reindex) return result def _normalize_indexes(ndim: int, flatblocks: Sequence[int], blkshape: tuple[int, ...]) -> tuple: """ .blocks accessor can only accept one iterable at a time, but can handle multiple slices. To minimize tasks and layers, we normalize to produce slices along as many axes as possible, and then repeatedly apply any remaining iterables in a loop. TODO: move this upstream """ unraveled = np.unravel_index(flatblocks, blkshape) normalized: list[int | slice | list[int]] = [] for ax, idx in enumerate(unraveled): i = _unique(idx).squeeze() if i.ndim == 0: normalized.append(i.item()) else: if len(i) == blkshape[ax] and np.array_equal(i, np.arange(blkshape[ax])): normalized.append(slice(None)) elif _issorted(i) and np.array_equal(i, np.arange(i[0], i[-1] + 1)): start = None if i[0] == 0 else i[0] stop = i[-1] + 1 stop = None if stop == blkshape[ax] else stop normalized.append(slice(start, stop)) else: normalized.append(list(i)) full_normalized = (slice(None),) * (ndim - len(normalized)) + tuple(normalized) # has no iterables noiter = list(i if not hasattr(i, "__len__") else slice(None) for i in full_normalized) # has all iterables alliter = {ax: i for ax, i in enumerate(full_normalized) if hasattr(i, "__len__")} mesh = dict(zip(alliter.keys(), np.ix_(*alliter.values()))) # type: ignore[arg-type, var-annotated] full_tuple = tuple(i if ax not in mesh else mesh[ax] for ax, i in enumerate(noiter)) return full_tuple def subset_to_blocks( array: DaskArray, flatblocks: Sequence[int], blkshape: tuple[int, ...] | None = None, reindexer=identity, chunks_as_array: tuple[np.ndarray, ...] | None = None, ) -> ArrayLayer: """ Advanced indexing of .blocks such that we always get a regular array back. Parameters ---------- array : dask.array flatblocks : flat indices of blocks to extract blkshape : shape of blocks with which to unravel flatblocks Returns ------- dask.array """ from dask.base import tokenize if blkshape is None: blkshape = array.blocks.shape if chunks_as_array is None: chunks_as_array = tuple(np.array(c) for c in array.chunks) index = _normalize_indexes(array.ndim, flatblocks, blkshape) # These rest is copied from dask.array.core.py with slight modifications index = tuple(slice(k, k + 1) if isinstance(k, Integral) else k for k in index) name = "groupby-cohort-" + tokenize(array, index) new_keys = array._key_array[index] squeezed = tuple(np.squeeze(i) if isinstance(i, np.ndarray) else i for i in index) chunks = tuple(tuple(c[i].tolist()) for c, i in zip(chunks_as_array, squeezed)) keys = itertools.product(*(range(len(c)) for c in chunks)) layer: Graph = {(name,) + key: (reindexer, tuple(new_keys[key].tolist())) for key in keys} return ArrayLayer(layer=layer, chunks=chunks, name=name) def _extract_unknown_groups(reduced, dtype) -> tuple[DaskArray]: import dask.array from dask.highlevelgraph import HighLevelGraph groups_token = f"group-{reduced.name}" first_block = reduced.ndim * (0,) layer: Graph = {(groups_token, 0): (operator.getitem, (reduced.name, *first_block), "groups")} groups: tuple[DaskArray] = ( dask.array.Array( HighLevelGraph.from_collections(groups_token, layer, dependencies=[reduced]), groups_token, chunks=((np.nan,),), meta=np.array([], dtype=dtype), ), ) return groups def _unify_chunks(array, by): from dask.array import from_array, unify_chunks inds = tuple(range(array.ndim)) # Unifying chunks is necessary for argreductions. # We need to rechunk before zipping up with the index # let's always do it anyway if not is_duck_dask_array(by): # chunk numpy arrays like the input array # This removes an extra rechunk-merge layer that would be # added otherwise chunks = tuple(array.chunks[ax] if by.shape[ax] != 1 else (1,) for ax in range(-by.ndim, 0)) by = from_array(by, chunks=chunks) _, (array, by) = unify_chunks(array, inds, by, inds[-by.ndim :]) return array, by def dask_groupby_agg( array: DaskArray, by: T_By, *, agg: Aggregation, expected_groups: pd.RangeIndex | None, reindex: ReindexStrategy, axis: T_Axes = (), fill_value: Any = None, method: T_Method = "map-reduce", engine: T_Engine = "numpy", sort: bool = True, chunks_cohorts=None, ) -> tuple[DaskArray, tuple[pd.Index | np.ndarray | DaskArray]]: import dask.array from dask.array.core import slices_from_chunks from dask.highlevelgraph import HighLevelGraph from .dask_array_ops import _tree_reduce # I think _tree_reduce expects this assert isinstance(axis, Sequence) assert all(ax >= 0 for ax in axis) inds = tuple(range(array.ndim)) name = f"groupby_{agg.name}" if expected_groups is None and reindex.blockwise: raise ValueError("reindex.blockwise must be False-y if expected_groups is not provided.") if method == "cohorts" and reindex.blockwise: raise ValueError("reindex.blockwise must be False-y if method is 'cohorts'.") by_input = by array, by = _unify_chunks(array, by) # tokenize here since by has already been hashed if its numpy token = dask.base.tokenize(array, by, agg, expected_groups, axis, method) # preprocess the array: # - for argreductions, this zips the index together with the array block # - not necessary for blockwise with argreductions # - if this is needed later, we can fix this then if agg.preprocess and method != "blockwise": array = agg.preprocess(array, axis=axis) # 1. We first apply the groupby-reduction blockwise to generate "intermediates" # 2. These intermediate results are combined to generate the final result using a # "map-reduce" or "tree reduction" approach. # There are two ways: # a. "_simple_combine": Where it makes sense, we tree-reduce the reduction, # NOT the groupby-reduction for a speed boost. This is what xhistogram does (effectively), # It requires that all blocks contain all groups after the initial blockwise step (1) i.e. # reindex.blockwise=True, and we must know expected_groups # b. "_grouped_combine": A more general solution where we tree-reduce the groupby reduction. # This allows us to discover groups at compute time, support argreductions, lower intermediate # memory usage (but method="cohorts" would also work to reduce memory in some cases) labels_are_unknown = is_duck_dask_array(by_input) and expected_groups is None do_grouped_combine = ( _is_arg_reduction(agg) or labels_are_unknown or (_is_first_last_reduction(agg) and array.dtype.kind != "f") ) do_simple_combine = not do_grouped_combine if method == "blockwise": # use the "non dask" code path, but applied blockwise blockwise_method = partial(_reduce_blockwise, agg=agg, fill_value=fill_value, reindex=reindex) else: # choose `chunk_reduce` or `chunk_argreduce` blockwise_method = partial( _get_chunk_reduction(agg.reduction_type), func=agg.chunk, reindex=reindex.blockwise, fill_value=agg.fill_value["intermediate"], dtype=agg.dtype["intermediate"], user_dtype=agg.dtype["user"], ) if do_simple_combine: # Add a dummy dimension that then gets reduced over blockwise_method = tlz.compose(_expand_dims, blockwise_method) # apply reduction on chunk intermediate = dask.array.blockwise( partial( blockwise_method, axis=axis, expected_groups=expected_groups if reindex.blockwise else None, engine=engine, sort=sort, ), # output indices are the same as input indices # Unlike xhistogram, we don't always know what the size of the group # dimension will be unless reindex=True inds, array, inds, by, inds[-by.ndim :], concatenate=False, dtype=array.dtype, # this is purely for show meta=array._meta, align_arrays=False, name=f"{name}-chunk-{token}", ) group_chunks: tuple[tuple[int | float, ...]] if method in ["map-reduce", "cohorts"]: combine: Callable[..., IntermediateDict] = ( partial(_simple_combine, reindex=reindex) if do_simple_combine else partial(_grouped_combine, engine=engine, sort=sort) ) tree_reduce = partial( dask.array.reductions._tree_reduce, name=f"{name}-simple-reduce", dtype=array.dtype, axis=axis, keepdims=True, concatenate=False, ) aggregate = partial(_aggregate, combine=combine, agg=agg, fill_value=fill_value, reindex=reindex) # Each chunk of `reduced`` is really a dict mapping # 1. reduction name to array # 2. "groups" to an array of group labels # Note: it does not make sense to interpret axis relative to # shape of intermediate results after the blockwise call if method == "map-reduce": reduced = tree_reduce( intermediate, combine=partial(combine, agg=agg), aggregate=partial(aggregate, expected_groups=expected_groups), ) if labels_are_unknown: groups = _extract_unknown_groups(reduced, dtype=by.dtype) group_chunks = ((np.nan,),) else: assert expected_groups is not None groups = (expected_groups,) group_chunks = ((len(expected_groups),),) elif method == "cohorts": assert chunks_cohorts block_shape = array.blocks.shape[-len(axis) :] out_name = f"{name}-reduce-{method}-{token}" groups_ = [] chunks_as_array = tuple(np.array(c) for c in array.chunks) dsk: Graph = {} for icohort, (blks, cohort) in enumerate(chunks_cohorts.items()): cohort_index = pd.Index(cohort) reindexer = ( partial( reindex_intermediates, agg=agg, unique_groups=cohort_index, array_type=reindex.array_type, ) if do_simple_combine else identity ) subset = subset_to_blocks(intermediate, blks, block_shape, reindexer, chunks_as_array) dsk |= subset.layer # type: ignore[operator] # now that we have reindexed, we can set reindex=True explicitlly new_reindex = ReindexStrategy(blockwise=do_simple_combine, array_type=reindex.array_type) _tree_reduce( subset, out_dsk=dsk, name=out_name, block_index=icohort, axis=axis, combine=partial(combine, agg=agg, reindex=new_reindex, keepdims=True), aggregate=partial( aggregate, expected_groups=cohort_index, reindex=new_reindex, keepdims=True ), ) # This is done because pandas promotes to 64-bit types when an Index is created # So we use the index to generate the return value for consistency with "map-reduce" # This is important on windows groups_.append(cohort_index.values) graph = HighLevelGraph.from_collections(out_name, dsk, dependencies=[intermediate]) out_chunks = list(array.chunks) out_chunks[axis[-1]] = tuple(len(c) for c in chunks_cohorts.values()) for ax in axis[:-1]: out_chunks[ax] = (1,) reduced = dask.array.Array(graph, out_name, out_chunks, meta=array._meta) groups = (np.concatenate(groups_),) group_chunks = (tuple(len(cohort) for cohort in groups_),) elif method == "blockwise": reduced = intermediate if reindex.blockwise: if TYPE_CHECKING: assert expected_groups is not None # TODO: we could have `expected_groups` be a dask array with appropriate chunks # for now, we have a numpy array that is interpreted as listing all group labels # that are present in every chunk groups = (expected_groups,) group_chunks = ((len(expected_groups),),) else: # TODO: use chunks_cohorts here; hard because chunks_cohorts does not include all-NaN blocks # but the array after applying the blockwise op; does. We'd have to insert a subsetting op. # Here one input chunk → one output chunks # find number of groups in each chunk, this is needed for output chunks # along the reduced axis # TODO: this logic is very specialized for the resampling case slices = slices_from_chunks(tuple(array.chunks[ax] for ax in axis)) groups_in_block = tuple(_unique(by_input[slc]) for slc in slices) groups = (np.concatenate(groups_in_block),) ngroups_per_block = tuple(len(grp) for grp in groups_in_block) group_chunks = (ngroups_per_block,) else: raise ValueError(f"Unknown method={method}.") # Adjust output for any new dimensions added, example for multiple quantiles new_dims_shape = tuple(dim.size for dim in agg.new_dims if not dim.is_scalar) new_inds = tuple(range(-len(new_dims_shape), 0)) out_inds = new_inds + inds[: -len(axis)] + (inds[-1],) output_chunks = new_dims_shape + reduced.chunks[: -len(axis)] + group_chunks new_axes = dict(zip(new_inds, new_dims_shape)) if method == "blockwise" and len(axis) > 1: # The final results are available but the blocks along axes # need to be reshaped to axis=-1 # I don't know that this is possible with blockwise # All other code paths benefit from an unmaterialized Blockwise layer reduced = _collapse_blocks_along_axes(reduced, axis, group_chunks) # Can't use map_blocks because it forces concatenate=True along drop_axes, result = dask.array.blockwise( _extract_result, out_inds, reduced, inds, adjust_chunks=dict(zip(out_inds, output_chunks)), key=agg.name, name=f"{name}-{token}", concatenate=False, new_axes=new_axes, meta=reindex.get_dask_meta(array, dtype=agg.dtype["final"], fill_value=agg.fill_value[agg.name]), ) return (result, groups) def cubed_groupby_agg( array: CubedArray, by: T_By, agg: Aggregation, expected_groups: pd.Index | None, reindex: ReindexStrategy, axis: T_Axes = (), fill_value: Any = None, method: T_Method = "map-reduce", engine: T_Engine = "numpy", sort: bool = True, chunks_cohorts=None, ) -> tuple[CubedArray, tuple[pd.Index | np.ndarray | CubedArray]]: import cubed import cubed.core.groupby # I think _tree_reduce expects this assert isinstance(axis, Sequence) assert all(ax >= 0 for ax in axis) if method == "blockwise": assert by.ndim == 1 assert expected_groups is not None def _reduction_func(a, by, axis, start_group, num_groups): # adjust group labels to start from 0 for each chunk by_for_chunk = by - start_group expected_groups_for_chunk = pd.RangeIndex(num_groups) axis = (axis,) # convert integral axis to tuple blockwise_method = partial( _reduce_blockwise, agg=agg, axis=axis, expected_groups=expected_groups_for_chunk, fill_value=fill_value, engine=engine, sort=sort, reindex=reindex, ) out = blockwise_method(a, by_for_chunk) return out[agg.name] num_groups = len(expected_groups) result = cubed.core.groupby.groupby_blockwise( array, by, axis=axis, func=_reduction_func, num_groups=num_groups ) groups = (expected_groups,) return (result, groups) else: inds = tuple(range(array.ndim)) by_input = by # Unifying chunks is necessary for argreductions. # We need to rechunk before zipping up with the index # let's always do it anyway if not is_chunked_array(by): # chunk numpy arrays like the input array chunks = tuple(array.chunks[ax] if by.shape[ax] != 1 else (1,) for ax in range(-by.ndim, 0)) by = cubed.from_array(by, chunks=chunks, spec=array.spec) _, (array, by) = cubed.core.unify_chunks(array, inds, by, inds[-by.ndim :]) # Cubed's groupby_reduction handles the generation of "intermediates", and the # "map-reduce" combination step, so we don't have to do that here. # Only the equivalent of "_simple_combine" is supported, there is no # support for "_grouped_combine". labels_are_unknown = is_chunked_array(by_input) and expected_groups is None do_simple_combine = not _is_arg_reduction(agg) and not labels_are_unknown assert do_simple_combine assert method == "map-reduce" assert expected_groups is not None assert reindex.blockwise is True assert len(axis) == 1 # one axis/grouping def _groupby_func(a, by, axis, intermediate_dtype, num_groups): blockwise_method = partial( _get_chunk_reduction(agg.reduction_type), func=agg.chunk, fill_value=agg.fill_value["intermediate"], dtype=agg.dtype["intermediate"], reindex=reindex, user_dtype=agg.dtype["user"], axis=axis, expected_groups=expected_groups, engine=engine, sort=sort, ) out = blockwise_method(a, by) # Convert dict to one that cubed understands, dropping groups since they are # known, and the same for every block. return {f"f{idx}": intermediate for idx, intermediate in enumerate(out["intermediates"])} def _groupby_combine(a, axis, dummy_axis, dtype, keepdims): # this is similar to _simple_combine, except the dummy axis and concatenation is handled by cubed # only combine over the dummy axis, to preserve grouping along 'axis' dtype = dict(dtype) out = {} for idx, combine in enumerate(agg.simple_combine): field = f"f{idx}" out[field] = combine(a[field], axis=dummy_axis, keepdims=keepdims) return out def _groupby_aggregate(a, **kwargs): # Convert cubed dict to one that _finalize_results works with results = {"groups": expected_groups, "intermediates": a.values()} out = _finalize_results(results, agg, axis, expected_groups, reindex) return out[agg.name] # convert list of dtypes to a structured dtype for cubed intermediate_dtype = [(f"f{i}", dtype) for i, dtype in enumerate(agg.dtype["intermediate"])] dtype = agg.dtype["final"] num_groups = len(expected_groups) result = cubed.core.groupby.groupby_reduction( array, by, func=_groupby_func, combine_func=_groupby_combine, aggregate_func=_groupby_aggregate, axis=axis, intermediate_dtype=intermediate_dtype, dtype=dtype, num_groups=num_groups, ) groups = (expected_groups,) return (result, groups) def _collapse_blocks_along_axes(reduced: DaskArray, axis: T_Axes, group_chunks) -> DaskArray: import dask.array from dask.highlevelgraph import HighLevelGraph nblocks = tuple(reduced.numblocks[ax] for ax in axis) output_chunks = reduced.chunks[: -len(axis)] + ((1,) * (len(axis) - 1),) + group_chunks # extract results from the dict ochunks = tuple(range(len(chunks_v)) for chunks_v in output_chunks) layer2: dict[tuple, tuple] = {} name = f"reshape-{reduced.name}" for ochunk in itertools.product(*ochunks): inchunk = ochunk[: -len(axis)] + np.unravel_index(ochunk[-1], nblocks) layer2[(name, *ochunk)] = (reduced.name, *inchunk) layer2: Graph return dask.array.Array( HighLevelGraph.from_collections(name, layer2, dependencies=[reduced]), name, chunks=output_chunks, dtype=reduced.dtype, ) def _extract_result(result_dict: FinalResultsDict, key) -> np.ndarray: from dask.array.core import deepfirst # deepfirst should be not be needed here but sometimes we receive a list of dict? return deepfirst(result_dict)[key] def _validate_reindex( reindex: ReindexStrategy | bool | None, func, method: T_MethodOpt, expected_groups, any_by_dask: bool, is_dask_array: bool, array_dtype: Any, ) -> ReindexStrategy: # logger.debug("Entering _validate_reindex: reindex is {}".format(reindex)) # noqa def first_or_last(): return func in ["first", "last"] or (_is_first_last_reduction(func) and array_dtype.kind != "f") all_eager = not is_dask_array and not any_by_dask if reindex is True and not all_eager: if _is_arg_reduction(func): raise NotImplementedError if method == "cohorts" or (method == "blockwise" and not any_by_dask): raise ValueError("reindex=True is not a valid choice for method='blockwise' or method='cohorts'.") if first_or_last(): raise ValueError("reindex must be None or False when func is 'first' or 'last.") if isinstance(reindex, ReindexStrategy): reindex_ = reindex else: reindex_ = ReindexStrategy(blockwise=reindex) if reindex_.blockwise is None: if method is None: # logger.debug("Leaving _validate_reindex: method = None, returning None") return ReindexStrategy(blockwise=None) if all_eager: return ReindexStrategy(blockwise=True) if first_or_last(): # have to do the grouped_combine since there's no good fill_value # Also needed for nanfirst, nanlast with no-NaN dtypes return ReindexStrategy(blockwise=False) if method == "blockwise": # for grouping by dask arrays, we set reindex=True reindex_ = ReindexStrategy(blockwise=any_by_dask) elif _is_arg_reduction(func): reindex_ = ReindexStrategy(blockwise=False) elif method == "cohorts": reindex_ = ReindexStrategy(blockwise=False) elif method == "map-reduce": if expected_groups is None and any_by_dask: reindex_ = ReindexStrategy(blockwise=False) else: reindex_ = ReindexStrategy(blockwise=True) assert isinstance(reindex_, ReindexStrategy) # logger.debug("Leaving _validate_reindex: reindex is {}".format(reindex)) # noqa return reindex_ def _assert_by_is_aligned(shape: tuple[int, ...], by: T_Bys) -> None: assert all(b.ndim == by[0].ndim for b in by[1:]) for idx, b in enumerate(by): if not all(j in [i, 1] for i, j in zip(shape[-b.ndim :], b.shape)): raise ValueError( "`array` and `by` arrays must be 'aligned' " "so that such that by_ is broadcastable to array.shape[-by.ndim:] " "for every array `by_` in `by`. " "Either array.shape[-by_.ndim :] == by_.shape or the only differences " "should be size-1 dimensions in by_." f"Received array of shape {shape} but " f"array {idx} in `by` has shape {b.shape}." ) @overload def _convert_expected_groups_to_index( expected_groups: tuple[None, ...], isbin: Sequence[bool], sort: bool ) -> tuple[None, ...]: ... @overload def _convert_expected_groups_to_index( expected_groups: T_ExpectTuple, isbin: Sequence[bool], sort: bool ) -> T_ExpectIndexTuple: ... def _convert_expected_groups_to_index( expected_groups: T_ExpectOptTuple, isbin: Sequence[bool], sort: bool ) -> T_ExpectIndexOptTuple: out: list[T_ExpectIndexOpt] = [] for ex, isbin_ in zip(expected_groups, isbin): if isinstance(ex, pd.IntervalIndex) or (isinstance(ex, pd.Index) and not isbin_): if sort: out.append(ex.sort_values()) else: out.append(ex) elif ex is not None: if isbin_: out.append(pd.IntervalIndex.from_breaks(ex)) else: if sort: ex = np.sort(ex) out.append(pd.Index(ex)) else: assert ex is None out.append(None) return tuple(out) def _lazy_factorize_wrapper(*by: T_By, **kwargs) -> np.ndarray: group_idx, *_ = factorize_(by, **kwargs) return group_idx def _factorize_multiple( by: T_Bys, expected_groups: T_ExpectIndexOptTuple, any_by_dask: bool, sort: bool = True, ) -> tuple[tuple[np.ndarray], tuple[pd.Index, ...], tuple[int, ...]]: kwargs: FactorizeKwargs = dict( axes=(), # always (), we offset later if necessary. fastpath=True, # This is the only way it makes sense I think. # reindex controls what's actually allocated in chunk_reduce # At this point, we care about an accurate conversion to codes. reindex=True, sort=sort, ) if any_by_dask: import dask.array from . import dask_array_ops # noqa # unifying chunks will make sure all arrays in `by` are dask arrays # with compatible chunks, even if there was originally a numpy array inds = tuple(range(by[0].ndim)) for by_, expect in zip(by, expected_groups): if expect is None and is_duck_dask_array(by_): raise ValueError("Please provide expected_groups when grouping by a dask array.") found_groups = tuple( pd.Index(pd.unique(by_.reshape(-1))) if expect is None else expect for by_, expect in zip(by, expected_groups) ) grp_shape = tuple(map(len, found_groups)) chunks, by_chunked = dask.array.unify_chunks(*itertools.chain(*zip(by, (inds,) * len(by)))) group_idxs = [ dask.array.map_blocks( _lazy_factorize_wrapper, by_, expected_groups=(expect_,), meta=np.array((), dtype=np.int64), **kwargs, ) for by_, expect_ in zip(by_chunked, expected_groups) ] # This could be avoied but we'd use `np.where` # instead `_ravel_factorized` instead i.e. a copy. group_idx = dask.array.map_blocks( _ravel_factorized, *group_idxs, grp_shape=grp_shape, chunks=tuple(chunks.values()), dtype=np.int64 ) else: kwargs["by"] = by group_idx, found_groups, grp_shape, *_ = factorize_(**kwargs, expected_groups=expected_groups) return (group_idx,), found_groups, grp_shape @overload def _validate_expected_groups(nby: int, expected_groups: None) -> tuple[None, ...]: ... @overload def _validate_expected_groups(nby: int, expected_groups: T_ExpectedGroups) -> T_ExpectTuple: ... def _validate_expected_groups(nby: int, expected_groups: T_ExpectedGroupsOpt) -> T_ExpectOptTuple: if expected_groups is None: return (None,) * nby if nby == 1 and not isinstance(expected_groups, tuple): if isinstance(expected_groups, pd.Index | np.ndarray): return (expected_groups,) else: array = np.asarray(expected_groups) if np.issubdtype(array.dtype, np.integer): # preserve default dtypes # on pandas 1.5/2, on windows # when a list is passed array = array.astype(np.int64) return (array,) if nby > 1 and not isinstance(expected_groups, tuple): # TODO: test for list raise ValueError( "When grouping by multiple variables, expected_groups must be a tuple " "of either arrays or objects convertible to an array (like lists). " "For example `expected_groups=(np.array([1, 2, 3]), ['a', 'b', 'c'])`." f"Received a {type(expected_groups).__name__} instead. " "When grouping by a single variable, you can pass an array or something " "convertible to an array for convenience: `expected_groups=['a', 'b', 'c']`." ) if TYPE_CHECKING: assert isinstance(expected_groups, tuple) if len(expected_groups) != nby: raise ValueError( f"Must have same number of `expected_groups` (received {len(expected_groups)}) " f" and variables to group by (received {nby})." ) return expected_groups def _choose_method( method: T_MethodOpt, preferred_method: T_Method, agg: Aggregation, by, nax: int ) -> T_Method: if method is None: logger.debug("_choose_method: method is None") if agg.chunk == (None,): if preferred_method != "blockwise": raise ValueError( f"Aggregation {agg.name} is only supported for `method='blockwise'`, " "but the chunking is not right." ) logger.debug("_choose_method: choosing 'blockwise'") return "blockwise" if nax != by.ndim: logger.debug("_choose_method: choosing 'map-reduce'") return "map-reduce" if _is_arg_reduction(agg) and preferred_method == "blockwise": return "cohorts" logger.debug(f"_choose_method: choosing preferred_method={preferred_method}") # noqa return preferred_method else: return method def _choose_engine(by, agg: Aggregation): dtype = agg.dtype["user"] not_arg_reduce = not _is_arg_reduction(agg) if agg.name in ["quantile", "nanquantile", "median", "nanmedian"]: logger.debug(f"_choose_engine: Choosing 'flox' since {agg.name}") return "flox" # numbagg only supports nan-skipping reductions # without dtype specified has_blockwise_nan_skipping = (agg.chunk[0] is None and "nan" in agg.name) or any( (isinstance(func, str) and "nan" in func) for func in agg.chunk ) if HAS_NUMBAGG: if agg.name in ["all", "any"] or (not_arg_reduce and has_blockwise_nan_skipping and dtype is None): logger.debug("_choose_engine: Choosing 'numbagg'") return "numbagg" if not_arg_reduce and (not is_duck_dask_array(by) and _issorted(by)): logger.debug("_choose_engine: Choosing 'flox'") return "flox" else: logger.debug("_choose_engine: Choosing 'numpy'") return "numpy" def groupby_reduce( array: np.ndarray | DaskArray, *by: T_By, func: T_Agg, expected_groups: T_ExpectedGroupsOpt = None, sort: bool = True, isbin: T_IsBins = False, axis: T_AxesOpt = None, fill_value=None, dtype: np.typing.DTypeLike = None, min_count: int | None = None, method: T_MethodOpt = None, engine: T_EngineOpt = None, reindex: ReindexStrategy | bool | None = None, finalize_kwargs: dict[Any, Any] | None = None, ) -> tuple[DaskArray, Unpack[tuple[np.ndarray | DaskArray, ...]]]: """ GroupBy reductions using tree reductions for dask.array Parameters ---------- array : ndarray or DaskArray Array to be reduced, possibly nD *by : ndarray or DaskArray Array of labels to group over. Must be aligned with ``array`` so that ``array.shape[-by.ndim :] == by.shape`` or any disagreements in that equality check are for dimensions of size 1 in ``by``. func : {"all", "any", "count", "sum", "nansum", "mean", "nanmean", \ "max", "nanmax", "min", "nanmin", "argmax", "nanargmax", "argmin", "nanargmin", \ "quantile", "nanquantile", "median", "nanmedian", "mode", "nanmode", \ "first", "nanfirst", "last", "nanlast"} or Aggregation Single function name or an Aggregation instance expected_groups : (optional) Sequence Expected unique labels. isbin : bool, optional Are ``expected_groups`` bin edges? sort : bool, optional Whether groups should be returned in sorted order. Only applies for dask reductions when ``method`` is not ``"map-reduce"``. For ``"map-reduce"``, the groups are always sorted. axis : None or int or Sequence[int], optional If None, reduce across all dimensions of ``by``, else reduce across corresponding axes of array. Negative integers are normalized using ``array.ndim``. fill_value : Any Value to assign when a label in ``expected_groups`` is not present. dtype : data-type , optional DType for the output. Can be anything that is accepted by ``np.dtype``. min_count : int, default: None The required number of valid values to perform the operation. If fewer than ``min_count`` non-NA values are present the result will be NA. Only used if ``skipna`` is set to True or defaults to True for the array's dtype. method : {"map-reduce", "blockwise", "cohorts"}, optional Note that this arg is chosen by default using heuristics. Strategy for reduction of dask arrays only. * ``"map-reduce"``: First apply the reduction blockwise on ``array``, then combine a few newighbouring blocks, apply the reduction. Continue until finalizing. Usually, ``func`` will need to be an ``Aggregation`` instance for this method to work. Common aggregations are implemented. * ``"blockwise"``: Only reduce using blockwise and avoid aggregating blocks together. Useful for resampling-style reductions where group members are always together. If ``by`` is 1D, ``array`` is automatically rechunked so that chunk boundaries line up with group boundaries i.e. each block contains all members of any group present in that block. For nD ``by``, you must make sure that all members of a group are present in a single block. * ``"cohorts"``: Finds group labels that tend to occur together ("cohorts"), indexes out cohorts and reduces that subset using "map-reduce", repeat for all cohorts. This works well for many time groupings where the group labels repeat at regular intervals like 'hour', 'month', dayofyear' etc. Optimize chunking ``array`` for this method by first rechunking using ``rechunk_for_cohorts`` (for 1D ``by`` only). engine : {"flox", "numpy", "numba", "numbagg"}, optional Algorithm to compute the groupby reduction on non-dask arrays and on each dask chunk: * ``"numpy"``: Use the vectorized implementations in ``numpy_groupies.aggregate_numpy``. This is the default choice because it works for most array types. * ``"flox"``: Use an internal implementation where the data is sorted so that all members of a group occur sequentially, and then numpy.ufunc.reduceat is to used for the reduction. This will fall back to ``numpy_groupies.aggregate_numpy`` for a reduction that is not yet implemented. * ``"numba"``: Use the implementations in ``numpy_groupies.aggregate_numba``. * ``"numbagg"``: Use the reductions supported by ``numbagg.grouped``. This will fall back to ``numpy_groupies.aggregate_numpy`` for a reduction that is not yet implemented. reindex : ReindexStrategy | bool, optional Whether to "reindex" the blockwise reduced results to ``expected_groups`` (possibly automatically detected). If True, the intermediate result of the blockwise groupby-reduction has a value for all expected groups, and the final result is a simple reduction of those intermediates. In nearly all cases, this is a significant boost in computation speed. For cases like time grouping, this may result in large intermediates relative to the original block size. Avoid that by using ``method="cohorts"``. By default, it is turned off for argreductions. By default, the type of ``array`` is preserved. You may optionally reindex to a sparse array type to further control memory in the case of ``expected_groups`` being very large. Pass a ``ReindexStrategy`` instance with the appropriate ``array_type``, for example (``reindex=ReindexStrategy(blockwise=False, array_type=ReindexArrayType.SPARSE_COO)``). finalize_kwargs : dict, optional Kwargs passed to finalize the reduction such as ``ddof`` for var, std or ``q`` for quantile. Returns ------- result Aggregated result *groups Group labels See Also -------- xarray.xarray_reduce """ if engine == "flox" and _is_arg_reduction(func): raise NotImplementedError( "argreductions not supported for engine='flox' yet. Try engine='numpy' or engine='numba' instead." ) if engine == "numbagg" and dtype is not None: raise NotImplementedError( "numbagg does not support the `dtype` kwarg. Either cast your " "input arguments to `dtype` or use a different `engine`: " "'flox' or 'numpy' or 'numba'. " "See https://github.com/numbagg/numbagg/issues/121." ) if func in ["quantile", "nanquantile"]: if finalize_kwargs is None or "q" not in finalize_kwargs: raise ValueError("Please pass `q` for quantile calculations.") else: nq = len(_atleast_1d(finalize_kwargs["q"])) if nq > 1 and engine == "numpy": raise ValueError( "Multiple quantiles not supported with engine='numpy'." "Use engine='flox' instead (it is also much faster), " "or set engine=None to use the default." ) bys: T_Bys = tuple(np.asarray(b) if not is_duck_array(b) else b for b in by) nby = len(bys) by_is_dask = tuple(is_duck_dask_array(b) for b in bys) any_by_dask = any(by_is_dask) provided_expected = expected_groups is not None if engine == "numbagg" and _is_arg_reduction(func) and (any_by_dask or is_duck_dask_array(array)): # There is only one test that fails, but I can't figure # out why without deep debugging. # just disable for now. # test_groupby_reduce_axis_subset_against_numpy # for array is 3D dask, by is 3D dask, axis=2 # We are falling back to numpy for the arg reduction, # so presumably something is going wrong raise NotImplementedError( "argreductions not supported for engine='numbagg' yet." "Try engine='numpy' or engine='numba' instead." ) if method == "cohorts" and any_by_dask: raise ValueError(f"method={method!r} can only be used when grouping by numpy arrays.") if not is_duck_array(array): array = np.asarray(array) reindex = _validate_reindex( reindex, func, method, expected_groups, any_by_dask, is_duck_dask_array(array), array.dtype, ) is_bool_array = np.issubdtype(array.dtype, bool) and not _is_bool_supported_reduction(func) array = array.astype(np.int_) if is_bool_array else array isbins = _atleast_1d(isbin, nby) _assert_by_is_aligned(array.shape, bys) expected_groups = _validate_expected_groups(nby, expected_groups) for idx, (expect, is_dask) in enumerate(zip(expected_groups, by_is_dask)): if is_dask and (reindex.blockwise or nby > 1) and expect is None: raise ValueError( f"`expected_groups` for array {idx} in `by` cannot be None since it is a dask.array." ) # We convert to pd.Index since that lets us know if we are binning or not # (pd.IntervalIndex or not) expected_groups = _convert_expected_groups_to_index(expected_groups, isbins, sort) # Don't factorize early only when # grouping by dask arrays, and not having expected_groups factorize_early = not ( # can't do it if we are grouping by dask array but don't have expected_groups any(is_dask and ex_ is None for is_dask, ex_ in zip(by_is_dask, expected_groups)) ) expected_: pd.RangeIndex | None if factorize_early: bys, final_groups, grp_shape = _factorize_multiple( bys, expected_groups, any_by_dask=any_by_dask, sort=sort, ) expected_ = pd.RangeIndex(math.prod(grp_shape)) else: assert expected_groups == (None,) expected_ = None assert len(bys) == 1 (by_,) = bys if axis is None: axis_ = tuple(array.ndim + np.arange(-by_.ndim, 0)) else: axis_ = normalize_axis_tuple(axis, array.ndim) nax = len(axis_) has_dask = is_duck_dask_array(array) or is_duck_dask_array(by_) has_cubed = is_duck_cubed_array(array) or is_duck_cubed_array(by_) is_first_last = _is_first_last_reduction(func) if is_first_last: if has_dask and nax != 1: raise ValueError( "For dask arrays: first, last, nanfirst, nanlast reductions are " "only supported along a single axis. Please reshape appropriately." ) elif nax not in [1, by_.ndim]: raise ValueError( "first, last, nanfirst, nanlast reductions are only supported " "along a single axis or when reducing across all dimensions of `by`." ) is_npdatetime = array.dtype.kind in "Mm" is_cftime = _contains_cftime_datetimes(array) requires_numeric = ( (func not in ["count", "any", "all"] and not is_first_last) # Flox's count works with non-numeric and its faster than converting. or (func == "count" and engine != "flox") # TODO: needed for npg, move to aggregate_npg or (is_first_last and is_cftime) ) if requires_numeric: if is_npdatetime: datetime_dtype = array.dtype array = array.view(np.int64) elif is_cftime: offset = array.min() array = datetime_to_numeric(array, offset, datetime_unit="us") if nax == 1 and by_.ndim > 1 and expected_ is None: # When we reduce along all axes, we are guaranteed to see all # groups in the final combine stage, so everything works. # This is not necessarily true when reducing along a subset of axes # (of by) # TODO: Does this depend on chunking of by? # For e.g., we could relax this if there is only one chunk along all # by dim != axis? raise NotImplementedError("Please provide ``expected_groups`` when not reducing along all axes.") assert nax <= by_.ndim if nax < by_.ndim: by_ = _move_reduce_dims_to_end(by_, tuple(-array.ndim + ax + by_.ndim for ax in axis_)) array = _move_reduce_dims_to_end(array, axis_) axis_ = tuple(array.ndim + np.arange(-nax, 0)) nax = len(axis_) # When axis is a subset of possible values; then npg will # apply the fill_value to groups that don't exist along a particular axis (for e.g.) # since these count as a group that is absent. thoo! # fill_value applies to all-NaN groups as well as labels in expected_groups that are not found. # The only way to do this consistently is mask out using min_count # Consider np.sum([np.nan]) = np.nan, np.nansum([np.nan]) = 0 if min_count is None: if nax < by_.ndim or (fill_value is not None and provided_expected): min_count_: int = 1 else: min_count_ = 0 else: min_count_ = min_count # TODO: set in xarray? if min_count_ > 0 and func in ["nansum", "nanprod"] and fill_value is None: # nansum, nanprod have fill_value=0, 1 # overwrite than when min_count is set fill_value = np.nan kwargs = dict(axis=axis_, fill_value=fill_value) agg = _initialize_aggregation(func, dtype, array.dtype, fill_value, min_count_, finalize_kwargs) # Need to set this early using `agg` # It cannot be done in the core loop of chunk_reduce # since we "prepare" the data for flox. kwargs["engine"] = _choose_engine(by_, agg) if engine is None else engine groups: tuple[np.ndarray | DaskArray, ...] if has_cubed: if method is None: method = "map-reduce" if method not in ("map-reduce", "blockwise"): raise NotImplementedError( "Reduction for Cubed arrays is only implemented for methods 'map-reduce' and 'blockwise'." ) partial_agg = partial(cubed_groupby_agg, **kwargs) result, groups = partial_agg( array=array, by=by_, expected_groups=expected_, agg=agg, reindex=reindex, method=method, sort=sort, ) return (result, groups) elif not has_dask: reindex.set_blockwise_for_numpy() results = _reduce_blockwise( array, by_, agg, expected_groups=expected_, reindex=reindex, sort=sort, **kwargs, ) groups = (results["groups"],) result = results[agg.name] else: if TYPE_CHECKING: # TODO: How else to narrow that array.chunks is there? assert isinstance(array, DaskArray) if (not any_by_dask and method is None) or method == "cohorts": preferred_method, chunks_cohorts = find_group_cohorts( by_, [array.chunks[ax] for ax in range(-by_.ndim, 0)], expected_groups=expected_, # when provided with cohorts, we *always* 'merge' merge=(method == "cohorts"), ) else: preferred_method = "map-reduce" chunks_cohorts = {} method = _choose_method(method, preferred_method, agg, by_, nax) if agg.chunk[0] is None and method != "blockwise": raise NotImplementedError( f"Aggregation {agg.name!r} is only implemented for dask arrays when method='blockwise'." f"Received method={method!r}" ) if ( _is_arg_reduction(agg) and method == "blockwise" and not all(nchunks == 1 for nchunks in array.numblocks[-nax:]) ): raise NotImplementedError( "arg-reductions are not supported with method='blockwise', use 'cohorts' instead." ) if nax != by_.ndim and method in ["blockwise", "cohorts"]: raise NotImplementedError( "Must reduce along all dimensions of `by` when method != 'map-reduce'." f"Received method={method!r}" ) # TODO: clean this up reindex = _validate_reindex( reindex, func, method, expected_, any_by_dask, is_duck_dask_array(array), array.dtype, ) if reindex.array_type is ReindexArrayType.SPARSE_COO: if not HAS_SPARSE: raise ImportError("Package 'sparse' must be installed to reindex to a sparse.COO array.") if not _is_sparse_supported_reduction(func): raise NotImplementedError( f"Aggregation {func=!r} is not supported when reindexing to a sparse array. " "Please raise an issue" ) if TYPE_CHECKING: assert isinstance(reindex, ReindexStrategy) assert method is not None # TODO: just do this in dask_groupby_agg # we always need some fill_value (see above) so choose the default if needed if kwargs["fill_value"] is None: kwargs["fill_value"] = agg.fill_value[agg.name] partial_agg = partial(dask_groupby_agg, **kwargs) # if preferred method is already blockwise, no need to rechunk if preferred_method != "blockwise" and method == "blockwise" and by_.ndim == 1: array = rechunk_for_blockwise(array, axis=-1, labels=by_) result, groups = partial_agg( array=array, by=by_, expected_groups=expected_, agg=agg, reindex=reindex, method=method, chunks_cohorts=chunks_cohorts, sort=sort, ) if sort and method != "map-reduce": assert len(groups) == 1 sorted_idx = np.argsort(groups[0]) # This optimization helps specifically with resampling if not _issorted(sorted_idx): result = result[..., sorted_idx] groups = (groups[0][sorted_idx],) if factorize_early: assert len(groups) == 1 (groups_,) = groups # nan group labels are factorized to -1, and preserved # now we get rid of them by reindexing # First, for "blockwise", we can have -1 repeated in different blocks # This breaks the reindexing so remove those first. if method == "blockwise" and (mask := groups_ == -1).sum(axis=-1) > 1: result = result[..., ~mask] groups_ = groups_[..., ~mask] # This reindex also handles bins with no data result = reindex_( result, from_=groups_, to=expected_, fill_value=fill_value, array_type=ReindexArrayType.AUTO, # just reindex the received array ).reshape(result.shape[:-1] + grp_shape) groups = final_groups if is_bool_array and (_is_minmax_reduction(func) or _is_first_last_reduction(func)): result = result.astype(bool) # Output of count has an int dtype. if requires_numeric and func != "count": if is_npdatetime: result = result.astype(datetime_dtype) elif is_cftime: asdelta = _to_pytimedelta(result, unit="us") nanmask = np.isnan(result) asdelta[nanmask] = datetime.timedelta(microseconds=0) result = asdelta + offset result[nanmask] = np.timedelta64("NaT") groups = map( lambda g: g.to_numpy() if isinstance(g, pd.Index) and not isinstance(g, pd.RangeIndex) else g, groups ) return (result, *groups) def groupby_scan( array: np.ndarray | DaskArray, *by: T_By, func: T_Scan, expected_groups: T_ExpectedGroupsOpt = None, axis: int | tuple[int] = -1, dtype: np.typing.DTypeLike = None, method: T_MethodOpt = None, engine: T_EngineOpt = None, ) -> np.ndarray | DaskArray: """ GroupBy reductions using parallel scans for dask.array Parameters ---------- array : ndarray or DaskArray Array to be reduced, possibly nD *by : ndarray or DaskArray Array of labels to group over. Must be aligned with ``array`` so that ``array.shape[-by.ndim :] == by.shape`` or any disagreements in that equality check are for dimensions of size 1 in `by`. func : {"nancumsum", "ffill", "bfill"} or Scan Single function name or a Scan instance expected_groups : (optional) Sequence Expected unique labels. axis : None or int or Sequence[int], optional If None, reduce across all dimensions of by Else, reduce across corresponding axes of array Negative integers are normalized using array.ndim. fill_value : Any Value to assign when a label in ``expected_groups`` is not present. dtype : data-type , optional DType for the output. Can be anything that is accepted by ``np.dtype``. method : {"blockwise", "cohorts"}, optional Strategy for reduction of dask arrays only: * ``"blockwise"``: Only scan using blockwise and avoid aggregating blocks together. Useful for resampling-style groupby problems where group members are always together. If `by` is 1D, `array` is automatically rechunked so that chunk boundaries line up with group boundaries i.e. each block contains all members of any group present in that block. For nD `by`, you must make sure that all members of a group are present in a single block. * ``"cohorts"``: Finds group labels that tend to occur together ("cohorts"), indexes out cohorts and reduces that subset using "map-reduce", repeat for all cohorts. This works well for many time groupings where the group labels repeat at regular intervals like 'hour', 'month', dayofyear' etc. Optimize chunking ``array`` for this method by first rechunking using ``rechunk_for_cohorts`` (for 1D ``by`` only). engine : {"flox", "numpy", "numba", "numbagg"}, optional Algorithm to compute the groupby reduction on non-dask arrays and on each dask chunk: * ``"numpy"``: Use the vectorized implementations in ``numpy_groupies.aggregate_numpy``. This is the default choice because it works for most array types. * ``"flox"``: Use an internal implementation where the data is sorted so that all members of a group occur sequentially, and then numpy.ufunc.reduceat is to used for the reduction. This will fall back to ``numpy_groupies.aggregate_numpy`` for a reduction that is not yet implemented. * ``"numba"``: Use the implementations in ``numpy_groupies.aggregate_numba``. * ``"numbagg"``: Use the reductions supported by ``numbagg.grouped``. This will fall back to ``numpy_groupies.aggregate_numpy`` for a reduction that is not yet implemented. Returns ------- result Aggregated result See Also -------- xarray.xarray_reduce """ axis = _atleast_1d(axis) if len(axis) > 1: raise NotImplementedError("Scans are only supported along a single dimension.") bys: T_Bys = tuple(np.asarray(b) if not is_duck_array(b) else b for b in by) nby = len(by) by_is_dask = tuple(is_duck_dask_array(b) for b in bys) any_by_dask = any(by_is_dask) axis_ = normalize_axis_tuple(axis, array.ndim) if engine is not None: raise NotImplementedError("Setting `engine` is not supported for scans yet.") if method is not None: raise NotImplementedError("Setting `method` is not supported for scans yet.") if engine is None: engine = "flox" assert engine == "flox" if not is_duck_array(array): array = np.asarray(array) if isinstance(func, str): agg = AGGREGATIONS[func] assert isinstance(agg, Scan) agg = copy.deepcopy(agg) if (agg == AGGREGATIONS["ffill"] or agg == AGGREGATIONS["bfill"]) and array.dtype.kind != "f": # nothing to do, no NaNs! return array if expected_groups is not None: raise NotImplementedError("Setting `expected_groups` and binning is not supported yet.") expected_groups = _validate_expected_groups(nby, expected_groups) expected_groups = _convert_expected_groups_to_index(expected_groups, isbin=(False,) * nby, sort=False) # Don't factorize early only when # grouping by dask arrays, and not having expected_groups factorize_early = not ( # can't do it if we are grouping by dask array but don't have expected_groups any(is_dask and ex_ is None for is_dask, ex_ in zip(by_is_dask, expected_groups)) ) if factorize_early: bys, final_groups, grp_shape = _factorize_multiple( bys, expected_groups, any_by_dask=any_by_dask, sort=False, ) else: raise NotImplementedError assert len(bys) == 1 by_: np.ndarray (by_,) = bys has_dask = is_duck_dask_array(array) or is_duck_dask_array(by_) if array.dtype.kind in "Mm": cast_to = array.dtype array = array.view(np.int64) elif array.dtype.kind == "b": array = array.view(np.int8) cast_to = None if agg.preserves_dtype: cast_to = bool else: cast_to = None # TODO: move to aggregate_npg.py if agg.name in ["cumsum", "nancumsum"] and array.dtype.kind in ["i", "u"]: # https://numpy.org/doc/stable/reference/generated/numpy.cumsum.html # it defaults to the dtype of a, unless a # has an integer dtype with a precision less than that of the default platform integer. if array.dtype.kind == "i": agg.dtype = np.result_type(array.dtype, np.int_) elif array.dtype.kind == "u": agg.dtype = np.result_type(array.dtype, np.uint) else: agg.dtype = array.dtype if dtype is None else dtype agg.identity = xrdtypes._get_fill_value(agg.dtype, agg.identity) (single_axis,) = axis_ # type: ignore[misc] # avoid some roundoff error when we can. if by_.shape[-1] == 1 or by_.shape == grp_shape: array = array.astype(agg.dtype) if cast_to is not None: array = array.astype(cast_to) return array # Made a design choice here to have `preprocess` handle both array and group_idx # Example: for reversing, we need to reverse the whole array, not just reverse # each block independently inp = AlignedArrays(array=array, group_idx=by_) if agg.preprocess: inp = agg.preprocess(inp) if not has_dask: final_state = chunk_scan(inp, axis=single_axis, agg=agg, dtype=agg.dtype) result = _finalize_scan(final_state, dtype=agg.dtype) else: result = dask_groupby_scan(inp.array, inp.group_idx, axes=axis_, agg=agg) # Made a design choice here to have `postprocess` handle both array and group_idx out = AlignedArrays(array=result, group_idx=by_) if agg.finalize: out = agg.finalize(out) if cast_to is not None: return out.array.astype(cast_to) return out.array def chunk_scan(inp: AlignedArrays, *, axis: int, agg: Scan, dtype=None, keepdims=None) -> ScanState: assert axis == inp.array.ndim - 1 # I don't think we need to re-factorize here unless we are grouping by a dask array accumulated = generic_aggregate( inp.group_idx, inp.array, axis=axis, engine="flox", func=agg.scan, dtype=dtype, fill_value=agg.identity, ) result = AlignedArrays(array=accumulated, group_idx=inp.group_idx) return ScanState(result=result, state=None) def grouped_reduce(inp: AlignedArrays, *, agg: Scan, axis: int, keepdims=None) -> ScanState: assert axis == inp.array.ndim - 1 reduced = chunk_reduce( inp.array, inp.group_idx, func=(agg.reduction,), axis=axis, engine="flox", dtype=inp.array.dtype, fill_value=agg.identity, expected_groups=None, ) return ScanState( state=AlignedArrays(array=reduced["intermediates"][0], group_idx=reduced["groups"]), result=None, ) def _zip(group_idx: np.ndarray, array: np.ndarray) -> AlignedArrays: return AlignedArrays(group_idx=group_idx, array=array) def _finalize_scan(block: ScanState, dtype) -> np.ndarray: assert block.result is not None return block.result.array.astype(dtype, copy=False) def dask_groupby_scan(array, by, axes: T_Axes, agg: Scan) -> DaskArray: from dask.array import map_blocks from dask.array.reductions import cumreduction as scan from flox.aggregations import scan_binary_op if len(axes) > 1: raise NotImplementedError("Scans are only supported along a single axis.") (axis,) = axes array, by = _unify_chunks(array, by) # 1. zip together group indices & array zipped = map_blocks( _zip, by, array, dtype=array.dtype, meta=array._meta, name="groupby-scan-preprocess", ) scan_ = partial(chunk_scan, agg=agg) # dask tokenizing error workaround scan_.__name__ = scan_.func.__name__ # type: ignore[attr-defined] # 2. Run the scan accumulated = scan( func=scan_, binop=partial(scan_binary_op, agg=agg), ident=agg.identity, x=zipped, axis=axis, # TODO: support method="sequential" here. method="blelloch", preop=partial(grouped_reduce, agg=agg), dtype=agg.dtype, ) # 3. Unzip and extract the final result array, discard groups result = map_blocks(partial(_finalize_scan, dtype=agg.dtype), accumulated, dtype=agg.dtype) assert result.chunks == array.chunks return result flox-0.10.3/flox/dask_array_ops.py000066400000000000000000000073661477552625700171260ustar00rootroot00000000000000import builtins import math from functools import lru_cache, partial from itertools import product from numbers import Integral import dask import pandas as pd from dask import config from dask.base import normalize_token from dask.blockwise import lol_tuples from packaging.version import Version from toolz import partition_all from .lib import ArrayLayer from .types import Graph if Version(dask.__version__) <= Version("2025.03.1"): # workaround for https://github.com/dask/dask/issues/11862 @normalize_token.register(pd.RangeIndex) def normalize_range_index(x): return normalize_token(type(x)), x.start, x.stop, x.step, x.dtype, x.name # _tree_reduce and partial_reduce are copied from dask.array.reductions # They have been modified to work purely with graphs, and without creating new Array layers # in the graph. The `block_index` kwarg is new and avoids a concatenation by simply setting the right # key initially def _tree_reduce( x: ArrayLayer, *, name: str, out_dsk: Graph, aggregate, axis: tuple[int, ...], block_index: int, split_every=None, combine=None, ): # Normalize split_every split_every = split_every or config.get("split_every", 4) if isinstance(split_every, dict): split_every = {k: split_every.get(k, 2) for k in axis} elif isinstance(split_every, Integral): n = builtins.max(int(split_every ** (1 / (len(axis) or 1))), 2) split_every = dict.fromkeys(axis, n) else: raise ValueError("split_every must be a int or a dict") numblocks = tuple(len(c) for c in x.chunks) out_chunks = x.chunks # Reduce across intermediates depth = 1 for i, n in enumerate(numblocks): if i in split_every and split_every[i] != 1: depth = int(builtins.max(depth, math.ceil(math.log(n, split_every[i])))) func = partial(combine or aggregate, axis=axis) agg_dep_name = x.name for level in range(depth - 1): newname = name + f"-{block_index}-partial-{level}" out_dsk, out_chunks = partial_reduce( func, out_dsk, chunks=out_chunks, split_every=split_every, name=newname, dep_name=agg_dep_name, axis=axis, ) agg_dep_name = newname func = partial(aggregate, axis=axis) return partial_reduce( func, out_dsk, chunks=out_chunks, split_every=split_every, name=name, dep_name=agg_dep_name, axis=axis, block_index=block_index, ) def partial_reduce( func, dsk, *, chunks: tuple[tuple[int, ...], ...], name: str, dep_name: str, split_every: dict[int, int], axis: tuple[int, ...], block_index: int | None = None, ): ndim = len(chunks) keys, parts, out_chunks = get_parts(tuple(split_every.items()), chunks) for k, p in zip(keys, product(*parts)): free = {i: j[0] for (i, j) in enumerate(p) if len(j) == 1 and i not in split_every} dummy = dict(i for i in enumerate(p) if i[0] in split_every) g = lol_tuples((dep_name,), range(ndim), free, dummy) assert dep_name != name if block_index is not None: k = (*k[:-1], block_index) dsk[(name,) + k] = (func, g) return dsk, out_chunks @lru_cache def get_parts(split_every_items, chunks): numblocks = tuple(len(c) for c in chunks) split_every = dict(split_every_items) parts = [list(partition_all(split_every.get(i, 1), range(n))) for (i, n) in enumerate(numblocks)] keys = tuple(product(*map(range, map(len, parts)))) out_chunks = tuple( tuple(1 for p in partition_all(split_every[i], c)) if i in split_every else c for (i, c) in enumerate(chunks) ) return keys, parts, out_chunks flox-0.10.3/flox/lib.py000066400000000000000000000007371477552625700146660ustar00rootroot00000000000000from dataclasses import dataclass from .types import DaskArray, Graph @dataclass class ArrayLayer: name: str layer: Graph chunks: tuple[tuple[int, ...], ...] def to_array(self, dep: DaskArray) -> DaskArray: from dask.array import Array from dask.highlevelgraph import HighLevelGraph graph = HighLevelGraph.from_collections(self.name, self.layer, dependencies=[dep]) return Array(graph, self.name, self.chunks, meta=dep._meta) flox-0.10.3/flox/py.typed000066400000000000000000000000001477552625700152240ustar00rootroot00000000000000flox-0.10.3/flox/types.py000066400000000000000000000004511477552625700152550ustar00rootroot00000000000000from typing import Any, TypeAlias try: import cubed.Array as CubedArray except ImportError: CubedArray = Any try: import dask.array.Array as DaskArray from dask.typing import Graph except ImportError: DaskArray = Any Graph: TypeAlias = Any # type: ignore[no-redef,misc] flox-0.10.3/flox/visualize.py000066400000000000000000000120501477552625700161220ustar00rootroot00000000000000import random from itertools import product import matplotlib as mpl import matplotlib.pyplot as plt import numpy as np import pandas as pd from .core import _unique, find_group_cohorts def draw_mesh( nrow, ncol, *, draw_line_at=None, nspaces=0, space_at=0, pxin=0.3, counter=None, colors=None, randomize=True, x0=0, y0=0, append=False, ): dx = 2 xpts = x0 + np.arange(0, (ncol + nspaces) * dx, dx) ypts = y0 + np.arange(0, nrow * dx, dx) if colors is None: colors = mpl.cm.Set2.colors[:4] if not append: plt.figure() ax = plt.axes() else: ax = plt.gca() ax.set_aspect(1) ax.set_axis_off() # ncolors = len(colors) if not randomize: colors = iter(colors) icolor = -1 for n, (y, x) in enumerate(product(ypts, xpts)): if space_at > 0 and (n % space_at) == 0: continue if randomize: fcolor = random.choice(colors) else: fcolor = next(colors) icolor += 1 if counter is not None: counter[fcolor] += 1 ax.add_patch( mpl.patches.Rectangle( (x, y), dx, dx, edgecolor="w", linewidth=1, facecolor=fcolor, ) ) if draw_line_at is not None and icolor > 0 and icolor % draw_line_at == 0: plt.plot([x, x], [y - 0.75 * dx, y + 0.75 * dx], color="k", lw=2) # assert n + 1 == ncolors, (n, ncolors) ax.set_xlim((0, max(xpts) + 2 * dx)) ax.set_ylim((-0.75 * dx + min(ypts), max(ypts) + 0.75 * dx)) if not append: plt.gcf().set_size_inches((ncol * pxin, (nrow + 2) * pxin)) def visualize_groups_1d(array, labels, axis=-1, colors=None, cmap=None, append=True, x0=0): """ Visualize group distribution for a 1D array of group labels. """ labels = np.asarray(labels) assert labels.ndim == 1 factorized, unique_labels = pd.factorize(labels) assert np.array(labels).ndim == 1 chunks = array.chunks[axis] if colors is None: if cmap is None: colors = list(mpl.cm.tab20.colors) elif cmap is not None: colors = [cmap((num - 1) / len(unique_labels)) for num in unique_labels] if len(unique_labels) > len(colors): raise ValueError("Not enough unique colors") if not append: fig = plt.figure() i0 = 0 for i in chunks: lab = labels[i0 : i0 + i] col = [colors[label] for label in lab] + [(1, 1, 1)] draw_mesh( 1, len(lab) + 1, colors=col, randomize=False, append=append, x0=x0 + i0 * 2.3, # + (i0 - 1) * 0.025, ) i0 += i if not append: pxin = 0.8 fig.set_size_inches((len(labels) * pxin, 1 * pxin)) def get_colormap(N): cmap = mpl.cm.get_cmap("tab20_r").copy() ncolors = len(cmap.colors) q = N // ncolors r = N % ncolors cmap = mpl.colors.ListedColormap(np.concatenate([cmap.colors] * q + [cmap.colors[: r + 1]])) cmap.set_under(color="k") return cmap def factorize_cohorts(chunks, cohorts): chunk_grid = tuple(len(c) for c in chunks) nchunks = np.prod(chunk_grid) factorized = np.full((nchunks,), -1, dtype=np.int64) for idx, cohort in enumerate(cohorts): factorized[list(cohort)] = idx return factorized.reshape(chunk_grid) def visualize_cohorts_2d(by, chunks): assert by.ndim == 2 print("finding cohorts...") chunks = [chunks[ax] for ax in range(-by.ndim, 0)] _, chunks_cohorts = find_group_cohorts(by, chunks) print("finished cohorts...") xticks = np.cumsum(chunks[-1]) yticks = np.cumsum(chunks[-2]) f, ax = plt.subplots(1, 2, constrained_layout=True, sharex=False, sharey=False) ax = ax.ravel() # ax[1].set_visible(False) # ax = ax[[0, 2, 3]] ngroups = len(_unique(by)) h0 = ax[0].imshow(by, vmin=0, cmap=get_colormap(ngroups)) h2 = _visualize_cohorts(chunks, chunks_cohorts, ax=ax[1]) ax[0].grid(True, which="both") for axx in ax[:1]: axx.set_xticks(xticks) axx.set_yticks(yticks) for h, axx in zip([h0, h2], ax): f.colorbar(h, ax=axx, orientation="horizontal") ax[0].set_title(f"by: {ngroups} groups") ax[1].set_title(f"{len(chunks_cohorts)} cohorts") f.set_size_inches((9, 6)) def _visualize_cohorts(chunks, cohorts, ax=None): if ax is None: _, ax = plt.subplots(1, 1) data = factorize_cohorts(chunks, cohorts) return ax.imshow(data, vmin=0, cmap=get_colormap(len(cohorts))) def visualize_groups_2d(labels, y0=0, **kwargs): colors = mpl.cm.tab10_r for _i, chunk in enumerate(labels): chunk = np.atleast_2d(chunk) draw_mesh( *chunk.shape, colors=tuple(colors(label) for label in np.flipud(chunk).ravel()), randomize=False, append=True, y0=y0, **kwargs, ) y0 = y0 + 2 * chunk.shape[0] + 2 plt.ylim([-1, y0]) flox-0.10.3/flox/xarray.py000066400000000000000000000602371477552625700154270ustar00rootroot00000000000000from __future__ import annotations from collections.abc import Hashable, Iterable, Sequence from typing import TYPE_CHECKING, Any import numpy as np import pandas as pd import xarray as xr from packaging.version import Version from .aggregations import Aggregation, Dim, _atleast_1d, quantile_new_dims_func from .core import ( ReindexStrategy, _convert_expected_groups_to_index, _get_expected_groups, _validate_expected_groups, groupby_reduce, ) from .core import rechunk_for_blockwise as rechunk_array_for_blockwise from .core import rechunk_for_cohorts as rechunk_array_for_cohorts if TYPE_CHECKING: from xarray.core.types import T_DataArray, T_Dataset from .core import T_ExpectedGroupsOpt, T_ExpectIndex, T_ExpectOpt Dims = str | Iterable[Hashable] | None def _restore_dim_order(result, obj, by, no_groupby_reorder=False): def lookup_order(dimension): if dimension == by.name and by.ndim == 1: (dimension,) = by.dims if no_groupby_reorder: return -1e6 # some arbitrarily low value if dimension in obj.dims: axis = obj.get_axis_num(dimension) else: axis = 1e6 # some arbitrarily high value return axis new_order = sorted(result.dims, key=lookup_order) return result.transpose(*new_order) def _broadcast_size_one_dims(*arrays, core_dims): """Broadcast by adding size-1 dimensions in the right place. Workaround because apply_ufunc doesn't support this yet. https://github.com/pydata/xarray/issues/3032#issuecomment-503337637 Specialized to the groupby problem. """ array_dims = set(core_dims[0]) broadcasted = [arrays[0]] for dims, array in zip(core_dims[1:], arrays[1:]): assert set(dims).issubset(array_dims) order = [dims.index(d) for d in core_dims[0] if d in dims] array = array.transpose(*order) axis = [core_dims[0].index(d) for d in core_dims[0] if d not in dims] broadcasted.append(np.expand_dims(array, axis)) return broadcasted def xarray_reduce( obj: T_Dataset | T_DataArray, *by: T_DataArray | Hashable, func: str | Aggregation, expected_groups: T_ExpectedGroupsOpt = None, isbin: bool | Sequence[bool] = False, sort: bool = True, dim: Dims | ellipsis = None, fill_value=None, dtype: np.typing.DTypeLike = None, method: str | None = None, engine: str | None = None, keep_attrs: bool | None = True, skipna: bool | None = None, min_count: int | None = None, reindex: ReindexStrategy | bool | None = None, **finalize_kwargs, ): """GroupBy reduce operations on xarray objects using numpy-groupies. Parameters ---------- obj : DataArray or Dataset Xarray object to reduce *by : DataArray or iterable of str or iterable of DataArray Variables with which to group by ``obj`` func : {"all", "any", "count", "sum", "nansum", "mean", "nanmean", \ "max", "nanmax", "min", "nanmin", "argmax", "nanargmax", "argmin", "nanargmin", \ "quantile", "nanquantile", "median", "nanmedian", "mode", "nanmode", \ "first", "nanfirst", "last", "nanlast"} or Aggregation Single function name or an Aggregation instance expected_groups : str or sequence expected group labels corresponding to each `by` variable isbin : iterable of bool If True, corresponding entry in ``expected_groups`` are bin edges. If False, the entry in ``expected_groups`` is treated as a simple label. sort : (optional), bool Whether groups should be returned in sorted order. Only applies for dask reductions when ``method`` is not ``"map-reduce"``. For ``"map-reduce"``, the groups are always sorted. dim : hashable dimension name along which to reduce. If None, reduces across all dimensions of `by` fill_value : Any Value used for missing groups in the output i.e. when one of the labels in ``expected_groups`` is not actually present in ``by``. dtype : data-type, optional DType for the output. Can be anything that is accepted by ``np.dtype``. method : {"map-reduce", "blockwise", "cohorts"}, optional Note that this arg is chosen by default using heuristics. Strategy for reduction of dask arrays only. * ``"map-reduce"``: First apply the reduction blockwise on ``array``, then combine a few newighbouring blocks, apply the reduction. Continue until finalizing. Usually, ``func`` will need to be an ``Aggregation`` instance for this method to work. Common aggregations are implemented. * ``"blockwise"``: Only reduce using blockwise and avoid aggregating blocks together. Useful for resampling-style reductions where group members are always together. If ``by`` is 1D, ``array`` is automatically rechunked so that chunk boundaries line up with group boundaries i.e. each block contains all members of any group present in that block. For nD ``by``, you must make sure that all members of a group are present in a single block. * ``"cohorts"``: Finds group labels that tend to occur together ("cohorts"), indexes out cohorts and reduces that subset using "map-reduce", repeat for all cohorts. This works well for many time groupings where the group labels repeat at regular intervals like 'hour', 'month', dayofyear' etc. Optimize chunking ``array`` for this method by first rechunking using ``rechunk_for_cohorts`` (for 1D ``by`` only). engine : {"flox", "numpy", "numba", "numbagg"}, optional Algorithm to compute the groupby reduction on non-dask arrays and on each dask chunk: * ``"numpy"``: Use the vectorized implementations in ``numpy_groupies.aggregate_numpy``. This is the default choice because it works for most array types. * ``"flox"``: Use an internal implementation where the data is sorted so that all members of a group occur sequentially, and then numpy.ufunc.reduceat is to used for the reduction. This will fall back to ``numpy_groupies.aggregate_numpy`` for a reduction that is not yet implemented. * ``"numba"``: Use the implementations in ``numpy_groupies.aggregate_numba``. * ``"numbagg"``: Use the reductions supported by ``numbagg.grouped``. This will fall back to ``numpy_groupies.aggregate_numpy`` for a reduction that is not yet implemented. keep_attrs : bool, optional Preserve attrs? skipna : bool, optional If True, skip missing values (as marked by NaN). By default, only skips missing values for float dtypes; other dtypes either do not have a sentinel missing value (int) or ``skipna=True`` has not been implemented (object, datetime64 or timedelta64). min_count : int, default: None The required number of valid values to perform the operation. If fewer than min_count non-NA values are present the result will be NA. Only used if skipna is set to True or defaults to True for the array's dtype. reindex : ReindexStrategy | bool, optional Whether to "reindex" the blockwise reduced results to ``expected_groups`` (possibly automatically detected). If True, the intermediate result of the blockwise groupby-reduction has a value for all expected groups, and the final result is a simple reduction of those intermediates. In nearly all cases, this is a significant boost in computation speed. For cases like time grouping, this may result in large intermediates relative to the original block size. Avoid that by using ``method="cohorts"``. By default, it is turned off for argreductions. By default, the type of ``array`` is preserved. You may optionally reindex to a sparse array type to further control memory in the case of ``expected_groups`` being very large. Pass a ``ReindexStrategy`` instance with the appropriate ``array_type``, for example (``reindex=ReindexStrategy(blockwise=False, array_type=ReindexArrayType.SPARSE_COO)``). **finalize_kwargs: dict, optional kwargs passed to the finalize function, like ``ddof`` for var, std or ``q`` for quantile. Returns ------- DataArray or Dataset Reduced object See Also -------- flox.core.groupby_reduce Raises ------ NotImplementedError ValueError Examples -------- >>> import xarray as xr >>> from flox.xarray import xarray_reduce >>> # Create a group index: >>> labels = xr.DataArray( ... [1, 2, 3, 1, 2, 3, 0, 0, 0], ... dims="x", ... name="label", ... ) >>> # Create a DataArray to apply the group index on: >>> da = da = xr.ones_like(labels) >>> # Sum all values in da that matches the elements in the group index: >>> xarray_reduce(da, labels, func="sum") Size: 32B array([3, 2, 2, 2]) Coordinates: * label (label) int64 32B 0 1 2 3 """ if skipna is not None and isinstance(func, Aggregation): raise ValueError("skipna must be None when func is an Aggregation.") nby = len(by) for b in by: if isinstance(b, xr.DataArray) and b.name is None: raise ValueError("Cannot group by unnamed DataArrays.") # TODO: move to GroupBy._flox_reduce if keep_attrs is None: keep_attrs = True if isinstance(isbin, Sequence): isbins = isbin else: isbins = (isbin,) * nby expected_groups_valid = _validate_expected_groups(nby, expected_groups) if not sort: raise NotImplementedError("sort must be True for xarray_reduce") # eventually drop the variables we are grouping by maybe_drop = {b for b in by if isinstance(b, Hashable)} unindexed_dims = tuple( b for b, isbin_ in zip(by, isbins) if isinstance(b, Hashable) and not isbin_ and b in obj.dims and b not in obj.indexes ) by_da = tuple(obj[g] if isinstance(g, Hashable) else g for g in by) grouper_dims = [] for g in by_da: for d in g.dims: if d not in grouper_dims: grouper_dims.append(d) if isinstance(obj, xr.Dataset): ds = obj else: ds = obj._to_temp_dataset() try: from xarray.indexes import PandasMultiIndex except ImportError: PandasMultiIndex = tuple() # type: ignore[assignment, misc] more_drop = set() for var in maybe_drop: maybe_midx = ds._indexes.get(var, None) if isinstance(maybe_midx, PandasMultiIndex): idx_coord_names = set(tuple(maybe_midx.index.names) + (maybe_midx.dim,)) idx_other_names = idx_coord_names - set(maybe_drop) more_drop.update(idx_other_names) maybe_drop.update(more_drop) if dim is Ellipsis: if nby > 1: raise NotImplementedError("Multiple by are not allowed when dim is Ellipsis.") name_ = by_da[0].name if name_ in ds.dims and not isbins[0]: dim_tuple = tuple(d for d in obj.dims if d != name_) else: dim_tuple = tuple(obj.dims) elif dim is not None: dim_tuple = _atleast_1d(dim) else: dim_tuple = tuple(grouper_dims) # broadcast to make sure grouper dimensions are present in the array. exclude_dims = tuple(d for d in ds.dims if d not in grouper_dims and d not in dim_tuple) if any(d not in grouper_dims and d not in obj.dims for d in dim_tuple): raise ValueError(f"Cannot reduce over absent dimensions {dim}.") try: xr.align(ds, *by_da, join="exact", copy=False) except ValueError as e: raise ValueError("Object being grouped must be exactly aligned with every array in `by`.") from e needs_broadcast = any( not set(grouper_dims).issubset(set(variable.dims)) for variable in ds.data_vars.values() ) if needs_broadcast: ds_broad = xr.broadcast(ds, *by_da, exclude=exclude_dims)[0] else: ds_broad = ds dims_not_in_groupers = tuple(d for d in dim_tuple if d not in grouper_dims) if dims_not_in_groupers == tuple(dim_tuple) and not any(isbins): # reducing along a dimension along which groups do not vary # This is really just a normal reduction. # This is not right when binning so we exclude. if isinstance(func, str) and func.startswith("nan"): raise ValueError(f"Specify func={func[3:]}, skipna=True instead of func={func}") elif isinstance(func, Aggregation): raise NotImplementedError( "func must be a string when reducing along a dimension not present in `by`" ) # skipna is not supported for all reductions # https://github.com/pydata/xarray/issues/8819 kwargs = {"skipna": skipna} if skipna is not None else {} kwargs.update(finalize_kwargs) result = getattr(ds_broad, func)(dim=dim_tuple, **kwargs) if isinstance(obj, xr.DataArray): return obj._from_temp_dataset(result) else: return result ds = ds.drop_vars([var for var in maybe_drop if var in ds.variables]) axis = tuple(range(-len(dim_tuple), 0)) # Set expected_groups and convert to index since we need coords, sizes # for output xarray objects expected_groups_valid_list: list[T_ExpectIndex] = [] group_names: tuple[Any, ...] = () group_sizes: dict[Any, int] = {} for idx, (b_, expect, isbin_) in enumerate(zip(by_da, expected_groups_valid, isbins)): group_name = f"{b_.name}_bins" if isbin_ or isinstance(expect, pd.IntervalIndex) else b_.name group_names += (group_name,) if isbin_ and isinstance(expect, int): raise NotImplementedError("flox does not support binning into an integer number of bins yet.") expect1: T_ExpectOpt if expect is None: if isbin_: raise ValueError( f"Please provided bin edges for group variable {idx} " f"named {group_name} in expected_groups." ) expect1 = _get_expected_groups(b_.data, sort=sort) else: expect1 = expect expect_index = _convert_expected_groups_to_index((expect1,), (isbin_,), sort=sort)[0] # The if-check is for type hinting mainly, it narrows down the return # type of _convert_expected_groups_to_index to pure pd.Index: if expect_index is not None: expected_groups_valid_list.append(expect_index) group_sizes[group_name] = len(expect_index) else: # This will never be reached raise ValueError("expect_index cannot be None") def wrapper(array, *by, func, skipna, core_dims, **kwargs): array, *by = _broadcast_size_one_dims(array, *by, core_dims=core_dims) # Handle skipna here because I need to know dtype to make a good default choice. # We cannot handle this easily for xarray Datasets in xarray_reduce if skipna and func in ["all", "any", "count"]: raise ValueError(f"skipna cannot be truthy for {func} reductions.") if skipna or (skipna is None and isinstance(func, str) and array.dtype.kind in "cfO"): if "nan" not in func and func not in ["all", "any", "count"]: func = f"nan{func}" result, *groups = groupby_reduce(array, *by, func=func, **kwargs) # Transpose the new quantile dimension to the end. This is ugly. # but new core dimensions are expected at the end :/ # but groupby_reduce inserts them at the beginning if func in ["quantile", "nanquantile"]: (newdim,) = quantile_new_dims_func(**finalize_kwargs) if not newdim.is_scalar: # NOTE: _restore_dim_order will move any new dims to the end anyway. # This transpose is simply makes it easy to specify output_core_dims # output dim order: (*broadcast_dims, *group_dims, quantile_dim) result = np.moveaxis(result, 0, -1) return result # These data variables do not have any of the core dimension, # take them out to prevent errors. # apply_ufunc can handle non-dim coordinate variables without core dimensions missing_dim = {} if isinstance(obj, xr.Dataset): # broadcasting means the group dim gets added to ds, so we check the original obj for k, v in obj.data_vars.items(): is_missing_dim = not (any(d in v.dims for d in dim_tuple)) if is_missing_dim: missing_dim[k] = v # dim_tuple contains dimensions we are reducing over. These need to be the last # core dimensions to be synchronized with axis. input_core_dims = [[d for d in grouper_dims if d not in dim_tuple] + list(dim_tuple)] input_core_dims += [list(b.dims) for b in by_da] newdims: tuple[Dim, ...] = ( quantile_new_dims_func(**finalize_kwargs) if func in ["quantile", "nanquantile"] else () ) output_core_dims = [d for d in input_core_dims[0] if d not in dim_tuple] output_core_dims.extend(group_names) vector_dims = [dim.name for dim in newdims if not dim.is_scalar] output_core_dims.extend(vector_dims) output_sizes = group_sizes output_sizes.update({dim.name: dim.size for dim in newdims if dim.size != 0}) actual = xr.apply_ufunc( wrapper, ds_broad.drop_vars(tuple(missing_dim)).transpose(..., *grouper_dims), *by_da, input_core_dims=input_core_dims, # for xarray's test_groupby_duplicate_coordinate_labels exclude_dims=set(dim_tuple), output_core_dims=[output_core_dims], dask="allowed", dask_gufunc_kwargs=dict( output_sizes=output_sizes, output_dtypes=[dtype] if dtype is not None else None, ), keep_attrs=keep_attrs, kwargs={ "func": func, "axis": axis, "sort": sort, "fill_value": fill_value, "method": method, "min_count": min_count, "skipna": skipna, "engine": engine, "reindex": reindex, "expected_groups": tuple(expected_groups_valid_list), "isbin": isbins, "finalize_kwargs": finalize_kwargs, "dtype": dtype, "core_dims": input_core_dims, }, ) # restore non-dim coord variables without the core dimension # TODO: shouldn't apply_ufunc handle this? for var in set(ds_broad._coord_names) - set(ds_broad._indexes) - set(ds_broad.dims): if all(d not in ds_broad[var].dims for d in dim_tuple): actual[var] = ds_broad[var] for newdim in newdims: actual.coords[newdim.name] = newdim.values if newdim.is_scalar else np.array(newdim.values) expect3: T_ExpectIndex | np.ndarray for name, expect2, by_ in zip(group_names, expected_groups_valid_list, by_da): # Can't remove this until xarray handles IntervalIndex: if isinstance(expect2, pd.IntervalIndex): # TODO: Only place where expect3 is an ndarray, remove the type if xarray # starts supporting IntervalIndex. expect3 = expect2.to_numpy() else: expect3 = expect2 if isinstance(actual, xr.Dataset) and name in actual: actual = actual.drop_vars(name) # When grouping by MultiIndex, expect is an pd.Index wrapping # an object array of tuples if ( name in ds_broad.indexes and isinstance(ds_broad.indexes[name], pd.MultiIndex) and not isinstance(expect3, pd.RangeIndex) ): levelnames = ds_broad.indexes[name].names if isinstance(expect3, np.ndarray): # TODO: workaround for IntervalIndex issue. raise NotImplementedError expect3 = pd.MultiIndex.from_tuples(expect3.values.tolist(), names=levelnames) actual[name] = expect3 if Version(xr.__version__) > Version("2022.03.0"): actual = actual.set_coords(levelnames) else: actual[name] = expect3 if keep_attrs: actual[name].attrs = by_.attrs if unindexed_dims: actual = actual.drop_vars(unindexed_dims) if nby == 1: for var in actual: if isinstance(obj, xr.Dataset): template = obj[var] else: template = obj if actual[var].ndim > 1 + len(vector_dims): no_groupby_reorder = isinstance(obj, xr.Dataset) # do not re-order dataarrays inside datasets actual[var] = _restore_dim_order( actual[var].variable, template, by_da[0], no_groupby_reorder=no_groupby_reorder, ) if missing_dim: for k, v in missing_dim.items(): missing_group_dims = {d: size for d, size in group_sizes.items() if d not in v.dims} # The expand_dims is for backward compat with xarray's questionable behaviour if missing_group_dims: actual[k] = v.expand_dims(missing_group_dims).variable else: actual[k] = v.variable if isinstance(obj, xr.DataArray): return obj._from_temp_dataset(actual) else: return actual def rechunk_for_cohorts( obj: T_DataArray | T_Dataset, dim: str, labels: T_DataArray, force_new_chunk_at, chunksize: int | None = None, ignore_old_chunks: bool = False, debug: bool = False, ): """ Rechunks array so that each new chunk contains groups that always occur together. Parameters ---------- obj : DataArray or Dataset array to rechunk dim : str Dimension to rechunk labels : DataArray 1D Group labels to align chunks with. This routine works well when ``labels`` has repeating patterns: e.g. ``1, 2, 3, 1, 2, 3, 4, 1, 2, 3`` though there is no requirement that the pattern must contain sequences. force_new_chunk_at : Sequence Labels at which we always start a new chunk. For the example ``labels`` array, this would be `1`. chunksize : int, optional nominal chunk size. Chunk size is exceeded when the label in ``force_new_chunk_at`` is less than ``chunksize//2`` elements away. If None, uses median chunksize along ``dim``. Returns ------- DataArray or Dataset Xarray object with rechunked arrays. """ return _rechunk( rechunk_array_for_cohorts, obj, dim, labels, force_new_chunk_at=force_new_chunk_at, chunksize=chunksize, ignore_old_chunks=ignore_old_chunks, debug=debug, ) def rechunk_for_blockwise(obj: T_DataArray | T_Dataset, dim: str, labels: T_DataArray): """ Rechunks array so that group boundaries line up with chunk boundaries, allowing embarrassingly parallel group reductions. This only works when the groups are sequential (e.g. labels = ``[0,0,0,1,1,1,1,2,2]``). Such patterns occur when using ``.resample``. Parameters ---------- obj : DataArray or Dataset Array to rechunk dim : hashable Name of dimension to rechunk labels : DataArray Group labels Returns ------- DataArray or Dataset Xarray object with rechunked arrays. """ return _rechunk(rechunk_array_for_blockwise, obj, dim, labels) def _rechunk(func, obj, dim, labels, **kwargs): """Common logic for rechunking xarray objects.""" obj = obj.copy(deep=True) if isinstance(obj, xr.Dataset): for var in obj: if obj[var].chunks is not None: obj[var] = obj[var].copy( data=func( obj[var].data, axis=obj[var].get_axis_num(dim), labels=labels.data, **kwargs, ) ) else: if obj.chunks is not None: obj = obj.copy(data=func(obj.data, axis=obj.get_axis_num(dim), labels=labels.data, **kwargs)) return obj flox-0.10.3/flox/xrdtypes.py000066400000000000000000000136321477552625700160000ustar00rootroot00000000000000import functools import numpy as np from numpy.typing import DTypeLike from . import xrutils as utils # Use as a sentinel value to indicate a dtype appropriate NA value. NA = utils.ReprObject("") @functools.total_ordering class AlwaysGreaterThan: def __gt__(self, other): return True def __eq__(self, other): return isinstance(other, type(self)) @functools.total_ordering class AlwaysLessThan: def __lt__(self, other): return True def __eq__(self, other): return isinstance(other, type(self)) # Equivalence to np.inf (-np.inf) for object-type INF = AlwaysGreaterThan() NINF = AlwaysLessThan() def maybe_promote(dtype): """Simpler equivalent of pandas.core.common._maybe_promote Parameters ---------- dtype : np.dtype Returns ------- dtype : Promoted dtype that can hold missing values. fill_value : Valid missing value for the promoted dtype. """ # N.B. these casting rules should match pandas if np.issubdtype(dtype, np.floating): fill_value = np.nan elif np.issubdtype(dtype, np.timedelta64): # See https://github.com/numpy/numpy/issues/10685 # np.timedelta64 is a subclass of np.integer # Check np.timedelta64 before np.integer fill_value = np.timedelta64("NaT") elif np.issubdtype(dtype, np.integer): dtype = np.float32 if dtype.itemsize <= 2 else np.float64 fill_value = np.nan elif np.issubdtype(dtype, np.complexfloating): fill_value = np.nan + np.nan * 1j elif np.issubdtype(dtype, np.datetime64): fill_value = np.datetime64("NaT") else: dtype = object fill_value = np.nan return np.dtype(dtype), fill_value NAT_TYPES = {np.datetime64("NaT").dtype, np.timedelta64("NaT").dtype} def get_fill_value(dtype): """Return an appropriate fill value for this dtype. Parameters ---------- dtype : np.dtype Returns ------- fill_value : Missing value corresponding to this dtype. """ _, fill_value = maybe_promote(dtype) return fill_value def get_pos_infinity(dtype, max_for_int=False): """Return an appropriate positive infinity for this dtype. Parameters ---------- dtype : np.dtype max_for_int : bool Return np.iinfo(dtype).max instead of np.inf Returns ------- fill_value : positive infinity value corresponding to this dtype. """ if issubclass(dtype.type, np.floating): return np.inf if issubclass(dtype.type, np.integer): if max_for_int: dtype = np.int64 if dtype.kind in "Mm" else dtype return np.iinfo(dtype).max else: return np.inf if issubclass(dtype.type, np.complexfloating): return np.inf + 1j * np.inf return INF def get_neg_infinity(dtype, min_for_int=False): """Return an appropriate positive infinity for this dtype. Parameters ---------- dtype : np.dtype min_for_int : bool Return np.iinfo(dtype).min instead of -np.inf Returns ------- fill_value : positive infinity value corresponding to this dtype. """ if is_datetime_like(dtype): unit, _ = np.datetime_data(dtype) return dtype.type(np.iinfo(np.int64).min + 1, unit) if issubclass(dtype.type, np.floating): return -np.inf if issubclass(dtype.type, np.integer): if min_for_int: return np.iinfo(dtype).min else: return -np.inf if issubclass(dtype.type, np.complexfloating): return -np.inf - 1j * np.inf return NINF def is_datetime_like(dtype): """Check if a dtype is a subclass of the numpy datetime types""" return np.issubdtype(dtype, np.datetime64) or np.issubdtype(dtype, np.timedelta64) def _normalize_dtype( dtype: DTypeLike, array_dtype: np.dtype, preserves_dtype: bool, fill_value=None ) -> np.dtype: if dtype is None: if not preserves_dtype: dtype = _maybe_promote_int(array_dtype) else: dtype = array_dtype if dtype is np.floating: # mean, std, var always result in floating # but we preserve the array's dtype if it is floating if array_dtype.kind in "fcmM": dtype = array_dtype else: dtype = np.dtype("float64") elif not isinstance(dtype, np.dtype): dtype = np.dtype(dtype) if fill_value not in [None, INF, NINF, NA]: dtype = np.result_type(dtype, fill_value) return dtype def _maybe_promote_int(dtype) -> np.dtype: # https://numpy.org/doc/stable/reference/generated/numpy.prod.html # The dtype of a is used by default unless a has an integer dtype of less precision # than the default platform integer. if not isinstance(dtype, np.dtype): dtype = np.dtype(dtype) if dtype.kind == "i": dtype = np.result_type(dtype, np.int_) elif dtype.kind == "u": dtype = np.result_type(dtype, np.uint) return dtype def _get_fill_value(dtype, fill_value): """Returns dtype appropriate infinity. Returns +Inf equivalent for None.""" if fill_value in [None, NA] and dtype.kind in "US": return "" if fill_value == INF or fill_value is None: return get_pos_infinity(dtype, max_for_int=True) if fill_value == NINF: return get_neg_infinity(dtype, min_for_int=True) if fill_value == NA: if np.issubdtype(dtype, np.floating) or np.issubdtype(dtype, np.complexfloating): return np.nan # This is madness, but npg checks that fill_value is compatible # with array dtype even if the fill_value is never used. elif np.issubdtype(dtype, np.integer): return get_neg_infinity(dtype, min_for_int=True) elif np.issubdtype(dtype, np.timedelta64): return np.timedelta64("NaT") elif np.issubdtype(dtype, np.datetime64): return np.datetime64("NaT") else: return None return fill_value flox-0.10.3/flox/xrutils.py000066400000000000000000000307571477552625700156370ustar00rootroot00000000000000# The functions defined here were copied based on the source code # defined in xarray import datetime import importlib from collections.abc import Iterable from typing import Any import numpy as np import pandas as pd from packaging.version import Version def module_available(module: str, minversion: str | None = None) -> bool: """Checks whether a module is installed without importing it. Use this for a lightweight check and lazy imports. Parameters ---------- module : str Name of the module. Returns ------- available : bool Whether the module is installed. """ has = importlib.util.find_spec(module) is not None if has: mod = importlib.import_module(module) return Version(mod.__version__) >= Version(minversion) if minversion is not None else True else: return False if module_available("numpy", minversion="2.0.0"): from numpy.lib.array_utils import normalize_axis_index else: from numpy.core.numeric import normalize_axis_index # type: ignore[no-redef] try: import cftime except ImportError: cftime = None try: import dask.array dask_array_type = dask.array.Array except ImportError: dask_array_type = () # type: ignore[assignment, misc] def asarray(data, xp=np): return data if is_duck_array(data) else xp.asarray(data) def is_duck_array(value: Any) -> bool: """Checks if value is a duck array.""" if isinstance(value, np.ndarray): return True return ( hasattr(value, "ndim") and hasattr(value, "shape") and hasattr(value, "dtype") and ( (hasattr(value, "__array_function__") and hasattr(value, "__array_ufunc__")) or hasattr(value, "__array_namespace__") ) ) def is_chunked_array(x) -> bool: """True if dask or cubed""" return is_duck_dask_array(x) or (is_duck_array(x) and hasattr(x, "chunks")) def is_dask_collection(x): try: import dask return dask.is_dask_collection(x) except ImportError: return False def is_duck_dask_array(x): return is_duck_array(x) and is_dask_collection(x) def is_duck_cubed_array(x): try: import cubed return is_duck_array(x) and isinstance(x, cubed.Array) except ImportError: return False class ReprObject: """Object that prints as the given value, for use with sentinel values.""" __slots__ = ("_value",) def __init__(self, value: str): self._value = value def __repr__(self) -> str: return self._value def __eq__(self, other) -> bool: if isinstance(other, ReprObject): return self._value == other._value return False def __hash__(self) -> int: return hash((type(self), self._value)) def __dask_tokenize__(self): from dask.base import normalize_token return normalize_token((type(self), self._value)) def is_scalar(value: Any, include_0d: bool = True) -> bool: """Whether to treat a value as a scalar. Any non-iterable, string, dict, or 0-D array """ NON_NUMPY_SUPPORTED_ARRAY_TYPES = (dask_array_type, pd.Index) if include_0d: include_0d = getattr(value, "ndim", None) == 0 return ( include_0d or isinstance(value, str | bytes | dict) or not ( isinstance(value, (Iterable,) + NON_NUMPY_SUPPORTED_ARRAY_TYPES) or hasattr(value, "__array_function__") ) ) def notnull(data): if not is_duck_array(data): data = np.asarray(data) scalar_type = data.dtype.type if issubclass(scalar_type, np.bool_ | np.integer | np.character | np.void): # these types cannot represent missing values return np.broadcast_to(np.array(True), data.shape) else: out = isnull(data) np.logical_not(out, out=out) return out def isnull(data: Any): if data is None: return False if not is_duck_array(data): data = np.asarray(data) scalar_type = data.dtype.type if issubclass(scalar_type, np.datetime64 | np.timedelta64): # datetime types use NaT for null # note: must check timedelta64 before integers, because currently # timedelta64 inherits from np.integer return np.isnat(data) elif issubclass(scalar_type, np.inexact): # float types use NaN for null return np.isnan(data) elif issubclass(scalar_type, np.bool_ | np.integer | np.character | np.void): # these types cannot represent missing values return np.broadcast_to(np.array(False), data.shape) else: # at this point, array should have dtype=object if isinstance(data, (np.ndarray, dask_array_type)): # noqa return pd.isnull(data) # type: ignore[arg-type] else: # Not reachable yet, but intended for use with other duck array # types. For full consistency with pandas, we should accept None as # a null value as well as NaN, but it isn't clear how to do this # with duck typing. return data != data def datetime_to_numeric(array, offset=None, datetime_unit=None, dtype=float): """Convert an array containing datetime-like data to numerical values. Convert the datetime array to a timedelta relative to an offset. Parameters ---------- array : array-like Input data offset : None, datetime or cftime.datetime Datetime offset. If None, this is set by default to the array's minimum value to reduce round off errors. datetime_unit : {None, Y, M, W, D, h, m, s, ms, us, ns, ps, fs, as} If not None, convert output to a given datetime unit. Note that some conversions are not allowed due to non-linear relationships between units. dtype : dtype Output dtype. Returns ------- array Numerical representation of datetime object relative to an offset. Notes ----- Some datetime unit conversions won't work, for example from days to years, even though some calendars would allow for them (e.g. no_leap). This is because there is no `cftime.timedelta` object. """ # TODO: make this function dask-compatible? # Set offset to minimum if not given if offset is None: if array.dtype.kind in "Mm": offset = _datetime_nanmin(array) else: offset = array.min() # Compute timedelta object. # For np.datetime64, this can silently yield garbage due to overflow. # One option is to enforce 1970-01-01 as the universal offset. # This map_blocks call is for backwards compatibility. # dask == 2021.04.1 does not support subtracting object arrays # which is required for cftime if is_duck_dask_array(array) and np.issubdtype(array.dtype, object): array = array.map_blocks(lambda a, b: a - b, offset, meta=array._meta) else: array = array - offset # Scalar is converted to 0d-array if not hasattr(array, "dtype"): array = np.array(array) # Convert timedelta objects to float by first converting to microseconds. if array.dtype.kind in "O": return py_timedelta_to_float(array, datetime_unit or "ns").astype(dtype) # Convert np.NaT to np.nan elif array.dtype.kind in "mM": # Convert to specified timedelta units. if datetime_unit: array = array / np.timedelta64(1, datetime_unit) return np.where(isnull(array), np.nan, array.astype(dtype)) def timedelta_to_numeric(value, datetime_unit="ns", dtype=float): """Convert a timedelta-like object to numerical values. Parameters ---------- value : datetime.timedelta, numpy.timedelta64, pandas.Timedelta, str Time delta representation. datetime_unit : {Y, M, W, D, h, m, s, ms, us, ns, ps, fs, as} The time units of the output values. Note that some conversions are not allowed due to non-linear relationships between units. dtype : type The output data type. """ import datetime as dt if isinstance(value, dt.timedelta): out = py_timedelta_to_float(value, datetime_unit) elif isinstance(value, np.timedelta64): out = np_timedelta64_to_float(value, datetime_unit) elif isinstance(value, pd.Timedelta): out = pd_timedelta_to_float(value, datetime_unit) elif isinstance(value, str): try: a = pd.to_timedelta(value) except ValueError: raise ValueError(f"Could not convert {value!r} to timedelta64 using pandas.to_timedelta") return py_timedelta_to_float(a, datetime_unit) else: raise TypeError( f"Expected value of type str, pandas.Timedelta, datetime.timedelta " f"or numpy.timedelta64, but received {type(value).__name__}" ) return out.astype(dtype) def _to_pytimedelta(array, unit="us"): return array.astype(f"timedelta64[{unit}]").astype(datetime.timedelta) def np_timedelta64_to_float(array, datetime_unit): """Convert numpy.timedelta64 to float. Notes ----- The array is first converted to microseconds, which is less likely to cause overflow errors. """ array = array.astype("timedelta64[ns]").astype(np.float64) conversion_factor = np.timedelta64(1, "ns") / np.timedelta64(1, datetime_unit) return conversion_factor * array def pd_timedelta_to_float(value, datetime_unit): """Convert pandas.Timedelta to float. Notes ----- Built on the assumption that pandas timedelta values are in nanoseconds, which is also the numpy default resolution. """ value = value.to_timedelta64() return np_timedelta64_to_float(value, datetime_unit) def _timedelta_to_seconds(array): return np.reshape([a.total_seconds() for a in array.ravel()], array.shape) * 1e6 def py_timedelta_to_float(array, datetime_unit): """Convert a timedelta object to a float, possibly at a loss of resolution.""" array = asarray(array) if is_duck_dask_array(array): array = array.map_blocks(_timedelta_to_seconds, meta=np.array([], dtype=np.float64)) else: array = _timedelta_to_seconds(array) conversion_factor = np.timedelta64(1, "us") / np.timedelta64(1, datetime_unit) return conversion_factor * array def _contains_cftime_datetimes(array) -> bool: """Check if an array contains cftime.datetime objects""" if cftime is None: return False else: if array.dtype == np.dtype("O") and array.size > 0: sample = array.ravel()[0] if is_duck_dask_array(sample): sample = sample.compute() if isinstance(sample, np.ndarray): sample = sample.item() return isinstance(sample, cftime.datetime) else: return False def _datetime_nanmin(array): """nanmin() function for datetime64. Caveats that this function deals with: - In numpy < 1.18, min() on datetime64 incorrectly ignores NaT - numpy nanmin() don't work on datetime64 (all versions at the moment of writing) - dask min() does not work on datetime64 (all versions at the moment of writing) """ from .xrdtypes import is_datetime_like dtype = array.dtype assert is_datetime_like(dtype) # (NaT).astype(float) does not produce NaN... array = np.where(pd.isnull(array), np.nan, array.astype(float)) array = np.nanmin(array) if isinstance(array, float): array = np.array(array) # ...but (NaN).astype("M8") does produce NaT return array.astype(dtype) def _select_along_axis(values, idx, axis): other_ind = np.ix_(*[np.arange(s) for s in idx.shape]) sl = other_ind[:axis] + (idx,) + other_ind[axis:] return values[sl] def nanfirst(values, axis, keepdims=False): if isinstance(axis, tuple): (axis,) = axis if not is_duck_array(values): values = np.asarray(values) axis = normalize_axis_index(axis, values.ndim) idx_first = np.argmax(~isnull(values), axis=axis) result = _select_along_axis(values, idx_first, axis) if keepdims: return np.expand_dims(result, axis=axis) else: return result def nanlast(values, axis, keepdims=False): if isinstance(axis, tuple): (axis,) = axis if not is_duck_array(values): values = np.asarray(values) axis = normalize_axis_index(axis, values.ndim) rev = (slice(None),) * axis + (slice(None, None, -1),) idx_last = -1 - np.argmax(~isnull(values)[rev], axis=axis) result = _select_along_axis(values, idx_last, axis) if keepdims: return np.expand_dims(result, axis=axis) else: return result flox-0.10.3/pyproject.toml000066400000000000000000000057761477552625700155220ustar00rootroot00000000000000[project] name = "flox" description = "GroupBy operations for dask.array" license = {file = "LICENSE"} readme = "README.md" requires-python = ">=3.10" keywords = ["xarray", "dask", "groupby"] classifiers = [ "Development Status :: 4 - Beta", "License :: OSI Approved :: Apache Software License", "Natural Language :: English", "Operating System :: OS Independent", "Programming Language :: Python", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", "Programming Language :: Python :: 3.13", ] dependencies = [ "pandas>=1.5", "packaging>=21.3", "numpy>=1.22", "numpy_groupies>=0.9.19", "toolz", "scipy>=1.9", ] dynamic=["version"] [project.urls] homepage = "https://flox.readthedocs.io" documentation = "https://flox.readthedocs.io" repository = "https://github.com/xarray-contrib/flox.git" changelog = "https://github.com/xarray-contrib/flox/releases" [project.optional-dependencies] all = ["cachey", "dask", "numba", "numbagg", "xarray"] test = ["netCDF4"] [build-system] requires = [ "pandas>=1.5", "numpy>=1.22", "numpy_groupies>=0.9.19", "scipy>=1.9", "toolz", "setuptools>=61.0.0", "setuptools_scm[toml]>=7.0", ] build-backend = "setuptools.build_meta" [tool.setuptools] packages = ["flox"] [tool.setuptools.dynamic] version = {attr = "flox.__version__"} [tool.setuptools_scm] fallback_version = "999" write_to = "flox/_version.py" write_to_template= '__version__ = "{version}"' [tool.ruff] line-length = 110 target-version = "py310" builtins = ["ellipsis"] exclude = [ ".eggs", "doc", ] [tool.ruff.lint] # E402: module level import not at top of file # E501: line too long - let black worry about that # E731: do not assign a lambda expression, use a def ignore = [ "E402", "E501", "E731", ] select = [ # Bugbear # "B", # Pyflakes "F", # Pycodestyle "E", "W", # isort "I", # Pyupgrade "UP", ] [tool.ruff.lint.isort] known-first-party = ["flox"] known-third-party = [ "dask", "numpy", "numpy_groupies", "pandas", "pkg_resources", "pytest", "setuptools", "scipy", "xarray" ] [tool.ruff.format] # Enable reformatting of code snippets in docstrings. docstring-code-format = true [tool.mypy] allow_redefinition = true files = "**/*.py" show_error_codes = true warn_unused_ignores = true warn_unreachable = true enable_error_code = ["ignore-without-code", "redundant-expr", "truthy-bool"] exclude=["asv_bench/pkgs"] [[tool.mypy.overrides]] module=[ "asv_runner.*", "cachey", "cftime", "cubed.*", "dask.*", "importlib_metadata", "numba", "numbagg.*", "numpy_groupies.*", "matplotlib.*", "pandas", "setuptools", "scipy.*", "sparse.*", "toolz.*", ] ignore_missing_imports = true [tool.pytest.ini_options] addopts = "--tb=short" minversion = "7" testpaths = ["tests"] [tool.codespell] ignore-words-list = "nd,nax,coo" skip = "*.html" flox-0.10.3/setup.py000066400000000000000000000000751477552625700143030ustar00rootroot00000000000000#!/usr/bin/env python from setuptools import setup setup() flox-0.10.3/tests/000077500000000000000000000000001477552625700137315ustar00rootroot00000000000000flox-0.10.3/tests/__init__.py000066400000000000000000000205051477552625700160440ustar00rootroot00000000000000import importlib from contextlib import nullcontext import numpy as np import packaging.version import pandas as pd import pytest pd_types = (pd.Index,) try: import dask import dask.array as da dask_array_type = da.Array except ImportError: dask_array_type = () # type: ignore[assignment, misc] try: import sparse sparse_array_type = sparse.COO except ImportError: sparse_array_type = () try: import xarray as xr xr_types = (xr.DataArray, xr.Dataset) except ImportError: xr_types = () # type: ignore[assignment] def _importorskip(modname, minversion=None): try: mod = importlib.import_module(modname) has = True if minversion is not None: if LooseVersion(mod.__version__) < LooseVersion(minversion): raise ImportError("Minimum version not satisfied") except ImportError: has = False func = pytest.mark.skipif(not has, reason=f"requires {modname}") return has, func def LooseVersion(vstring): # Our development version is something like '0.10.9+aac7bfc' # This function just ignored the git commit id. vstring = vstring.split("+")[0] return packaging.version.Version(vstring) has_cftime, requires_cftime = _importorskip("cftime") has_cubed, requires_cubed = _importorskip("cubed") has_dask, requires_dask = _importorskip("dask") has_sparse, requires_sparse = _importorskip("sparse") has_numba, requires_numba = _importorskip("numba") has_numbagg, requires_numbagg = _importorskip("numbagg") has_scipy, requires_scipy = _importorskip("scipy") has_xarray, requires_xarray = _importorskip("xarray") class CountingScheduler: """Simple dask scheduler counting the number of computes. Reference: https://stackoverflow.com/questions/53289286/""" def __init__(self, max_computes=0): self.total_computes = 0 self.max_computes = max_computes def __call__(self, dsk, keys, **kwargs): self.total_computes += 1 if self.total_computes > self.max_computes: raise RuntimeError(f"Too many computes. Total: {self.total_computes} > max: {self.max_computes}.") return dask.get(dsk, keys, **kwargs) def raise_if_dask_computes(max_computes=0): # return a dummy context manager so that this can be used for non-dask objects if not has_dask: return nullcontext() scheduler = CountingScheduler(max_computes) return dask.config.set(scheduler=scheduler) def assert_equal(a, b, tolerance=None): __tracebackhide__ = True if isinstance(a, list): a = np.array(a) if isinstance(b, list): b = np.array(b) if isinstance(a, pd_types) or isinstance(b, pd_types): pd.testing.assert_index_equal(a, b) return if has_xarray and isinstance(a, xr_types) or isinstance(b, xr_types): xr.testing.assert_identical(a, b) return if tolerance is None: if np.issubdtype(a.dtype, np.float64) | np.issubdtype(b.dtype, np.float64): tolerance = {"atol": 1e-18, "rtol": 1e-15} else: tolerance = {} # Always run the numpy comparison first, so that we get nice error messages with dask. # sometimes it's nice to see values and shapes # rather than being dropped into some file in dask if a.dtype != b.dtype: raise AssertionError(f"a and b have different dtypes: (a: {a.dtype}, b: {b.dtype})") if has_dask: a_eager = a.compute() if isinstance(a, dask_array_type) else a b_eager = b.compute() if isinstance(b, dask_array_type) else b else: a_eager, b_eager = a, b if has_sparse: one_is_sparse = isinstance(a_eager, sparse_array_type) or isinstance(b_eager, sparse_array_type) a_eager = a_eager.todense() if isinstance(a_eager, sparse_array_type) else a_eager b_eager = b_eager.todense() if isinstance(b_eager, sparse_array_type) else b_eager else: one_is_sparse = False if a.dtype.kind in "SUMmO": np.testing.assert_equal(a_eager, b_eager) else: np.testing.assert_allclose(a_eager, b_eager, equal_nan=True, **tolerance) if has_dask and isinstance(a, dask_array_type) or isinstance(b, dask_array_type): # does some validation of the dask graph dask_assert_eq(a, b, equal_nan=True, check_type=not one_is_sparse) def assert_equal_tuple(a, b): """assert_equal for .blocks indexing tuples""" assert len(a) == len(b) for a_, b_ in zip(a, b): assert type(a_) is type(b_) if isinstance(a_, np.ndarray): np.testing.assert_array_equal(a_, b_) else: assert a_ == b_ SCIPY_STATS_FUNCS = ("mode", "nanmode") BLOCKWISE_FUNCS = ("median", "nanmedian", "quantile", "nanquantile") + SCIPY_STATS_FUNCS ALL_FUNCS = ( "sum", "nansum", "argmax", "nanfirst", "nanargmax", "prod", "nanprod", "mean", "nanmean", "var", "nanvar", "std", "nanstd", "max", "nanmax", "min", "nanmin", "argmin", "nanargmin", "any", "all", "nanlast", "median", "nanmedian", "quantile", "nanquantile", ) + tuple(SCIPY_STATS_FUNCS) def dask_assert_eq( a, b, check_shape=True, check_graph=True, check_meta=True, check_chunks=True, check_ndim=True, check_type=True, check_dtype=True, equal_nan=True, scheduler="sync", **kwargs, ): """dask.array.utils.assert_eq modified to skip value checks. Their code is buggy for some dtypes. We just check values through numpy and care about validating the graph in this function.""" from dask.array.utils import _get_dt_meta_computed a_original = a b_original = b if isinstance(a, list | int | float): a = np.array(a) if isinstance(b, list | int | float): b = np.array(b) a, adt, a_meta, a_computed = _get_dt_meta_computed( a, check_shape=check_shape, check_graph=check_graph, check_chunks=check_chunks, check_ndim=check_ndim, scheduler=scheduler, ) b, bdt, b_meta, b_computed = _get_dt_meta_computed( b, check_shape=check_shape, check_graph=check_graph, check_chunks=check_chunks, check_ndim=check_ndim, scheduler=scheduler, ) if check_type: _a = a if a.shape else a.item() _b = b if b.shape else b.item() assert type(_a) is type(_b), f"a and b have different types (a: {type(_a)}, b: {type(_b)})" if check_meta: if hasattr(a, "_meta") and hasattr(b, "_meta"): dask_assert_eq(a._meta, b._meta) if hasattr(a_original, "_meta"): msg = ( f"compute()-ing 'a' changes its number of dimensions " f"(before: {a_original._meta.ndim}, after: {a.ndim})" ) assert a_original._meta.ndim == a.ndim, msg if a_meta is not None: msg = ( f"compute()-ing 'a' changes its type " f"(before: {type(a_original._meta)}, after: {type(a_meta)})" ) assert type(a_original._meta) is type(a_meta), msg if not (np.isscalar(a_meta) or np.isscalar(a_computed)): msg = ( f"compute()-ing 'a' results in a different type than implied by its metadata " f"(meta: {type(a_meta)}, computed: {type(a_computed)})" ) assert type(a_meta) is type(a_computed), msg if hasattr(b_original, "_meta"): msg = ( f"compute()-ing 'b' changes its number of dimensions " f"(before: {b_original._meta.ndim}, after: {b.ndim})" ) assert b_original._meta.ndim == b.ndim, msg if b_meta is not None: msg = ( f"compute()-ing 'b' changes its type " f"(before: {type(b_original._meta)}, after: {type(b_meta)})" ) assert type(b_original._meta) is type(b_meta), msg if not (np.isscalar(b_meta) or np.isscalar(b_computed)): msg = ( f"compute()-ing 'b' results in a different type than implied by its metadata " f"(meta: {type(b_meta)}, computed: {type(b_computed)})" ) assert type(b_meta) is type(b_computed), msg flox-0.10.3/tests/__snapshots__/000077500000000000000000000000001477552625700165475ustar00rootroot00000000000000flox-0.10.3/tests/__snapshots__/test_cohorts.ambr000066400000000000000000004632371477552625700221510ustar00rootroot00000000000000# serializer version: 1 # name: test_snapshot_cohorts[ERA5DayOfYear] dict({ tuple( 0, 365, 183, ): list([ 0, ]), tuple( 0, 366, 183, ): list([ 1, ]), tuple( 8, 373, 191, ): list([ 16, ]), tuple( 8, 374, 191, ): list([ 17, ]), tuple( 16, 381, 199, ): list([ 32, ]), tuple( 16, 382, 199, ): list([ 33, ]), tuple( 24, 389, 207, ): list([ 48, ]), tuple( 24, 390, 207, ): list([ 49, ]), tuple( 32, 397, 215, ): list([ 64, ]), tuple( 32, 398, 215, ): list([ 65, ]), tuple( 40, 405, 223, ): list([ 80, ]), tuple( 40, 406, 223, ): list([ 81, ]), tuple( 48, 413, 231, ): list([ 96, ]), tuple( 48, 414, 231, ): list([ 97, ]), tuple( 56, 421, 239, ): list([ 112, ]), tuple( 56, 422, 239, ): list([ 113, ]), tuple( 64, 429, 247, ): list([ 128, ]), tuple( 64, 430, 247, ): list([ 129, ]), tuple( 72, 437, 255, ): list([ 144, ]), tuple( 72, 438, 255, ): list([ 145, ]), tuple( 80, 445, 263, ): list([ 160, ]), tuple( 80, 446, 263, ): list([ 161, ]), tuple( 88, 453, 271, ): list([ 176, ]), tuple( 88, 454, 271, ): list([ 177, ]), tuple( 96, 461, 279, ): list([ 192, ]), tuple( 96, 462, 279, ): list([ 193, ]), tuple( 104, 469, 287, ): list([ 208, ]), tuple( 104, 470, 287, ): list([ 209, ]), tuple( 112, 477, 295, ): list([ 224, ]), tuple( 112, 478, 295, ): list([ 225, ]), tuple( 120, 485, 303, ): list([ 240, ]), tuple( 120, 486, 303, ): list([ 241, ]), tuple( 128, 493, 311, ): list([ 256, ]), tuple( 128, 494, 311, ): list([ 257, ]), tuple( 136, 501, 319, ): list([ 272, ]), tuple( 136, 502, 319, ): list([ 273, ]), tuple( 144, 509, 327, ): list([ 288, ]), tuple( 144, 510, 327, ): list([ 289, ]), tuple( 152, 517, 335, ): list([ 304, ]), tuple( 152, 518, 335, ): list([ 305, ]), tuple( 160, 525, 343, ): list([ 320, ]), tuple( 160, 526, 343, ): list([ 321, ]), tuple( 168, 533, 351, ): list([ 336, ]), tuple( 168, 534, 351, ): list([ 337, ]), tuple( 176, 541, 359, ): list([ 352, ]), tuple( 176, 542, 359, ): list([ 353, ]), tuple( 184, 1, 366, ): list([ 2, ]), tuple( 184, 1, 367, ): list([ 3, ]), tuple( 185, 2, 367, ): list([ 4, ]), tuple( 192, 9, 374, ): list([ 18, ]), tuple( 192, 9, 375, ): list([ 19, ]), tuple( 193, 10, 375, ): list([ 20, ]), tuple( 200, 17, 382, ): list([ 34, ]), tuple( 200, 17, 383, ): list([ 35, ]), tuple( 201, 18, 383, ): list([ 36, ]), tuple( 208, 25, 390, ): list([ 50, ]), tuple( 208, 25, 391, ): list([ 51, ]), tuple( 209, 26, 391, ): list([ 52, ]), tuple( 216, 33, 398, ): list([ 66, ]), tuple( 216, 33, 399, ): list([ 67, ]), tuple( 217, 34, 399, ): list([ 68, ]), tuple( 224, 41, 406, ): list([ 82, ]), tuple( 224, 41, 407, ): list([ 83, ]), tuple( 225, 42, 407, ): list([ 84, ]), tuple( 232, 49, 414, ): list([ 98, ]), tuple( 232, 49, 415, ): list([ 99, ]), tuple( 233, 50, 415, ): list([ 100, ]), tuple( 240, 57, 422, ): list([ 114, ]), tuple( 240, 57, 423, ): list([ 115, ]), tuple( 241, 58, 423, ): list([ 116, ]), tuple( 248, 65, 430, ): list([ 130, ]), tuple( 248, 65, 431, ): list([ 131, ]), tuple( 249, 66, 431, ): list([ 132, ]), tuple( 256, 73, 438, ): list([ 146, ]), tuple( 256, 73, 439, ): list([ 147, ]), tuple( 257, 74, 439, ): list([ 148, ]), tuple( 264, 81, 446, ): list([ 162, ]), tuple( 264, 81, 447, ): list([ 163, ]), tuple( 265, 82, 447, ): list([ 164, ]), tuple( 272, 89, 454, ): list([ 178, ]), tuple( 272, 89, 455, ): list([ 179, ]), tuple( 273, 90, 455, ): list([ 180, ]), tuple( 280, 97, 462, ): list([ 194, ]), tuple( 280, 97, 463, ): list([ 195, ]), tuple( 281, 98, 463, ): list([ 196, ]), tuple( 288, 105, 470, ): list([ 210, ]), tuple( 288, 105, 471, ): list([ 211, ]), tuple( 289, 106, 471, ): list([ 212, ]), tuple( 296, 113, 478, ): list([ 226, ]), tuple( 296, 113, 479, ): list([ 227, ]), tuple( 297, 114, 479, ): list([ 228, ]), tuple( 304, 121, 486, ): list([ 242, ]), tuple( 304, 121, 487, ): list([ 243, ]), tuple( 305, 122, 487, ): list([ 244, ]), tuple( 312, 129, 494, ): list([ 258, ]), tuple( 312, 129, 495, ): list([ 259, ]), tuple( 313, 130, 495, ): list([ 260, ]), tuple( 320, 137, 502, ): list([ 274, ]), tuple( 320, 137, 503, ): list([ 275, ]), tuple( 321, 138, 503, ): list([ 276, ]), tuple( 328, 145, 510, ): list([ 290, ]), tuple( 328, 145, 511, ): list([ 291, ]), tuple( 329, 146, 511, ): list([ 292, ]), tuple( 336, 153, 518, ): list([ 306, ]), tuple( 336, 153, 519, ): list([ 307, ]), tuple( 337, 154, 519, ): list([ 308, ]), tuple( 344, 161, 526, ): list([ 322, ]), tuple( 344, 161, 527, ): list([ 323, ]), tuple( 345, 162, 527, ): list([ 324, ]), tuple( 352, 169, 534, ): list([ 338, ]), tuple( 352, 169, 535, ): list([ 339, ]), tuple( 353, 170, 535, ): list([ 340, ]), tuple( 360, 177, 542, ): list([ 354, ]), tuple( 360, 177, 543, ): list([ 355, ]), tuple( 361, 178, 543, ): list([ 356, ]), tuple( 368, 185, 2, ): list([ 5, ]), tuple( 368, 186, 3, ): list([ 6, ]), tuple( 369, 186, 3, ): list([ 7, ]), tuple( 369, 187, 4, ): list([ 8, ]), tuple( 370, 187, 4, ): list([ 9, ]), tuple( 370, 188, 5, ): list([ 10, ]), tuple( 371, 188, 5, ): list([ 11, ]), tuple( 371, 189, 6, ): list([ 12, ]), tuple( 372, 189, 6, ): list([ 13, ]), tuple( 372, 190, 7, ): list([ 14, ]), tuple( 373, 190, 7, ): list([ 15, ]), tuple( 376, 193, 10, ): list([ 21, ]), tuple( 376, 194, 11, ): list([ 22, ]), tuple( 377, 194, 11, ): list([ 23, ]), tuple( 377, 195, 12, ): list([ 24, ]), tuple( 378, 195, 12, ): list([ 25, ]), tuple( 378, 196, 13, ): list([ 26, ]), tuple( 379, 196, 13, ): list([ 27, ]), tuple( 379, 197, 14, ): list([ 28, ]), tuple( 380, 197, 14, ): list([ 29, ]), tuple( 380, 198, 15, ): list([ 30, ]), tuple( 381, 198, 15, ): list([ 31, ]), tuple( 384, 201, 18, ): list([ 37, ]), tuple( 384, 202, 19, ): list([ 38, ]), tuple( 385, 202, 19, ): list([ 39, ]), tuple( 385, 203, 20, ): list([ 40, ]), tuple( 386, 203, 20, ): list([ 41, ]), tuple( 386, 204, 21, ): list([ 42, ]), tuple( 387, 204, 21, ): list([ 43, ]), tuple( 387, 205, 22, ): list([ 44, ]), tuple( 388, 205, 22, ): list([ 45, ]), tuple( 388, 206, 23, ): list([ 46, ]), tuple( 389, 206, 23, ): list([ 47, ]), tuple( 392, 209, 26, ): list([ 53, ]), tuple( 392, 210, 27, ): list([ 54, ]), tuple( 393, 210, 27, ): list([ 55, ]), tuple( 393, 211, 28, ): list([ 56, ]), tuple( 394, 211, 28, ): list([ 57, ]), tuple( 394, 212, 29, ): list([ 58, ]), tuple( 395, 212, 29, ): list([ 59, ]), tuple( 395, 213, 30, ): list([ 60, ]), tuple( 396, 213, 30, ): list([ 61, ]), tuple( 396, 214, 31, ): list([ 62, ]), tuple( 397, 214, 31, ): list([ 63, ]), tuple( 400, 217, 34, ): list([ 69, ]), tuple( 400, 218, 35, ): list([ 70, ]), tuple( 401, 218, 35, ): list([ 71, ]), tuple( 401, 219, 36, ): list([ 72, ]), tuple( 402, 219, 36, ): list([ 73, ]), tuple( 402, 220, 37, ): list([ 74, ]), tuple( 403, 220, 37, ): list([ 75, ]), tuple( 403, 221, 38, ): list([ 76, ]), tuple( 404, 221, 38, ): list([ 77, ]), tuple( 404, 222, 39, ): list([ 78, ]), tuple( 405, 222, 39, ): list([ 79, ]), tuple( 408, 225, 42, ): list([ 85, ]), tuple( 408, 226, 43, ): list([ 86, ]), tuple( 409, 226, 43, ): list([ 87, ]), tuple( 409, 227, 44, ): list([ 88, ]), tuple( 410, 227, 44, ): list([ 89, ]), tuple( 410, 228, 45, ): list([ 90, ]), tuple( 411, 228, 45, ): list([ 91, ]), tuple( 411, 229, 46, ): list([ 92, ]), tuple( 412, 229, 46, ): list([ 93, ]), tuple( 412, 230, 47, ): list([ 94, ]), tuple( 413, 230, 47, ): list([ 95, ]), tuple( 416, 233, 50, ): list([ 101, ]), tuple( 416, 234, 51, ): list([ 102, ]), tuple( 417, 234, 51, ): list([ 103, ]), tuple( 417, 235, 52, ): list([ 104, ]), tuple( 418, 235, 52, ): list([ 105, ]), tuple( 418, 236, 53, ): list([ 106, ]), tuple( 419, 236, 53, ): list([ 107, ]), tuple( 419, 237, 54, ): list([ 108, ]), tuple( 420, 237, 54, ): list([ 109, ]), tuple( 420, 238, 55, ): list([ 110, ]), tuple( 421, 238, 55, ): list([ 111, ]), tuple( 424, 241, 58, ): list([ 117, ]), tuple( 424, 242, 59, ): list([ 118, ]), tuple( 425, 242, 59, ): list([ 119, ]), tuple( 425, 243, 60, ): list([ 120, ]), tuple( 426, 243, 60, ): list([ 121, ]), tuple( 426, 244, 61, ): list([ 122, ]), tuple( 427, 244, 61, ): list([ 123, ]), tuple( 427, 245, 62, ): list([ 124, ]), tuple( 428, 245, 62, ): list([ 125, ]), tuple( 428, 246, 63, ): list([ 126, ]), tuple( 429, 246, 63, ): list([ 127, ]), tuple( 432, 249, 66, ): list([ 133, ]), tuple( 432, 250, 67, ): list([ 134, ]), tuple( 433, 250, 67, ): list([ 135, ]), tuple( 433, 251, 68, ): list([ 136, ]), tuple( 434, 251, 68, ): list([ 137, ]), tuple( 434, 252, 69, ): list([ 138, ]), tuple( 435, 252, 69, ): list([ 139, ]), tuple( 435, 253, 70, ): list([ 140, ]), tuple( 436, 253, 70, ): list([ 141, ]), tuple( 436, 254, 71, ): list([ 142, ]), tuple( 437, 254, 71, ): list([ 143, ]), tuple( 440, 257, 74, ): list([ 149, ]), tuple( 440, 258, 75, ): list([ 150, ]), tuple( 441, 258, 75, ): list([ 151, ]), tuple( 441, 259, 76, ): list([ 152, ]), tuple( 442, 259, 76, ): list([ 153, ]), tuple( 442, 260, 77, ): list([ 154, ]), tuple( 443, 260, 77, ): list([ 155, ]), tuple( 443, 261, 78, ): list([ 156, ]), tuple( 444, 261, 78, ): list([ 157, ]), tuple( 444, 262, 79, ): list([ 158, ]), tuple( 445, 262, 79, ): list([ 159, ]), tuple( 448, 265, 82, ): list([ 165, ]), tuple( 448, 266, 83, ): list([ 166, ]), tuple( 449, 266, 83, ): list([ 167, ]), tuple( 449, 267, 84, ): list([ 168, ]), tuple( 450, 267, 84, ): list([ 169, ]), tuple( 450, 268, 85, ): list([ 170, ]), tuple( 451, 268, 85, ): list([ 171, ]), tuple( 451, 269, 86, ): list([ 172, ]), tuple( 452, 269, 86, ): list([ 173, ]), tuple( 452, 270, 87, ): list([ 174, ]), tuple( 453, 270, 87, ): list([ 175, ]), tuple( 456, 273, 90, ): list([ 181, ]), tuple( 456, 274, 91, ): list([ 182, ]), tuple( 457, 274, 91, ): list([ 183, ]), tuple( 457, 275, 92, ): list([ 184, ]), tuple( 458, 275, 92, ): list([ 185, ]), tuple( 458, 276, 93, ): list([ 186, ]), tuple( 459, 276, 93, ): list([ 187, ]), tuple( 459, 277, 94, ): list([ 188, ]), tuple( 460, 277, 94, ): list([ 189, ]), tuple( 460, 278, 95, ): list([ 190, ]), tuple( 461, 278, 95, ): list([ 191, ]), tuple( 464, 281, 98, ): list([ 197, ]), tuple( 464, 282, 99, ): list([ 198, ]), tuple( 465, 282, 99, ): list([ 199, ]), tuple( 465, 283, 100, ): list([ 200, ]), tuple( 466, 283, 100, ): list([ 201, ]), tuple( 466, 284, 101, ): list([ 202, ]), tuple( 467, 284, 101, ): list([ 203, ]), tuple( 467, 285, 102, ): list([ 204, ]), tuple( 468, 285, 102, ): list([ 205, ]), tuple( 468, 286, 103, ): list([ 206, ]), tuple( 469, 286, 103, ): list([ 207, ]), tuple( 472, 289, 106, ): list([ 213, ]), tuple( 472, 290, 107, ): list([ 214, ]), tuple( 473, 290, 107, ): list([ 215, ]), tuple( 473, 291, 108, ): list([ 216, ]), tuple( 474, 291, 108, ): list([ 217, ]), tuple( 474, 292, 109, ): list([ 218, ]), tuple( 475, 292, 109, ): list([ 219, ]), tuple( 475, 293, 110, ): list([ 220, ]), tuple( 476, 293, 110, ): list([ 221, ]), tuple( 476, 294, 111, ): list([ 222, ]), tuple( 477, 294, 111, ): list([ 223, ]), tuple( 480, 297, 114, ): list([ 229, ]), tuple( 480, 298, 115, ): list([ 230, ]), tuple( 481, 298, 115, ): list([ 231, ]), tuple( 481, 299, 116, ): list([ 232, ]), tuple( 482, 299, 116, ): list([ 233, ]), tuple( 482, 300, 117, ): list([ 234, ]), tuple( 483, 300, 117, ): list([ 235, ]), tuple( 483, 301, 118, ): list([ 236, ]), tuple( 484, 301, 118, ): list([ 237, ]), tuple( 484, 302, 119, ): list([ 238, ]), tuple( 485, 302, 119, ): list([ 239, ]), tuple( 488, 305, 122, ): list([ 245, ]), tuple( 488, 306, 123, ): list([ 246, ]), tuple( 489, 306, 123, ): list([ 247, ]), tuple( 489, 307, 124, ): list([ 248, ]), tuple( 490, 307, 124, ): list([ 249, ]), tuple( 490, 308, 125, ): list([ 250, ]), tuple( 491, 308, 125, ): list([ 251, ]), tuple( 491, 309, 126, ): list([ 252, ]), tuple( 492, 309, 126, ): list([ 253, ]), tuple( 492, 310, 127, ): list([ 254, ]), tuple( 493, 310, 127, ): list([ 255, ]), tuple( 496, 313, 130, ): list([ 261, ]), tuple( 496, 314, 131, ): list([ 262, ]), tuple( 497, 314, 131, ): list([ 263, ]), tuple( 497, 315, 132, ): list([ 264, ]), tuple( 498, 315, 132, ): list([ 265, ]), tuple( 498, 316, 133, ): list([ 266, ]), tuple( 499, 316, 133, ): list([ 267, ]), tuple( 499, 317, 134, ): list([ 268, ]), tuple( 500, 317, 134, ): list([ 269, ]), tuple( 500, 318, 135, ): list([ 270, ]), tuple( 501, 318, 135, ): list([ 271, ]), tuple( 504, 321, 138, ): list([ 277, ]), tuple( 504, 322, 139, ): list([ 278, ]), tuple( 505, 322, 139, ): list([ 279, ]), tuple( 505, 323, 140, ): list([ 280, ]), tuple( 506, 323, 140, ): list([ 281, ]), tuple( 506, 324, 141, ): list([ 282, ]), tuple( 507, 324, 141, ): list([ 283, ]), tuple( 507, 325, 142, ): list([ 284, ]), tuple( 508, 325, 142, ): list([ 285, ]), tuple( 508, 326, 143, ): list([ 286, ]), tuple( 509, 326, 143, ): list([ 287, ]), tuple( 512, 329, 146, ): list([ 293, ]), tuple( 512, 330, 147, ): list([ 294, ]), tuple( 513, 330, 147, ): list([ 295, ]), tuple( 513, 331, 148, ): list([ 296, ]), tuple( 514, 331, 148, ): list([ 297, ]), tuple( 514, 332, 149, ): list([ 298, ]), tuple( 515, 332, 149, ): list([ 299, ]), tuple( 515, 333, 150, ): list([ 300, ]), tuple( 516, 333, 150, ): list([ 301, ]), tuple( 516, 334, 151, ): list([ 302, ]), tuple( 517, 334, 151, ): list([ 303, ]), tuple( 520, 337, 154, ): list([ 309, ]), tuple( 520, 338, 155, ): list([ 310, ]), tuple( 521, 338, 155, ): list([ 311, ]), tuple( 521, 339, 156, ): list([ 312, ]), tuple( 522, 339, 156, ): list([ 313, ]), tuple( 522, 340, 157, ): list([ 314, ]), tuple( 523, 340, 157, ): list([ 315, ]), tuple( 523, 341, 158, ): list([ 316, ]), tuple( 524, 341, 158, ): list([ 317, ]), tuple( 524, 342, 159, ): list([ 318, ]), tuple( 525, 342, 159, ): list([ 319, ]), tuple( 528, 345, 162, ): list([ 325, ]), tuple( 528, 346, 163, ): list([ 326, ]), tuple( 529, 346, 163, ): list([ 327, ]), tuple( 529, 347, 164, ): list([ 328, ]), tuple( 530, 347, 164, ): list([ 329, ]), tuple( 530, 348, 165, ): list([ 330, ]), tuple( 531, 348, 165, ): list([ 331, ]), tuple( 531, 349, 166, ): list([ 332, ]), tuple( 532, 349, 166, ): list([ 333, ]), tuple( 532, 350, 167, ): list([ 334, ]), tuple( 533, 350, 167, ): list([ 335, ]), tuple( 536, 353, 170, ): list([ 341, ]), tuple( 536, 354, 171, ): list([ 342, ]), tuple( 537, 354, 171, ): list([ 343, ]), tuple( 537, 355, 172, ): list([ 344, ]), tuple( 538, 355, 172, ): list([ 345, ]), tuple( 538, 356, 173, ): list([ 346, ]), tuple( 539, 356, 173, ): list([ 347, ]), tuple( 539, 357, 174, ): list([ 348, ]), tuple( 540, 357, 174, ): list([ 349, ]), tuple( 540, 358, 175, ): list([ 350, ]), tuple( 541, 358, 175, ): list([ 351, ]), tuple( 544, 361, 178, ): list([ 357, ]), tuple( 544, 362, 179, ): list([ 358, ]), tuple( 545, 362, 179, ): list([ 359, ]), tuple( 545, 363, 180, ): list([ 360, ]), tuple( 546, 363, 180, ): list([ 361, ]), tuple( 546, 364, 181, ): list([ 362, ]), tuple( 547, 364, 181, ): list([ 363, ]), tuple( 547, 365, 182, ): list([ 364, 365, ]), }) # --- # name: test_snapshot_cohorts[ERA5Google] dict({ tuple( 0, 1, 2, 3, 124, 125, 126, 127, 236, 237, 238, 239, 360, 361, 362, 363, 480, 481, 482, 483, 604, 605, 606, 607, 724, 725, 726, 727, 848, 849, 850, 851, ): list([ 0, ]), tuple( 4, 5, 6, 7, 128, 129, 130, 131, 240, 241, 242, 243, 364, 365, 366, 367, 484, 485, 486, 487, 608, 609, 610, 611, 728, 729, 730, 731, 852, 853, 854, 855, ): list([ 1, ]), tuple( 8, 9, 10, 11, 132, 133, 134, 135, 244, 245, 246, 247, 368, 369, 370, 371, 488, 489, 490, 491, 612, 613, 614, 615, 732, 733, 734, 735, 856, 857, 858, 859, ): list([ 2, ]), tuple( 12, 13, 14, 15, 136, 137, 138, 139, 248, 249, 250, 251, 372, 373, 374, 375, 492, 493, 494, 495, 616, 617, 618, 619, 736, 737, 738, 739, 860, 861, 862, 863, ): list([ 3, ]), tuple( 16, 17, 18, 19, 140, 141, 142, 143, 252, 253, 254, 255, 376, 377, 378, 379, 496, 497, 498, 499, 620, 621, 622, 623, 740, 741, 742, 743, 864, 865, 866, 867, ): list([ 4, ]), tuple( 20, 21, 22, 23, 144, 145, 146, 147, 256, 257, 258, 259, 380, 381, 382, 383, 500, 501, 502, 503, 624, 625, 626, 627, 744, 745, 746, 747, 868, 869, 870, 871, ): list([ 5, ]), tuple( 24, 25, 26, 27, 148, 149, 150, 151, 260, 261, 262, 263, 384, 385, 386, 387, 504, 505, 506, 507, 628, 629, 630, 631, 748, 749, 750, 751, 872, 873, 874, 875, ): list([ 6, ]), tuple( 28, 29, 30, 31, 152, 153, 154, 155, 264, 265, 266, 267, 388, 389, 390, 391, 508, 509, 510, 511, 632, 633, 634, 635, 752, 753, 754, 755, 876, 877, 878, 879, ): list([ 7, ]), tuple( 32, 33, 34, 35, 156, 157, 158, 159, 268, 269, 270, 271, 392, 393, 394, 395, 512, 513, 514, 515, 636, 637, 638, 639, 756, 757, 758, 759, 880, 881, 882, 883, ): list([ 8, ]), tuple( 36, 37, 38, 39, 160, 161, 162, 163, 272, 273, 274, 275, 396, 397, 398, 399, 516, 517, 518, 519, 640, 641, 642, 643, 760, 761, 762, 763, 884, 885, 886, 887, ): list([ 9, ]), tuple( 40, 41, 42, 43, 164, 165, 166, 167, 276, 277, 278, 279, 400, 401, 402, 403, 520, 521, 522, 523, 644, 645, 646, 647, 764, 765, 766, 767, 888, 889, 890, 891, ): list([ 10, ]), tuple( 44, 45, 46, 47, 168, 169, 170, 171, 280, 281, 282, 283, 404, 405, 406, 407, 524, 525, 526, 527, 648, 649, 650, 651, 768, 769, 770, 771, 892, 893, 894, 895, ): list([ 11, ]), tuple( 48, 49, 50, 51, 172, 173, 174, 175, 284, 285, 286, 287, 408, 409, 410, 411, 528, 529, 530, 531, 652, 653, 654, 655, 772, 773, 774, 775, 896, 897, 898, 899, ): list([ 12, ]), tuple( 52, 53, 54, 55, 176, 177, 178, 179, 288, 289, 290, 291, 412, 413, 414, 415, 532, 533, 534, 535, 656, 657, 658, 659, 776, 777, 778, 779, ): list([ 13, ]), tuple( 56, 57, 58, 59, 180, 181, 182, 183, 292, 293, 294, 295, 416, 417, 418, 419, 536, 537, 538, 539, 660, 661, 662, 663, 780, 781, 782, 783, ): list([ 14, ]), tuple( 60, 61, 62, 63, 184, 185, 186, 187, 296, 297, 298, 299, 420, 421, 422, 423, 540, 541, 542, 543, 664, 665, 666, 667, 784, 785, 786, 787, ): list([ 15, ]), tuple( 64, 65, 66, 67, 188, 189, 190, 191, 300, 301, 302, 303, 424, 425, 426, 427, 544, 545, 546, 547, 668, 669, 670, 671, 788, 789, 790, 791, ): list([ 16, ]), tuple( 68, 69, 70, 71, 192, 193, 194, 195, 304, 305, 306, 307, 428, 429, 430, 431, 548, 549, 550, 551, 672, 673, 674, 675, 792, 793, 794, 795, ): list([ 17, ]), tuple( 72, 73, 74, 75, 196, 197, 198, 199, 308, 309, 310, 311, 432, 433, 434, 435, 552, 553, 554, 555, 676, 677, 678, 679, 796, 797, 798, 799, ): list([ 18, ]), tuple( 76, 77, 78, 79, 200, 201, 202, 203, 312, 313, 314, 315, 436, 437, 438, 439, 556, 557, 558, 559, 680, 681, 682, 683, 800, 801, 802, 803, ): list([ 19, ]), tuple( 80, 81, 82, 83, 204, 205, 206, 207, 316, 317, 318, 319, 440, 441, 442, 443, 560, 561, 562, 563, 684, 685, 686, 687, 804, 805, 806, 807, ): list([ 20, ]), tuple( 84, 85, 86, 87, 208, 209, 210, 211, 320, 321, 322, 323, 444, 445, 446, 447, 564, 565, 566, 567, 688, 689, 690, 691, 808, 809, 810, 811, ): list([ 21, ]), tuple( 88, 89, 90, 91, 212, 213, 214, 215, 324, 325, 326, 327, 448, 449, 450, 451, 568, 569, 570, 571, 692, 693, 694, 695, 812, 813, 814, 815, ): list([ 22, ]), tuple( 92, 93, 94, 95, 216, 217, 218, 219, 328, 329, 330, 331, 452, 453, 454, 455, 572, 573, 574, 575, 696, 697, 698, 699, 816, 817, 818, 819, ): list([ 23, ]), tuple( 96, 97, 98, 99, 220, 221, 222, 223, 332, 333, 334, 335, 456, 457, 458, 459, 576, 577, 578, 579, 700, 701, 702, 703, 820, 821, 822, 823, ): list([ 24, ]), tuple( 100, 101, 102, 103, 224, 225, 226, 227, 336, 337, 338, 339, 460, 461, 462, 463, 580, 581, 582, 583, 704, 705, 706, 707, 824, 825, 826, 827, ): list([ 25, ]), tuple( 104, 105, 106, 107, 228, 229, 230, 231, 340, 341, 342, 343, 464, 465, 466, 467, 584, 585, 586, 587, 708, 709, 710, 711, 828, 829, 830, 831, ): list([ 26, ]), tuple( 108, 109, 110, 111, 232, 233, 234, 235, 344, 345, 346, 347, 468, 469, 470, 471, 588, 589, 590, 591, 712, 713, 714, 715, 832, 833, 834, 835, ): list([ 27, ]), tuple( 112, 113, 114, 115, 348, 349, 350, 351, 472, 473, 474, 475, 592, 593, 594, 595, 716, 717, 718, 719, 836, 837, 838, 839, ): list([ 28, ]), tuple( 116, 117, 118, 119, 352, 353, 354, 355, 476, 477, 478, 479, 596, 597, 598, 599, 720, 721, 722, 723, 840, 841, 842, 843, ): list([ 29, ]), tuple( 120, 121, 122, 123, 356, 357, 358, 359, 600, 601, 602, 603, 844, 845, 846, 847, ): list([ 30, ]), }) # --- # name: test_snapshot_cohorts[ERA5MonthHourRechunked] dict({ tuple( 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 395, 396, 397, 398, 399, 400, 401, 402, 403, 404, 405, 406, 407, 408, 409, 410, 411, 412, 413, 414, 415, 416, 417, 418, 419, 420, 421, 422, 423, 424, 425, 426, ): list([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, ]), tuple( 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 426, 427, 428, 429, 430, 431, 432, 433, 434, 435, 436, 437, 438, 439, 440, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, ): list([ 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, ]), tuple( 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 516, 517, 518, 519, 520, 521, 522, 523, 524, 525, 526, 527, 528, 529, 530, 531, 532, 319, 320, 321, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 122, 123, 124, 125, 126, 127, ): list([ 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, ]), tuple( 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 532, 533, 534, 535, 536, 537, 538, 539, 540, 541, 542, 543, 544, 545, 546, 547, 335, 336, 337, 338, 339, 340, 341, 342, 343, 344, 345, 346, 347, 348, 349, 350, ): list([ 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, ]), tuple( 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 547, 548, 549, 550, 551, 552, 553, 554, 555, 556, 557, 558, 559, 560, 561, 562, 168, 350, 351, 352, 353, 354, 355, 356, 357, 358, 359, 360, 361, 362, 363, 364, 365, ): list([ 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, ]), tuple( 183, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 562, 563, 564, 565, 566, 567, 568, 569, 570, 571, 572, 573, 574, 575, 576, 577, 365, 366, 367, 368, 369, 370, 371, 372, 373, 374, 375, 376, 377, 378, 379, 380, ): list([ 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 258, 259, 260, 261, 262, ]), tuple( 256, 257, 258, 61, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 440, 441, 442, 443, 444, 445, 446, 447, 448, 449, 450, 451, 452, 453, 454, 455, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, ): list([ 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, ]), tuple( 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 455, 456, 457, 458, 459, 460, 461, 462, 463, 464, 465, 466, 467, 468, 469, 470, 76, ): list([ 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, ]), tuple( 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 289, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 470, 471, 472, 473, 474, 475, 476, 477, 478, 479, 480, 481, 482, 483, 484, 485, 486, ): list([ 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, ]), tuple( 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299, 300, 301, 302, 303, 304, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 486, 487, 488, 489, 490, 491, 492, 493, 494, 495, 496, 497, 498, 499, 500, 501, ): list([ 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, ]), tuple( 384, 385, 386, 387, 388, 389, 390, 391, 392, 393, 394, 395, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 577, 578, 579, 580, 581, 582, 583, 584, 585, 586, 587, 588, 589, 590, 591, 592, 593, 380, 381, 382, 383, ): list([ 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, ]), tuple( 512, 513, 514, 515, 516, 505, 304, 305, 306, 307, 308, 309, 310, 311, 312, 313, 314, 315, 316, 317, 318, 319, 506, 507, 503, 122, 504, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 501, 502, 508, 509, 510, 511, ): list([ 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, ]), }) # --- # name: test_snapshot_cohorts[ERA5MonthHour] dict({ tuple( 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 365, 366, 367, 368, 369, 370, 371, 372, 373, 374, 375, 376, 377, 378, 379, 380, ): list([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, ]), tuple( 128, 129, 130, 131, 132, 133, 134, 135, 136, 304, 305, 306, 307, 308, 309, 310, 311, 312, 313, 314, 315, 316, 317, 318, 319, 487, 488, 489, 490, 491, 492, 493, 494, 495, 496, 497, 498, 499, 500, 501, 122, 123, 124, 125, 126, 127, ): list([ 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, ]), tuple( 256, 257, 258, 425, 426, 427, 428, 429, 430, 431, 432, 433, 434, 435, 436, 437, 438, 439, 440, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, ): list([ 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, ]), tuple( 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 441, 442, 443, 444, 445, 446, 447, 448, 449, 450, 451, 452, 453, 454, 455, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, ): list([ 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, ]), tuple( 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 456, 457, 458, 459, 460, 461, 462, 463, 464, 465, 466, 467, 468, 469, 470, 471, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, ): list([ 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, ]), tuple( 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299, 300, 301, 302, 303, 304, 471, 472, 473, 474, 475, 476, 477, 478, 479, 480, 481, 482, 483, 484, 485, 486, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, ): list([ 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, ]), tuple( 384, 385, 386, 387, 388, 389, 390, 391, 392, 393, 394, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 381, 382, 383, ): list([ 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, ]), tuple( 395, 396, 397, 398, 399, 400, 401, 402, 403, 404, 405, 406, 407, 408, 409, 410, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, ): list([ 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, ]), tuple( 410, 411, 412, 413, 414, 415, 416, 417, 418, 419, 420, 421, 422, 423, 424, 425, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, ): list([ 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, ]), tuple( 512, 513, 514, 515, 516, 517, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 319, 320, 321, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 502, 503, 504, 505, 506, 507, 508, 509, 510, 511, ): list([ 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, ]), tuple( 517, 518, 519, 520, 521, 522, 523, 524, 525, 526, 527, 528, 529, 530, 531, 532, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 335, 336, 337, 338, 339, 340, 341, 342, 343, 344, 345, 346, 347, 348, 349, ): list([ 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 258, 259, 260, 261, 262, ]), tuple( 532, 533, 534, 535, 536, 537, 538, 539, 540, 541, 542, 543, 544, 545, 546, 547, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 350, 351, 352, 353, 354, 355, 356, 357, 358, 359, 360, 361, 362, 363, 364, 365, ): list([ 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, ]), }) # --- # name: test_snapshot_cohorts[NWMMidwest] dict({ tuple( 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 26, 27, 39, 52, 65, ): list([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 25, 26, 27, 29, ]), tuple( 2, 3, 5, 6, 10, 12, 14, 15, 52, 26, 27, ): list([ 24, 28, ]), tuple( 2, 4, 6, 14, 15, 26, 27, ): list([ 30, ]), tuple( 3, 4, 5, 7, 39, 40, 8, 9, 14, 15, 16, 26, 27, 28, ): list([ 32, 33, 35, 36, 37, 38, 40, 46, 51, ]), tuple( 3, 4, 6, 15, 16, 17, 27, ): list([ 34, 41, ]), tuple( 3, 6, 7, 14, 15, 26, 27, ): list([ 31, ]), tuple( 4, 5, 6, 8, 40, 9, 15, 17, 18, 27, 28, ): list([ 49, 53, 56, 57, ]), tuple( 4, 10, 11, 15, 17, 52, 53, ): list([ 45, ]), tuple( 5, 6, 7, 10, 11, 12, 15, 16, 52, 53, 27, 28, ): list([ 43, 47, 50, ]), tuple( 6, 7, 40, 9, 41, 8, 16, 17, 19, 20, 21, 27, 28, ): list([ 44, 54, 55, 58, 60, 63, 66, 69, 72, 80, ]), tuple( 6, 10, 11, 19, 53, 28, ): list([ 62, ]), tuple( 6, 39, 40, 9, 15, 16, 27, ): list([ 39, ]), tuple( 7, 8, 40, 11, 15, 16, 17, 52, 53, ): list([ 48, ]), tuple( 7, 40, 9, 10, 8, 41, 11, 16, 17, 18, 53, 28, 29, ): list([ 59, 61, 64, 65, 67, ]), tuple( 8, 41, 10, 11, 17, 21, 53, 54, ): list([ 75, 79, ]), tuple( 9, 10, 41, 22, 23, 54, ): list([ 87, ]), tuple( 9, 41, 17, 18, 22, 30, ): list([ 84, ]), tuple( 10, 11, 54, 23, 24, ): list([ 93, ]), tuple( 10, 12, 18, 19, 20, 53, 54, 28, 29, ): list([ 76, 85, ]), tuple( 11, 19, 54, 24, 29, 30, ): list([ 99, ]), tuple( 16, 17, 18, 29, 30, 31, ): list([ 78, 92, ]), tuple( 24, 54, 31, ): list([ 111, ]), tuple( 25, 67, 54, ): list([ 120, ]), tuple( 32, 33, 34, 35, 44, 45, 55, 24, 25, 56, ): list([ 143, 145, 149, 154, 156, 160, 163, 164, 166, 169, 170, 175, 183, 190, ]), tuple( 32, 33, 34, 36, 37, 45, 46, 47, 57, ): list([ 165, 172, 173, 178, 179, 180, 185, ]), tuple( 32, 33, 34, 44, 23, 55, 24, 56, ): list([ 121, 140, 144, 151, 153, 155, ]), tuple( 32, 33, 41, 42, 23, 55, 54, 22, 24, 25, 31, ): list([ 101, 113, 116, 124, 125, 129, 130, 131, ]), tuple( 32, 33, 42, 19, 20, 22, 30, 31, ): list([ 110, 122, 159, ]), tuple( 32, 33, 45, ): list([ 150, 158, ]), tuple( 32, 33, 55, 24, 31, ): list([ 137, ]), tuple( 32, 41, 42, 43, 19, 20, 21, 22, 23, 54, 24, 29, 30, 31, ): list([ 77, 90, 94, 95, 98, 100, 104, 107, 108, 109, 112, 115, 117, ]), tuple( 32, 42, 43, 20, 21, 31, ): list([ 123, ]), tuple( 32, 43, 22, ): list([ 139, ]), tuple( 33, 34, 35, 42, 43, 44, 20, 21, 22, ): list([ 106, 119, 128, 132, 138, 141, 147, 152, ]), tuple( 33, 43, 44, 20, 30, 31, ): list([ 118, 126, 135, 142, ]), tuple( 34, 46, 47, ): list([ 187, 193, ]), tuple( 35, 36, 37, 38, 44, 45, 46, 57, 58, 59, ): list([ 168, 177, 186, 191, 195, 199, 203, 204, 213, ]), tuple( 35, 36, 43, 44, 22, 23, 56, 57, ): list([ 146, 157, 162, ]), tuple( 35, 36, 46, 48, 49, 59, ): list([ 200, 205, 212, 219, ]), tuple( 35, 37, 44, 24, 57, 56, ): list([ 171, 176, 192, 198, ]), tuple( 35, 38, 45, 56, 25, 57, ): list([ 181, ]), tuple( 36, 37, 23, 24, 57, 56, ): list([ 167, 182, 189, ]), tuple( 36, 37, 38, 46, 47, 48, 57, 58, ): list([ 188, 201, 208, 211, 217, 223, ]), tuple( 36, 37, 47, 58, 59, ): list([ 197, 206, 210, 216, 221, ]), tuple( 36, 37, 49, 50, 59, ): list([ 224, 230, 234, ]), tuple( 37, 49, 58, 59, 60, ): list([ 225, ]), tuple( 38, 71, 72, 58, 59, ): list([ 218, 226, 231, 237, ]), tuple( 38, 72, 73, 51, 59, 60, ): list([ 245, 252, 260, 267, ]), tuple( 39, 40, 9, 10, 52, 53, ): list([ 42, ]), tuple( 40, 9, 10, 41, 53, 54, ): list([ 74, ]), tuple( 40, 9, 41, 12, 18, 19, 53, 22, 28, 29, ): list([ 68, 82, ]), tuple( 40, 9, 41, 16, 17, 18, 29, ): list([ 71, ]), tuple( 41, 10, 18, 19, 21, 54, 23, 29, 30, ): list([ 73, 86, 89, 91, ]), tuple( 41, 11, 18, 20, 53, 54, 21, 24, 29, 30, ): list([ 81, 83, 96, ]), tuple( 41, 18, 22, 30, 31, ): list([ 97, 103, ]), tuple( 42, 43, 20, 21, 54, 24, ): list([ 114, ]), tuple( 42, 43, 23, 22, 55, 24, 56, ): list([ 127, 136, ]), tuple( 43, 23, 21, 55, 24, ): list([ 133, ]), tuple( 46, 47, 48, 49, 50, 59, 60, ): list([ 194, 202, 209, 214, 215, 220, 222, 227, 228, 229, 232, 233, 235, 236, 241, 243, ]), tuple( 48, 49, 50, 61, 62, 63, ): list([ 253, 254, 255, 258, 259, 261, 263, 264, 265, 266, 269, 270, 271, 274, 275, 276, 279, 280, 283, 284, 287, 290, ]), tuple( 48, 49, 61, ): list([ 242, 249, ]), tuple( 49, 50, 60, ): list([ 239, 246, 247, 250, 251, 257, ]), tuple( 49, 51, 59, 60, 61, ): list([ 248, 256, 262, ]), tuple( 50, 59, 60, ): list([ 238, 244, ]), tuple( 59, 51, 38, ): list([ 240, ]), tuple( 64, 74, 75, 51, 61, 62, ): list([ 268, 272, 273, 277, 278, 281, ]), tuple( 64, 75, 76, 62, 63, ): list([ 282, 285, 286, 288, 289, 291, 292, 293, 294, ]), tuple( 64, 77, ): list([ 295, 296, ]), tuple( 65, 66, 12, 52, 53, ): list([ 52, ]), tuple( 66, 67, 12, 53, 54, ): list([ 70, 88, ]), tuple( 67, 42, 12, 19, 21, 54, 25, 30, ): list([ 102, 105, ]), tuple( 68, 69, 55, 56, 25, ): list([ 134, 148, 161, ]), tuple( 69, 38, 70, 56, 25, 57, ): list([ 174, 184, 196, 207, ]), }) # --- # name: test_snapshot_cohorts[OISST] dict({ tuple( 0, 255, 1168, 401, 146, 657, 913, 1424, 1022, 767, 1278, 1314, 547, 36, 292, 803, 1059, 1315, 693, 182, 438, 949, 1205, 694, 839, 72, 328, 584, 1095, 1351, 73, 840, 1241, 474, 219, 730, 986, 1387, 620, 109, 365, 876, 1132, 621, 1388, 766, 511, ): list([ 242, 243, 244, ]), tuple( 0, 256, 255, 1424, 401, 146, 657, 767, 913, 1169, 402, 1023, 1278, 803, 36, 292, 548, 1059, 1315, 949, 182, 438, 694, 1205, 183, 950, 1095, 328, 73, 584, 840, 1351, 329, 1096, 730, 475, 219, 986, 1242, 876, 109, 365, 621, 1132, 1388, 877, 1022, 511, ): list([ 245, 246, 247, ]), tuple( 0, 256, 512, 1, 768, 913, 146, 402, 658, 1169, 1425, 147, 914, 1023, 1279, 548, 37, 293, 804, 1060, 1315, 694, 439, 183, 950, 1206, 840, 73, 329, 585, 1096, 1352, 74, 841, 986, 475, 219, 731, 1242, 220, 987, 1388, 621, 110, 366, 877, 1133, 767, ): list([ 251, 252, 253, ]), tuple( 0, 256, 512, 1424, 657, 146, 402, 767, 913, 1169, 658, 1425, 1023, 1059, 292, 37, 548, 804, 1315, 1279, 1205, 438, 183, 694, 950, 439, 1206, 1351, 584, 73, 329, 840, 1096, 585, 1352, 986, 475, 219, 731, 1242, 1132, 365, 110, 621, 877, 1388, 1278, 511, ): list([ 248, 249, 250, ]), tuple( 128, 383, 1296, 529, 18, 274, 785, 1041, 1297, 895, 1406, 675, 164, 420, 931, 1187, 1442, 1443, 676, 821, 54, 310, 566, 1077, 1333, 55, 822, 1223, 456, 201, 712, 968, 1369, 602, 347, 91, 858, 1114, 603, 1370, 748, 237, 493, 1004, 1260, 749, 1150, 639, ): list([ 62, 63, 64, ]), tuple( 128, 384, 383, 785, 18, 274, 530, 1041, 1297, 895, 1151, 931, 164, 420, 676, 1187, 1443, 165, 932, 1077, 310, 55, 566, 822, 1333, 311, 1078, 712, 201, 457, 968, 1224, 858, 347, 603, 91, 1114, 1370, 859, 1004, 237, 493, 749, 1260, 1005, 238, 1406, 639, ): list([ 65, 66, 67, ]), tuple( 128, 384, 640, 129, 896, 1297, 530, 19, 275, 786, 1042, 1151, 1443, 676, 165, 421, 932, 1188, 1407, 1444, 822, 567, 55, 311, 1078, 1334, 56, 823, 968, 201, 457, 713, 1224, 202, 969, 1370, 603, 348, 92, 859, 1115, 749, 238, 494, 1005, 1261, 750, 895, ): list([ 71, 72, 73, ]), tuple( 128, 384, 640, 1041, 274, 19, 530, 786, 1297, 895, 1151, 1187, 420, 165, 676, 932, 1443, 1407, 421, 1188, 1333, 566, 55, 311, 822, 1078, 567, 1334, 968, 201, 457, 713, 1224, 1114, 603, 347, 92, 859, 1370, 1260, 493, 238, 749, 1005, 494, 1261, 1406, 639, ): list([ 68, 69, 70, ]), tuple( 256, 1, 512, 768, 1024, 1169, 402, 147, 658, 914, 1425, 1170, 403, 1279, 548, 37, 293, 804, 1060, 1316, 549, 950, 439, 183, 695, 1206, 1096, 329, 74, 585, 841, 1352, 330, 1097, 1242, 475, 220, 731, 987, 476, 1243, 877, 110, 366, 622, 1133, 1389, 1023, ): list([ 254, 255, 256, ]), tuple( 382, 893, 527, 16, 272, 783, 1039, 1295, 528, 1149, 1404, 929, 162, 418, 674, 1185, 1441, 1075, 308, 53, 564, 820, 1331, 309, 1076, 1221, 454, 199, 710, 966, 455, 1222, 856, 345, 601, 89, 1112, 1368, 1002, 235, 491, 747, 1258, 1003, 381, 1148, 637, 126, ): list([ 44, 45, 46, ]), tuple( 382, 1295, 528, 17, 273, 784, 894, 1040, 1296, 529, 1150, 1405, 674, 163, 419, 930, 1186, 1442, 675, 1076, 309, 54, 565, 821, 1332, 1222, 455, 200, 711, 967, 456, 1223, 1368, 601, 346, 90, 857, 1113, 602, 1369, 1003, 236, 492, 748, 1259, 1149, 638, 127, ): list([ 53, 54, 55, ]), tuple( 384, 129, 640, 896, 1152, 385, 530, 19, 275, 786, 1042, 1298, 531, 932, 165, 421, 677, 1188, 1407, 1444, 1078, 567, 311, 56, 823, 1334, 312, 1079, 1224, 457, 202, 713, 969, 458, 1225, 859, 348, 92, 604, 1115, 1371, 1005, 238, 494, 750, 1261, 1006, 1151, ): list([ 74, 75, 76, ]), tuple( 509, 764, 1020, 1421, 654, 143, 399, 910, 1166, 655, 1422, 1276, 1056, 289, 34, 545, 801, 1312, 1202, 435, 180, 691, 947, 436, 1203, 1348, 581, 70, 326, 837, 1093, 582, 1349, 983, 472, 216, 728, 1239, 1129, 362, 107, 618, 874, 1385, 1275, 508, 253, ): list([ 218, 219, 220, ]), tuple( 511, 766, 912, 145, 401, 657, 1168, 1424, 1022, 1278, 1058, 291, 36, 547, 803, 1314, 292, 1059, 1204, 437, 182, 693, 949, 438, 1205, 839, 72, 328, 584, 1095, 1351, 985, 474, 218, 730, 1241, 1131, 364, 109, 620, 876, 1387, 365, 1132, 1277, 510, 255, ): list([ 239, 240, 241, ]), tuple( 512, 1, 257, 768, 1024, 1425, 658, 147, 403, 914, 1170, 659, 1426, 804, 37, 293, 549, 1060, 1316, 38, 805, 1206, 439, 184, 695, 951, 1352, 585, 74, 330, 841, 1097, 586, 1353, 731, 476, 220, 987, 1243, 732, 1133, 110, 366, 622, 878, 1389, 111, 1279, ): list([ 257, 258, 259, ]), tuple( 637, 892, 1038, 271, 16, 527, 783, 1148, 1294, 1404, 1184, 417, 162, 673, 929, 1440, 418, 1185, 1330, 563, 52, 308, 819, 1075, 564, 1331, 965, 198, 454, 710, 1221, 1111, 600, 344, 89, 856, 1367, 1257, 490, 235, 746, 1002, 491, 1258, 125, 1403, 636, 381, ): list([ 38, 39, 40, ]), tuple( 638, 893, 783, 16, 272, 528, 1039, 1295, 1149, 17, 784, 1405, 1185, 418, 163, 674, 930, 1441, 1331, 564, 53, 309, 820, 1076, 565, 1332, 710, 199, 455, 966, 1222, 711, 1112, 345, 601, 89, 857, 1368, 90, 1258, 491, 236, 747, 1003, 126, 1404, 637, 382, ): list([ 47, 48, 49, ]), tuple( 638, 1039, 272, 17, 528, 784, 1295, 1149, 273, 1040, 894, 1405, 1441, 674, 163, 419, 930, 1186, 820, 53, 309, 565, 1076, 1332, 966, 199, 455, 711, 1222, 200, 967, 1112, 601, 345, 90, 857, 1368, 346, 1113, 747, 236, 492, 1003, 1259, 126, 893, 382, 127, ): list([ 50, 51, 52, ]), tuple( 639, 1040, 273, 18, 529, 785, 1296, 274, 1041, 1150, 1186, 419, 164, 675, 931, 1406, 1442, 420, 1187, 821, 54, 310, 566, 1077, 1333, 967, 200, 456, 712, 1223, 1113, 602, 346, 91, 858, 1369, 347, 1114, 1259, 492, 237, 748, 1004, 493, 1260, 127, 894, 383, ): list([ 59, 60, 61, ]), tuple( 640, 129, 385, 896, 1152, 641, 1408, 786, 19, 275, 531, 1042, 1298, 20, 787, 1188, 421, 166, 677, 933, 1444, 1334, 567, 312, 56, 823, 1079, 568, 1335, 713, 202, 458, 969, 1225, 714, 1115, 348, 92, 604, 860, 1371, 93, 1261, 494, 239, 750, 1006, 1407, ): list([ 77, 78, 79, ]), tuple( 764, 1019, 1165, 398, 143, 654, 910, 1275, 1421, 1311, 544, 33, 289, 800, 1056, 1312, 690, 179, 435, 946, 1202, 691, 836, 69, 325, 581, 1092, 1348, 70, 837, 1238, 471, 216, 727, 983, 1384, 617, 106, 362, 873, 1129, 618, 1385, 252, 763, 508, ): list([ 212, 213, 214, ]), tuple( 764, 1020, 1421, 398, 143, 654, 910, 1166, 1275, 399, 800, 33, 289, 545, 1056, 1312, 946, 179, 435, 691, 1202, 180, 947, 1092, 325, 70, 581, 837, 1348, 326, 1093, 727, 472, 216, 983, 1239, 873, 106, 362, 618, 1129, 1385, 874, 252, 1019, 508, 253, ): list([ 215, 216, 217, ]), tuple( 765, 1020, 910, 143, 399, 655, 1166, 1422, 1276, 144, 911, 1312, 545, 34, 290, 801, 1057, 691, 180, 436, 947, 1203, 837, 70, 326, 582, 1093, 1349, 71, 838, 983, 472, 216, 728, 1239, 217, 984, 1385, 618, 107, 363, 874, 1130, 253, 764, 509, ): list([ 221, 222, 223, ]), tuple( 765, 1166, 399, 144, 655, 911, 1422, 1276, 1167, 400, 1021, 545, 34, 290, 801, 1057, 1313, 546, 947, 180, 436, 692, 1203, 1093, 326, 71, 582, 838, 1349, 327, 1094, 1239, 472, 217, 728, 984, 473, 1240, 874, 107, 363, 619, 1130, 1386, 253, 1020, 509, ): list([ 224, 225, 226, ]), tuple( 765, 1422, 655, 144, 400, 911, 1021, 1167, 656, 1423, 801, 34, 290, 546, 1057, 1313, 35, 802, 1203, 436, 181, 692, 948, 1349, 582, 71, 327, 838, 1094, 583, 1350, 728, 473, 217, 984, 1240, 729, 1130, 107, 363, 619, 875, 1386, 108, 1276, 509, 254, ): list([ 227, 228, 229, ]), tuple( 766, 1167, 400, 145, 656, 912, 1423, 1168, 1277, 1313, 546, 35, 291, 802, 1058, 1314, 547, 692, 181, 437, 948, 1204, 693, 1094, 327, 72, 583, 839, 1350, 1240, 473, 218, 729, 985, 474, 1241, 1386, 619, 108, 364, 875, 1131, 620, 1387, 254, 1021, 510, ): list([ 233, 234, 235, ]), tuple( 766, 1423, 656, 145, 401, 912, 1168, 1022, 802, 35, 291, 547, 1058, 1314, 36, 803, 948, 181, 437, 693, 1204, 182, 949, 1350, 583, 72, 328, 839, 1095, 729, 474, 218, 985, 1241, 730, 875, 108, 364, 620, 1131, 1387, 876, 109, 254, 1277, 510, 255, ): list([ 236, 237, 238, ]), tuple( 768, 1, 257, 513, 1024, 1280, 914, 147, 403, 659, 1170, 1426, 148, 915, 1060, 293, 38, 549, 805, 1316, 294, 1061, 695, 440, 184, 951, 1207, 841, 74, 330, 586, 1097, 1353, 987, 476, 220, 732, 1243, 221, 988, 1133, 366, 111, 622, 878, 1389, 367, 1134, ): list([ 260, 261, 262, ]), tuple( 769, 2, 258, 514, 1025, 1281, 3, 770, 1171, 404, 149, 660, 916, 1427, 1317, 550, 39, 295, 806, 1062, 1318, 696, 441, 185, 952, 1208, 697, 842, 75, 331, 587, 1098, 1354, 76, 843, 1244, 477, 222, 733, 989, 1390, 623, 112, 368, 879, 1135, 624, 1391, ): list([ 272, 273, 274, ]), tuple( 770, 3, 259, 515, 1026, 1282, 4, 771, 916, 149, 405, 661, 1172, 1428, 150, 917, 1318, 551, 40, 296, 807, 1063, 697, 442, 186, 953, 1209, 843, 76, 332, 588, 1099, 1355, 77, 844, 989, 478, 222, 734, 1245, 223, 990, 1391, 624, 113, 369, 880, 1136, ): list([ 281, 282, 283, ]), tuple( 771, 4, 260, 516, 1027, 1283, 917, 150, 406, 662, 1173, 1429, 151, 918, 1063, 296, 41, 552, 808, 1319, 297, 1064, 698, 443, 187, 954, 1210, 844, 77, 333, 589, 1100, 1356, 990, 479, 223, 735, 1246, 224, 991, 1136, 369, 114, 625, 881, 1392, 370, 1137, ): list([ 290, 291, 292, ]), tuple( 772, 5, 261, 517, 1028, 1284, 6, 773, 1174, 663, 407, 152, 919, 1430, 1320, 553, 42, 298, 809, 1065, 1321, 699, 444, 188, 955, 1211, 700, 845, 78, 334, 590, 1101, 1357, 79, 846, 1247, 480, 225, 736, 992, 1393, 626, 115, 371, 882, 1138, 627, 1394, ): list([ 302, 303, 304, ]), tuple( 773, 6, 262, 518, 1029, 1285, 7, 774, 919, 408, 664, 152, 1175, 1431, 153, 920, 1321, 554, 43, 299, 810, 1066, 700, 189, 445, 956, 1212, 846, 79, 335, 591, 1102, 1358, 80, 847, 992, 225, 481, 737, 1248, 226, 993, 1394, 627, 116, 372, 883, 1139, ): list([ 311, 312, 313, ]), tuple( 774, 7, 263, 519, 1030, 1286, 920, 409, 665, 153, 1176, 1432, 154, 921, 1066, 299, 44, 555, 811, 1322, 300, 1067, 701, 446, 190, 957, 1213, 847, 80, 336, 592, 1103, 1359, 993, 226, 482, 738, 1249, 227, 994, 1139, 372, 117, 628, 884, 1395, 373, 1140, ): list([ 320, 321, 322, ]), tuple( 775, 8, 264, 520, 1031, 1287, 9, 776, 1177, 666, 410, 155, 922, 1433, 1323, 556, 45, 301, 812, 1068, 1324, 702, 447, 191, 958, 1214, 703, 848, 81, 337, 593, 1104, 1360, 82, 849, 1250, 483, 228, 739, 995, 1396, 629, 118, 374, 885, 1141, 630, 1397, ): list([ 332, 333, 334, ]), tuple( 776, 9, 265, 521, 1032, 1288, 10, 777, 922, 411, 155, 667, 1178, 1434, 156, 923, 1324, 557, 46, 302, 813, 1069, 703, 192, 448, 959, 1215, 849, 82, 338, 594, 1105, 1361, 83, 850, 375, 995, 228, 484, 740, 1251, 229, 996, 1397, 630, 119, 886, 1142, ): list([ 341, 342, 343, ]), tuple( 777, 10, 266, 522, 1033, 1289, 923, 668, 412, 156, 1179, 1435, 157, 924, 1069, 302, 47, 558, 814, 1325, 303, 1070, 704, 193, 449, 960, 1216, 850, 83, 339, 595, 1106, 1362, 1143, 375, 996, 229, 485, 741, 1252, 376, 230, 997, 887, 1142, 631, 120, 1398, ): list([ 350, 351, 352, ]), tuple( 784, 17, 273, 529, 894, 1040, 1296, 18, 785, 1150, 930, 163, 419, 675, 1186, 1442, 164, 931, 1332, 565, 54, 310, 821, 1077, 711, 200, 456, 967, 1223, 712, 857, 346, 602, 90, 1113, 1369, 858, 91, 1259, 236, 492, 748, 1004, 237, 127, 1405, 638, 383, ): list([ 56, 57, 58, ]), tuple( 891, 1146, 1036, 269, 14, 525, 781, 1292, 1402, 270, 1037, 1438, 671, 160, 416, 927, 1183, 817, 50, 306, 562, 1073, 1329, 963, 196, 452, 708, 1219, 197, 964, 1109, 342, 87, 598, 854, 1365, 343, 1110, 744, 233, 489, 123, 1000, 1256, 379, 890, 635, 124, ): list([ 20, 21, 22, ]), tuple( 891, 1147, 781, 14, 270, 526, 1037, 1293, 15, 782, 927, 160, 416, 672, 1183, 1439, 161, 928, 1329, 562, 51, 307, 818, 1074, 708, 197, 453, 964, 1220, 709, 854, 599, 87, 343, 1110, 1366, 855, 88, 1256, 233, 489, 745, 1001, 234, 124, 1402, 635, 380, ): list([ 26, 27, 28, ]), tuple( 891, 1147, 1292, 525, 14, 270, 781, 1037, 1402, 1293, 526, 671, 160, 416, 927, 1183, 1439, 672, 1073, 306, 51, 562, 818, 1329, 1219, 452, 197, 708, 964, 453, 1220, 1365, 598, 87, 343, 854, 1110, 599, 1366, 1000, 233, 489, 745, 1256, 379, 1146, 635, 124, ): list([ 23, 24, 25, ]), tuple( 892, 782, 15, 271, 527, 1038, 1148, 1294, 928, 161, 417, 673, 1184, 1440, 162, 929, 1074, 307, 52, 563, 819, 1330, 308, 1075, 709, 198, 454, 965, 1221, 855, 344, 600, 88, 1111, 1367, 856, 1001, 234, 490, 746, 1257, 1002, 235, 381, 380, 1403, 636, 125, ): list([ 35, 36, 37, ]), tuple( 892, 1293, 526, 15, 271, 782, 1038, 1403, 1294, 1439, 672, 161, 417, 928, 1184, 1440, 673, 818, 51, 307, 563, 1074, 1330, 52, 819, 1220, 453, 198, 709, 965, 1366, 599, 344, 88, 855, 1111, 600, 1367, 745, 234, 490, 1001, 1257, 746, 380, 1147, 636, 125, ): list([ 32, 33, 34, ]), tuple( 893, 1294, 527, 16, 272, 783, 1039, 1148, 1404, 1440, 673, 162, 418, 929, 1185, 1441, 819, 52, 308, 564, 1075, 1331, 53, 820, 965, 198, 454, 710, 1221, 199, 966, 1367, 600, 345, 89, 856, 1112, 746, 235, 491, 1002, 1258, 747, 381, 125, 892, 637, 126, ): list([ 41, 42, 43, ]), tuple( 896, 129, 385, 641, 1152, 1408, 130, 897, 1042, 275, 20, 531, 787, 1298, 276, 1043, 1444, 677, 166, 422, 933, 1189, 823, 312, 568, 56, 1079, 1335, 969, 202, 458, 714, 1225, 203, 970, 1115, 348, 93, 604, 860, 1371, 349, 1116, 750, 239, 495, 1006, 1262, ): list([ 80, 81, 82, ]), tuple( 897, 130, 386, 642, 1153, 1409, 1043, 276, 21, 532, 788, 1299, 277, 1044, 1189, 422, 167, 678, 934, 1445, 423, 1190, 824, 313, 569, 57, 1080, 1336, 970, 203, 459, 715, 1226, 1116, 605, 349, 94, 861, 1372, 350, 1117, 1262, 495, 240, 751, 1007, 496, 1263, ): list([ 89, 90, 91, ]), tuple( 898, 131, 387, 643, 1154, 1410, 132, 899, 1300, 533, 22, 278, 789, 1045, 1446, 679, 168, 424, 935, 1191, 1447, 825, 314, 570, 58, 1081, 1337, 59, 826, 971, 204, 460, 716, 1227, 205, 972, 1373, 606, 351, 95, 862, 1118, 752, 241, 497, 1008, 1264, 753, ): list([ 101, 102, 103, ]), tuple( 899, 132, 388, 644, 1155, 1411, 133, 900, 1045, 278, 23, 534, 790, 1301, 279, 1046, 1447, 680, 169, 425, 936, 1192, 826, 315, 571, 59, 1082, 1338, 972, 205, 461, 717, 1228, 206, 973, 1118, 607, 96, 351, 863, 1374, 352, 1119, 753, 242, 498, 1009, 1265, ): list([ 110, 111, 112, ]), tuple( 900, 133, 389, 645, 1156, 1412, 1046, 535, 279, 24, 791, 1302, 280, 1047, 1192, 425, 170, 681, 937, 1448, 426, 1193, 827, 316, 60, 572, 1083, 1339, 973, 206, 462, 718, 1229, 1119, 352, 97, 608, 864, 1375, 353, 1120, 1265, 498, 243, 754, 1010, 499, 1266, ): list([ 119, 120, 121, ]), tuple( 901, 134, 390, 646, 1157, 1413, 135, 902, 1303, 536, 281, 25, 792, 1048, 1449, 682, 171, 427, 938, 1194, 1450, 828, 573, 317, 61, 1084, 1340, 62, 829, 974, 207, 463, 719, 1230, 208, 975, 1376, 609, 98, 354, 865, 1121, 755, 244, 500, 1011, 1267, 756, ): list([ 131, 132, 133, ]), tuple( 902, 135, 391, 647, 1158, 1414, 136, 903, 1048, 537, 281, 26, 793, 1304, 282, 1049, 1450, 683, 172, 428, 939, 1195, 829, 318, 574, 62, 1085, 1341, 975, 208, 464, 720, 1231, 209, 976, 1121, 354, 99, 610, 866, 1377, 355, 1122, 756, 245, 501, 1012, 1268, ): list([ 140, 141, 142, ]), tuple( 903, 136, 392, 648, 1159, 1415, 1049, 538, 282, 27, 794, 1305, 283, 1050, 1195, 428, 173, 684, 940, 1451, 429, 1196, 830, 319, 575, 63, 1086, 1342, 976, 209, 465, 721, 1232, 1122, 355, 100, 611, 867, 1378, 356, 1123, 1268, 501, 246, 757, 1013, 502, 1269, ): list([ 149, 150, 151, ]), tuple( 904, 137, 393, 649, 1160, 1416, 138, 905, 1306, 539, 284, 28, 795, 1051, 1452, 685, 174, 430, 941, 1197, 1453, 831, 64, 320, 576, 1087, 1343, 65, 832, 977, 210, 466, 722, 1233, 211, 978, 247, 1379, 612, 101, 357, 868, 1124, 759, 758, 503, 1014, 1270, ): list([ 161, 162, 163, ]), tuple( 905, 138, 394, 650, 1161, 1417, 139, 906, 1051, 540, 284, 29, 796, 1307, 285, 1052, 686, 175, 431, 942, 1198, 832, 65, 321, 577, 1088, 1344, 978, 211, 467, 723, 1234, 212, 979, 248, 1124, 357, 102, 613, 869, 1380, 358, 1125, 1015, 759, 504, 1271, ): list([ 170, 171, 172, ]), tuple( 911, 144, 400, 656, 1021, 1167, 1423, 145, 912, 1277, 1057, 290, 35, 546, 802, 1313, 291, 1058, 692, 181, 437, 948, 1204, 838, 71, 327, 583, 1094, 1350, 984, 473, 217, 729, 1240, 218, 985, 1130, 363, 108, 619, 875, 1386, 364, 1131, 254, 765, 510, ): list([ 230, 231, 232, ]), tuple( 1018, 1273, 1163, 396, 141, 652, 908, 1419, 1164, 397, 542, 287, 31, 798, 1054, 1310, 543, 944, 177, 433, 689, 1200, 1090, 323, 68, 579, 835, 1346, 324, 1091, 1236, 469, 214, 725, 981, 470, 1237, 762, 871, 104, 360, 616, 250, 1127, 1383, 1017, 506, ): list([ 194, 195, 196, ]), tuple( 1018, 1274, 908, 141, 397, 653, 1164, 1420, 142, 909, 1054, 543, 32, 287, 799, 1310, 288, 1055, 689, 178, 434, 945, 1201, 835, 68, 324, 580, 1091, 1347, 981, 214, 470, 726, 1237, 215, 982, 1127, 360, 105, 616, 872, 1383, 361, 1128, 251, 762, 507, ): list([ 200, 201, 202, ]), tuple( 1018, 1419, 652, 141, 397, 908, 1164, 653, 1420, 798, 287, 543, 31, 1054, 1310, 32, 799, 1200, 433, 178, 689, 945, 1346, 579, 68, 324, 835, 1091, 580, 1347, 725, 214, 470, 981, 1237, 726, 762, 1127, 104, 360, 616, 872, 1383, 105, 1273, 506, 251, ): list([ 197, 198, 199, ]), tuple( 1019, 909, 142, 398, 654, 1165, 1421, 1275, 1055, 288, 33, 544, 800, 1311, 289, 1056, 1201, 434, 179, 690, 946, 435, 1202, 836, 69, 325, 581, 1092, 1348, 982, 471, 215, 727, 1238, 1128, 361, 106, 617, 873, 1384, 508, 362, 1129, 1274, 507, 252, 763, ): list([ 209, 210, 211, ]), tuple( 1019, 1420, 653, 142, 398, 909, 1165, 799, 32, 288, 544, 1055, 1311, 33, 800, 945, 178, 434, 690, 1201, 179, 946, 1347, 580, 69, 325, 836, 1092, 726, 471, 215, 982, 1238, 727, 872, 105, 361, 617, 1128, 1384, 873, 251, 106, 1274, 507, 252, 763, ): list([ 206, 207, 208, ]), tuple( 1024, 257, 2, 513, 769, 1280, 1170, 403, 148, 659, 915, 1426, 1171, 1316, 549, 38, 294, 805, 1061, 1317, 550, 695, 440, 184, 951, 1207, 696, 1097, 330, 75, 586, 842, 1353, 1243, 476, 221, 732, 988, 477, 1244, 1389, 622, 111, 367, 878, 1134, 623, 1390, ): list([ 263, 264, 265, ]), tuple( 1025, 258, 3, 514, 770, 1281, 1026, 259, 1427, 404, 149, 660, 916, 1172, 405, 806, 39, 295, 551, 1062, 1318, 952, 441, 185, 697, 1208, 186, 953, 1098, 331, 76, 587, 843, 1354, 332, 1099, 733, 478, 222, 989, 1245, 879, 112, 368, 624, 1135, 1391, 880, ): list([ 275, 276, 277, ]), tuple( 1026, 259, 4, 515, 771, 1282, 1027, 1172, 405, 150, 661, 917, 1428, 1173, 406, 551, 40, 296, 807, 1063, 1319, 552, 953, 442, 186, 698, 1209, 1099, 332, 77, 588, 844, 1355, 333, 1100, 1245, 478, 223, 734, 990, 479, 1246, 880, 113, 369, 625, 1136, 1392, ): list([ 284, 285, 286, ]), tuple( 1027, 260, 5, 516, 772, 1283, 1173, 406, 151, 662, 918, 1429, 1174, 1319, 552, 41, 297, 808, 1064, 1320, 553, 698, 443, 187, 954, 1210, 699, 1100, 333, 78, 589, 845, 1356, 1246, 479, 224, 735, 991, 480, 1247, 1392, 625, 114, 370, 881, 1137, 626, 1393, ): list([ 293, 294, 295, ]), tuple( 1028, 261, 6, 517, 773, 1284, 1029, 262, 1430, 663, 407, 152, 919, 1175, 408, 809, 42, 298, 554, 1065, 1321, 955, 444, 188, 700, 1211, 189, 956, 1101, 334, 79, 590, 846, 1357, 335, 1102, 736, 225, 481, 992, 1248, 882, 115, 371, 627, 1138, 1394, 883, ): list([ 305, 306, 307, ]), tuple( 1029, 262, 7, 518, 774, 1285, 1030, 1175, 664, 408, 153, 920, 1431, 1176, 409, 554, 43, 299, 810, 1066, 1322, 555, 956, 189, 445, 701, 1212, 1102, 335, 80, 591, 847, 1358, 336, 1103, 1248, 481, 226, 737, 993, 482, 1249, 883, 116, 372, 628, 1139, 1395, ): list([ 314, 315, 316, ]), tuple( 1030, 263, 8, 519, 775, 1286, 1176, 665, 409, 154, 921, 1432, 1177, 1322, 555, 44, 300, 811, 1067, 1323, 556, 701, 446, 190, 957, 1213, 702, 1103, 336, 81, 592, 848, 1359, 1249, 482, 227, 738, 994, 483, 1250, 1395, 628, 117, 373, 884, 1140, 629, 1396, ): list([ 323, 324, 325, ]), tuple( 1031, 264, 9, 520, 776, 1287, 1032, 265, 1433, 666, 410, 155, 922, 1178, 411, 812, 45, 301, 557, 1068, 1324, 958, 447, 191, 703, 1214, 192, 959, 1104, 337, 82, 593, 849, 1360, 338, 1105, 739, 228, 484, 995, 1251, 885, 118, 374, 630, 1141, 1397, 886, ): list([ 335, 336, 337, ]), tuple( 1032, 265, 10, 521, 777, 1288, 1033, 1178, 667, 411, 156, 923, 1434, 1179, 412, 557, 46, 302, 813, 1069, 1325, 558, 959, 192, 448, 704, 1215, 1105, 338, 83, 594, 850, 1361, 119, 339, 1106, 375, 1251, 484, 229, 740, 996, 485, 1252, 886, 631, 1142, 1398, ): list([ 344, 345, 346, ]), tuple( 1033, 266, 11, 522, 778, 1289, 1179, 668, 412, 157, 924, 1435, 1180, 1325, 558, 47, 303, 814, 1070, 1326, 559, 704, 193, 449, 960, 1216, 705, 1106, 339, 84, 595, 851, 1143, 1362, 120, 1252, 485, 230, 741, 997, 486, 1253, 632, 887, 1398, 631, 376, 1399, ): list([ 353, 354, 355, ]), tuple( 1145, 779, 12, 268, 524, 1035, 1291, 925, 414, 670, 158, 1181, 1437, 159, 926, 1071, 304, 49, 560, 816, 1327, 305, 1072, 706, 195, 451, 962, 1218, 852, 85, 341, 597, 1108, 1364, 853, 889, 998, 231, 487, 377, 743, 1254, 999, 232, 378, 1400, 633, 122, ): list([ 5, 6, 7, ]), tuple( 1145, 1401, 1035, 268, 13, 524, 780, 1291, 1181, 670, 414, 159, 926, 1437, 415, 1182, 1327, 560, 49, 305, 816, 1072, 561, 1328, 962, 195, 451, 707, 1218, 1108, 341, 86, 597, 853, 1364, 889, 122, 487, 232, 743, 999, 1254, 488, 1255, 634, 1400, 633, 378, ): list([ 8, 9, 10, ]), tuple( 1145, 1401, 1291, 524, 13, 269, 780, 1036, 1437, 670, 415, 159, 926, 1182, 1438, 816, 49, 305, 561, 1072, 1328, 50, 817, 962, 195, 451, 707, 1218, 196, 963, 1364, 597, 86, 342, 853, 1109, 122, 743, 232, 488, 999, 1255, 744, 634, 889, 378, 123, 890, ): list([ 11, 12, 13, ]), tuple( 1146, 780, 13, 269, 525, 1036, 1292, 14, 781, 1402, 1182, 671, 160, 415, 927, 1438, 1328, 561, 50, 306, 817, 1073, 562, 1329, 707, 196, 452, 963, 1219, 708, 1109, 86, 342, 598, 854, 1365, 87, 1255, 488, 233, 744, 123, 1000, 635, 1401, 634, 379, 890, ): list([ 17, 18, 19, ]), tuple( 1146, 1401, 524, 13, 269, 780, 1036, 1292, 525, 926, 415, 671, 159, 1182, 1438, 1072, 305, 50, 561, 817, 1328, 306, 1073, 1218, 451, 196, 707, 963, 452, 1219, 853, 86, 342, 598, 1109, 1365, 999, 232, 488, 744, 1255, 1000, 378, 379, 1145, 634, 123, 890, ): list([ 14, 15, 16, ]), tuple( 1147, 1037, 270, 15, 526, 782, 1293, 1403, 271, 1038, 1183, 416, 161, 672, 928, 1439, 417, 1184, 818, 51, 307, 563, 1074, 1330, 964, 197, 453, 709, 1220, 1110, 599, 343, 88, 855, 1366, 344, 1111, 1256, 489, 234, 745, 1001, 490, 1257, 124, 380, 891, 636, ): list([ 29, 30, 31, ]), tuple( 1152, 385, 130, 641, 897, 1408, 1153, 1298, 531, 20, 276, 787, 1043, 1299, 532, 677, 166, 422, 933, 1189, 1445, 678, 1079, 568, 312, 57, 824, 1335, 1225, 458, 203, 714, 970, 459, 1226, 1371, 604, 349, 93, 860, 1116, 605, 1372, 1006, 239, 495, 751, 1262, ): list([ 83, 84, 85, ]), tuple( 1153, 386, 131, 642, 898, 1409, 1299, 532, 21, 277, 788, 1044, 1300, 1445, 678, 167, 423, 934, 1190, 1446, 679, 824, 313, 569, 57, 1080, 1336, 58, 825, 1226, 459, 204, 715, 971, 1372, 605, 350, 94, 861, 1117, 606, 1373, 751, 240, 496, 1007, 1263, 752, ): list([ 92, 93, 94, ]), tuple( 1154, 387, 132, 643, 899, 1410, 1155, 388, 533, 22, 278, 789, 1045, 1301, 534, 935, 168, 424, 680, 1191, 1447, 1081, 570, 314, 59, 826, 1337, 315, 1082, 1227, 460, 205, 716, 972, 461, 1228, 862, 351, 607, 95, 1118, 1374, 1008, 241, 497, 753, 1264, 1009, ): list([ 104, 105, 106, ]), tuple( 1155, 388, 133, 644, 900, 1411, 1156, 1301, 534, 279, 23, 790, 1046, 1302, 535, 680, 169, 425, 936, 1192, 1448, 681, 1082, 571, 315, 60, 827, 1338, 1228, 461, 206, 717, 973, 462, 1229, 1374, 607, 96, 352, 863, 1119, 608, 1375, 1009, 242, 498, 754, 1265, ): list([ 113, 114, 115, ]), tuple( 1156, 389, 134, 645, 901, 1412, 1302, 535, 280, 24, 791, 1047, 1303, 1448, 681, 170, 426, 937, 1193, 1449, 682, 827, 316, 60, 572, 1083, 1339, 61, 828, 1229, 462, 207, 718, 974, 1375, 608, 97, 353, 864, 1120, 609, 1376, 754, 243, 499, 1010, 1266, 755, ): list([ 122, 123, 124, ]), tuple( 1157, 390, 135, 646, 902, 1413, 1158, 391, 536, 281, 25, 792, 1048, 1304, 537, 938, 171, 427, 683, 1194, 1450, 1084, 573, 317, 62, 829, 1340, 318, 1085, 1230, 463, 208, 719, 975, 464, 1231, 865, 98, 354, 610, 1121, 1377, 1011, 244, 500, 756, 1267, 1012, ): list([ 134, 135, 136, ]), tuple( 1158, 391, 136, 647, 903, 1414, 1159, 1304, 537, 282, 26, 793, 1049, 1305, 538, 683, 172, 428, 939, 1195, 1451, 684, 1085, 574, 318, 63, 830, 1341, 1231, 464, 209, 720, 976, 465, 1232, 1377, 610, 99, 355, 866, 1122, 611, 1378, 1012, 245, 501, 757, 1268, ): list([ 143, 144, 145, ]), tuple( 1159, 392, 137, 648, 904, 1415, 1305, 538, 283, 27, 794, 1050, 1306, 1451, 684, 173, 429, 940, 1196, 1452, 685, 830, 319, 575, 63, 1086, 1342, 64, 831, 1232, 465, 210, 721, 977, 1378, 611, 100, 356, 867, 1123, 612, 1379, 757, 246, 502, 1013, 1269, 758, ): list([ 152, 153, 154, ]), tuple( 1160, 393, 138, 649, 905, 1416, 1161, 394, 539, 284, 28, 795, 1051, 1307, 540, 941, 174, 430, 686, 1197, 1453, 1087, 320, 65, 576, 832, 1343, 321, 1088, 1233, 466, 211, 722, 978, 467, 1234, 247, 868, 101, 357, 613, 1124, 1380, 759, 1015, 1014, 503, 1270, ): list([ 164, 165, 166, ]), tuple( 1161, 394, 139, 650, 906, 1417, 1162, 1307, 540, 285, 29, 796, 1052, 1308, 541, 686, 175, 431, 942, 1198, 687, 1088, 321, 66, 577, 833, 1344, 1234, 467, 212, 723, 979, 468, 1235, 248, 1380, 613, 102, 358, 869, 1125, 614, 1381, 760, 1015, 504, 1271, ): list([ 173, 174, 175, ]), tuple( 1272, 906, 139, 395, 651, 1162, 1418, 1052, 541, 285, 30, 797, 1308, 286, 1053, 1198, 431, 176, 687, 943, 432, 1199, 833, 66, 322, 578, 1089, 1345, 979, 212, 468, 724, 1235, 1016, 1125, 358, 103, 614, 870, 1381, 359, 1126, 505, 760, 1271, 504, 249, ): list([ 179, 180, 181, ]), tuple( 1272, 1162, 395, 140, 651, 907, 1418, 1308, 541, 30, 286, 797, 1053, 1309, 687, 176, 432, 943, 1199, 688, 833, 66, 322, 578, 1089, 1345, 67, 834, 1235, 468, 213, 724, 980, 1016, 249, 614, 103, 359, 870, 1126, 1381, 615, 1382, 761, 760, 505, ): list([ 182, 183, 184, ]), tuple( 1272, 1418, 395, 140, 651, 907, 1163, 396, 797, 542, 30, 286, 1053, 1309, 943, 176, 432, 688, 1199, 177, 944, 1089, 322, 67, 578, 834, 1345, 323, 1090, 724, 213, 469, 980, 1236, 249, 870, 103, 359, 615, 1126, 1382, 871, 761, 1016, 505, 250, 1017, ): list([ 185, 186, 187, ]), tuple( 1273, 907, 140, 396, 652, 1163, 1419, 141, 908, 1309, 542, 287, 31, 798, 1054, 688, 177, 433, 944, 1200, 834, 67, 323, 579, 1090, 1346, 68, 835, 980, 213, 469, 725, 1236, 214, 981, 762, 1382, 615, 104, 360, 250, 871, 1127, 761, 506, 1017, ): list([ 191, 192, 193, ]), tuple( 1273, 1418, 651, 140, 396, 907, 1163, 652, 1419, 1053, 542, 286, 31, 798, 1309, 1199, 432, 177, 688, 944, 433, 1200, 1345, 578, 67, 323, 834, 1090, 579, 1346, 980, 213, 469, 725, 1236, 1126, 359, 104, 615, 871, 1382, 506, 761, 1272, 505, 250, 1017, ): list([ 188, 189, 190, ]), tuple( 1274, 1164, 397, 142, 653, 909, 1420, 1165, 1310, 543, 32, 288, 799, 1055, 1311, 544, 689, 178, 434, 945, 1201, 690, 1091, 324, 69, 580, 836, 1347, 1237, 470, 215, 726, 982, 471, 1238, 1383, 616, 105, 361, 872, 1128, 617, 1384, 251, 1018, 507, 763, ): list([ 203, 204, 205, ]), tuple( 1280, 257, 2, 513, 769, 1025, 258, 1426, 659, 148, 404, 915, 1171, 805, 38, 294, 550, 1061, 1317, 39, 806, 951, 440, 184, 696, 1207, 185, 952, 1353, 586, 75, 331, 842, 1098, 732, 477, 221, 988, 1244, 733, 878, 111, 367, 623, 1134, 1390, 879, 112, ): list([ 266, 267, 268, ]), tuple( 1280, 513, 2, 258, 769, 1025, 514, 1281, 915, 148, 404, 660, 1171, 1427, 1061, 294, 39, 550, 806, 1317, 295, 1062, 1207, 440, 185, 696, 952, 441, 1208, 842, 75, 331, 587, 1098, 1354, 988, 477, 221, 733, 1244, 1134, 367, 112, 623, 879, 1390, 368, 1135, ): list([ 269, 270, 271, ]), tuple( 1281, 514, 3, 259, 770, 1026, 515, 1282, 1427, 660, 149, 405, 916, 1172, 661, 1428, 1062, 295, 40, 551, 807, 1318, 1208, 441, 186, 697, 953, 442, 1209, 1354, 587, 76, 332, 843, 1099, 588, 1355, 989, 478, 222, 734, 1245, 1135, 368, 113, 624, 880, 1391, ): list([ 278, 279, 280, ]), tuple( 1282, 515, 4, 260, 771, 1027, 1428, 661, 150, 406, 917, 1173, 662, 1429, 807, 40, 296, 552, 1063, 1319, 41, 808, 1209, 442, 187, 698, 954, 1355, 588, 77, 333, 844, 1100, 589, 1356, 734, 479, 223, 990, 1246, 735, 1136, 113, 369, 625, 881, 1392, 114, ): list([ 287, 288, 289, ]), tuple( 1283, 260, 5, 516, 772, 1028, 261, 1429, 662, 407, 151, 918, 1174, 808, 41, 297, 553, 1064, 1320, 42, 809, 954, 443, 187, 699, 1210, 188, 955, 1356, 589, 78, 334, 845, 1101, 735, 224, 480, 991, 1247, 736, 881, 114, 370, 626, 1137, 1393, 882, 115, ): list([ 296, 297, 298, ]), tuple( 1283, 516, 5, 261, 772, 1028, 517, 1284, 918, 407, 663, 151, 1174, 1430, 1064, 297, 42, 553, 809, 1320, 298, 1065, 1210, 443, 188, 699, 955, 444, 1211, 845, 78, 334, 590, 1101, 1357, 991, 224, 480, 736, 1247, 1137, 370, 115, 626, 882, 1393, 371, 1138, ): list([ 299, 300, 301, ]), tuple( 1284, 517, 6, 262, 773, 1029, 518, 1285, 1430, 663, 408, 152, 919, 1175, 664, 1431, 1065, 298, 43, 554, 810, 1321, 1211, 444, 189, 700, 956, 445, 1212, 1357, 590, 79, 335, 846, 1102, 591, 1358, 992, 225, 481, 737, 1248, 1138, 371, 116, 627, 883, 1394, ): list([ 308, 309, 310, ]), tuple( 1285, 518, 7, 263, 774, 1030, 1431, 664, 409, 153, 920, 1176, 665, 1432, 810, 43, 299, 555, 1066, 1322, 44, 811, 1212, 445, 190, 701, 957, 1358, 591, 80, 336, 847, 1103, 592, 1359, 737, 226, 482, 993, 1249, 738, 1139, 116, 372, 628, 884, 1395, 117, ): list([ 317, 318, 319, ]), tuple( 1286, 263, 8, 519, 775, 1031, 264, 1432, 665, 410, 154, 921, 1177, 811, 44, 300, 556, 1067, 1323, 45, 812, 957, 446, 190, 702, 1213, 191, 958, 1359, 592, 81, 337, 848, 1104, 738, 227, 483, 994, 1250, 739, 884, 117, 373, 629, 1140, 1396, 885, 118, ): list([ 326, 327, 328, ]), tuple( 1286, 519, 8, 264, 775, 1031, 520, 1287, 921, 410, 666, 154, 1177, 1433, 1067, 300, 45, 556, 812, 1323, 301, 1068, 1213, 446, 191, 702, 958, 447, 1214, 848, 81, 337, 593, 1104, 1360, 994, 227, 483, 739, 1250, 1140, 373, 118, 629, 885, 1396, 374, 1141, ): list([ 329, 330, 331, ]), tuple( 1287, 520, 9, 265, 776, 1032, 521, 1288, 1433, 666, 411, 155, 922, 1178, 667, 1434, 1068, 301, 46, 557, 813, 1324, 1214, 447, 192, 703, 959, 448, 1215, 1360, 593, 82, 338, 849, 1105, 594, 1361, 995, 228, 484, 740, 1251, 1141, 374, 119, 630, 886, 1397, ): list([ 338, 339, 340, ]), tuple( 1288, 521, 10, 266, 777, 1033, 1434, 667, 412, 156, 923, 1179, 668, 1435, 813, 46, 302, 558, 1069, 1325, 47, 814, 1215, 448, 193, 704, 960, 1361, 594, 83, 339, 850, 1106, 119, 595, 1362, 120, 375, 740, 229, 485, 996, 1252, 741, 887, 1142, 631, 1398, ): list([ 347, 348, 349, ]), tuple( 1289, 266, 11, 522, 778, 1034, 267, 1435, 668, 157, 413, 924, 1180, 814, 47, 303, 559, 1070, 1326, 48, 815, 960, 193, 449, 705, 1216, 194, 961, 1362, 595, 84, 340, 851, 1107, 1143, 120, 741, 230, 486, 997, 1253, 742, 632, 888, 887, 376, 121, 1399, ): list([ 356, 357, 358, ]), tuple( 1289, 522, 11, 267, 778, 1034, 523, 1290, 924, 669, 157, 413, 1180, 1436, 1070, 303, 48, 559, 815, 1326, 304, 1071, 1216, 449, 194, 705, 961, 450, 1217, 851, 84, 340, 596, 1107, 1363, 376, 997, 230, 486, 742, 1253, 377, 888, 1143, 632, 121, 1144, 1399, ): list([ 359, 360, 361, ]), tuple( 1400, 1034, 267, 12, 523, 779, 1290, 268, 1035, 11, 778, 1436, 669, 414, 158, 925, 1181, 413, 1180, 1326, 815, 48, 304, 560, 1071, 1327, 559, 961, 194, 450, 706, 1217, 705, 1107, 340, 85, 596, 852, 1363, 341, 1108, 84, 851, 121, 1253, 742, 231, 487, 998, 1254, 486, 632, 633, 1399, 888, 377, 1144, ): list([ 0, 1, 362, 363, 364, 365, ]), tuple( 1400, 1290, 523, 12, 268, 779, 1035, 1291, 1436, 669, 414, 158, 925, 1181, 1437, 670, 815, 48, 304, 560, 1071, 1327, 49, 816, 1217, 450, 195, 706, 962, 1363, 596, 85, 341, 852, 1108, 597, 1364, 889, 742, 231, 487, 377, 998, 1254, 743, 1144, 633, 122, ): list([ 2, 3, 4, ]), tuple( 1408, 641, 130, 386, 897, 1153, 787, 20, 276, 532, 1043, 1299, 21, 788, 933, 166, 422, 678, 1189, 1445, 167, 934, 1335, 568, 313, 57, 824, 1080, 714, 203, 459, 970, 1226, 715, 860, 605, 349, 93, 1116, 1372, 861, 94, 1262, 239, 495, 751, 1007, 240, ): list([ 86, 87, 88, ]), tuple( 1409, 386, 131, 642, 898, 1154, 387, 788, 21, 277, 533, 1044, 1300, 934, 167, 423, 679, 1190, 1446, 168, 935, 1080, 569, 313, 58, 825, 1336, 314, 1081, 715, 204, 460, 971, 1227, 861, 350, 606, 94, 1117, 1373, 862, 1007, 240, 496, 752, 1263, 1008, 241, ): list([ 95, 96, 97, ]), tuple( 1409, 642, 131, 387, 898, 1154, 643, 1410, 1044, 277, 22, 533, 789, 1300, 1190, 423, 168, 679, 935, 1446, 424, 1191, 1336, 569, 314, 58, 825, 1081, 570, 1337, 971, 204, 460, 716, 1227, 1117, 606, 350, 95, 862, 1373, 1263, 496, 241, 752, 1008, 497, 1264, ): list([ 98, 99, 100, ]), tuple( 1410, 643, 132, 388, 899, 1155, 644, 1411, 789, 22, 278, 534, 1045, 1301, 23, 790, 1191, 424, 169, 680, 936, 1447, 1337, 570, 315, 59, 826, 1082, 571, 1338, 716, 205, 461, 972, 1228, 717, 1118, 351, 607, 95, 863, 1374, 96, 1264, 497, 242, 753, 1009, ): list([ 107, 108, 109, ]), tuple( 1411, 644, 133, 389, 900, 1156, 790, 535, 279, 23, 1046, 1302, 24, 791, 936, 169, 425, 681, 1192, 1448, 170, 937, 1338, 571, 316, 60, 827, 1083, 717, 206, 462, 973, 1229, 718, 863, 96, 352, 608, 1119, 1375, 864, 97, 1265, 242, 498, 754, 1010, 243, ): list([ 116, 117, 118, ]), tuple( 1412, 389, 134, 645, 901, 1157, 390, 791, 280, 536, 24, 1047, 1303, 937, 170, 426, 682, 1193, 1449, 171, 938, 1083, 316, 61, 572, 828, 1339, 317, 1084, 718, 207, 463, 974, 1230, 864, 97, 353, 609, 1120, 1376, 865, 1010, 243, 499, 755, 1266, 1011, 244, ): list([ 125, 126, 127, ]), tuple( 1412, 645, 134, 390, 901, 1157, 646, 1413, 1047, 536, 280, 25, 792, 1303, 1193, 426, 171, 682, 938, 1449, 427, 1194, 1339, 572, 317, 61, 828, 1084, 573, 1340, 974, 207, 463, 719, 1230, 1120, 353, 98, 609, 865, 1376, 1266, 499, 244, 755, 1011, 500, 1267, ): list([ 128, 129, 130, ]), tuple( 1413, 646, 135, 391, 902, 1158, 647, 1414, 792, 281, 537, 25, 1048, 1304, 26, 793, 1194, 427, 172, 683, 939, 1450, 1340, 573, 318, 62, 829, 1085, 574, 1341, 719, 208, 464, 975, 1231, 720, 1121, 98, 354, 610, 866, 1377, 99, 1267, 500, 245, 756, 1012, ): list([ 137, 138, 139, ]), tuple( 1414, 647, 136, 392, 903, 1159, 793, 282, 538, 26, 1049, 1305, 27, 794, 939, 172, 428, 684, 1195, 1451, 173, 940, 1341, 574, 319, 63, 830, 1086, 720, 209, 465, 976, 1232, 721, 866, 99, 355, 611, 1122, 1378, 867, 100, 1268, 245, 501, 757, 1013, 246, ): list([ 146, 147, 148, ]), tuple( 1415, 392, 137, 648, 904, 1160, 393, 794, 283, 539, 27, 1050, 1306, 940, 173, 429, 685, 1196, 1452, 174, 941, 1086, 575, 64, 319, 831, 1342, 320, 1087, 721, 210, 466, 977, 1233, 247, 867, 100, 356, 612, 1123, 1379, 868, 1013, 246, 502, 758, 1269, 1014, ): list([ 155, 156, 157, ]), tuple( 1415, 648, 137, 393, 904, 1160, 649, 1416, 1050, 539, 283, 28, 795, 1306, 1196, 429, 174, 685, 941, 1452, 430, 1197, 1342, 575, 64, 320, 831, 1087, 576, 1343, 977, 210, 466, 722, 1233, 1123, 356, 101, 612, 868, 1379, 503, 1269, 502, 247, 758, 1014, 1270, ): list([ 158, 159, 160, ]), tuple( 1416, 649, 138, 394, 905, 1161, 650, 1417, 795, 284, 28, 540, 1051, 1307, 29, 796, 1197, 430, 175, 686, 942, 1343, 576, 65, 321, 832, 1088, 577, 1344, 722, 211, 467, 978, 1234, 723, 1124, 101, 357, 613, 869, 1380, 102, 759, 1015, 1270, 503, 248, ): list([ 167, 168, 169, ]), tuple( 1417, 650, 139, 395, 906, 1162, 796, 541, 285, 29, 1052, 1308, 30, 797, 942, 175, 431, 687, 1198, 176, 943, 1344, 577, 66, 322, 833, 1089, 723, 212, 468, 979, 1235, 724, 1016, 248, 869, 102, 358, 614, 1125, 1381, 870, 103, 760, 1271, 504, 249, ): list([ 176, 177, 178, ]), }) # --- # name: test_snapshot_cohorts[PerfectBlockwiseResampling] dict({ tuple( 0, ): list([ 0, 1, ]), tuple( 1, ): list([ 2, 3, ]), tuple( 2, ): list([ 4, 5, ]), tuple( 3, ): list([ 6, 7, ]), tuple( 4, ): list([ 8, 9, ]), tuple( 5, ): list([ 10, 11, ]), tuple( 6, ): list([ 12, 13, ]), tuple( 7, ): list([ 14, 15, ]), tuple( 8, ): list([ 16, 17, ]), tuple( 9, ): list([ 18, 19, ]), tuple( 10, ): list([ 20, 21, ]), tuple( 11, ): list([ 22, 23, ]), tuple( 12, ): list([ 24, 25, ]), tuple( 13, ): list([ 26, 27, ]), tuple( 14, ): list([ 28, 29, ]), tuple( 15, ): list([ 30, 31, ]), tuple( 16, ): list([ 32, 33, ]), tuple( 17, ): list([ 34, 35, ]), tuple( 18, ): list([ 36, 37, ]), tuple( 19, ): list([ 38, 39, ]), tuple( 20, ): list([ 40, 41, ]), tuple( 21, ): list([ 42, 43, ]), tuple( 22, ): list([ 44, 45, ]), tuple( 23, ): list([ 46, 47, ]), tuple( 24, ): list([ 48, 49, ]), tuple( 25, ): list([ 50, 51, ]), tuple( 26, ): list([ 52, 53, ]), tuple( 27, ): list([ 54, 55, ]), tuple( 28, ): list([ 56, 57, ]), tuple( 29, ): list([ 58, 59, ]), tuple( 30, ): list([ 60, 61, ]), tuple( 31, ): list([ 62, 63, ]), tuple( 32, ): list([ 64, 65, ]), tuple( 33, ): list([ 66, 67, ]), tuple( 34, ): list([ 68, 69, ]), tuple( 35, ): list([ 70, 71, ]), tuple( 36, ): list([ 72, 73, ]), tuple( 37, ): list([ 74, 75, ]), tuple( 38, ): list([ 76, 77, ]), tuple( 39, ): list([ 78, 79, ]), tuple( 40, ): list([ 80, 81, ]), tuple( 41, ): list([ 82, 83, ]), tuple( 42, ): list([ 84, 85, ]), tuple( 43, ): list([ 86, 87, ]), tuple( 44, ): list([ 88, 89, ]), tuple( 45, ): list([ 90, 91, ]), tuple( 46, ): list([ 92, 93, ]), tuple( 47, ): list([ 94, 95, ]), tuple( 48, ): list([ 96, 97, ]), tuple( 49, ): list([ 98, 99, ]), tuple( 50, ): list([ 100, 101, ]), tuple( 51, ): list([ 102, 103, ]), tuple( 52, ): list([ 104, 105, ]), tuple( 53, ): list([ 106, 107, ]), tuple( 54, ): list([ 108, 109, ]), tuple( 55, ): list([ 110, 111, ]), tuple( 56, ): list([ 112, 113, ]), tuple( 57, ): list([ 114, 115, ]), tuple( 58, ): list([ 116, 117, ]), tuple( 59, ): list([ 118, 119, ]), tuple( 60, ): list([ 120, 121, ]), tuple( 61, ): list([ 122, 123, ]), tuple( 62, ): list([ 124, 125, ]), tuple( 63, ): list([ 126, 127, ]), tuple( 64, ): list([ 128, 129, ]), tuple( 65, ): list([ 130, 131, ]), tuple( 66, ): list([ 132, 133, ]), tuple( 67, ): list([ 134, 135, ]), tuple( 68, ): list([ 136, 137, ]), tuple( 69, ): list([ 138, 139, ]), tuple( 70, ): list([ 140, 141, ]), tuple( 71, ): list([ 142, 143, ]), tuple( 72, ): list([ 144, 145, ]), tuple( 73, ): list([ 146, 147, ]), tuple( 74, ): list([ 148, 149, ]), tuple( 75, ): list([ 150, 151, ]), tuple( 76, ): list([ 152, 153, ]), tuple( 77, ): list([ 154, 155, ]), tuple( 78, ): list([ 156, 157, ]), tuple( 79, ): list([ 158, 159, ]), tuple( 80, ): list([ 160, 161, ]), tuple( 81, ): list([ 162, 163, ]), tuple( 82, ): list([ 164, 165, ]), tuple( 83, ): list([ 166, 167, ]), tuple( 84, ): list([ 168, 169, ]), tuple( 85, ): list([ 170, 171, ]), tuple( 86, ): list([ 172, 173, ]), tuple( 87, ): list([ 174, 175, ]), tuple( 88, ): list([ 176, 177, ]), tuple( 89, ): list([ 178, 179, ]), tuple( 90, ): list([ 180, 181, ]), tuple( 91, ): list([ 182, 183, ]), tuple( 92, ): list([ 184, 185, ]), tuple( 93, ): list([ 186, 187, ]), tuple( 94, ): list([ 188, 189, ]), tuple( 95, ): list([ 190, 191, ]), tuple( 96, ): list([ 192, 193, ]), tuple( 97, ): list([ 194, 195, ]), tuple( 98, ): list([ 196, 197, ]), tuple( 99, ): list([ 198, 199, ]), tuple( 100, ): list([ 200, 201, ]), tuple( 101, ): list([ 202, 203, ]), tuple( 102, ): list([ 204, 205, ]), tuple( 103, ): list([ 206, 207, ]), tuple( 104, ): list([ 208, 209, ]), tuple( 105, ): list([ 210, 211, ]), tuple( 106, ): list([ 212, 213, ]), tuple( 107, ): list([ 214, 215, ]), tuple( 108, ): list([ 216, 217, ]), tuple( 109, ): list([ 218, 219, ]), tuple( 110, ): list([ 220, 221, ]), tuple( 111, ): list([ 222, 223, ]), tuple( 112, ): list([ 224, 225, ]), tuple( 113, ): list([ 226, 227, ]), tuple( 114, ): list([ 228, 229, ]), tuple( 115, ): list([ 230, 231, ]), tuple( 116, ): list([ 232, 233, ]), tuple( 117, ): list([ 234, 235, ]), tuple( 118, ): list([ 236, 237, ]), tuple( 119, ): list([ 238, 239, ]), tuple( 120, ): list([ 240, 241, ]), tuple( 121, ): list([ 242, 243, ]), tuple( 122, ): list([ 244, 245, ]), tuple( 123, ): list([ 246, 247, ]), tuple( 124, ): list([ 248, 249, ]), tuple( 125, ): list([ 250, 251, ]), tuple( 126, ): list([ 252, 253, ]), tuple( 127, ): list([ 254, 255, ]), tuple( 128, ): list([ 256, 257, ]), tuple( 129, ): list([ 258, 259, ]), tuple( 130, ): list([ 260, 261, ]), tuple( 131, ): list([ 262, 263, ]), tuple( 132, ): list([ 264, 265, ]), tuple( 133, ): list([ 266, 267, ]), tuple( 134, ): list([ 268, 269, ]), tuple( 135, ): list([ 270, 271, ]), tuple( 136, ): list([ 272, 273, ]), tuple( 137, ): list([ 274, 275, ]), tuple( 138, ): list([ 276, 277, ]), tuple( 139, ): list([ 278, 279, ]), tuple( 140, ): list([ 280, 281, ]), tuple( 141, ): list([ 282, 283, ]), tuple( 142, ): list([ 284, 285, ]), tuple( 143, ): list([ 286, 287, ]), tuple( 144, ): list([ 288, 289, ]), tuple( 145, ): list([ 290, 291, ]), tuple( 146, ): list([ 292, ]), }) # --- # name: test_snapshot_cohorts[PerfectMonthly] dict({ tuple( 0, 3, 6, 9, 12, 15, 18, 21, 24, 27, 30, 33, 36, 39, 42, 45, 48, 51, 54, 57, 60, 63, 66, 69, 72, 75, 78, 81, 84, 87, 90, 93, 96, 99, 102, 105, 108, 111, 114, 117, 120, 123, 126, 129, 132, 135, 138, 141, 144, 147, 150, 153, 156, 159, 162, 165, 168, 171, ): list([ 0, 1, 2, 3, ]), tuple( 1, 4, 7, 10, 13, 16, 19, 22, 25, 28, 31, 34, 37, 40, 43, 46, 49, 52, 55, 58, 61, 64, 67, 70, 73, 76, 79, 82, 85, 88, 91, 94, 97, 100, 103, 106, 109, 112, 115, 118, 121, 124, 127, 130, 133, 136, 139, 142, 145, 148, 151, 154, 157, 160, 163, 166, 169, 172, ): list([ 4, 5, 6, 7, ]), tuple( 2, 5, 8, 11, 14, 17, 20, 23, 26, 29, 32, 35, 38, 41, 44, 47, 50, 53, 56, 59, 62, 65, 68, 71, 74, 77, 80, 83, 86, 89, 92, 95, 98, 101, 104, 107, 110, 113, 116, 119, 122, 125, 128, 131, 134, 137, 140, 143, 146, 149, 152, 155, 158, 161, 164, 167, 170, 173, ): list([ 8, 9, 10, 11, ]), }) # --- # name: test_snapshot_cohorts[RandomBigArray] dict({ }) # --- # name: test_snapshot_cohorts[SingleChunk] dict({ tuple( 0, ): list([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 292, ]), }) # --- flox-0.10.3/tests/conftest.py000066400000000000000000000013501477552625700161270ustar00rootroot00000000000000import pytest from hypothesis import HealthCheck, Verbosity, settings from . import requires_numbagg settings.register_profile( "ci", max_examples=1000, deadline=None, suppress_health_check=[HealthCheck.filter_too_much, HealthCheck.too_slow], ) settings.register_profile( "default", max_examples=300, deadline=500, suppress_health_check=[HealthCheck.filter_too_much, HealthCheck.too_slow], verbosity=Verbosity.verbose, ) settings.load_profile("default") @pytest.fixture( scope="module", params=[ "flox", "numpy", # pytest.param("numba", marks=requires_numba), pytest.param("numbagg", marks=requires_numbagg), ], ) def engine(request): return request.param flox-0.10.3/tests/strategies.py000066400000000000000000000112071477552625700164560ustar00rootroot00000000000000from __future__ import annotations from collections.abc import Callable from typing import Any import cftime import dask import hypothesis.extra.numpy as npst import hypothesis.strategies as st import numpy as np from . import ALL_FUNCS, SCIPY_STATS_FUNCS Chunks = tuple[tuple[int, ...], ...] calendars = st.sampled_from( [ "standard", "gregorian", "proleptic_gregorian", "noleap", "365_day", "360_day", "julian", "all_leap", "366_day", ] ) @st.composite def units(draw, *, calendar: str) -> str: choices = ["days", "hours", "minutes", "seconds", "milliseconds", "microseconds"] if calendar == "360_day": choices += ["months"] elif calendar == "noleap": choices += ["common_years"] time_units = draw(st.sampled_from(choices)) dt = draw(st.datetimes()) year, month, day = dt.year, dt.month, dt.day if calendar == "360_day": day = min(day, 30) if calendar in ["360_day", "365_day", "noleap"] and month == 2 and day == 29: day = 28 return f"{time_units} since {year}-{month}-{day}" @st.composite def cftime_arrays( draw: st.DrawFn, *, shape: st.SearchStrategy[tuple[int, ...]] = npst.array_shapes(), calendars: st.SearchStrategy[str] = calendars, elements: dict[str, Any] | None = None, ) -> np.ndarray[Any, Any]: if elements is None: elements = {} elements.setdefault("min_value", -10_000) elements.setdefault("max_value", 10_000) cal = draw(calendars) values = draw(npst.arrays(dtype=np.int64, shape=shape, elements=elements)) unit = draw(units(calendar=cal)) return cftime.num2date(values, units=unit, calendar=cal) numeric_dtypes = ( npst.integer_dtypes(endianness="=") | npst.unsigned_integer_dtypes(endianness="=") | npst.floating_dtypes(endianness="=", sizes=(32, 64)) # TODO: add complex here not in supported_dtypes ) numeric_like_dtypes = ( npst.boolean_dtypes() | numeric_dtypes | npst.datetime64_dtypes(endianness="=") | npst.timedelta64_dtypes(endianness="=") ) supported_dtypes = ( numeric_like_dtypes | npst.unicode_string_dtypes(endianness="=") | npst.complex_number_dtypes(endianness="=") ) by_dtype_st = supported_dtypes NON_NUMPY_FUNCS = [ "first", "last", "nanfirst", "nanlast", "count", "any", "all", ] + list(SCIPY_STATS_FUNCS) SKIPPED_FUNCS = ["var", "std", "nanvar", "nanstd"] func_st = st.sampled_from([f for f in ALL_FUNCS if f not in NON_NUMPY_FUNCS and f not in SKIPPED_FUNCS]) numeric_arrays = npst.arrays( elements={"allow_subnormal": False}, shape=npst.array_shapes(), dtype=numeric_dtypes ) numeric_like_arrays = npst.arrays( elements={"allow_subnormal": False}, shape=npst.array_shapes(), dtype=numeric_like_dtypes ) all_arrays = ( npst.arrays( elements={"allow_subnormal": False}, shape=npst.array_shapes(), dtype=numeric_like_dtypes, ) | cftime_arrays() ) def by_arrays( shape: st.SearchStrategy[tuple[int, ...]], *, elements: dict[str, Any] | None = None ) -> st.SearchStrategy[np.ndarray[Any, Any]]: if elements is None: elements = {} elements.setdefault("alphabet", st.characters(exclude_categories=["C"])) return st.one_of( npst.arrays( dtype=npst.integer_dtypes(endianness="=") | npst.unicode_string_dtypes(endianness="="), shape=shape, elements=elements, ), cftime_arrays(shape=shape, elements=elements), ) @st.composite def chunks(draw: st.DrawFn, *, shape: tuple[int, ...]) -> Chunks: chunks = [] for size in shape: if size > 1: nchunks = draw(st.integers(min_value=1, max_value=size - 1)) dividers = sorted( set(draw(st.integers(min_value=1, max_value=size - 1)) for _ in range(nchunks - 1)) ) chunks.append(tuple(a - b for a, b in zip(dividers + [size], [0] + dividers))) else: chunks.append((1,)) return tuple(chunks) @st.composite def chunked_arrays( draw: st.DrawFn, *, chunks: Callable[..., st.SearchStrategy[Chunks]] = chunks, arrays=all_arrays, from_array: Callable = dask.array.from_array, ) -> dask.array.Array: array = draw(arrays) chunks = draw(chunks(shape=array.shape)) if array.dtype.kind in "cf": nan_idx = draw( st.lists( st.integers(min_value=0, max_value=array.shape[-1] - 1), max_size=array.shape[-1] - 1, unique=True, ) ) if nan_idx: array[..., nan_idx] = np.nan return from_array(array, chunks=chunks) flox-0.10.3/tests/test_asv.py000066400000000000000000000011111477552625700161250ustar00rootroot00000000000000# Run asv benchmarks as tests import pytest pytest.importorskip("dask") from asv_bench.benchmarks import reduce @pytest.mark.parametrize("problem", [reduce.ChunkReduce1D, reduce.ChunkReduce2D, reduce.ChunkReduce2DAllAxes]) def test_reduce(problem) -> None: testcase = problem() testcase.setup() for args in zip(*testcase.time_reduce.params): testcase.time_reduce(*args) def test_reduce_bare() -> None: testcase = reduce.ChunkReduce1D() testcase.setup() for args in zip(*testcase.time_reduce_bare.params): testcase.time_reduce_bare(*args) flox-0.10.3/tests/test_cohorts.py000066400000000000000000000012461477552625700170260ustar00rootroot00000000000000# Snapshot tests for cohorts detection import pytest pytest.importorskip("dask") from asv_bench.benchmarks import cohorts @pytest.mark.parametrize( "testcase", [ cohorts.ERA5DayOfYear, cohorts.ERA5Google, cohorts.ERA5MonthHour, cohorts.ERA5MonthHourRechunked, cohorts.OISST, cohorts.PerfectBlockwiseResampling, cohorts.PerfectMonthly, cohorts.RandomBigArray, cohorts.SingleChunk, cohorts.NWMMidwest, ], ) def test_snapshot_cohorts(testcase, snapshot): problem = testcase() problem.setup() chunks_cohorts = problem.chunks_cohorts() assert chunks_cohorts == snapshot flox-0.10.3/tests/test_core.py000066400000000000000000002173751477552625700163110ustar00rootroot00000000000000from __future__ import annotations import itertools import logging import warnings from collections.abc import Callable from functools import partial, reduce from typing import TYPE_CHECKING, Any from unittest.mock import MagicMock, patch import numpy as np import pandas as pd import pytest from numpy_groupies.aggregate_numpy import aggregate import flox from flox import xrdtypes as dtypes from flox import xrutils from flox.aggregations import Aggregation, _initialize_aggregation from flox.core import ( HAS_NUMBAGG, ReindexArrayType, ReindexStrategy, _choose_engine, _convert_expected_groups_to_index, _get_optimal_chunks_for_groups, _is_sparse_supported_reduction, _normalize_indexes, _validate_reindex, factorize_, find_group_cohorts, groupby_reduce, groupby_scan, rechunk_for_cohorts, reindex_, subset_to_blocks, ) from . import ( ALL_FUNCS, BLOCKWISE_FUNCS, SCIPY_STATS_FUNCS, assert_equal, assert_equal_tuple, has_cubed, has_dask, has_sparse, raise_if_dask_computes, requires_cubed, requires_dask, requires_sparse, ) logger = logging.getLogger("flox") logger.setLevel(logging.DEBUG) labels = np.array([0, 0, 2, 2, 2, 1, 1, 2, 2, 1, 1, 0]) nan_labels = labels.astype(float) # copy nan_labels[:5] = np.nan labels2d = np.array([labels[:5], np.flip(labels[:5])]) if has_dask: import dask import dask.array as da from dask.array import from_array dask.config.set(scheduler="sync") else: def dask_array_ones(*args): return None if has_cubed: import cubed DEFAULT_QUANTILE = 0.9 REINDEX_SPARSE_STRAT = ReindexStrategy(blockwise=False, array_type=ReindexArrayType.SPARSE_COO) REINDEX_SPARSE_PARAM = pytest.param( REINDEX_SPARSE_STRAT, marks=(requires_dask, pytest.mark.skipif(not has_sparse, reason="no sparse")) ) if TYPE_CHECKING: from flox.core import T_Agg, T_Engine, T_ExpectedGroupsOpt, T_Method def _get_array_func(func: str) -> Callable: if func == "count": def npfunc(x, **kwargs): x = np.asarray(x) return (~xrutils.isnull(x)).sum(**kwargs) elif func in ["nanfirst", "nanlast"]: npfunc = getattr(xrutils, func) elif func in SCIPY_STATS_FUNCS: import scipy.stats if "nan" in func: func = func[3:] nan_policy = "omit" else: nan_policy = "propagate" def npfunc(x, **kwargs): spfunc = partial(getattr(scipy.stats, func), nan_policy=nan_policy) with warnings.catch_warnings(): warnings.filterwarnings("ignore", r"After omitting NaNs, one or more axis-slices") result = getattr(spfunc(x, **kwargs), func) return result else: npfunc = getattr(np, func) return npfunc def test_alignment_error(): da = np.ones((12,)) labels = np.ones((5,)) with pytest.raises(ValueError): groupby_reduce(da, labels, func="mean") @pytest.mark.parametrize("dtype", (float, int)) @pytest.mark.parametrize("chunk", [False, pytest.param(True, marks=requires_dask)]) # TODO: make this intp when python 3.8 is dropped @pytest.mark.parametrize("expected_groups", [None, [0, 1, 2], np.array([0, 1, 2], dtype=np.int64)]) @pytest.mark.parametrize( "func, array, by, expected", [ ("sum", np.ones((12,)), labels, [3, 4, 5]), # form 1 ("sum", np.ones((12,)), nan_labels, [1, 4, 2]), # form 1 ("sum", np.ones((2, 12)), labels, [[3, 4, 5], [3, 4, 5]]), # form 3 ("sum", np.ones((2, 12)), nan_labels, [[1, 4, 2], [1, 4, 2]]), # form 3 ( "sum", np.ones((2, 12)), np.array([labels, labels]), [6, 8, 10], ), # form 1 after reshape ("sum", np.ones((2, 12)), np.array([nan_labels, nan_labels]), [2, 8, 4]), # (np.ones((12,)), np.array([labels, labels])), # form 4 ("count", np.ones((12,)), labels, [3, 4, 5]), # form 1 ("count", np.ones((12,)), nan_labels, [1, 4, 2]), # form 1 ("count", np.ones((2, 12)), labels, [[3, 4, 5], [3, 4, 5]]), # form 3 ("count", np.ones((2, 12)), nan_labels, [[1, 4, 2], [1, 4, 2]]), # form 3 ( "count", np.ones((2, 12)), np.array([labels, labels]), [6, 8, 10], ), # form 1 after reshape ("count", np.ones((2, 12)), np.array([nan_labels, nan_labels]), [2, 8, 4]), ("nanmean", np.ones((12,)), labels, [1, 1, 1]), # form 1 ("nanmean", np.ones((12,)), nan_labels, [1, 1, 1]), # form 1 ("nanmean", np.ones((2, 12)), labels, [[1, 1, 1], [1, 1, 1]]), # form 3 ("nanmean", np.ones((2, 12)), nan_labels, [[1, 1, 1], [1, 1, 1]]), # form 3 ("nanmean", np.ones((2, 12)), np.array([labels, labels]), [1, 1, 1]), ("nanmean", np.ones((2, 12)), np.array([nan_labels, nan_labels]), [1, 1, 1]), # (np.ones((12,)), np.array([labels, labels])), # form 4 ], ) def test_groupby_reduce( engine: T_Engine, func: T_Agg, array: np.ndarray, by: np.ndarray, expected: list[float], expected_groups: T_ExpectedGroupsOpt, chunk: bool, dtype: np.typing.DTypeLike, ) -> None: array = array.astype(dtype) if chunk: array = da.from_array(array, chunks=(3,) if array.ndim == 1 else (1, 3)) by = da.from_array(by, chunks=(3,) if by.ndim == 1 else (1, 3)) if func == "mean" or func == "nanmean": expected_result = np.array(expected, dtype=np.float64) elif func == "sum": expected_result = np.array(expected, dtype=dtypes._maybe_promote_int(array.dtype)) elif func == "count": expected_result = np.array(expected, dtype=np.intp) (result, *groups) = groupby_reduce( array, by, func=func, expected_groups=expected_groups, fill_value=123, engine=engine, ) (groups_array,) = groups # we use pd.Index(expected_groups).to_numpy() which is always int64 # for the values in this test if expected_groups is None: g_dtype = by.dtype elif isinstance(expected_groups, np.ndarray): g_dtype = expected_groups.dtype else: g_dtype = np.int64 assert_equal(groups_array, np.array([0, 1, 2], g_dtype)) assert_equal(expected_result, result) def gen_array_by(size, func): by = np.ones(size[-1]) rng = np.random.default_rng(12345) array = rng.random(tuple(6 if s == 1 else s for s in size)) if ("nan" in func or "fill" in func) and "nanarg" not in func: array[[1, 4, 5], ...] = np.nan elif "nanarg" in func and len(size) > 1: array[[1, 4, 5], 1] = np.nan if func in ["any", "all"]: array = array > 0.5 return array, by @pytest.mark.parametrize( "chunks", [ None, pytest.param(-1, marks=requires_dask), pytest.param(3, marks=requires_dask), pytest.param(4, marks=requires_dask), ], ) @pytest.mark.parametrize("size", ((1, 12), (12,), (12, 9))) @pytest.mark.parametrize("nby", [1, 2, 3]) @pytest.mark.parametrize("add_nan_by", [True, False]) @pytest.mark.parametrize("func", ALL_FUNCS) def test_groupby_reduce_all(nby, size, chunks, func, add_nan_by, engine): if ("arg" in func and engine in ["flox", "numbagg"]) or (func in BLOCKWISE_FUNCS and chunks != -1): pytest.skip() array, by = gen_array_by(size, func) if chunks: array = dask.array.from_array(array, chunks=chunks) by = (by,) * nby by = [b + idx for idx, b in enumerate(by)] if add_nan_by: for idx in range(nby): by[idx][2 * idx : 2 * idx + 3] = np.nan by = tuple(by) nanmask = reduce(np.logical_or, (np.isnan(b) for b in by)) finalize_kwargs = [{}] if "var" in func or "std" in func: finalize_kwargs = finalize_kwargs + [{"ddof": 1}, {"ddof": 0}] fill_value = np.nan tolerance = {"rtol": 1e-13, "atol": 1e-15} elif "quantile" in func: finalize_kwargs = [ {"q": DEFAULT_QUANTILE}, {"q": [DEFAULT_QUANTILE / 2, DEFAULT_QUANTILE]}, ] fill_value = None tolerance = None else: fill_value = None tolerance = None # for constructing expected array_func = _get_array_func(func) for kwargs in finalize_kwargs: if "quantile" in func and isinstance(kwargs["q"], list) and engine != "flox": continue flox_kwargs = dict(func=func, engine=engine, finalize_kwargs=kwargs, fill_value=fill_value) with np.errstate(invalid="ignore", divide="ignore"): with warnings.catch_warnings(): warnings.filterwarnings("ignore", r"All-NaN (slice|axis) encountered") warnings.filterwarnings("ignore", r"Degrees of freedom <= 0 for slice") warnings.filterwarnings("ignore", r"Mean of empty slice") # computing silences a bunch of dask warnings array_ = array.compute() if chunks is not None else array if "arg" in func and add_nan_by: # NaNs are in by, but we can't call np.argmax([..., NaN, .. ]) # That would return index of the NaN # This way, we insert NaNs where there are NaNs in by, and # call np.nanargmax func_ = f"nan{func}" if "nan" not in func else func array_[..., nanmask] = np.nan expected = getattr(np, func_)(array_, axis=-1, **kwargs) else: expected = array_func(array_[..., ~nanmask], axis=-1, **kwargs) for _ in range(nby): expected = np.expand_dims(expected, -1) if func in BLOCKWISE_FUNCS: assert chunks == -1 actual, *groups = groupby_reduce(array, *by, **flox_kwargs) if "quantile" in func and isinstance(kwargs["q"], list): assert actual.ndim == expected.ndim == (array.ndim + nby) else: assert actual.ndim == expected.ndim == (array.ndim + nby - 1) expected_groups = tuple(np.array([idx + 1.0]) for idx in range(nby)) for actual_group, expect in zip(groups, expected_groups): assert_equal(actual_group, expect) if "arg" in func: assert actual.dtype.kind == "i" assert_equal(expected, actual, tolerance) if "nan" not in func and "arg" not in func: # test non-NaN skipping behaviour when NaNs are present nanned = array_.copy() # remove nans in by to reduce complexity # We are checking for consistent behaviour with NaNs in array by_ = tuple(np.nan_to_num(b, nan=np.nanmin(b)) for b in by) nanned[[1, 4, 5], ...] = np.nan nanned.reshape(-1)[0] = np.nan actual, *_ = groupby_reduce(nanned, *by_, **flox_kwargs) expected_0 = array_func(nanned, axis=-1, **kwargs) for _ in range(nby): expected_0 = np.expand_dims(expected_0, -1) assert_equal(expected_0, actual, tolerance) if not has_dask or chunks is None or func in BLOCKWISE_FUNCS: continue params = list( itertools.product( ["map-reduce"], [True, False, None, REINDEX_SPARSE_STRAT], ) ) params.extend(itertools.product(["cohorts"], [False, None])) if chunks == -1: params.extend([("blockwise", None)]) combine_error = RuntimeError("This combine should not have been called.") for method, reindex in params: if isinstance(reindex, ReindexStrategy) and not _is_sparse_supported_reduction(func): continue call = partial( groupby_reduce, array, *by, method=method, reindex=reindex, **flox_kwargs, ) if ("arg" in func or func in ["first", "last"]) and reindex is True: # simple_combine with argreductions not supported right now with pytest.raises(NotImplementedError): call() continue if method == "blockwise": # no combine necessary mocks = { "_simple_combine": MagicMock(side_effect=combine_error), "_grouped_combine": MagicMock(side_effect=combine_error), } else: if "arg" not in func: # make sure we use simple combine mocks = {"_grouped_combine": MagicMock(side_effect=combine_error)} else: mocks = {"_simple_combine": MagicMock(side_effect=combine_error)} with patch.multiple(flox.core, **mocks): actual, *groups = call() for actual_group, expect in zip(groups, expected_groups): assert_equal(actual_group, expect, tolerance) if "arg" in func: assert actual.dtype.kind == "i" if isinstance(reindex, ReindexStrategy): import sparse expected = sparse.COO.from_numpy(expected) assert_equal(actual, expected, tolerance) @requires_dask @pytest.mark.parametrize("size", ((12,), (12, 5))) @pytest.mark.parametrize("func", ("argmax", "nanargmax", "argmin", "nanargmin")) def test_arg_reduction_dtype_is_int(size, func): """avoid bugs being hidden by the xfail in the above test.""" rng = np.random.default_rng(12345) array = rng.random(size) by = np.ones(size[-1]) if "nanarg" in func and len(size) > 1: array[[1, 4, 5], 1] = np.nan expected = getattr(np, func)(array, axis=-1) expected = np.expand_dims(expected, -1) actual, _ = groupby_reduce(array, by, func=func, engine="numpy") assert actual.dtype.kind == "i" actual, _ = groupby_reduce(da.from_array(array, chunks=3), by, func=func, engine="numpy") assert actual.dtype.kind == "i" def test_groupby_reduce_count(): array = np.array([0, 0, np.nan, np.nan, np.nan, 1, 1]) labels = np.array(["a", "b", "b", "b", "c", "c", "c"]) result, _ = groupby_reduce(array, labels, func="count") assert_equal(result, np.array([1, 1, 2], dtype=np.intp)) def test_func_is_aggregation(): from flox.aggregations import mean array = np.array([0, 0, np.nan, np.nan, np.nan, 1, 1]) labels = np.array(["a", "b", "b", "b", "c", "c", "c"]) expected, _ = groupby_reduce(array, labels, func="mean") actual, _ = groupby_reduce(array, labels, func=mean) assert_equal(actual, expected) @requires_dask @pytest.mark.parametrize("func", ("sum", "prod")) @pytest.mark.parametrize("dtype", [np.float32, np.float64, np.int32, np.int64]) def test_groupby_reduce_preserves_dtype(dtype, func): array = np.ones((2, 12), dtype=dtype) by = np.array([labels] * 2) result, _ = groupby_reduce(from_array(array, chunks=(-1, 4)), by, func=func) expect_dtype = dtypes._maybe_promote_int(array.dtype) assert result.dtype == expect_dtype def test_numpy_reduce_nd_md(): array = np.ones((2, 12)) by = np.array([labels] * 2) expected = aggregate(by.ravel(), array.ravel(), func="sum") result, groups = groupby_reduce(array, by, func="sum", fill_value=123) actual = reindex_(result, groups, pd.Index(np.unique(by)), axis=0, fill_value=0) np.testing.assert_equal(expected, actual) array = np.ones((4, 2, 12)) by = np.array([labels] * 2) expected = aggregate(by.ravel(), array.reshape(4, 24), func="sum", axis=-1, fill_value=0) result, groups = groupby_reduce(array, by, func="sum") actual = reindex_(result, groups, pd.Index(np.unique(by)), axis=-1, fill_value=0) assert_equal(expected, actual) array = np.ones((4, 2, 12)) by = np.broadcast_to(np.array([labels] * 2), array.shape) expected = aggregate(by.ravel(), array.ravel(), func="sum", axis=-1) result, groups = groupby_reduce(array, by, func="sum") actual = reindex_(result, groups, pd.Index(np.unique(by)), axis=-1, fill_value=0) assert_equal(expected, actual) array = np.ones((2, 3, 4)) by = np.ones((2, 3, 4)) actual, _ = groupby_reduce(array, by, axis=(1, 2), func="sum") expected = np.sum(array, axis=(1, 2), keepdims=True).squeeze(2) assert_equal(actual, expected) @requires_dask @pytest.mark.parametrize("reindex", [None, False, True, REINDEX_SPARSE_PARAM]) @pytest.mark.parametrize("func", ALL_FUNCS) @pytest.mark.parametrize("add_nan", [False, True]) @pytest.mark.parametrize("dtype", (float,)) @pytest.mark.parametrize( "shape, array_chunks, group_chunks", [ ((12,), (3,), 3), # form 1 ((12,), (3,), (4,)), # form 1, chunks not aligned ((12,), ((3, 5, 4),), (2,)), # form 1 ((10, 12), (3, 3), -1), # form 3 ((10, 12), (3, 3), 3), # form 3 ], ) def test_groupby_agg_dask(func, shape, array_chunks, group_chunks, add_nan, dtype, engine, reindex): """Tests groupby_reduce with dask arrays against groupby_reduce with numpy arrays""" if func in ["first", "last"] or func in BLOCKWISE_FUNCS: pytest.skip() if "arg" in func and (engine in ["flox", "numbagg"] or reindex): pytest.skip() if isinstance(reindex, ReindexStrategy) and not _is_sparse_supported_reduction(func): pytest.skip() rng = np.random.default_rng(12345) array = dask.array.from_array(rng.random(shape), chunks=array_chunks).astype(dtype) array = dask.array.ones(shape, chunks=array_chunks) labels = np.array([0, 0, 2, 2, 2, 1, 1, 2, 2, 1, 1, 0]) if add_nan: labels = labels.astype(float) labels[:3] = np.nan # entire block is NaN when group_chunks=3 labels[-2:] = np.nan kwargs = dict( func=func, expected_groups=[0, 1, 2], fill_value=False if func in ["all", "any"] else 123, ) expected, _ = groupby_reduce(array.compute(), labels, engine="numpy", **kwargs) actual, _ = groupby_reduce(array.compute(), labels, engine=engine, **kwargs) assert_equal(actual, expected) with raise_if_dask_computes(): actual, _ = groupby_reduce(array, labels, engine=engine, **kwargs) assert_equal(actual, expected) by = from_array(labels, group_chunks) with raise_if_dask_computes(): actual, _ = groupby_reduce(array, by, engine=engine, **kwargs) assert_equal(expected, actual) kwargs["expected_groups"] = [0, 2, 1] with raise_if_dask_computes(): actual, groups = groupby_reduce(array, by, engine=engine, **kwargs, sort=False) assert_equal(groups, np.array([0, 2, 1], dtype=np.int64)) assert_equal(expected, actual[..., [0, 2, 1]]) with raise_if_dask_computes(): actual, groups = groupby_reduce(array, by, engine=engine, **kwargs, sort=True) assert_equal(groups, np.array([0, 1, 2], np.int64)) assert_equal(expected, actual) @requires_cubed @pytest.mark.parametrize("reindex", [True]) @pytest.mark.parametrize("func", ALL_FUNCS) @pytest.mark.parametrize("add_nan", [False, True]) @pytest.mark.parametrize( "shape, array_chunks, group_chunks", [ ((12,), (3,), 3), # form 1 ], ) def test_groupby_agg_cubed(func, shape, array_chunks, group_chunks, add_nan, engine, reindex): """Tests groupby_reduce with cubed arrays against groupby_reduce with numpy arrays""" if func in ["first", "last"] or func in BLOCKWISE_FUNCS: pytest.skip() if "arg" in func and (engine in ["flox", "numbagg"] or reindex): pytest.skip() array = cubed.array_api.ones(shape, chunks=array_chunks) labels = np.array([0, 0, 2, 2, 2, 1, 1, 2, 2, 1, 1, 0]) if add_nan: labels = labels.astype(float) labels[:3] = np.nan # entire block is NaN when group_chunks=3 labels[-2:] = np.nan kwargs = dict( func=func, expected_groups=[0, 1, 2], fill_value=False if func in ["all", "any"] else 123, reindex=reindex, ) expected, _ = groupby_reduce(array.compute(), labels, engine="numpy", **kwargs) actual, _ = groupby_reduce(array.compute(), labels, engine=engine, **kwargs) assert_equal(actual, expected) # TODO: raise_if_cubed_computes actual, _ = groupby_reduce(array, labels, engine=engine, **kwargs) assert_equal(expected, actual) def test_numpy_reduce_axis_subset(engine): # TODO: add NaNs by = labels2d array = np.ones_like(by, dtype=np.int64) kwargs = dict(func="count", engine=engine, fill_value=0) result, _ = groupby_reduce(array, by, **kwargs, axis=1) assert_equal(result, np.array([[2, 3], [2, 3]], dtype=np.intp)) by = np.broadcast_to(labels2d, (3, *labels2d.shape)) array = np.ones_like(by) result, _ = groupby_reduce(array, by, **kwargs, axis=1) subarr = np.array([[1, 1], [1, 1], [0, 2], [1, 1], [1, 1]], dtype=np.intp) expected = np.tile(subarr, (3, 1, 1)) assert_equal(result, expected) result, _ = groupby_reduce(array, by, **kwargs, axis=2) subarr = np.array([[2, 3], [2, 3]], dtype=np.intp) expected = np.tile(subarr, (3, 1, 1)) assert_equal(result, expected) result, _ = groupby_reduce(array, by, **kwargs, axis=(1, 2)) expected = np.array([[4, 6], [4, 6], [4, 6]], dtype=np.intp) assert_equal(result, expected) result, _ = groupby_reduce(array, by, **kwargs, axis=(2, 1)) assert_equal(result, expected) result, _ = groupby_reduce(array, by[0, ...], **kwargs, axis=(1, 2)) expected = np.array([[4, 6], [4, 6], [4, 6]], dtype=np.intp) assert_equal(result, expected) @requires_dask def test_dask_reduce_axis_subset(): by = labels2d array = np.ones_like(by, dtype=np.int64) with raise_if_dask_computes(): result, _ = groupby_reduce( da.from_array(array, chunks=(2, 3)), da.from_array(by, chunks=(2, 2)), func="count", axis=1, expected_groups=[0, 2], ) assert_equal(result, np.array([[2, 3], [2, 3]], dtype=np.intp)) by = np.broadcast_to(labels2d, (3, *labels2d.shape)) array = np.ones_like(by) subarr = np.array([[1, 1], [1, 1], [123, 2], [1, 1], [1, 1]], dtype=np.intp) expected = np.tile(subarr, (3, 1, 1)) with raise_if_dask_computes(): result, _ = groupby_reduce( da.from_array(array, chunks=(1, 2, 3)), da.from_array(by, chunks=(2, 2, 2)), func="count", axis=1, expected_groups=[0, 2], fill_value=123, ) assert_equal(result, expected) subarr = np.array([[2, 3], [2, 3]], dtype=np.intp) expected = np.tile(subarr, (3, 1, 1)) with raise_if_dask_computes(): result, _ = groupby_reduce( da.from_array(array, chunks=(1, 2, 3)), da.from_array(by, chunks=(2, 2, 2)), func="count", axis=2, expected_groups=[0, 2], ) assert_equal(result, expected) with pytest.raises(NotImplementedError): groupby_reduce( da.from_array(array, chunks=(1, 3, 2)), da.from_array(by, chunks=(2, 2, 2)), func="count", axis=2, ) @pytest.mark.parametrize("group_idx", [[0, 1, 0], [0, 0, 1], [1, 0, 0], [1, 1, 0]]) @pytest.mark.parametrize( "func", [ # "first", "last", "nanfirst", "nanlast", ], ) @pytest.mark.parametrize( "chunks", [ None, pytest.param(1, marks=pytest.mark.skipif(not has_dask, reason="no dask")), pytest.param(2, marks=pytest.mark.skipif(not has_dask, reason="no dask")), pytest.param(3, marks=pytest.mark.skipif(not has_dask, reason="no dask")), ], ) def test_first_last_useless(func, chunks, group_idx): array = np.array([[0, 0, 0], [0, 0, 0]], dtype=np.int8) if chunks is not None: array = dask.array.from_array(array, chunks=chunks) actual, _ = groupby_reduce(array, np.array(group_idx), func=func, engine="numpy") expected = np.array([[0, 0], [0, 0]], dtype=np.int8) assert_equal(actual, expected) @pytest.mark.parametrize("func", ["first", "last", "nanfirst", "nanlast"]) @pytest.mark.parametrize("axis", [(0, 1)]) def test_first_last_disallowed(axis, func): with pytest.raises(ValueError): groupby_reduce(np.empty((2, 3, 2)), np.ones((2, 3, 2)), func=func, axis=axis) @requires_dask @pytest.mark.parametrize("func", ["nanfirst", "nanlast"]) @pytest.mark.parametrize("axis", [None, (0, 1, 2)]) def test_nanfirst_nanlast_disallowed_dask(axis, func): with pytest.raises(ValueError): groupby_reduce(dask.array.empty((2, 3, 2)), np.ones((2, 3, 2)), func=func, axis=axis) @requires_dask @pytest.mark.xfail @pytest.mark.parametrize("func", ["first", "last"]) def test_first_last_allowed_dask(func): # blockwise should be fine... but doesn't work now. groupby_reduce(dask.array.empty((2, 3, 2)), np.ones((2, 3, 2)), func=func, axis=-1) @requires_dask @pytest.mark.xfail @pytest.mark.parametrize("func", ["first", "last"]) def test_first_last_disallowed_dask(func): # blockwise is fine groupby_reduce(dask.array.empty((2, 3, 2)), np.ones((2, 3, 2)), func=func, axis=-1) # anything else is not. with pytest.raises(ValueError): groupby_reduce( dask.array.empty((2, 3, 2), chunks=(-1, -1, 1)), np.ones((2,)), func=func, axis=-1, ) @requires_dask @pytest.mark.parametrize("func", ALL_FUNCS) @pytest.mark.parametrize("axis", [None, (0, 1, 2), (0, 1), (0, 2), (1, 2), 0, 1, 2, (0,), (1,), (2,)]) def test_groupby_reduce_axis_subset_against_numpy(func, axis, engine): if ("arg" in func and engine in ["flox", "numbagg"]) or func in BLOCKWISE_FUNCS: pytest.skip() if not isinstance(axis, int): if "arg" in func and (axis is None or len(axis) > 1): pytest.skip() if ("first" in func or "last" in func) and (axis is not None and len(axis) not in [1, 3]): pytest.skip() if func in ["all", "any"]: fill_value = False else: fill_value = 123 if "var" in func or "std" in func: tolerance = {"rtol": 1e-14, "atol": 1e-16} else: tolerance = None # tests against the numpy output to make sure dask compute matches by = np.broadcast_to(labels2d, (3, *labels2d.shape)) rng = np.random.default_rng(12345) array = rng.random(by.shape) kwargs = dict(func=func, axis=axis, expected_groups=[0, 2], fill_value=fill_value) expected, _ = groupby_reduce(array, by, engine=engine, **kwargs) if engine == "flox": expected_npg, _ = groupby_reduce(array, by, **kwargs, engine="numpy") assert_equal(expected_npg, expected) if func in ["all", "any"]: fill_value = False else: fill_value = 123 if "var" in func or "std" in func: tolerance = {"rtol": 1e-14, "atol": 1e-16} else: tolerance = None # tests against the numpy output to make sure dask compute matches by = np.broadcast_to(labels2d, (3, *labels2d.shape)) rng = np.random.default_rng(12345) array = rng.random(by.shape) kwargs = dict(func=func, axis=axis, expected_groups=[0, 2], fill_value=fill_value) expected, _ = groupby_reduce(array, by, engine=engine, **kwargs) if engine == "flox": expected_npg, _ = groupby_reduce(array, by, **kwargs, engine="numpy") assert_equal(expected_npg, expected) if ("first" in func or "last" in func) and ( axis is None or (not isinstance(axis, int) and len(axis) != 1) ): return with raise_if_dask_computes(): actual, _ = groupby_reduce( da.from_array(array, chunks=(-1, 2, 3)), da.from_array(by, chunks=(-1, 2, 2)), engine=engine, **kwargs, ) assert_equal(actual, expected, tolerance) @pytest.mark.parametrize( "reindex, chunks", [ (None, None), pytest.param(False, (2, 2, 3), marks=requires_dask), pytest.param(True, (2, 2, 3), marks=requires_dask), pytest.param(REINDEX_SPARSE_PARAM, (2, 2, 3), marks=requires_dask), ], ) @pytest.mark.parametrize( "axis, groups, expected_shape", [ (2, [0, 1, 2], (3, 5, 3)), (None, [0, 1, 2], (3,)), # global reduction; 0 shaped group axis (None, [0], (1,)), # global reduction; 0 shaped group axis; 1 group ], ) def test_groupby_reduce_nans(reindex, chunks, axis, groups, expected_shape, engine): def _maybe_chunk(arr): if chunks: return da.from_array(arr, chunks=chunks) else: return arr # test when entire by are NaNs by = np.full((3, 5, 2), fill_value=np.nan) array = np.ones_like(by) # along an axis; requires expected_group # TODO: this should check for fill_value result, _ = groupby_reduce( _maybe_chunk(array), _maybe_chunk(by), func="count", expected_groups=groups, axis=axis, fill_value=0, engine=engine, reindex=reindex, ) assert_equal(result, np.zeros(expected_shape, dtype=np.intp)) # now when subsets are NaN # labels = np.array([0, 0, 1, 1, 1], dtype=float) # labels2d = np.array([labels[:5], np.flip(labels[:5])]) # labels2d[0, :5] = np.nan # labels2d[1, 5:] = np.nan # by = np.broadcast_to(labels2d, (3, *labels2d.shape)) @requires_dask @pytest.mark.parametrize( "expected_groups, reindex", [ (None, None), (None, False), ([0, 1, 2], True), ([0, 1, 2], False), pytest.param([0, 1, 2], REINDEX_SPARSE_PARAM), ], ) def test_groupby_all_nan_blocks_dask(expected_groups, reindex, engine): labels = np.array([0, 0, 2, 2, 2, 1, 1, 2, 2, 1, 1, 0]) nan_labels = labels.astype(float) # copy nan_labels[:5] = np.nan array, by, expected = ( np.ones((2, 12), dtype=np.int64), np.array([nan_labels, nan_labels[::-1]]), np.array([2, 8, 4], dtype=np.int64), ) actual, _ = groupby_reduce( da.from_array(array, chunks=(1, 3)), da.from_array(by, chunks=(1, 3)), func="sum", expected_groups=expected_groups, engine=engine, reindex=reindex, method="map-reduce", ) assert_equal(actual, expected) @pytest.mark.parametrize("axis", (0, 1, 2, -1)) def test_reindex(axis): shape = [2, 2, 2] fill_value = 0 array = np.broadcast_to(np.array([1, 2]), shape) groups = np.array(["a", "b"]) expected_groups = pd.Index(["a", "b", "c"]) actual = reindex_(array, groups, expected_groups, fill_value=fill_value, axis=axis) if axis < 0: axis = array.ndim + axis result_shape = tuple(len(expected_groups) if ax == axis else s for ax, s in enumerate(shape)) slicer = tuple(slice(None, s) for s in shape) expected = np.full(result_shape, fill_value) expected[slicer] = array assert_equal(actual, expected) @pytest.mark.xfail def test_bad_npg_behaviour(): labels = np.array([0, 0, 2, 2, 2, 1, 1, 2, 2, 1, 1, 0], dtype=int) # fmt: off array = np.array([[1] * 12, [1] * 12]) # fmt: on assert_equal( aggregate(labels, array, axis=-1, func="argmax"), np.array([[0, 5, 2], [0, 5, 2]]), ) assert ( aggregate( np.array([0, 1, 2, 0, 1, 2]), np.array([-np.inf, 0, 0, -np.inf, 0, 0]), func="max", )[0] == -np.inf ) @pytest.mark.xfail @pytest.mark.parametrize("func", ("nanargmax", "nanargmin")) def test_npg_nanarg_bug(func): array = np.array([1, 1, 2, 1, 1, np.nan, 6, 1]) labels = np.array([1, 1, 1, 1, 1, 1, 1, 1]) - 1 actual = aggregate(labels, array, func=func).astype(int) expected = getattr(np, func)(array) assert_equal(actual, expected) @pytest.mark.parametrize( "kwargs", ( dict(expected_groups=np.array([1, 2, 4, 5]), isbin=True), dict(expected_groups=pd.IntervalIndex.from_breaks([1, 2, 4, 5])), ), ) @pytest.mark.parametrize("method", ["cohorts", "map-reduce"]) @pytest.mark.parametrize("chunk_labels", [False, True]) @pytest.mark.parametrize( "chunks", ( (), pytest.param((1,), marks=requires_dask), pytest.param((2,), marks=requires_dask), ), ) def test_groupby_bins(chunk_labels, kwargs, chunks, engine, method) -> None: array = [1, 1, 1, 1, 1, 1] labels = [0.2, 1.5, 1.9, 2, 3, 20] if method == "cohorts" and chunk_labels: pytest.xfail() if chunks: array = dask.array.from_array(array, chunks=chunks) if chunk_labels: labels = dask.array.from_array(labels, chunks=chunks) with raise_if_dask_computes(): actual, *groups = groupby_reduce( array, labels, func="count", fill_value=0, engine=engine, method=method, **kwargs, ) (groups_array,) = groups expected = np.array([3, 1, 0], dtype=np.intp) for left, right in zip(groups_array, pd.IntervalIndex.from_arrays([1, 2, 4], [2, 4, 5]).to_numpy()): assert left == right assert_equal(actual, expected) @pytest.mark.parametrize( "inchunks, expected", [ [(1,) * 10, (3, 2, 2, 3)], [(2,) * 5, (3, 2, 2, 3)], [(3, 3, 3, 1), (3, 2, 5)], [(3, 1, 1, 2, 1, 1, 1), (3, 2, 2, 3)], [(3, 2, 2, 3), (3, 2, 2, 3)], [(4, 4, 2), (3, 4, 3)], [(5, 5), (5, 5)], [(6, 4), (5, 5)], [(7, 3), (7, 3)], [(8, 2), (7, 3)], [(9, 1), (10,)], [(10,), (10,)], ], ) def test_rechunk_for_blockwise(inchunks, expected): labels = np.array([1, 1, 1, 2, 2, 3, 3, 5, 5, 5]) assert _get_optimal_chunks_for_groups(inchunks, labels) == expected @requires_dask @pytest.mark.parametrize( "expected, labels, chunks", [ [[[0, 1, 2, 3]], [0, 1, 2, 0, 1, 2, 3], (3, 4)], [[[0], [1], [2], [3]], [0, 1, 2, 0, 1, 2, 3], (2, 2, 2, 1)], [[[0, 1, 2], [3]], [0, 1, 2, 0, 1, 2, 3], (3, 3, 1)], [ [[0], [1, 2, 3, 4], [5]], np.repeat(np.arange(6), [4, 4, 12, 2, 3, 4]), (4, 8, 4, 9, 4), ], ], ) def test_find_group_cohorts(expected, labels, chunks: tuple[int]) -> None: # force merging of cohorts for the test _, chunks_cohorts = find_group_cohorts(labels, (chunks,), merge=True) actual = list(chunks_cohorts.values()) assert actual == expected, (actual, expected) @requires_dask def test_find_cohorts_missing_groups(): by = np.array([np.nan, np.nan, np.nan, 2.0, 2.0, 1.0, 1.0, 2.0, 2.0, 1.0, np.nan, np.nan]) kwargs = {"func": "sum", "expected_groups": [0, 1, 2], "fill_value": 123} array = dask.array.ones_like(by, chunks=(3,)) actual, _ = groupby_reduce(array, by, method="cohorts", **kwargs) expected, _ = groupby_reduce(array.compute(), by, **kwargs) assert_equal(expected, actual) @pytest.mark.parametrize("chunksize", [12, 13, 14, 24, 36, 48, 72, 71]) def test_verify_complex_cohorts(chunksize: int) -> None: time = pd.Series(pd.date_range("2016-01-01", "2018-12-31 23:59", freq="h")) chunks = (chunksize,) * (len(time) // chunksize) by = np.array(time.dt.dayofyear.values) if len(by) != sum(chunks): chunks += (len(by) - sum(chunks),) _, chunk_cohorts = find_group_cohorts(by - 1, (chunks,)) chunks_ = np.sort(np.concatenate(tuple(chunk_cohorts.keys()))) groups = np.sort(np.concatenate(tuple(chunk_cohorts.values()))) assert_equal(np.unique(chunks_).astype(np.int64), np.arange(len(chunks), dtype=np.int64)) assert_equal(groups.astype(np.int64), np.arange(366, dtype=np.int64)) @requires_dask @pytest.mark.parametrize("chunksize", (12,) + tuple(range(1, 13)) + (-1,)) def test_method_guessing(chunksize): # just a regression test labels = np.tile(np.arange(0, 12), 30) by = dask.array.from_array(labels, chunks=chunksize) - 1 preferred_method, chunks_cohorts = find_group_cohorts(labels, by.chunks[slice(-1, None)]) if chunksize == -1: assert preferred_method == "blockwise" assert chunks_cohorts == {(0,): list(range(12))} elif chunksize in (1, 2, 3, 4, 6): assert preferred_method == "cohorts" assert len(chunks_cohorts) == 12 // chunksize else: assert preferred_method == "map-reduce" assert chunks_cohorts == {} @requires_dask @pytest.mark.parametrize("ndim", [1, 2, 3]) def test_single_chunk_method_is_blockwise(ndim): for by_ndim in range(1, ndim + 1): chunks = (5,) * (ndim - by_ndim) + (-1,) * by_ndim assert len(chunks) == ndim array = dask.array.ones(shape=(10,) * ndim, chunks=chunks) by = np.zeros(shape=(10,) * by_ndim, dtype=int) method, chunks_cohorts = find_group_cohorts( by, chunks=[array.chunks[ax] for ax in range(-by.ndim, 0)] ) assert method == "blockwise" assert chunks_cohorts == {(0,): [0]} @requires_dask @pytest.mark.parametrize( "chunk_at,expected", [ [1, ((1, 6, 1, 6, 1, 6, 1, 6, 1, 1),)], [0, ((7, 7, 7, 7, 2),)], [3, ((3, 4, 3, 4, 3, 4, 3, 4, 2),)], ], ) def test_rechunk_for_cohorts(chunk_at, expected): array = dask.array.ones((30,), chunks=7) labels = np.arange(0, 30) % 7 rechunked = rechunk_for_cohorts(array, axis=-1, force_new_chunk_at=chunk_at, labels=labels) assert rechunked.chunks == expected @pytest.mark.parametrize("chunks", [None, pytest.param(3, marks=requires_dask)]) @pytest.mark.parametrize("fill_value", [123, np.nan]) @pytest.mark.parametrize("func", ALL_FUNCS) def test_fill_value_behaviour(func, chunks, fill_value, engine): # fill_value = np.nan tests promotion of int counts to float # This is used by xarray if (func in ["all", "any"] or "arg" in func) or func in BLOCKWISE_FUNCS: pytest.skip() npfunc = _get_array_func(func) by = np.array([1, 2, 3, 1, 2, 3]) array = np.array([np.nan, 1, 1, np.nan, 1, 1]) if chunks: array = dask.array.from_array(array, chunks) actual, _ = groupby_reduce( array, by, func=func, engine=engine, fill_value=fill_value, expected_groups=[0, 1, 2, 3], ) expected = np.array([fill_value, fill_value, npfunc([1.0, 1.0], axis=0), npfunc([1.0, 1.0], axis=0)]) assert_equal(actual, expected) @requires_dask @pytest.mark.parametrize("func", ["mean", "sum"]) @pytest.mark.parametrize("dtype", ["float32", "float64", "int32", "int64"]) def test_dtype_preservation(dtype, func, engine): if engine == "numbagg": # https://github.com/numbagg/numbagg/issues/121 pytest.skip() if func == "sum": expected = dtypes._maybe_promote_int(dtype) elif func == "mean" and "int" in dtype: expected = np.float64 else: expected = np.dtype(dtype) array = np.ones((20,), dtype=dtype) by = np.ones(array.shape, dtype=int) actual, _ = groupby_reduce(array, by, func=func, engine=engine) assert actual.dtype == expected array = dask.array.from_array(array, chunks=(4,)) actual, _ = groupby_reduce(array, by, func=func, engine=engine) assert actual.dtype == expected @requires_dask @pytest.mark.parametrize("dtype", [np.float32, np.float64, np.int32, np.int64]) @pytest.mark.parametrize("labels_dtype", [np.float32, np.float64, np.int32, np.int64]) @pytest.mark.parametrize("method", ["map-reduce", "cohorts"]) def test_cohorts_map_reduce_consistent_dtypes(method, dtype, labels_dtype): repeats = np.array([4, 4, 12, 2, 3, 4], dtype=np.int32) labels = np.repeat(np.arange(6, dtype=labels_dtype), repeats) array = dask.array.from_array(labels.astype(dtype), chunks=(4, 8, 4, 9, 4)) actual, actual_groups = groupby_reduce(array, labels, func="count", method=method) assert_equal(actual_groups, np.arange(6, dtype=labels.dtype)) assert_equal(actual, repeats.astype(np.intp)) actual, actual_groups = groupby_reduce(array, labels, func="sum", method=method) assert_equal(actual_groups, np.arange(6, dtype=labels.dtype)) expect_dtype = dtypes._maybe_promote_int(dtype) assert_equal(actual, np.array([0, 4, 24, 6, 12, 20], dtype=expect_dtype)) @requires_dask @pytest.mark.parametrize("func", ALL_FUNCS) @pytest.mark.parametrize("axis", (-1, None)) @pytest.mark.parametrize("method", ["blockwise", "cohorts", "map-reduce"]) def test_cohorts_nd_by(func, method, axis, engine): if ( ("arg" in func and (axis is None or engine in ["flox", "numbagg"])) or (method != "blockwise" and func in BLOCKWISE_FUNCS) or (axis is None and ("first" in func or "last" in func)) ): pytest.skip() if axis is not None and method != "map-reduce": pytest.xfail() o = dask.array.ones((3,), chunks=-1) o2 = dask.array.ones((2, 3), chunks=-1) array = dask.array.block([[o, 2 * o], [3 * o2, 4 * o2]]) by = array.compute().astype(np.int64) by[0, 1] = 30 by[2, 1] = 40 by[0, 4] = 31 array = np.broadcast_to(array, (2, 3) + array.shape) if func in ["any", "all"]: fill_value = False else: fill_value = -123 kwargs = dict(func=func, engine=engine, method=method, axis=axis, fill_value=fill_value) if "quantile" in func: kwargs["finalize_kwargs"] = {"q": DEFAULT_QUANTILE} actual, groups = groupby_reduce(array, by, **kwargs) expected, sorted_groups = groupby_reduce(array.compute(), by, **kwargs) assert_equal(groups, sorted_groups) assert_equal(actual, expected) actual, groups = groupby_reduce(array, by, sort=False, **kwargs) assert_equal(groups, np.array([1, 30, 2, 31, 3, 4, 40], dtype=np.int64)) reindexed = reindex_(actual, groups, pd.Index(sorted_groups)) assert_equal(reindexed, expected) @pytest.mark.parametrize("func", ["sum", "count"]) @pytest.mark.parametrize("fill_value, expected", ((0, np.integer), (np.nan, np.floating))) def test_dtype_promotion(func, fill_value, expected, engine): array = np.array([1, 1]) by = [0, 1] actual, _ = groupby_reduce( array, by, func=func, expected_groups=[1, 2], fill_value=fill_value, engine=engine, ) assert np.issubdtype(actual.dtype, expected) @pytest.mark.parametrize("func", ["mean", "nanmean"]) def test_empty_bins(func, engine): array = np.ones((2, 3, 2)) by = np.broadcast_to([0, 1], array.shape) actual, _ = groupby_reduce( array, by, func=func, expected_groups=[-1, 0, 1, 2], isbin=True, engine=engine, axis=(0, 1, 2), ) expected = np.array([1.0, 1.0, np.nan]) assert_equal(actual, expected) def test_datetime_binning(): time_bins = pd.date_range(start="2010-08-01", end="2010-08-15", freq="24h") by = pd.date_range("2010-08-01", "2010-08-15", freq="15min") (actual,) = _convert_expected_groups_to_index((time_bins,), isbin=(True,), sort=False) expected = pd.IntervalIndex.from_arrays(time_bins[:-1], time_bins[1:]) assert_equal(actual, expected) ret = factorize_((by.to_numpy(),), axes=(0,), expected_groups=(actual,)) group_idx = ret[0] # Ignore pd.cut's dtype as it won't match np.digitize: expected = pd.cut(by, time_bins).codes.copy().astype(group_idx.dtype) expected[0] = 14 # factorize doesn't return -1 for nans assert_equal(group_idx, expected) @pytest.mark.parametrize("func", ALL_FUNCS) def test_bool_reductions(func, engine): if "arg" in func and engine == "flox": pytest.skip() if "quantile" in func or "mode" in func: pytest.skip() groups = np.array([1, 1, 1]) data = np.array([True, True, False]) npfunc = _get_array_func(func) expected = np.expand_dims(npfunc(data, axis=0), -1) actual, _ = groupby_reduce(data, groups, func=func, engine=engine) assert_equal(expected, actual) @requires_dask def test_map_reduce_blockwise_mixed() -> None: t = pd.date_range("2000-01-01", "2000-12-31", freq="D").to_series() data = t.dt.dayofyear actual, *_ = groupby_reduce( dask.array.from_array(data.values, chunks=365), t.dt.month, func="mean", method="map-reduce", ) expected, *_ = groupby_reduce(data, t.dt.month, func="mean") assert_equal(expected, actual) @requires_dask @pytest.mark.parametrize("method", ["blockwise", "map-reduce", "cohorts"]) def test_group_by_datetime(engine, method): kwargs = dict( func="mean", method=method, engine=engine, ) t = pd.date_range("2000-01-01", "2000-12-31", freq="D").to_series() data = t.dt.dayofyear daskarray = dask.array.from_array(data.values, chunks=30) actual, _ = groupby_reduce(daskarray, t, **kwargs) expected = data.to_numpy().astype(float) assert_equal(expected, actual) if method == "blockwise": return None edges = pd.date_range("1999-12-31", "2000-12-31", freq="ME").to_series().to_numpy() actual, _ = groupby_reduce(daskarray, t.to_numpy(), isbin=True, expected_groups=edges, **kwargs) expected = data.resample("ME").mean().to_numpy() assert_equal(expected, actual) actual, _ = groupby_reduce( np.broadcast_to(daskarray, (2, 3, daskarray.shape[-1])), t.to_numpy(), isbin=True, expected_groups=edges, **kwargs, ) expected = np.broadcast_to(expected, (2, 3, expected.shape[-1])) assert_equal(expected, actual) @requires_cubed @pytest.mark.parametrize("method", ["blockwise", "map-reduce"]) def test_group_by_datetime_cubed(engine, method): kwargs = dict( func="mean", method=method, engine=engine, ) t = pd.date_range("2000-01-01", "2000-12-31", freq="D").to_series() data = t.dt.dayofyear cubedarray = cubed.from_array(data.values, chunks=30) actual, _ = groupby_reduce(cubedarray, t, **kwargs) expected = data.to_numpy().astype(float) assert_equal(expected, actual) edges = pd.date_range("1999-12-31", "2000-12-31", freq="ME").to_series().to_numpy() actual, _ = groupby_reduce(cubedarray, t.to_numpy(), isbin=True, expected_groups=edges, **kwargs) expected = data.resample("ME").mean().to_numpy() assert_equal(expected, actual) actual, _ = groupby_reduce( cubed.array_api.broadcast_to(cubedarray, (2, 3, cubedarray.shape[-1])), t.to_numpy(), isbin=True, expected_groups=edges, **kwargs, ) expected = np.broadcast_to(expected, (2, 3, expected.shape[-1])) assert_equal(expected, actual) def test_factorize_values_outside_bins(): # pd.factorize returns intp vals = factorize_( (np.arange(10).reshape(5, 2), np.arange(10).reshape(5, 2)), axes=(0, 1), expected_groups=( pd.IntervalIndex.from_breaks(np.arange(2, 8, 1)), pd.IntervalIndex.from_breaks(np.arange(2, 8, 1)), ), reindex=True, fastpath=True, ) actual = vals[0] expected = np.array([[-1, -1], [-1, 0], [6, 12], [18, 24], [-1, -1]], np.intp) assert_equal(expected, actual) @pytest.mark.parametrize("chunk", [pytest.param(True, marks=requires_dask), False]) def test_multiple_groupers_bins(chunk) -> None: xp = dask.array if chunk else np array_kwargs = {"chunks": 2} if chunk else {} array = xp.ones((5, 2), **array_kwargs, dtype=np.int64) actual, *_ = groupby_reduce( array, np.arange(10).reshape(5, 2), xp.arange(10).reshape(5, 2), axis=(0, 1), expected_groups=( pd.IntervalIndex.from_breaks(np.arange(2, 8, 1)), pd.IntervalIndex.from_breaks(np.arange(2, 8, 1)), ), func="count", ) # output from `count` is intp expected = np.eye(5, 5, dtype=np.intp) assert_equal(expected, actual) @pytest.mark.parametrize("expected_groups", [None, (np.arange(5), [2, 3]), (None, [2, 3])]) @pytest.mark.parametrize("by1", [np.arange(5)[:, None], np.broadcast_to(np.arange(5)[:, None], (5, 2))]) @pytest.mark.parametrize( "by2", [ np.arange(2, 4).reshape(1, 2), np.broadcast_to(np.arange(2, 4).reshape(1, 2), (5, 2)), np.arange(2, 4).reshape(1, 2), ], ) @pytest.mark.parametrize("chunk", [pytest.param(True, marks=requires_dask), False]) def test_multiple_groupers(chunk, by1, by2, expected_groups) -> None: if chunk and expected_groups is None: pytest.skip() xp = dask.array if chunk else np array_kwargs = {"chunks": 2} if chunk else {} array = xp.ones((5, 2), **array_kwargs, dtype=np.int64) if chunk: by2 = dask.array.from_array(by2) # output from `count` is intp expected = np.ones((5, 2), dtype=np.intp) actual, *_ = groupby_reduce(array, by1, by2, axis=(0, 1), func="count", expected_groups=expected_groups) assert_equal(expected, actual) @pytest.mark.parametrize( "expected_groups", ( [None, None, None], (None,), ), ) def test_validate_expected_groups(expected_groups): with pytest.raises(ValueError): groupby_reduce( np.ones((10,)), np.ones((10,)), np.ones((10,)), expected_groups=expected_groups, func="mean", ) @requires_dask def test_validate_expected_groups_not_none_dask() -> None: with pytest.raises(ValueError): groupby_reduce( dask.array.ones((5, 2)), np.arange(10).reshape(5, 2), dask.array.arange(10).reshape(5, 2), axis=(0, 1), expected_groups=None, func="count", ) def test_factorize_reindex_sorting_strings(): # pd.factorize seems to return intp so int32 on 32bit arch kwargs = dict( by=(np.array(["El-Nino", "La-Nina", "boo", "Neutral"]),), axes=(-1,), expected_groups=(np.array(["El-Nino", "Neutral", "foo", "La-Nina"]),), ) expected = factorize_(**kwargs, reindex=True, sort=True)[0] assert_equal(expected, np.array([0, 1, 4, 2], dtype=np.intp)) expected = factorize_(**kwargs, reindex=True, sort=False)[0] assert_equal(expected, np.array([0, 3, 4, 1], dtype=np.intp)) expected = factorize_(**kwargs, reindex=False, sort=False)[0] assert_equal(expected, np.array([0, 1, 2, 3], dtype=np.intp)) expected = factorize_(**kwargs, reindex=False, sort=True)[0] assert_equal(expected, np.array([0, 1, 3, 2], dtype=np.intp)) def test_factorize_reindex_sorting_ints(): # pd.factorize seems to return intp so int32 on 32bit arch kwargs = dict( by=(np.array([-10, 1, 10, 2, 3, 5]),), axes=(-1,), expected_groups=(np.array([0, 1, 2, 3, 4, 5], np.int64),), ) expected = factorize_(**kwargs, reindex=True, sort=True)[0] assert_equal(expected, np.array([6, 1, 6, 2, 3, 5], dtype=np.intp)) expected = factorize_(**kwargs, reindex=True, sort=False)[0] assert_equal(expected, np.array([6, 1, 6, 2, 3, 5], dtype=np.intp)) kwargs["expected_groups"] = (np.arange(5, -1, -1),) expected = factorize_(**kwargs, reindex=True, sort=True)[0] assert_equal(expected, np.array([6, 1, 6, 2, 3, 5], dtype=np.intp)) expected = factorize_(**kwargs, reindex=True, sort=False)[0] assert_equal(expected, np.array([6, 4, 6, 3, 2, 0], dtype=np.intp)) @requires_dask def test_custom_aggregation_blockwise(): def grouped_median(group_idx, array, *, axis=-1, size=None, fill_value=None, dtype=None): return aggregate( group_idx, array, func=np.median, axis=axis, size=size, fill_value=fill_value, dtype=dtype, ) agg_median = Aggregation(name="median", numpy=grouped_median, fill_value=-1, chunk=None, combine=None) array = np.arange(100, dtype=np.float32).reshape(5, 20) by = np.ones((20,)) actual, _ = groupby_reduce(array, by, func=agg_median, axis=-1) expected = np.median(array, axis=-1, keepdims=True) assert_equal(expected, actual) for method in ["map-reduce", "cohorts"]: with pytest.raises(NotImplementedError): groupby_reduce( dask.array.from_array(array, chunks=(1, -1)), by, func=agg_median, axis=-1, method=method, ) actual, _ = groupby_reduce( dask.array.from_array(array, chunks=(1, -1)), by, func=agg_median, axis=-1, method="blockwise", ) assert_equal(expected, actual) @pytest.mark.parametrize("func", ALL_FUNCS) @pytest.mark.parametrize("dtype", [np.float32, np.float64]) def test_dtype(func, dtype, engine): if engine == "numbagg": # https://github.com/numbagg/numbagg/issues/121 pytest.skip() if "arg" in func or func in ["any", "all"]: pytest.skip() finalize_kwargs = {"q": DEFAULT_QUANTILE} if "quantile" in func else {} arr = np.ones((4, 12), dtype=dtype) labels = np.array(["a", "a", "c", "c", "c", "b", "b", "c", "c", "b", "b", "f"]) actual, _ = groupby_reduce( arr, labels, func=func, dtype=np.float64, engine=engine, finalize_kwargs=finalize_kwargs, ) assert actual.dtype == np.dtype("float64") @requires_dask @pytest.mark.parametrize( "flatblocks, expected", ( ((0, 1, 2, 3, 4), (slice(None),)), ((1, 2, 3), (slice(1, 4),)), ((1, 3), ([1, 3],)), ((0, 1, 3), ([0, 1, 3],)), ), ) def test_normalize_block_indexing_1d(flatblocks, expected): nblocks = 5 array = dask.array.ones((nblocks,), chunks=(1,)) expected = tuple(np.array(i) if isinstance(i, list) else i for i in expected) actual = _normalize_indexes(array.ndim, flatblocks, array.blocks.shape) assert_equal_tuple(expected, actual) @requires_dask @pytest.mark.parametrize( "flatblocks, expected", ( ((0, 1, 2, 3, 4), (0, slice(None))), ((1, 2, 3), (0, slice(1, 4))), ((1, 3), (0, [1, 3])), ((0, 1, 3), (0, [0, 1, 3])), (tuple(range(10)), (slice(None, 2), slice(None))), ((0, 1, 3, 5, 6, 8), (slice(None, 2), [0, 1, 3])), ((0, 3, 4, 5, 6, 8, 24), np.ix_([0, 1, 4], [0, 1, 3, 4])), ), ) def test_normalize_block_indexing_2d(flatblocks: tuple[int, ...], expected: tuple[Any, ...]) -> None: nblocks = 5 ndim = 2 array = dask.array.ones((nblocks,) * ndim, chunks=(1,) * ndim) expected = tuple(np.array(i) if isinstance(i, list) else i for i in expected) actual = _normalize_indexes(array.ndim, flatblocks, array.blocks.shape) assert_equal_tuple(expected, actual) @requires_dask def test_subset_blocks(): array = dask.array.random.random((120,), chunks=(4,)) blockid = (0, 3, 6, 9, 12, 15, 18, 21, 24, 27) subset = subset_to_blocks(array, blockid).to_array(array) assert subset.blocks.shape == (len(blockid),) @pytest.mark.skip("temporarily removed this optimization") @requires_dask def test_subset_block_passthrough(): from flox.core import identity # full slice pass through array = dask.array.ones((5,), chunks=(1,)) expected = dask.array.map_blocks(identity, array) subset = subset_to_blocks(array, np.arange(5)).to_array(array) assert subset.name == expected.name array = dask.array.ones((5, 5), chunks=1) expected = dask.array.map_blocks(identity, array) subset = subset_to_blocks(array, np.arange(25)).to_array(array) assert subset.name == expected.name @requires_dask @pytest.mark.parametrize( "flatblocks, expectidx", [ (np.arange(10), (slice(2), slice(None))), (np.arange(8), (slice(2), slice(None))), ([0, 10], ([0, 2], slice(1))), ([0, 7], (slice(2), [0, 2])), ([0, 7, 9], (slice(2), [0, 2, 4])), ([0, 6, 12, 14], (slice(3), [0, 1, 2, 4])), ([0, 12, 14, 19], np.ix_([0, 2, 3], [0, 2, 4])), ], ) def test_subset_block_2d(flatblocks, expectidx): array = dask.array.from_array(np.arange(25).reshape((5, 5)), chunks=1) subset = subset_to_blocks(array, flatblocks).to_array(array) assert len(subset.dask.layers) == 2 assert_equal(subset, array.compute()[expectidx]) @pytest.mark.parametrize( "dask_expected, reindex, func, expected_groups, any_by_dask", [ # argmax only False [False, None, "argmax", None, False], # True when by is numpy but expected is None [True, None, "sum", None, False], # False when by is dask but expected is None [False, None, "sum", None, True], # if expected_groups then always True [True, None, "sum", [1, 2, 3], False], [True, None, "sum", ([1], [2]), False], [True, None, "sum", ([1], [2]), True], [True, None, "sum", ([1], None), False], [True, None, "sum", ([1], None), True], ], ) def test_validate_reindex_map_reduce(dask_expected, reindex, func, expected_groups, any_by_dask) -> None: actual = _validate_reindex( reindex, func, "map-reduce", expected_groups, any_by_dask, is_dask_array=True, array_dtype=np.dtype("int32"), ) assert actual == ReindexStrategy(blockwise=dask_expected) # always reindex with all numpy inputs actual = _validate_reindex( reindex, func, "map-reduce", expected_groups, any_by_dask=False, is_dask_array=False, array_dtype=np.dtype("int32"), ) assert actual.blockwise actual = _validate_reindex( True, func, "map-reduce", expected_groups, any_by_dask=False, is_dask_array=False, array_dtype=np.dtype("int32"), ) assert actual.blockwise def test_validate_reindex() -> None: methods: list[T_Method] = ["map-reduce", "cohorts"] for method in methods: with pytest.raises(NotImplementedError): _validate_reindex( True, "argmax", method, expected_groups=None, any_by_dask=False, is_dask_array=True, array_dtype=np.dtype("int32"), ) methods: list[T_Method] = ["blockwise", "cohorts"] for method in methods: with pytest.raises(ValueError): _validate_reindex( True, "sum", method, expected_groups=None, any_by_dask=False, is_dask_array=True, array_dtype=np.dtype("int32"), ) for func in ["sum", "argmax"]: actual = _validate_reindex( None, func, method, expected_groups=None, any_by_dask=False, is_dask_array=True, array_dtype=np.dtype("int32"), ).blockwise assert actual is False with pytest.raises(ValueError): _validate_reindex( True, "sum", method="blockwise", expected_groups=np.array([1, 2, 3]), any_by_dask=False, is_dask_array=True, array_dtype=np.dtype("int32"), ) assert _validate_reindex( True, "sum", method="blockwise", expected_groups=np.array([1, 2, 3]), any_by_dask=True, is_dask_array=True, array_dtype=np.dtype("int32"), ).blockwise assert _validate_reindex( None, "sum", method="blockwise", expected_groups=np.array([1, 2, 3]), any_by_dask=True, is_dask_array=True, array_dtype=np.dtype("int32"), ).blockwise kwargs = dict( method="blockwise", expected_groups=np.array([1, 2, 3]), any_by_dask=True, is_dask_array=True, ) for func in ["nanfirst", "nanlast"]: assert not _validate_reindex(None, func, array_dtype=np.dtype("int32"), **kwargs).blockwise # type: ignore[arg-type] assert _validate_reindex(None, func, array_dtype=np.dtype("float32"), **kwargs).blockwise # type: ignore[arg-type] for func in ["first", "last"]: assert not _validate_reindex(None, func, array_dtype=np.dtype("int32"), **kwargs).blockwise # type: ignore[arg-type] assert not _validate_reindex(None, func, array_dtype=np.dtype("float32"), **kwargs).blockwise # type: ignore[arg-type] @requires_dask def test_1d_blockwise_sort_optimization() -> None: # Make sure for resampling problems sorting isn't done. time = pd.Series(pd.date_range("2020-09-01", "2020-12-31 23:59", freq="3h")) array = dask.array.ones((len(time),), chunks=(224,)) actual, *_ = groupby_reduce(array, time.dt.dayofyear.values, method="blockwise", func="count") assert all("getitem" not in k for k in actual.dask) actual, *_ = groupby_reduce( array, time.dt.dayofyear.values[::-1], sort=True, method="blockwise", func="count", ) assert any("getitem" in k for k in actual.dask.layers) actual, *_ = groupby_reduce( array, time.dt.dayofyear.values[::-1], sort=False, method="blockwise", func="count", ) assert all("getitem" not in k for k in actual.dask.layers) @requires_dask def test_negative_index_factorize_race_condition() -> None: # shape = (10, 2000) # chunks = ((shape[0]-1,1), 10) shape = (101, 174000) chunks = ((101,), 8760) eps = dask.array.random.random_sample(shape, chunks=chunks) N2 = dask.array.random.random_sample(shape, chunks=chunks) S2 = dask.array.random.random_sample(shape, chunks=chunks) bins = np.arange(-5, -2.05, 0.1) func = ["mean", "count", "sum"] out = [ groupby_reduce( eps, N2, S2, func=f, expected_groups=(bins, bins), isbin=(True, True), ) for f in func ] [dask.compute(out, scheduler="threads") for _ in range(5)] @pytest.mark.parametrize("sort", [True, False]) def test_expected_index_conversion_passthrough_range_index(sort) -> None: index = pd.RangeIndex(100) actual = _convert_expected_groups_to_index(expected_groups=(index,), isbin=(False,), sort=(sort,)) # type: ignore[call-overload] assert actual[0] is index def test_method_check_numpy() -> None: bins = [-2, -1, 0, 1, 2] field = np.ones((5, 3)) by = np.array([[-1.5, -1.5, 0.5, 1.5, 1.5] * 3]).reshape(5, 3) actual, *_ = groupby_reduce( field, by, expected_groups=pd.IntervalIndex.from_breaks(bins), func="count", method="cohorts", fill_value=np.nan, ) expected = np.array([6, np.nan, 3, 6]) assert_equal(actual, expected) actual, *_ = groupby_reduce( field, by, expected_groups=pd.IntervalIndex.from_breaks(bins), func="count", fill_value=np.nan, method="cohorts", axis=0, ) expected = np.array( [ [2.0, np.nan, 1.0, 2.0], [2.0, np.nan, 1.0, 2.0], [2.0, np.nan, 1.0, 2.0], ] ) assert_equal(actual, expected) @pytest.mark.parametrize("dtype", [None, np.float64]) def test_choose_engine(dtype) -> None: numbagg_possible = HAS_NUMBAGG and dtype is None default = "numbagg" if numbagg_possible else "numpy" mean = _initialize_aggregation( "mean", dtype=dtype, array_dtype=np.dtype("int64"), fill_value=0, min_count=0, finalize_kwargs=None, ) argmax = _initialize_aggregation( "argmax", dtype=dtype, array_dtype=np.dtype("int64"), fill_value=0, min_count=0, finalize_kwargs=None, ) # count_engine for method in ["all", "any", "count"]: agg = _initialize_aggregation( method, dtype=None, array_dtype=dtype, fill_value=0, min_count=0, finalize_kwargs=None, ) engine = _choose_engine(np.array([1, 1, 2, 2]), agg=agg) assert engine == ("numbagg" if HAS_NUMBAGG else "flox") # sorted by -> flox sorted_engine = _choose_engine(np.array([1, 1, 2, 2]), agg=mean) assert sorted_engine == ("numbagg" if numbagg_possible else "flox") # unsorted by -> numpy assert _choose_engine(np.array([3, 1, 1]), agg=mean) == default # argmax does not give engine="flox" assert _choose_engine(np.array([1, 1, 2, 2]), agg=argmax) == "numpy" def test_xarray_fill_value_behaviour() -> None: bar = np.array([1, 2, 3, np.nan, np.nan, np.nan, 4, 5, np.nan, np.nan]) times = np.arange(0, 20, 2) actual, *_ = groupby_reduce(bar, times, func="nansum", expected_groups=(np.arange(19),)) nan = np.nan # fmt: off expected = np.array( [ 1., nan, 2., nan, 3., nan, 0., nan, 0., nan, 0., nan, 4., nan, 5., nan, 0., nan, 0.] ) # fmt: on assert_equal(expected, actual) @pytest.mark.parametrize("q", (0.5, (0.5,), (0.5, 0.67, 0.85))) @pytest.mark.parametrize("func", ["nanquantile", "quantile"]) @pytest.mark.parametrize("chunk", [pytest.param(True, marks=requires_dask), False]) @pytest.mark.parametrize("by_ndim", [1, 2]) def test_multiple_quantiles(q, chunk, func, by_ndim) -> None: array = np.array([[1, -1, np.nan, 3, 4, 10, 5], [1, np.nan, np.nan, 3, 4, np.nan, np.nan]]) labels = np.array([0, 0, 0, 1, 0, 1, 1]) if by_ndim == 2: labels = np.broadcast_to(labels, (5, *labels.shape)) array = np.broadcast_to(np.expand_dims(array, -2), (2, 5, array.shape[-1])) axis = tuple(range(-by_ndim, 0)) if chunk: array = dask.array.from_array(array, chunks=(1,) + (-1,) * by_ndim) actual, *_ = groupby_reduce(array, labels, func=func, finalize_kwargs=dict(q=q), axis=axis) sorted_array = array[..., [0, 1, 2, 4, 3, 5, 6]] f = partial(getattr(np, func), q=q, axis=axis, keepdims=True) if chunk: sorted_array = sorted_array.compute() # type: ignore[attr-defined] expected = np.concatenate((f(sorted_array[..., :4]), f(sorted_array[..., 4:])), axis=-1) if by_ndim == 2: expected = expected.squeeze(axis=-2) assert_equal(expected, actual, tolerance={"atol": 1e-14}) @pytest.mark.parametrize("dtype", ["U3", "S3"]) def test_nanlen_string(dtype, engine) -> None: array = np.array(["ABC", "DEF", "GHI", "JKL", "MNO", "PQR"], dtype=dtype) by = np.array([0, 0, 1, 2, 1, 0]) expected = np.array([3, 2, 1], dtype=np.intp) actual, *_ = groupby_reduce(array, by, func="count", engine=engine) assert_equal(expected, actual) def test_cumusm() -> None: array = np.array([1, 1, 1], dtype=np.uint64) by = np.array([0] * array.shape[-1]) expected = np.nancumsum(array, axis=-1) actual = groupby_scan(array, by, func="nancumsum", axis=-1) assert_equal(expected, actual) if has_dask: da = dask.array.from_array(array, chunks=2) actual = groupby_scan(da, by, func="nancumsum", axis=-1) assert_equal(expected, actual) @pytest.mark.parametrize( "chunks", [ pytest.param(-1, marks=requires_dask), pytest.param(3, marks=requires_dask), pytest.param(4, marks=requires_dask), ], ) @pytest.mark.parametrize("size", ((1, 12), (12,), (12, 9))) @pytest.mark.parametrize("add_nan_by", [True, False]) @pytest.mark.parametrize("func", ["ffill", "bfill"]) def test_ffill_bfill(chunks, size, add_nan_by, func) -> None: array, by = gen_array_by(size, func) if chunks: array = dask.array.from_array(array, chunks=chunks) if add_nan_by: by[0:3] = np.nan by = tuple(by) expected = flox.groupby_scan(array.compute(), by, func=func) actual = flox.groupby_scan(array, by, func=func) assert_equal(expected, actual) @requires_dask def test_blockwise_nans() -> None: array = dask.array.ones((1, 10), chunks=2) by = np.array([-1, 0, -1, 1, -1, 2, -1, 3, 4, 4]) actual, *actual_groups = flox.groupby_reduce(array, by, func="sum", expected_groups=pd.RangeIndex(0, 5)) expected, *expected_groups = flox.groupby_reduce( array.compute(), by, func="sum", expected_groups=pd.RangeIndex(0, 5) ) assert_equal(expected_groups, actual_groups) assert_equal(expected, actual) @pytest.mark.parametrize("func", ["sum", "prod", "count", "nansum"]) @pytest.mark.parametrize("engine", ["flox", "numpy"]) def test_agg_dtypes(func, engine) -> None: # regression test for GH388 counts = np.array([0, 2, 1, 0, 1]) group = np.array([1, 1, 1, 2, 2]) actual, *_ = groupby_reduce( counts, group, expected_groups=(np.array([1, 2]),), func=func, dtype="uint8", engine=engine ) expected = _get_array_func(func)(counts, dtype="uint8") assert actual.dtype == np.uint8 == expected.dtype @requires_dask def test_blockwise_avoid_rechunk() -> None: array = dask.array.zeros((6,), chunks=(2, 4), dtype=np.int64) by = np.array(["1", "1", "0", "", "0", ""], dtype=" None: # GH403 array = np.array([np.datetime64("2000-01-01"), np.datetime64("2000-01-02"), np.datetime64("2000-01-03")]) by = np.array([0, 0, 1]) actual, *_ = flox.groupby_reduce(array, by, func="nanmin", engine=engine) expected = array[[0, 2]] assert_equal(expected, actual) expected = array[[1, 2]] actual, *_ = flox.groupby_reduce(array, by, func="nanmax", engine=engine) assert_equal(expected, actual) @pytest.mark.parametrize("func", ["first", "last", "nanfirst", "nanlast"]) def test_datetime_timedelta_first_last(engine, func) -> None: idx = 0 if "first" in func else -1 idx1 = 2 if "first" in func else -1 ## datetime dt = pd.date_range("2001-01-01", freq="d", periods=5).values by = np.ones(dt.shape, dtype=int) actual, *_ = groupby_reduce(dt, by, func=func, engine=engine) assert_equal(actual, dt[[idx]]) # missing group by = np.array([0, 2, 3, 3, 3]) actual, *_ = groupby_reduce( dt, by, expected_groups=([0, 1, 2, 3],), func=func, engine=engine, fill_value=dtypes.NA ) assert_equal(actual, [dt[0], np.datetime64("NaT"), dt[1], dt[idx1]]) ## timedelta dt = dt - dt[0] by = np.ones(dt.shape, dtype=int) actual, *_ = groupby_reduce(dt, by, func=func, engine=engine) assert_equal(actual, dt[[idx]]) # missing group by = np.array([0, 2, 3, 3, 3]) actual, *_ = groupby_reduce( dt, by, expected_groups=([0, 1, 2, 3],), func=func, engine=engine, fill_value=dtypes.NA ) assert_equal(actual, [dt[0], np.timedelta64("NaT"), dt[1], dt[idx1]]) @requires_dask @requires_sparse @pytest.mark.xdist_group(name="sparse-group") @pytest.mark.parametrize("size", [2**62 - 1, 11]) def test_reindex_sparse(size): import sparse array = dask.array.ones((2, 12), chunks=(-1, 3)) func = "sum" expected_groups = pd.RangeIndex(size) by = dask.array.from_array(np.repeat(np.arange(6) * 2, 2), chunks=(3,)) dense = np.zeros((2, 11)) dense[..., np.arange(6) * 2] = 2 expected = sparse.COO.from_numpy(dense) with pytest.raises(ValueError): ReindexStrategy(blockwise=True, array_type=ReindexArrayType.SPARSE_COO) reindex = ReindexStrategy(blockwise=False, array_type=ReindexArrayType.SPARSE_COO) original_reindex = flox.core.reindex_ def mocked_reindex(*args, **kwargs): res = original_reindex(*args, **kwargs) if isinstance(res, dask.array.Array): assert isinstance(res._meta, sparse.COO) else: assert isinstance(res, sparse.COO) return res # Define the error-raising property def raise_error(self): raise AttributeError("Access to '_data' is not allowed.") with patch("flox.core.reindex_") as mocked_reindex_func: with patch.object(pd.RangeIndex, "_data", property(raise_error)): mocked_reindex_func.side_effect = mocked_reindex actual, *_ = groupby_reduce( array, by, func=func, reindex=reindex, expected_groups=expected_groups, fill_value=0 ) if size == 11: assert_equal(actual, expected) else: actual.compute() # just compute # once during graph construction, 10 times afterward assert mocked_reindex_func.call_count > 1 def test_sparse_errors(): call = partial( groupby_reduce, [1, 2, 3], [0, 1, 1], reindex=REINDEX_SPARSE_STRAT, fill_value=0, expected_groups=[0, 1, 2], ) if not has_sparse: with pytest.raises(ImportError): call(func="sum") else: with pytest.raises(ValueError): call(func="first") flox-0.10.3/tests/test_properties.py000066400000000000000000000260101477552625700175350ustar00rootroot00000000000000import warnings from collections.abc import Callable from typing import Any import pandas as pd import pytest pytest.importorskip("hypothesis") pytest.importorskip("dask") pytest.importorskip("cftime") import dask import hypothesis.extra.numpy as npst import hypothesis.strategies as st import numpy as np from hypothesis import assume, given, note, settings import flox from flox.core import groupby_reduce, groupby_scan from flox.xrutils import _contains_cftime_datetimes, _to_pytimedelta, datetime_to_numeric, isnull, notnull from . import BLOCKWISE_FUNCS, assert_equal from .strategies import all_arrays, by_arrays, chunked_arrays, func_st, numeric_dtypes, numeric_like_arrays from .strategies import chunks as chunks_strategy dask.config.set(scheduler="sync") def ffill(array, axis, dtype=None): return flox.aggregate_flox.ffill(np.zeros(array.shape[-1], dtype=int), array, axis=axis) def bfill(array, axis, dtype=None): return flox.aggregate_flox.ffill( np.zeros(array.shape[-1], dtype=int), array[::-1], axis=axis, )[::-1] NUMPY_SCAN_FUNCS: dict[str, Callable] = { "nancumsum": np.nancumsum, "ffill": ffill, "bfill": bfill, } # "cumsum": np.cumsum, def not_overflowing_array(array: np.ndarray[Any, Any]) -> bool: if array.dtype.kind in "Mm": array = array.view(np.int64) if array.dtype.kind == "f": info = np.finfo(array.dtype) elif array.dtype.kind in ["i", "u"]: info = np.iinfo(array.dtype) # type: ignore[assignment] else: return True array = array.ravel() array = array[notnull(array)] with warnings.catch_warnings(): warnings.simplefilter("ignore", RuntimeWarning) result = bool(np.all((array < info.max / array.size) & (array > info.min / array.size))) # note(f"returning {result}, {array.min()} vs {info.min}, {array.max()} vs {info.max}") return result @given( data=st.data(), array=st.one_of(all_arrays, chunked_arrays()), func=func_st, ) @settings(deadline=None) def test_groupby_reduce(data, array, func: str) -> None: # overflow behaviour differs between bincount and sum (for example) assume(not_overflowing_array(array)) # TODO: fix var for complex numbers upstream assume(not (("quantile" in func or "var" in func or "std" in func) and array.dtype.kind == "c")) assume(not ("quantile" in func and array.dtype.kind == "b")) # arg* with nans in array are weird assume("arg" not in func and not np.any(isnull(array).ravel())) # TODO: funny bugs with overflows here is_cftime = _contains_cftime_datetimes(array) assume(not (is_cftime and func in ["prod", "nanprod"])) axis = -1 by = data.draw( by_arrays( elements={ "alphabet": st.just("a"), "min_value": 1, "max_value": 1, "min_size": 1, "max_size": 1, }, shape=st.just((array.shape[-1],)), ) ) if func in BLOCKWISE_FUNCS and isinstance(array, dask.array.Array): array = array.rechunk({axis: -1}) assert len(np.unique(by)) == 1 kwargs = {"q": 0.8} if "quantile" in func else {} flox_kwargs: dict[str, Any] = {} with np.errstate(invalid="ignore", divide="ignore"): actual, *_ = groupby_reduce( array, by, func=func, axis=axis, engine="numpy", **flox_kwargs, finalize_kwargs=kwargs, ) # numpy-groupies always does the calculation in float64 if ( ("var" in func or "std" in func or "sum" in func or "mean" in func or "quantile" in func) and array.dtype.kind == "f" and array.dtype.itemsize != 8 ): # bincount always accumulates in float64, # casting to float64 handles std more like npg does. # Setting dtype=float64 works fine for sum, mean. cast_to = array.dtype array = array.astype(np.float64) note(f"casting array to float64, cast_to={cast_to!r}") else: cast_to = None if array.dtype.kind in "Mm": array = array.view(np.int64) cast_to = array.dtype elif is_cftime: offset = array.min() array = datetime_to_numeric(array, offset, datetime_unit="us") note(("kwargs:", kwargs, "cast_to:", cast_to)) expected = getattr(np, func)(array, axis=axis, keepdims=True, **kwargs) if cast_to is not None: note(("casting to:", cast_to)) expected = expected.astype(cast_to) actual = actual.astype(cast_to) if is_cftime: expected = _to_pytimedelta(expected, unit="us") + offset note(("expected: ", expected, "actual: ", actual)) tolerance = {"atol": 1e-15} assert_equal(expected, actual, tolerance) @given( data=st.data(), array=chunked_arrays(arrays=numeric_like_arrays), func=func_st, ) def test_groupby_reduce_numpy_vs_dask(data, array, func: str) -> None: numpy_array = array.compute() # overflow behaviour differs between bincount and sum (for example) assume(not_overflowing_array(numpy_array)) # TODO: fix var for complex numbers upstream assume(not (("quantile" in func or "var" in func or "std" in func) and array.dtype.kind == "c")) # # arg* with nans in array are weird assume("arg" not in func and not np.any(isnull(numpy_array.ravel()))) if func in ["nanmedian", "nanquantile", "median", "quantile"]: array = array.rechunk({-1: -1}) axis = -1 by = data.draw(by_arrays(shape=st.just((array.shape[-1],)))) kwargs = {"q": 0.8} if "quantile" in func else {} flox_kwargs: dict[str, Any] = {} kwargs = dict( func=func, axis=axis, engine="numpy", **flox_kwargs, finalize_kwargs=kwargs, ) result_dask, *_ = groupby_reduce(array, by, **kwargs) result_numpy, *_ = groupby_reduce(numpy_array, by, **kwargs) assert_equal(result_numpy, result_dask) @settings(report_multiple_bugs=False) @given( data=st.data(), array=chunked_arrays(arrays=numeric_like_arrays), func=st.sampled_from(tuple(NUMPY_SCAN_FUNCS)), ) def test_scans(data, array: dask.array.Array, func: str) -> None: if "cum" in func: assume(not_overflowing_array(np.asarray(array))) by = data.draw(by_arrays(shape=st.just((array.shape[-1],)))) axis = array.ndim - 1 # Too many float32 edge-cases! if "cum" in func and array.dtype.kind == "f" and array.dtype.itemsize == 4: assume(False) numpy_array = array.compute() if numpy_array.dtype.kind not in "Mm": assume((np.abs(numpy_array) < 2**53).all()) if numpy_array.dtype.kind in "Mm": dtype = numpy_array.dtype asnumeric = numpy_array.view(np.int64) else: asnumeric = numpy_array dtype = NUMPY_SCAN_FUNCS[func](asnumeric[..., [0]], axis=axis).dtype expected = np.empty_like(numpy_array, dtype=dtype) group_idx, uniques = pd.factorize(by) for i in range(len(uniques)): mask = group_idx == i if not mask.any(): note((by, group_idx, uniques)) raise ValueError expected[..., mask] = NUMPY_SCAN_FUNCS[func](asnumeric[..., mask], axis=axis) if dtype: expected = expected.astype(dtype) note((numpy_array, group_idx, array.chunks)) tolerance = {"rtol": 1e-13, "atol": 1e-15} actual = groupby_scan(numpy_array, by, func=func, axis=-1, dtype=dtype) assert_equal(actual, expected, tolerance) actual = groupby_scan(array, by, func=func, axis=-1, dtype=dtype) assert_equal(actual, expected, tolerance) @given(data=st.data(), array=chunked_arrays()) def test_ffill_bfill_reverse(data, array: dask.array.Array) -> None: by = data.draw(by_arrays(shape=st.just((array.shape[-1],)))) def reverse(arr): return arr[..., ::-1] forward = groupby_scan(array, by, func="ffill") as_numpy = groupby_scan(array.compute(), by, func="ffill") assert_equal(forward, as_numpy) backward = groupby_scan(array, by, func="bfill") as_numpy = groupby_scan(array.compute(), by, func="bfill") assert_equal(backward, as_numpy) backward_reversed = reverse(groupby_scan(reverse(array), reverse(by), func="bfill")) assert_equal(forward, backward_reversed) forward_reversed = reverse(groupby_scan(reverse(array), reverse(by), func="ffill")) assert_equal(forward_reversed, backward) @given( data=st.data(), array=chunked_arrays(), func=st.sampled_from(["first", "last", "nanfirst", "nanlast"]), ) def test_first_last(data, array: dask.array.Array, func: str) -> None: by = data.draw(by_arrays(shape=st.just((array.shape[-1],)))) INVERSES = { "first": "last", "last": "first", "nanfirst": "nanlast", "nanlast": "nanfirst", } MATES = { "first": "nanfirst", "last": "nanlast", "nanfirst": "first", "nanlast": "last", } inverse = INVERSES[func] mate = MATES[func] if func in ["first", "last"]: array = array.rechunk((*array.chunks[:-1], -1)) for arr in [array, array.compute()]: forward, *fg = groupby_reduce(arr, by, func=func, engine="flox") reverse, *rg = groupby_reduce(arr[..., ::-1], by[..., ::-1], func=inverse, engine="flox") assert forward.dtype == reverse.dtype assert forward.dtype == arr.dtype assert_equal(fg, rg) assert_equal(forward, reverse) if arr.dtype.kind == "f" and not isnull(array.compute()).any(): if mate in ["first", "last"]: array = array.rechunk((*array.chunks[:-1], -1)) first, *_ = groupby_reduce(array, by, func=func, engine="flox") second, *_ = groupby_reduce(array, by, func=mate, engine="flox") assert_equal(first, second) @given(data=st.data(), func=st.sampled_from(["nanfirst", "nanlast"])) def test_first_last_useless(data, func): shape = data.draw(npst.array_shapes()) by = data.draw(by_arrays(shape=st.just(shape[slice(-1, None)]))) chunks = data.draw(chunks_strategy(shape=shape)) array = np.zeros(shape, dtype=np.int8) if chunks is not None: array = dask.array.from_array(array, chunks=chunks) actual, groups = groupby_reduce(array, by, axis=-1, func=func, engine="numpy") expected = np.zeros(shape[:-1] + (len(groups),), dtype=array.dtype) assert_equal(actual, expected) @given( func=st.sampled_from(["sum", "prod", "nansum", "nanprod"]), engine=st.sampled_from(["numpy", "flox"]), array_dtype=st.none() | numeric_dtypes, dtype=st.none() | numeric_dtypes, ) def test_agg_dtype_specified(func, array_dtype, dtype, engine): # regression test for GH388 counts = np.array([0, 2, 1, 0, 1], dtype=array_dtype) group = np.array([1, 1, 1, 2, 2]) actual, _ = groupby_reduce( counts, group, expected_groups=(np.array([1, 2]),), func=func, dtype=dtype, engine=engine, ) expected = getattr(np, func)(counts, keepdims=True, dtype=dtype) assert actual.dtype == expected.dtype flox-0.10.3/tests/test_xarray.py000066400000000000000000000641051477552625700166560ustar00rootroot00000000000000import numpy as np import pandas as pd import pytest # isort: off xr = pytest.importorskip("xarray") # isort: on from flox import xrdtypes as dtypes from flox.xarray import rechunk_for_blockwise, xarray_reduce from . import ( ALL_FUNCS, assert_equal, has_dask, raise_if_dask_computes, requires_cftime, requires_dask, ) if has_dask: import dask dask.config.set(scheduler="sync") # test against legacy xarray implementation # avoid some compilation overhead xr.set_options(use_flox=False, use_numbagg=False, use_bottleneck=False) tolerance64 = {"rtol": 1e-15, "atol": 1e-18} np.random.seed(123) @pytest.mark.parametrize("reindex", [None, False, True]) @pytest.mark.parametrize("min_count", [None, 1, 3]) @pytest.mark.parametrize("add_nan", [True, False]) @pytest.mark.parametrize("skipna", [True, False]) def test_xarray_reduce(skipna, add_nan, min_count, engine, reindex): if skipna is False and min_count is not None: pytest.skip() arr = np.ones((4, 12)) if add_nan: arr[1, ...] = np.nan arr[[0, 2], [3, 4]] = np.nan labels = np.array(["a", "a", "c", "c", "c", "b", "b", "c", "c", "b", "b", "f"]) labels = np.array(labels) labels2 = np.array([1, 2, 2, 1]) da = xr.DataArray( arr, dims=("x", "y"), coords={"labels2": ("x", labels2), "labels": ("y", labels)}, ).expand_dims(z=4) expected = da.groupby("labels").sum(skipna=skipna, min_count=min_count) actual = xarray_reduce( da, "labels", func="sum", skipna=skipna, min_count=min_count, engine=engine, reindex=reindex, ) assert_equal(expected, actual) da["labels2"] = da.labels2.astype(float) da["labels2"][0] = np.nan expected = da.groupby("labels2").sum(skipna=skipna, min_count=min_count) actual = xarray_reduce( da, "labels2", func="sum", skipna=skipna, min_count=min_count, engine=engine, reindex=reindex, ) assert_equal(expected, actual) # test dimension ordering # actual = xarray_reduce( # da.transpose("y", ...), "labels", func="sum", skipna=skipna, min_count=min_count # ) # assert_equal(expected, actual) # TODO: sort @pytest.mark.parametrize("pass_expected_groups", [True, False]) @pytest.mark.parametrize("chunk", (pytest.param(True, marks=requires_dask), False)) def test_xarray_reduce_multiple_groupers(pass_expected_groups, chunk, engine): if chunk and pass_expected_groups is False: pytest.skip() arr = np.ones((4, 12)) labels = np.array(["a", "a", "c", "c", "c", "b", "b", "c", "c", "b", "b", "f"]) labels2 = np.array([1, 2, 2, 1]) da = xr.DataArray( arr, dims=("x", "y"), coords={"labels2": ("x", labels2), "labels": ("y", labels)}, ).expand_dims(z=4) if chunk: da = da.chunk({"x": 2, "z": 1}) expected = xr.DataArray( [[4, 4], [8, 8], [10, 10], [2, 2]], dims=("labels", "labels2"), coords={"labels": ["a", "b", "c", "f"], "labels2": [1, 2]}, ).expand_dims(z=4) kwargs = dict(func="count", engine=engine) if pass_expected_groups: kwargs["expected_groups"] = (expected.labels.data, expected.labels2.data) with raise_if_dask_computes(): actual = xarray_reduce(da, da.labels, da.labels2, **kwargs) xr.testing.assert_identical(expected, actual) with raise_if_dask_computes(): actual = xarray_reduce(da, "labels", da.labels2, **kwargs) xr.testing.assert_identical(expected, actual) with raise_if_dask_computes(): actual = xarray_reduce(da, "labels", "labels2", **kwargs) xr.testing.assert_identical(expected, actual) if pass_expected_groups: kwargs["expected_groups"] = (expected.labels2.data, expected.labels.data) with raise_if_dask_computes(): actual = xarray_reduce(da, "labels2", "labels", **kwargs) xr.testing.assert_identical(expected.transpose("z", "labels2", "labels"), actual) @pytest.mark.parametrize("pass_expected_groups", [True, False]) @pytest.mark.parametrize("chunk", (pytest.param(True, marks=requires_dask), False)) def test_xarray_reduce_multiple_groupers_2(pass_expected_groups, chunk, engine): if chunk and pass_expected_groups is False: pytest.skip() arr = np.ones((2, 12)) labels = np.array(["a", "a", "c", "c", "c", "b", "b", "c", "c", "b", "b", "f"]) da = xr.DataArray( arr, dims=("x", "y"), coords={"labels2": ("y", labels), "labels": ("y", labels)} ).expand_dims(z=4) if chunk: da = da.chunk({"x": 2, "z": 1}) expected = xr.DataArray( [[2, 0, 0, 0], [0, 4, 0, 0], [0, 0, 5, 0], [0, 0, 0, 1]], dims=("labels", "labels2"), coords={ "labels": ["a", "b", "c", "f"], "labels2": ["a", "b", "c", "f"], }, ).expand_dims(z=4, x=2) kwargs = dict(func="count", engine=engine) if pass_expected_groups: kwargs["expected_groups"] = (expected.labels.data, expected.labels.data) with raise_if_dask_computes(): actual = xarray_reduce(da, "labels", "labels2", **kwargs) xr.testing.assert_identical(expected, actual) with pytest.raises(NotImplementedError): xarray_reduce(da, "labels", "labels2", dim=..., **kwargs) @requires_dask @pytest.mark.parametrize( "expected_groups", (None, (None, None), [[1, 2], [1, 2]]), ) def test_validate_expected_groups(expected_groups): da = xr.DataArray([1.0, 2.0], dims="x", coords={"labels": ("x", [1, 2]), "labels2": ("x", [1, 2])}) with pytest.raises(ValueError): xarray_reduce( da.chunk({"x": 1}), "labels", "labels2", func="count", expected_groups=expected_groups, ) @requires_cftime @pytest.mark.parametrize("indexer", [slice(None), pytest.param(slice(12), id="missing-group")]) @pytest.mark.parametrize("expected_groups", [None, [0, 1, 2, 3]]) @pytest.mark.parametrize("func", ["first", "last", "min", "max", "count"]) def test_xarray_reduce_cftime_var(engine, indexer, expected_groups, func): times = xr.date_range("1980-09-01 00:00", "1982-09-18 00:00", freq="ME", calendar="noleap") ds = xr.Dataset({"var": ("time", times)}, coords={"time": np.repeat(np.arange(4), 6)}) ds = ds.isel(time=indexer) actual = xarray_reduce( ds, ds.time, func=func, fill_value=dtypes.NA if func in ["first", "last"] else np.nan, engine=engine, expected_groups=expected_groups, ) expected = getattr(ds.groupby("time"), func)() if expected_groups is not None: expected = expected.reindex(time=expected_groups) xr.testing.assert_identical(actual, expected) @requires_cftime @requires_dask def test_xarray_reduce_single_grouper(engine): # DataArray ds = xr.Dataset( { "Tair": ( ("time", "x", "y"), dask.array.ones((36, 205, 275), chunks=(9, -1, -1)), ) }, coords={"time": xr.date_range("1980-09-01 00:00", "1983-09-18 00:00", freq="ME", calendar="noleap")}, ) actual = xarray_reduce(ds.Tair, ds.time.dt.month, func="mean", engine=engine) expected = ds.Tair.groupby("time.month").mean() xr.testing.assert_allclose(actual, expected) # Ellipsis reduction actual = xarray_reduce(ds.Tair, ds.time.dt.month, func="mean", dim=..., engine=engine) expected = ds.Tair.groupby("time.month").mean(...) xr.testing.assert_allclose(actual, expected) # Dataset expected = ds.groupby("time.month").mean() actual = xarray_reduce(ds, ds.time.dt.month, func="mean", engine=engine) xr.testing.assert_allclose(actual, expected) # reduce along other dimensions expected = ds.groupby("time.month").mean(("x", "y")) actual = xarray_reduce(ds, ds.time.dt.month, dim=["x", "y"], func="mean", engine=engine) xr.testing.assert_allclose(actual, expected) # add data var with missing grouper dim ds["foo"] = ("bar", [1, 2, 3]) expected = ds.groupby("time.month").mean() actual = xarray_reduce(ds, ds.time.dt.month, func="mean", engine=engine) xr.testing.assert_allclose(actual, expected) del ds["foo"] # non-dim coord with missing grouper dim ds.coords["foo"] = ("bar", [1, 2, 3]) expected = ds.groupby("time.month").mean() actual = xarray_reduce(ds, ds.time.dt.month, func="mean", engine=engine) xr.testing.assert_allclose(actual, expected) del ds["foo"] # unindexed dim by = ds.time.dt.month.drop_vars("time") ds = ds.drop_vars("time") expected = ds.groupby(by).mean() actual = xarray_reduce(ds, by, func="mean") xr.testing.assert_allclose(actual, expected) def test_xarray_reduce_errors(): da = xr.DataArray(np.ones((12,)), dims="x") by = xr.DataArray(np.ones((12,)), dims="x") with pytest.raises(ValueError, match="group by unnamed"): xarray_reduce(da, by, func="mean") by.name = "by" with pytest.raises(ValueError, match="Cannot reduce over"): xarray_reduce(da, by, func="mean", dim="foo") if has_dask: with pytest.raises(ValueError, match="provide expected_groups"): xarray_reduce(da, by.chunk(), func="mean") @requires_dask @pytest.mark.parametrize( "inchunks, expected", [ [(1,) * 10, (3, 2, 2, 3)], [(2,) * 5, (3, 2, 2, 3)], [(3, 3, 3, 1), (3, 2, 5)], [(3, 1, 1, 2, 1, 1, 1), (3, 2, 2, 3)], [(3, 2, 2, 3), (3, 2, 2, 3)], [(4, 4, 2), (3, 4, 3)], [(5, 5), (5, 5)], [(6, 4), (5, 5)], [(7, 3), (7, 3)], [(8, 2), (7, 3)], [(9, 1), (10,)], [(10,), (10,)], ], ) def test_rechunk_for_blockwise(inchunks, expected): labels = np.array([1, 1, 1, 2, 2, 3, 3, 5, 5, 5]) da = xr.DataArray(dask.array.ones((10,), chunks=inchunks), dims="x", name="foo") rechunked = rechunk_for_blockwise(da, "x", xr.DataArray(labels, dims="x")) assert rechunked.chunks == (expected,) da = xr.DataArray(dask.array.ones((5, 10), chunks=(-1, inchunks)), dims=("y", "x"), name="foo") rechunked = rechunk_for_blockwise(da, "x", xr.DataArray(labels, dims="x")) assert rechunked.chunks == ((5,), expected) ds = da.to_dataset() rechunked = rechunk_for_blockwise(ds, "x", xr.DataArray(labels, dims="x")) assert rechunked.foo.chunks == ((5,), expected) # everything below this is copied from xarray's test_groupby.py # TODO: chunk these # TODO: dim=None, dim=Ellipsis, groupby unindexed dim def test_groupby_duplicate_coordinate_labels(engine): # fix for http://stackoverflow.com/questions/38065129 array = xr.DataArray([1, 2, 3], [("x", [1, 1, 2])]) expected = xr.DataArray([3, 3], [("x", [1, 2])]) actual = xarray_reduce(array, array.x, func="sum", engine=engine) assert_equal(expected, actual) def test_multi_index_groupby_sum(engine): # regression test for xarray GH873 ds = xr.Dataset( {"foo": (("x", "y", "z"), np.ones((3, 4, 2)))}, {"x": ["a", "b", "c"], "y": [1, 2, 3, 4]}, ) expected = ds.sum("z") stacked = ds.stack(space=["x", "y"]) actual = xarray_reduce(stacked, "space", dim="z", func="sum", engine=engine) expected_xarray = stacked.groupby("space").sum("z") assert_equal(expected_xarray, actual) assert_equal(expected, actual.unstack("space")) actual = xarray_reduce(stacked.foo, "space", dim="z", func="sum", engine=engine) assert_equal(expected.foo, actual.unstack("space")) ds = xr.Dataset( dict(a=(("z",), np.ones(10))), coords=dict(b=(("z"), np.arange(2).repeat(5)), c=(("z"), np.arange(5).repeat(2))), ).set_index(bc=["b", "c"]) expected = ds.groupby("bc").sum() actual = xarray_reduce(ds, "bc", func="sum") assert_equal(expected, actual) @pytest.mark.parametrize("chunks", (None, pytest.param(2, marks=requires_dask))) def test_xarray_groupby_bins(chunks, engine): array = xr.DataArray([1, 1, 1, 1, 1], dims="x") labels = xr.DataArray([1, 1.5, 1.9, 2, 3], dims="x", name="labels") if chunks: array = array.chunk({"x": chunks}) labels = labels.chunk({"x": chunks}) kwargs = dict( dim="x", func="count", engine=engine, expected_groups=np.array([1, 2, 4, 5]), isbin=True, fill_value=0, ) with raise_if_dask_computes(): actual = xarray_reduce(array, labels, **kwargs) expected = xr.DataArray( np.array([3, 1, 0]), dims="labels_bins", coords={"labels_bins": [pd.Interval(1, 2), pd.Interval(2, 4), pd.Interval(4, 5)]}, ) xr.testing.assert_equal(actual, expected) # 3D array, 2D by, single dim, with NaNs in by array = array.expand_dims(y=2, z=3) labels = labels.expand_dims(y=2).copy() labels.data[-1, -1] = np.nan with raise_if_dask_computes(): actual = xarray_reduce(array, labels, **kwargs) expected = xr.DataArray( np.array([[[3, 1, 0]] * 3, [[3, 0, 0]] * 3]), dims=("y", "z", "labels_bins"), coords={"labels_bins": [pd.Interval(1, 2), pd.Interval(2, 4), pd.Interval(4, 5)]}, ) xr.testing.assert_equal(actual, expected) @requires_dask def test_func_is_aggregation(): from flox.aggregations import mean ds = xr.Dataset( { "Tair": ( ("time", "x", "y"), dask.array.ones((36, 205, 275), chunks=(9, -1, -1)), ) }, coords={"time": xr.date_range("1980-09-01 00:00", "1983-09-18 00:00", freq="ME", calendar="noleap")}, ) expected = xarray_reduce(ds.Tair, ds.time.dt.month, func="mean") actual = xarray_reduce(ds.Tair, ds.time.dt.month, func=mean) xr.testing.assert_allclose(actual, expected) with pytest.raises(ValueError): xarray_reduce(ds.Tair, ds.time.dt.month, func=mean, skipna=True) with pytest.raises(ValueError): xarray_reduce(ds.Tair, ds.time.dt.month, func=mean, skipna=False) # @requires_dask # def test_cache(): # pytest.importorskip("cachey") # from flox.cache import cache # ds = xr.Dataset( # { # "foo": (("x", "y"), dask.array.ones((10, 20), chunks=2)), # "bar": (("x", "y"), dask.array.ones((10, 20), chunks=2)), # }, # coords={"labels": ("y", np.repeat([1, 2], 10))}, # ) # cache.clear() # xarray_reduce(ds, "labels", func="mean", method="cohorts") # assert len(cache.data) == 1 # xarray_reduce(ds, "labels", func="mean", method="blockwise") # assert len(cache.data) == 2 @requires_dask @pytest.mark.parametrize("method", ["cohorts", "map-reduce"]) def test_groupby_bins_indexed_coordinate(method): ds = xr.Dataset( { "air": ( ("time", "lat", "lon"), dask.array.random.random((125, 25, 53), chunks=(20, 5, -1)), ) }, coords={ "time": pd.date_range("2013-01-01", "2013-02-01", freq="6h"), "lat": np.arange(75.0, 14.9, -2.5), "lon": np.arange(200.0, 331.0, 2.5), }, ) bins = [40, 50, 60, 70] expected = ds.groupby_bins("lat", bins=bins).mean(keep_attrs=True, dim=...) actual = xarray_reduce( ds, ds.lat, dim=ds.air.dims, expected_groups=([40, 50, 60, 70],), isbin=(True,), func="mean", method=method, ) xr.testing.assert_allclose(expected, actual) actual = xarray_reduce( ds, ds.lat, dim=ds.air.dims, expected_groups=pd.IntervalIndex.from_breaks([40, 50, 60, 70]), func="mean", method=method, ) xr.testing.assert_allclose(expected, actual) @pytest.mark.parametrize("chunk", (True, False)) def test_mixed_grouping(chunk): if not has_dask and chunk: pytest.skip() # regression test for https://github.com/xarray-contrib/flox/pull/111 sa = 10 sb = 13 sc = 3 x = xr.Dataset( { "v0": xr.DataArray( ((np.arange(sa * sb * sc) / sa) % 1).reshape((sa, sb, sc)), dims=("a", "b", "c"), ), "v1": xr.DataArray((np.arange(sa * sb) % 3).reshape(sa, sb), dims=("a", "b")), } ) if chunk: x["v0"] = x["v0"].chunk({"a": 5}) r = xarray_reduce( x["v0"], x["v1"], x["v0"], expected_groups=(np.arange(6), np.linspace(0, 1, num=5)), isbin=[False, True], func="count", dim="b", fill_value=0, ) assert (r.sel(v1=[3, 4, 5]) == 0).all().data def test_alignment_error(): da = xr.DataArray(np.arange(10), dims="x", coords={"x": np.arange(10)}) with pytest.raises(ValueError): xarray_reduce(da, da.x.sel(x=slice(5)), func="count") @pytest.mark.parametrize("add_nan", [True, False]) @pytest.mark.parametrize("dtype_out", [np.float64, "float64", np.dtype("float64")]) @pytest.mark.parametrize("dtype", [np.float32, np.float64]) @pytest.mark.parametrize("chunk", (pytest.param(True, marks=requires_dask), False)) def test_dtype(add_nan, chunk, dtype, dtype_out, engine): if engine == "numbagg": # https://github.com/numbagg/numbagg/issues/121 pytest.skip() xp = dask.array if chunk else np data = xp.linspace(0, 1, 48, dtype=dtype).reshape((4, 12)) if add_nan: data[1, ...] = np.nan data[0, [0, 2]] = np.nan arr = xr.DataArray( data, dims=("x", "t"), coords={ "labels": ( "t", np.array(["a", "a", "c", "c", "c", "b", "b", "c", "c", "b", "b", "f"]), ) }, name="arr", ) kwargs = dict(func="mean", dtype=dtype_out, engine=engine) actual = xarray_reduce(arr, "labels", **kwargs) expected = arr.groupby("labels").mean(dtype="float64") assert actual.dtype == np.dtype("float64") assert actual.compute().dtype == np.dtype("float64") xr.testing.assert_allclose(expected, actual, **tolerance64) actual = xarray_reduce(arr.to_dataset(), "labels", **kwargs) expected = arr.to_dataset().groupby("labels").mean(dtype="float64") assert actual.arr.dtype == np.dtype("float64") assert actual.compute().arr.dtype == np.dtype("float64") xr.testing.assert_allclose(expected, actual.transpose("labels", ...), **tolerance64) @pytest.mark.parametrize("chunk", [pytest.param(True, marks=requires_dask), False]) @pytest.mark.parametrize("use_flox", [True, False]) def test_dtype_accumulation(use_flox, chunk): datetimes = pd.date_range("2010-01", "2015-01", freq="6h", inclusive="left") samples = 10 + np.cos(2 * np.pi * 0.001 * np.arange(len(datetimes))) * 1 samples += np.random.randn(len(datetimes)) samples = samples.astype("float32") nan_indices = np.random.default_rng().integers(0, len(samples), size=5_000) samples[nan_indices] = np.nan da = xr.DataArray(samples, dims=("time",), coords=[datetimes]) if chunk: da = da.chunk(time=1024) gb = da.groupby("time.month") with xr.set_options(use_flox=use_flox): expected = gb.reduce(np.nanmean) actual = gb.mean() xr.testing.assert_allclose(expected, actual) assert np.issubdtype(actual.dtype, np.float32) assert np.issubdtype(actual.compute().dtype, np.float32) expected = gb.reduce(np.nanmean, dtype="float64") actual = gb.mean(dtype="float64") assert np.issubdtype(actual.dtype, np.float64) assert np.issubdtype(actual.compute().dtype, np.float64) xr.testing.assert_allclose(expected, actual, **tolerance64) def test_preserve_multiindex(): """Regression test for GH issue #215""" vort = xr.DataArray( name="vort", data=np.random.uniform(size=(4, 2)), dims=["i", "face"], coords={"i": ("i", np.arange(4)), "face": ("face", np.arange(2))}, ) vort = ( vort.coarsen(i=2) .construct(i=("i_region_coarse", "i_region")) .stack(region=["face", "i_region_coarse"]) ) bins = [np.linspace(0, 1, 10)] bin_intervals = tuple(pd.IntervalIndex.from_breaks(b) for b in bins) hist = xarray_reduce( xr.DataArray(1), # weights vort, # variables we want to bin func="count", # count occurrences falling in bins expected_groups=bin_intervals, # bins for each variable dim=["i_region"], # broadcast dimensions fill_value=0, # fill empty bins with 0 counts ) assert "region" in hist.coords def test_fill_value_xarray_behaviour(): times = pd.date_range("2000-01-01", freq="6h", periods=10) ds = xr.Dataset( { "bar": ( "time", [1, 2, 3, np.nan, np.nan, np.nan, 4, 5, np.nan, np.nan], {"meta": "data"}, ), "time": times, } ) pd.date_range("2000-01-01", freq="3h", periods=19) with xr.set_options(use_flox=False): expected = ds.resample(time="3h").sum() with xr.set_options(use_flox=True): actual = ds.resample(time="3h").sum() xr.testing.assert_identical(expected, actual) def test_fill_value_xarray_binning(): array = np.linspace(0, 10, 5 * 10, dtype=int).reshape(5, 10) x = np.array([0, 0, 1, 2, 2]) y = np.arange(array.shape[1]) * 3 u = np.linspace(0, 1, 5) data_array = xr.DataArray(data=array, coords={"x": x, "y": y, "u": ("x", u)}, dims=("x", "y")) with xr.set_options(use_flox=False): expected = data_array.groupby_bins("y", bins=4).mean() with xr.set_options(use_flox=True): actual = data_array.groupby_bins("y", bins=4).mean() xr.testing.assert_identical(expected, actual) def test_groupby_2d_dataset(): d = { "coords": { "bit_index": { "dims": ("bit_index",), "attrs": {"name": "bit_index"}, "data": [0, 1], }, "index": {"dims": ("index",), "data": [0, 6, 8, 10, 14]}, "clifford": {"dims": ("index",), "attrs": {}, "data": [1, 1, 4, 10, 4]}, }, "dims": {"bit_index": 2, "index": 5}, "data_vars": { "counts": { "dims": ("bit_index", "index"), "attrs": { "name": "counts", }, "data": [[18, 30, 45, 70, 38], [382, 370, 355, 330, 362]], } }, } ds = xr.Dataset.from_dict(d) with xr.set_options(use_flox=False): expected = ds.groupby("clifford").mean() with xr.set_options(use_flox=True): actual = ds.groupby("clifford").mean() assert expected.counts.dims == actual.counts.dims # https://github.com/pydata/xarray/issues/8292 xr.testing.assert_identical(expected, actual) @pytest.mark.parametrize("chunk", (pytest.param(True, marks=requires_dask), False)) def test_resampling_missing_groups(chunk): # Regression test for https://github.com/pydata/xarray/issues/8592 time_coords = pd.to_datetime(["2018-06-13T03:40:36", "2018-06-13T05:50:37", "2018-06-15T03:02:34"]) latitude_coords = [0.0] longitude_coords = [0.0] data = [[[1.0]], [[2.0]], [[3.0]]] da = xr.DataArray( data, coords={ "time": time_coords, "latitude": latitude_coords, "longitude": longitude_coords, }, dims=["time", "latitude", "longitude"], ) if chunk: da = da.chunk(time=1) # Without chunking the dataarray, it works: with xr.set_options(use_flox=False): expected = da.resample(time="1D").mean() with xr.set_options(use_flox=True): actual = da.resample(time="1D").mean() xr.testing.assert_identical(expected, actual) @pytest.mark.parametrize("q", (0.5, (0.5,), (0.5, 0.67, 0.85))) @pytest.mark.parametrize("skipna", [False, True]) @pytest.mark.parametrize("chunk", [pytest.param(True, marks=requires_dask), False]) @pytest.mark.parametrize("by_ndim", [1, 2]) def test_multiple_quantiles(q, chunk, by_ndim, skipna): array = np.array([[1, -1, np.nan, 3, 4, 10, 5], [1, np.nan, np.nan, 3, 4, np.nan, np.nan]]) labels = np.array([0, 0, 0, 1, 0, 1, 1]) dims = ("y",) if by_ndim == 2: labels = np.broadcast_to(labels, (5, *labels.shape)) array = np.broadcast_to(np.expand_dims(array, -2), (2, 5, array.shape[-1])) dims += ("y0",) if chunk: array = dask.array.from_array(array, chunks=(1,) + (-1,) * by_ndim) da = xr.DataArray(array, dims=("x", *dims)) by = xr.DataArray(labels, dims=dims, name="by") actual = xarray_reduce(da, by, func="quantile", skipna=skipna, q=q, engine="flox") with xr.set_options(use_flox=False): expected = da.groupby(by).quantile(q, skipna=skipna) xr.testing.assert_allclose(expected, actual) @pytest.mark.parametrize("func", ALL_FUNCS) def test_direct_reduction(func): if "arg" in func or "mode" in func: pytest.skip() # regression test for https://github.com/pydata/xarray/issues/8819 rand = np.random.choice([True, False], size=(2, 3)) if func not in ["any", "all"]: rand = rand.astype(float) if "nan" in func: func = func[3:] kwargs = {"skipna": True} else: kwargs = {} if "first" not in func and "last" not in func: kwargs["dim"] = "y" if "quantile" in func: kwargs["q"] = 0.9 data = xr.DataArray(rand, dims=("x", "y"), coords={"x": [10, 20], "y": [0, 1, 2]}) with xr.set_options(use_flox=True): actual = xarray_reduce(data, "x", func=func, **kwargs) with xr.set_options(use_flox=False): expected = getattr(data.groupby("x", squeeze=False), func)(**kwargs) xr.testing.assert_identical(expected, actual) @pytest.mark.parametrize("reduction", ["max", "min", "nanmax", "nanmin", "sum", "nansum", "prod", "nanprod"]) def test_groupby_preserve_dtype(reduction): # all groups are present, we should follow numpy exactly ds = xr.Dataset( { "test": ( ["x", "y"], np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]], dtype="int16"), ) }, coords={"idx": ("x", [1, 2, 1])}, ) kwargs = {"engine": "numpy"} if "nan" in reduction: kwargs["skipna"] = True with xr.set_options(use_flox=True): actual = getattr(ds.groupby("idx"), reduction.removeprefix("nan"))(**kwargs).test.dtype expected = getattr(np, reduction)(ds.test.data, axis=0).dtype assert actual == expected