pax_global_header 0000666 0000000 0000000 00000000064 15172660107 0014517 g ustar 00root root 0000000 0000000 52 comment=c00df9641495f54e8e53a020e0a09175f1c26c89
CDSETool-0.3.1/ 0000775 0000000 0000000 00000000000 15172660107 0013074 5 ustar 00root root 0000000 0000000 CDSETool-0.3.1/.github/ 0000775 0000000 0000000 00000000000 15172660107 0014434 5 ustar 00root root 0000000 0000000 CDSETool-0.3.1/.github/ISSUE_TEMPLATE/ 0000775 0000000 0000000 00000000000 15172660107 0016617 5 ustar 00root root 0000000 0000000 CDSETool-0.3.1/.github/ISSUE_TEMPLATE/bug-report.md 0000664 0000000 0000000 00000001205 15172660107 0021225 0 ustar 00root root 0000000 0000000 ---
name: "Bug report \U0001F41E"
about: Create a bug report
labels: bug
---
## Describe the bug
A clear and concise description of what the bug is.
### Steps to reproduce
Code snippets and steps to reproduce the behavior.
e.g.
```python
from cdsetool.query import query_features
query_features("InvalidCollection")
```
### Expected behavior
A clear and concise description of what you expected to happen.
### Environment
- OS: [e.g. Arch Linux]
- Package version: [e.g. cdsetool==X.Y.Z]
- Python version: [e.g. 3.11.2]
- Other details that you think may affect.
### Additional context
Add any other context about the problem here.
CDSETool-0.3.1/.github/ISSUE_TEMPLATE/feature-request.md 0000664 0000000 0000000 00000000443 15172660107 0022263 0 ustar 00root root 0000000 0000000 ---
name: "Feature request \U0001F680"
about: Suggest an idea
labels: enhancement
---
## Summary
Brief explanation of the feature.
### Basic example
Include a basic example or links here.
### Motivation
Why are we doing this? What use cases does it support? What is the expected outcome? CDSETool-0.3.1/.github/dependabot.yml 0000664 0000000 0000000 00000001016 15172660107 0017262 0 ustar 00root root 0000000 0000000 # To get started with Dependabot version updates, you'll need to specify which
# package ecosystems to update and where the package manifests are located.
# Please see the documentation for all configuration options:
# https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates
version: 2
updates:
- package-ecosystem: "pip"
directory: "/"
schedule:
interval: "daily"
- package-ecosystem: "github-actions"
directory: "/"
schedule:
interval: "daily"
CDSETool-0.3.1/.github/workflows/ 0000775 0000000 0000000 00000000000 15172660107 0016471 5 ustar 00root root 0000000 0000000 CDSETool-0.3.1/.github/workflows/lint.yml 0000664 0000000 0000000 00000003330 15172660107 0020161 0 ustar 00root root 0000000 0000000 name: Lint
on: [push, pull_request]
permissions: {}
jobs:
ruff:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
- uses: astral-sh/ruff-action@0ce1b0bf8b818ef400413f810f8a11cdbda0034b # v4.0.0
with:
args: 'format --check --diff'
- uses: astral-sh/ruff-action@0ce1b0bf8b818ef400413f810f8a11cdbda0034b # v4.0.0
with:
args: 'check'
pylint:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: ["3.9", "3.10", "3.11", "3.12"]
steps:
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install .[test]
- name: Analysing the code with pylint
run: |
pylint $(git ls-files 'src/*.py')
pyright:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: ["3.9", "3.10", "3.11", "3.12"]
steps:
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install -e .[test]
- uses: jakebailey/pyright-action@8ec14b5cfe41f26e5f41686a31eb6012758217ef # v3.0.2
CDSETool-0.3.1/.github/workflows/publish.yml 0000664 0000000 0000000 00000002226 15172660107 0020664 0 ustar 00root root 0000000 0000000 # This workflow will upload a Python Package using Twine when a release is created
# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python#publishing-to-package-registries
# This workflow uses actions that are not certified by GitHub.
# They are provided by a third-party and are governed by
# separate terms of service, privacy policy, and support
# documentation.
name: Upload Python Package
on:
release:
types: [published]
permissions: {}
jobs:
deploy:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
- name: Set up Python
uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
with:
python-version: '3.x'
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install build
- name: Build package
run: python -m build
- name: Publish package
uses: pypa/gh-action-pypi-publish@cef221092ed1bacb1cc03d23a2d87d1d172e277b # v1.14.0
with:
user: __token__
password: ${{ secrets.PYPI_API_TOKEN }}
CDSETool-0.3.1/.github/workflows/pytest.yml 0000664 0000000 0000000 00000001664 15172660107 0020553 0 ustar 00root root 0000000 0000000 name: Pytest
on:
push:
branches: [ "main" ]
pull_request:
branches: [ "main" ]
permissions: {}
jobs:
pytest:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: ["3.9", "3.10", "3.11", "3.12"]
steps:
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install -e .[test]
- name: Test with pytest
run: |
pytest --cov=cdsetool --cov-report xml
# - name: Python Coverage
# uses: orgoro/coverage@3f13a558c5af7376496aa4848bf0224aead366ac # v3.2
# with:
# coverageFile: coverage.xml
# token: ${{ secrets.GITHUB_TOKEN }}
CDSETool-0.3.1/.github/workflows/scan.yml 0000664 0000000 0000000 00000002046 15172660107 0020142 0 ustar 00root root 0000000 0000000 name: Scan
on:
schedule:
- cron: '0 0 * * *'
workflow_dispatch:
permissions: {}
# When a PR is updated, cancel the jobs from the previous version. Merges
# do not define head_ref, so use run_id to never cancel those jobs.
concurrency:
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
cancel-in-progress: true
jobs:
cve-scanner:
runs-on: ubuntu-latest
permissions:
security-events: write
steps:
- name: Checkout code
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
- name: Run vulnerability scanner
uses: aquasecurity/trivy-action@ed142fd0673e97e23eac54620cfb913e5ce36c25 # 0.36.0
with:
scan-type: 'fs'
ignore-unfixed: true
format: 'sarif'
output: 'trivy-results.sarif'
severity: 'MEDIUM,HIGH,CRITICAL'
- name: Upload Trivy scan results to GitHub Security tab
uses: github/codeql-action/upload-sarif@95e58e9a2cdfd71adc6e0353d5c52f41a045d225 # v4.35.2
with:
sarif_file: 'trivy-results.sarif'
CDSETool-0.3.1/.gitignore 0000664 0000000 0000000 00000006066 15172660107 0015074 0 ustar 00root root 0000000 0000000 .DS_Store
.coverage
environment
main.py
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
cover/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
.pybuilder/
target/
# Jupyter Notebook
.ipynb_checkpoints
# IPython
profile_default/
ipython_config.py
# pyenv
# For a library or package, you might want to ignore these files since the code is
# intended to run in multiple environments; otherwise, check them in:
# .python-version
# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock
# poetry
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
# This is especially recommended for binary packages to ensure reproducibility, and is more
# commonly ignored for libraries.
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
#poetry.lock
# pdm
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
#pdm.lock
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
# in version control.
# https://pdm.fming.dev/#use-with-ide
.pdm.toml
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
__pypackages__/
# Celery stuff
celerybeat-schedule
celerybeat.pid
# SageMath parsed files
*.sage.py
# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
.dmypy.json
dmypy.json
# Pyre type checker
.pyre/
# pytype static type analyzer
.pytype/
# Cython debug symbols
cython_debug/
# PyCharm
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
.idea/
.vscode/
CDSETool-0.3.1/LICENSE 0000664 0000000 0000000 00000002073 15172660107 0014103 0 ustar 00root root 0000000 0000000 MIT License
Copyright (c) 2023-2025 CDSETool Contributors
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
CDSETool-0.3.1/README.md 0000664 0000000 0000000 00000030234 15172660107 0014355 0 ustar 00root root 0000000 0000000 # CDSETool
## About CDSETool
This script downloads copernicus data from the Copernicus Data Space Ecosystem
## Quick start
```python
from cdsetool.query import query_features, shape_to_wkt
from cdsetool.credentials import Credentials
from cdsetool.download import download_features
from cdsetool.monitor import StatusMonitor
from datetime import date
features = query_features(
"SENTINEL-1",
{
"contentDateStartGe": "2020-12-20",
"contentDateStartLe": date(2020, 12, 25),
"processingLevel": "LEVEL1",
"productType": "IW_GRDH_1S",
"geometry": shape_to_wkt("path/to/shapefile.shp"),
},
)
list(
download_features(
features,
"path/to/output/folder/",
{
"concurrency": 4,
"monitor": StatusMonitor(),
"credentials": Credentials("username", "password"),
},
)
)
```
Or use the CLI:
```bash
cdsetool query search SENTINEL-2 --search-term contentDateStartGe=2020-01-01 --search-term contentDateStartLe=2020-01-10 --search-term productType=S2MSI2A
cdsetool download SENTINEL-2 PATH/TO/DIR --concurrency 4 --search-term contentDateStartGe=2020-01-01 --search-term contentDateStartLe=2020-01-10 --search-term productType=S2MSI2A
```
## Table of Contents
- [CDSETool](#cdsetool)
* [About CDSETool](#about-cdsetool)
* [Quick Start](#quick-start)
* [Table of Contents](#table-of-contents)
* [Installation](#installation)
* [Usage](#usage)
+ [Querying features](#querying-features)
- [Querying by shapes](#querying-by-shapes)
- [Querying by lists of parameters](#querying-by-lists-of-parameters)
- [Querying by dates](#querying-by-dates)
- [Listing search terms](#listing-search-terms)
+ [Downloading features](#downloading-features)
- [Authenticating](#authenticating)
- [Concurrently downloading features](#concurrently-downloading-features)
- [Sequentially downloading features](#sequentially-downloading-features)
* [Roadmap](#roadmap)
* [Contributing](#contributing)
* [LICENSE](#license)
## Installation
Install `cdsetool` using pip:
```bash
pip install cdsetool==0.3.1
```
## Usage
### Querying features
Querying is always done in batches, returning `len(results) <= top` records each time.
A local buffer is filled and gradually emptied as results are yielded. When the buffer is empty,
more results will be requested and the process repeated until no more results are available, or
the iterator is discarded.
**Important**: The API has a pagination limit of 10,000 results per query. If your query returns more results, you'll need to narrow your search criteria (e.g., use smaller date ranges).
Since downloading features is the most common use-case, `query_features` assumes that the query will run till the end.
Because of this, the batch size is set to `1000`, which is the size limit set by CDSE.
```python
from cdsetool.query import query_features
collection = "SENTINEL-2"
search_terms = {
"top": 100, # batch size, between 1 and 1000 (default: 1000)
"contentDateStartGe": "2024-01-01",
"productType": "S2MSI1C"
}
# wait for a single batch to finish, yield results immediately
for feature in query_features(collection, search_terms):
# do something with feature
# wait for all batch requests to complete, returning list
features = list(query_features(collection, search_terms))
# manually iterate
iterator = query_features(collection, search_terms)
featureA = next(iterator)
featureB = next(iterator)
# ...
```
#### Query Options
**Product Format**
Query results are returned directly from the Copernicus API. Each product has the following structure:
```python
product = {
"Id": "uuid-string",
"Name": "S2A_MSIL2A_20240110T105421_...",
"Collection": "SENTINEL-2",
"ContentDate": {"Start": "2024-01-10T10:54:21Z", "End": "..."},
"Online": True,
"ContentLength": 1043654649,
"GeoFootprint": {"type": "Polygon", "coordinates": [...]},
"Attributes": [
{"Name": "productType", "Value": "S2MSI2A"},
{"Name": "cloudCover", "Value": 5.2},
...
]
}
```
**Expand Product Attributes**
By default, query results do not include product attributes (productType, cloudCover, platform, instrument, etc.). To include product attributes, you need to request this from the server using the option `expand_attributes` and can then access the attributes using the function `get_product_attribute()`:
```python
from cdsetool.query import query_features, get_product_attribute
features = query_features(
"SENTINEL-2",
{"contentDateStartGe": "2024-01-01"},
options={"expand_attributes": True}
)
feature = features[0]
# Access basic properties directly
print(feature["Name"]) # Product name
print(feature["Id"]) # Product UUID
# Access attributes using helper function
cloud_cover = get_product_attribute(feature, "cloudCover")
product_type = get_product_attribute(feature, "productType")
```
#### Querying by geometry
To query by shapes, you must first convert your shape to Well Known Text (WKT). The included
`shape_to_wkt` can solve this.
```python
from cdsetool.query import query_features, shape_to_wkt
geometry = shape_to_wkt("path/to/shape.shp")
features = query_features("SENTINEL-3", {"geometry": geometry})
```
#### Querying by lists of parameters
Most search terms only accept a single argument. To query by a list of arguments, loop the arguments
and pass them one by one to the query function.
```python
from cdsetool.query import query_features
tile_ids = ["32TPT", "32UPU", "32UPU", "31RFL", "37XDA"]
for tile_id in tile_ids:
features = query_features("SENTINEL-2", {"tileId": tile_id})
for feature in features:
# do things with feature
```
#### Querying by dates
Its quite common to query for features created before, after or between dates.
Search terms support comparison operator suffixes:
| Suffix | Meaning | Example |
|--------|----------------------------|----------------------|
| `Eq` | equals (=) | `contentDateStartEq` |
| `Gt` | greater than (>) | `contentDateStartGt` |
| `Ge` | greater than or equal (>=) | `contentDateStartGe` |
| `Lt` | less than (<) | `contentDateStartLt` |
| `Le` | less than or equal (<=) | `contentDateStartLe` |
`Eq` can be applied on any field but the other suffixes can only be applied to numeric and date fields.
Interval syntax is only allowed on the base name, not on suffixed variants, and only on numeric and date fields.
| Interval notation | Suffixes to combine | Meaning |
|-------------------|---------------------|--------------------------------|
| [a, b] | `Ge` + `Le` | a <= value <= b (closed) |
| (a, b) | `Gt` + `Lt` | a < value < b (open) |
| [a, b) | `Ge` + `Lt` | a <= value < b (half-open) |
| (a, b] | `Gt` + `Le` | a < value <= b (half-open) |
```python
from cdsetool.query import query_features
from datetime import date, datetime
date_from = date(2020, 1, 1) # or datetime(2020, 1, 1, 23, 59, 59, 123456) or "2020-01-01" or "2020-01-01T23:59:59.123456Z"
date_to = date(2020, 12, 31)
features = query_features("SENTINEL-2", {"contentDateStartGe": date_from, "contentDateStartLe": date_to, "cloudCover": "[0, 30]"})
```
#### Listing search terms
To get a list of all search terms for a given collection, you may either use the `describe_collection` function or
use the CLI:
```python
from cdsetool.query import describe_collection
search_terms = describe_collection("SENTINEL-2").keys()
print(search_terms)
```
And the CLI:
```bash
$ cdsetool query search-terms SENTINEL-2
```
### Downloading features
#### Authenticating
An account is required to download features from the Copernicus distribution service.
To authenticate using an account, instantiate `Credentials` and pass your username and password
```python
from cdsetool.credentials import Credentials
username = "konata@izumi.com"
password = "password123"
credentials = Credentials(username, password)
```
Alternatively, `Credentials` can pull from `~/.netrc` when username and password are left blank.
```python
# ~/.netrc
machine https://identity.dataspace.copernicus.eu/auth/realms/CDSE/protocol/openid-connect/token
login konata@izumi.com
password password123
# main.py
from cdsetool.credentials import Credentials
credentials = Credentials()
```
The credentials object may then be passed to a download function. If left out, the download
functions will default to using `.netrc`.
```python
credentials = Credentials()
download_features(features, "/some/download/path", {"credentials": credentials})
```
Credentials can be validated using the `validate_credentials` function which will return a boolean.
```python
from cdsetool.credentials import validate_credentials
validate_credentials(username='user', password='password')
```
If None are passed to username and password, `validate_credentials` will validate `.netrc`
#### Concurrently downloading features
CDSETool provides a method for concurrently downloading features. The concurrency level
should match your accounts privileges.
See [CDSE quotas](https://documentation.dataspace.copernicus.eu/Quotas.html)
The downloaded feature ids are yielded, so its required to await the results.
```python
from cdsetool.query import query_features
from cdsetool.download import download_features
features = query_features("SENTINEL-2", {"contentDateStartGe": "2024-01-01", "contentDateStartLe": "2024-01-10"})
download_path = "/path/to/download/folder"
downloads = download_features(features, download_path, {"concurrency": 4})
for id in downloads:
print(f"feature {id} downloaded")
# or
list(downloads)
```
#### Sequentially downloading features
Its possible to download features sequentially in a single thread if desired.
```python
from cdsetool.query import query_features
from cdsetool.download import download_feature
features = query_features("SENTINEL-2", {"contentDateStartGe": "2024-01-01", "contentDateStartLe": "2024-01-10"})
download_path = "/path/to/download/folder"
for feature in features:
download_feature(feature, download_path)
```
#### Download specific files within features
It's possible to download specific files within products bundles using Unix filename pattern matching.
It can be used in CDSETool:
- Through the `filter_pattern` option of `download_features` and `download_feature`:
```python
from cdsetool.query import query_features
from cdsetool.download import download_features
features = query_features("SENTINEL-2", {"contentDateStartGe": "2024-01-01", "contentDateStartLe": "2024-01-10"})
download_path = "/path/to/download/folder"
filter_pattern = "*TCI.jp2"
downloads = download_features(features, download_path, {"filter_pattern": filter_pattern})
for id in downloads:
print(f"feature {id} downloaded")
```
- Or through the CLI:
```
cdsetool download SENTINEL-2 PATH/TO/DIR --filter-pattern *TCI.jp2 --concurrency 4 --search-term contentDateStartGe=2024-01-01 --search-term contentDateStartLe=2024-01-10 --search-term productType=S2MSI2A
```
## Roadmap
- [X] Query schema validation
- [ ] High-level API
- [ ] Query features
- [ ] Download features
- [ ] Download single feature
- [ ] Download list of features
- [ ] Download by ID
- [ ] Download by URL
- [ ] Command-Line Interface
- [ ] Update to match the high-level API
- [ ] Better `--help` messages
- [ ] Quickstart guide in README.md
- [ ] Test suite
- [ ] Query
- [ ] Credentials
- [ ] Download
- [ ] Monitor
- [ ] Strategy for handling HTTP and connection errors
## Contributing
Any contributions you make are **greatly appreciated**.
If you have a suggestion that would make this better, please fork the repo and create a pull request.
You can also simply open an issue with the tag "enhancement".
1. Fork the Project
2. Create your Feature Branch (`git checkout -b feature/cool-new-feature`)
3. Commit your Changes (`git commit -m 'Add some feature'`)
4. Push to the Branch (`git push origin feature/cool-new-feature`)
5. Open a Pull Request
## LICENSE
Distributed under the MIT License. See [LICENSE](LICENSE) for more information.
CDSETool-0.3.1/pyproject.toml 0000664 0000000 0000000 00000003215 15172660107 0016011 0 ustar 00root root 0000000 0000000 [build-system]
requires = ["setuptools>=61.0"]
build-backend = "setuptools.build_meta"
[project]
name = "cdsetool"
version = "0.3.1"
authors = [
{ name="Jacob Vejby", email="javej@sdfi.dk" },
]
description = "Tools & CLI for interacting with CDSE product APIs"
readme = "README.md"
requires-python = ">=3.9"
classifiers = [
"Programming Language :: Python :: 3",
"Operating System :: OS Independent",
]
dependencies = [
"typer >= 0.9,< 1",
"rich >= 13.6,< 16",
"requests >= 2.28.1,< 3",
"pyjwt[crypto] >= 2.8,< 2.13",
"geopandas >= 0.13.2",
]
[dependency-groups]
dev = [
"ruff==0.15.11",
]
# (Jan 30 2025) Pip does not support dependency-groups yet, so
# leave the optional dependencies for everything except Ruff.
[project.optional-dependencies]
test = [
"pylint==3.3.9",
"pytest==8.4.2",
"pytest-cov==7.1.0",
"requests-mock==1.12.1",
"pytest-mock==3.15.1",
]
[project.urls]
"Homepage" = "https://github.com/CDSETool/CDSETool"
"Bug Tracker" = "https://github.com/CDSETool/CDSETool/issues"
[project.scripts]
cdsetool = "cdsetool:cli.main"
[tool.pylint.design]
max-locals = 20
[tool.pylint.format]
max-line-length = "88"
disable="fixme"
[tool.pylint.imports]
disable="wrong-import-order"
[tool.pyright]
exclude = ["**/__pycache__", ".venv"]
# Can be strict too, see more fine-grained settings at:
# https://github.com/microsoft/pyright/blob/main/docs/configuration.md#diagnostic-settings-defaults
# reportMissingParameterType = true
typeCheckingMode = "standard"
executionEnvironments = [
{ root = "./" }
]
[tool.pytest.ini_options]
addopts = [
"--import-mode=importlib",
]
[tool.ruff.lint]
select = [
"I", # isort
]
CDSETool-0.3.1/src/ 0000775 0000000 0000000 00000000000 15172660107 0013663 5 ustar 00root root 0000000 0000000 CDSETool-0.3.1/src/cdsetool/ 0000775 0000000 0000000 00000000000 15172660107 0015477 5 ustar 00root root 0000000 0000000 CDSETool-0.3.1/src/cdsetool/__init__.py 0000664 0000000 0000000 00000000000 15172660107 0017576 0 ustar 00root root 0000000 0000000 CDSETool-0.3.1/src/cdsetool/_attributes.py 0000664 0000000 0000000 00000040332 15172660107 0020400 0 ustar 00root root 0000000 0000000 """Attribute definitions for CDSE OData product queries."""
from typing import Dict, List, TypedDict
class AttributeInfo(TypedDict, total=False):
"""Lookup entry for a product attribute's type and supported collections."""
Type: str
Title: str
Collections: List[str]
ATTRIBUTES: Dict[str, AttributeInfo] = {
"USGScollection": {
"Type": "String",
"Collections": [
"LANDSAT-8",
"LANDSAT-9",
],
},
"acquisitionType": {
"Type": "String",
"Collections": [
"SENTINEL-5P",
],
},
"authority": {
"Type": "String",
"Collections": [
"SENTINEL-6",
"SENTINEL-1-RTC",
"ENVISAT",
"LANDSAT-5",
"LANDSAT-7",
"LANDSAT-8",
"LANDSAT-9",
],
},
"baselineCollection": {
"Type": "String",
"Collections": [
"SENTINEL-3",
"SENTINEL-5P",
],
},
"brightCover": {
"Type": "Double",
"Collections": [
"SENTINEL-3",
],
},
"card4lSpecification": {
"Type": "String",
"Collections": [
"LANDSAT-5",
"LANDSAT-7",
"LANDSAT-8",
"LANDSAT-9",
],
},
"card4lSpecificationVersion": {
"Type": "String",
"Collections": [
"LANDSAT-5",
"LANDSAT-7",
"LANDSAT-8",
"LANDSAT-9",
],
},
"closedSeaCover": {
"Type": "Integer",
"Collections": [
"SENTINEL-3",
],
},
"cloudCover": {
"Type": "Double",
"Title": "Cloud cover percentage (0-100)",
"Collections": [
"SENTINEL-2",
"SENTINEL-3",
"LANDSAT-5",
"LANDSAT-7",
"LANDSAT-8",
"LANDSAT-9",
],
},
"cloudCoverLand": {
"Type": "Double",
"Collections": [
"LANDSAT-5",
"LANDSAT-7",
"LANDSAT-8",
"LANDSAT-9",
],
},
"coastalCover": {
"Type": "Double",
"Collections": [
"SENTINEL-3",
],
},
"collectionCategory": {
"Type": "String",
"Collections": [
"LANDSAT-5",
"LANDSAT-7",
"LANDSAT-8",
"LANDSAT-9",
],
},
"collectionName": {
"Type": "String",
"Collections": [
"LANDSAT-5",
"LANDSAT-7",
],
},
"collectionNumber": {
"Type": "String",
"Collections": [
"LANDSAT-5",
"LANDSAT-7",
"LANDSAT-8",
"LANDSAT-9",
],
},
"completionTimeFromAscendingNode": {
"Type": "Double",
"Collections": [
"SENTINEL-1",
],
},
"continentalIceCover": {
"Type": "Integer",
"Collections": [
"SENTINEL-3",
],
},
"coordinates": {
"Type": "String",
"Collections": [
"SENTINEL-1",
"SENTINEL-2",
"SENTINEL-3",
"SENTINEL-5P",
],
},
"cycleNumber": {
"Type": "Integer",
"Collections": [
"SENTINEL-1",
"SENTINEL-3",
"ENVISAT",
],
},
"datastripId": {
"Type": "String",
"Collections": [
"SENTINEL-2",
],
},
"datatakeID": {
"Type": "Integer",
"Collections": [
"SENTINEL-1",
],
},
"doi": {
"Type": "String",
"Collections": [
"SENTINEL-5P",
],
},
"freshInlandWaterCover": {
"Type": "Double",
"Collections": [
"SENTINEL-3",
],
},
"geometricRmse": {
"Type": "Double",
"Collections": [
"LANDSAT-5",
"LANDSAT-7",
"LANDSAT-8",
"LANDSAT-9",
],
},
"geometricXBias": {
"Type": "Double",
"Collections": [
"LANDSAT-5",
"LANDSAT-7",
"LANDSAT-8",
"LANDSAT-9",
],
},
"geometricXStddev": {
"Type": "Double",
"Collections": [
"LANDSAT-5",
"LANDSAT-7",
"LANDSAT-8",
"LANDSAT-9",
],
},
"geometricYBias": {
"Type": "Double",
"Collections": [
"LANDSAT-5",
"LANDSAT-7",
"LANDSAT-8",
"LANDSAT-9",
],
},
"geometricYStddev": {
"Type": "Double",
"Collections": [
"LANDSAT-5",
"LANDSAT-7",
"LANDSAT-8",
"LANDSAT-9",
],
},
"granuleIdentifier": {
"Type": "String",
"Collections": [
"SENTINEL-2",
],
},
"identifier": {
"Type": "String",
"Collections": [
"SENTINEL-5P",
],
},
"illuminationZenithAngle": {
"Type": "Double",
"Collections": [
"SENTINEL-2",
],
},
"instrumentConfigurationID": {
"Type": "Integer",
"Collections": [
"SENTINEL-1",
],
},
"instrumentShortName": {
"Type": "String",
"Collections": [
"SENTINEL-1",
"SENTINEL-2",
"SENTINEL-3",
"SENTINEL-5P",
"SENTINEL-6",
"SENTINEL-1-RTC",
"ENVISAT",
"LANDSAT-5",
"LANDSAT-7",
"LANDSAT-8",
"LANDSAT-9",
],
},
"landCover": {
"Type": "Double",
"Collections": [
"SENTINEL-3",
],
},
"lastOrbitDirection": {
"Type": "String",
"Collections": [
"SENTINEL-3",
],
},
"lastOrbitNumber": {
"Type": "Integer",
"Collections": [
"SENTINEL-2",
"SENTINEL-3",
],
},
"lastRelativeOrbitNumber": {
"Type": "Integer",
"Collections": [
"SENTINEL-3",
],
},
"numberOfBands": {
"Type": "Integer",
"Collections": [
"LANDSAT-5",
"LANDSAT-7",
"LANDSAT-8",
"LANDSAT-9",
],
},
"offNadir": {
"Type": "Double",
"Collections": [
"LANDSAT-5",
"LANDSAT-7",
"LANDSAT-8",
"LANDSAT-9",
],
},
"openOceanCover": {
"Type": "Integer",
"Collections": [
"SENTINEL-3",
],
},
"operationalMode": {
"Type": "String",
"Collections": [
"SENTINEL-1",
"SENTINEL-2",
"SENTINEL-3",
"SENTINEL-6",
"SENTINEL-1-RTC",
"LANDSAT-5",
"LANDSAT-7",
"LANDSAT-8",
"LANDSAT-9",
],
},
"orbitDirection": {
"Type": "String",
"Title": "Orbit direction (ASCENDING or DESCENDING)",
"Collections": [
"SENTINEL-1",
"SENTINEL-3",
"SENTINEL-1-RTC",
],
},
"orbitNumber": {
"Type": "Integer",
"Title": "Absolute orbit number",
"Collections": [
"SENTINEL-1",
"SENTINEL-2",
"SENTINEL-3",
"SENTINEL-5P",
"SENTINEL-6",
"SENTINEL-1-RTC",
"ENVISAT",
"LANDSAT-5",
"LANDSAT-7",
"LANDSAT-8",
"LANDSAT-9",
],
},
"origin": {
"Type": "String",
"Collections": [
"SENTINEL-1",
"SENTINEL-2",
"SENTINEL-6",
],
},
"parentIdentifier": {
"Type": "String",
"Collections": [
"SENTINEL-5P",
],
},
"pathNumber": {
"Type": "Integer",
"Collections": [
"LANDSAT-5",
"LANDSAT-7",
"LANDSAT-8",
"LANDSAT-9",
],
},
"phaseNumber": {
"Type": "Integer",
"Collections": [
"ENVISAT",
],
},
"platformSerialIdentifier": {
"Type": "String",
"Collections": [
"SENTINEL-1",
"SENTINEL-2",
"SENTINEL-3",
"SENTINEL-5P",
"SENTINEL-6",
"SENTINEL-1-RTC",
],
},
"platformShortName": {
"Type": "String",
"Collections": [
"SENTINEL-1",
"SENTINEL-2",
"SENTINEL-3",
"SENTINEL-5P",
"SENTINEL-6",
"SENTINEL-1-RTC",
"ENVISAT",
"LANDSAT-5",
"LANDSAT-7",
"LANDSAT-8",
"LANDSAT-9",
],
},
"polarisationChannels": {
"Type": "String",
"Collections": [
"SENTINEL-1",
"SENTINEL-1-RTC",
],
},
"processingBaseline": {
"Type": "String",
"Collections": [
"SENTINEL-1",
"SENTINEL-2",
"SENTINEL-3",
"SENTINEL-5P",
"SENTINEL-6",
],
},
"processingCenter": {
"Type": "String",
"Collections": [
"SENTINEL-1",
"SENTINEL-2",
"SENTINEL-3",
"SENTINEL-5P",
"SENTINEL-6",
],
},
"processingDate": {
"Type": "DateTimeOffset",
"Collections": [
"SENTINEL-1",
"SENTINEL-2",
"SENTINEL-3",
"SENTINEL-5P",
"SENTINEL-6",
],
},
"processingLevel": {
"Type": "String",
"Title": "Processing level",
"Collections": [
"SENTINEL-1",
"SENTINEL-2",
"SENTINEL-3",
"SENTINEL-5P",
"SENTINEL-6",
"SENTINEL-1-RTC",
"ENVISAT",
"LANDSAT-5",
"LANDSAT-7",
"LANDSAT-8",
"LANDSAT-9",
],
},
"processingMode": {
"Type": "String",
"Collections": [
"SENTINEL-5P",
],
},
"processorName": {
"Type": "String",
"Collections": [
"SENTINEL-1",
"SENTINEL-3",
"SENTINEL-5P",
],
},
"processorVersion": {
"Type": "String",
"Collections": [
"SENTINEL-1",
"SENTINEL-2",
"SENTINEL-3",
"SENTINEL-5P",
"SENTINEL-6",
],
},
"productClass": {
"Type": "String",
"Collections": [
"SENTINEL-1",
"SENTINEL-5P",
],
},
"productComposition": {
"Type": "String",
"Collections": [
"SENTINEL-1",
],
},
"productConsolidation": {
"Type": "String",
"Collections": [
"SENTINEL-1",
],
},
"productGeneration": {
"Type": "DateTimeOffset",
"Collections": [
"SENTINEL-1",
],
},
"productGroupId": {
"Type": "String",
"Collections": [
"SENTINEL-2",
],
},
"productType": {
"Type": "String",
"Title": "Product type",
"Collections": [
"SENTINEL-1",
"SENTINEL-2",
"SENTINEL-3",
"SENTINEL-5P",
"SENTINEL-6",
"SENTINEL-1-RTC",
"ENVISAT",
"LANDSAT-5",
"LANDSAT-7",
"LANDSAT-8",
"LANDSAT-9",
],
},
"proj:epsg": {
"Type": "Integer",
"Collections": [
"LANDSAT-5",
"LANDSAT-7",
"LANDSAT-8",
"LANDSAT-9",
],
},
"projShape": {
"Type": "String",
"Collections": [
"LANDSAT-5",
"LANDSAT-7",
"LANDSAT-8",
"LANDSAT-9",
],
},
"projTransform": {
"Type": "String",
"Collections": [
"LANDSAT-5",
"LANDSAT-7",
"LANDSAT-8",
"LANDSAT-9",
],
},
"qualityInfo": {
"Type": "Integer",
"Collections": [
"SENTINEL-2",
],
},
"qualityStatus": {
"Type": "String",
"Collections": [
"SENTINEL-2",
"SENTINEL-5P",
],
},
"relativeOrbitNumber": {
"Type": "Integer",
"Title": "Relative orbit number",
"Collections": [
"SENTINEL-1",
"SENTINEL-2",
"SENTINEL-3",
"SENTINEL-6",
"SENTINEL-1-RTC",
],
},
"rowNumber": {
"Type": "Integer",
"Collections": [
"LANDSAT-5",
"LANDSAT-7",
"LANDSAT-8",
"LANDSAT-9",
],
},
"salineWaterCover": {
"Type": "Double",
"Collections": [
"SENTINEL-3",
],
},
"sceneId": {
"Type": "String",
"Collections": [
"LANDSAT-5",
"LANDSAT-7",
"LANDSAT-8",
"LANDSAT-9",
],
},
"segmentStartTime": {
"Type": "DateTimeOffset",
"Collections": [
"SENTINEL-1",
],
},
"sliceNumber": {
"Type": "Integer",
"Collections": [
"SENTINEL-1",
],
},
"sliceProductFlag": {
"Type": "Boolean",
"Collections": [
"SENTINEL-1",
],
},
"snowOrIceCover": {
"Type": "Double",
"Collections": [
"SENTINEL-3",
],
},
"source": {
"Type": "String",
"Collections": [
"SENTINEL-6",
],
},
"sourceProduct": {
"Type": "String",
"Collections": [
"SENTINEL-2",
],
},
"sourceProductOriginDate": {
"Type": "String",
"Collections": [
"SENTINEL-2",
],
},
"spatialResolution": {
"Type": "Integer",
"Collections": [
"SENTINEL-6",
"SENTINEL-1-RTC",
"ENVISAT",
"LANDSAT-5",
"LANDSAT-7",
"LANDSAT-8",
"LANDSAT-9",
],
},
"startTimeFromAscendingNode": {
"Type": "Double",
"Collections": [
"SENTINEL-1",
],
},
"sunAzimuthAngle": {
"Type": "Double",
"Collections": [
"LANDSAT-5",
"LANDSAT-7",
"LANDSAT-8",
"LANDSAT-9",
],
},
"sunElevationAngle": {
"Type": "Double",
"Collections": [
"LANDSAT-5",
"LANDSAT-7",
"LANDSAT-8",
"LANDSAT-9",
],
},
"swathIdentifier": {
"Type": "String",
"Collections": [
"SENTINEL-1",
],
},
"tidalRegionCover": {
"Type": "Double",
"Collections": [
"SENTINEL-3",
],
},
"tileId": {
"Type": "String",
"Collections": [
"SENTINEL-2",
],
},
"timeliness": {
"Type": "String",
"Collections": [
"SENTINEL-1",
"SENTINEL-3",
"SENTINEL-6",
],
},
"totalSlices": {
"Type": "Integer",
"Collections": [
"SENTINEL-1",
],
},
"view:sun_azimuth": {
"Type": "Double",
"Collections": [
"LANDSAT-5",
"LANDSAT-7",
"LANDSAT-8",
"LANDSAT-9",
],
},
"view:sun_elevation": {
"Type": "Double",
"Collections": [
"LANDSAT-5",
"LANDSAT-7",
"LANDSAT-8",
"LANDSAT-9",
],
},
"wrsPath": {
"Type": "String",
"Collections": [
"LANDSAT-5",
"LANDSAT-7",
"LANDSAT-8",
"LANDSAT-9",
],
},
"wrsRow": {
"Type": "String",
"Collections": [
"LANDSAT-5",
"LANDSAT-7",
"LANDSAT-8",
"LANDSAT-9",
],
},
"wrsType": {
"Type": "String",
"Collections": [
"LANDSAT-5",
"LANDSAT-7",
"LANDSAT-8",
"LANDSAT-9",
],
},
}
CDSETool-0.3.1/src/cdsetool/_processing.py 0000664 0000000 0000000 00000004252 15172660107 0020367 0 ustar 00root root 0000000 0000000 """
This module provides functions for processing data concurrently
"""
from concurrent.futures import FIRST_COMPLETED, Future, ThreadPoolExecutor, wait
from typing import Callable, Generator, Iterable, List, Union
from cdsetool.query import FeatureQuery
def _concurrent_process(
fun: Callable[[FeatureQuery], Union[str, None]],
iterable: Iterable,
workers: int = 4,
) -> Generator[Union[str, None], None, None]:
"""
Process items in an iterable concurrently
Items are taken from the iterable as soon as a worker becomes available
Returns an iterable of the results
"""
# Futures are submitted in batches instead of all at once to avoid
# requesting too many items from the iterable at once, which is important if
# the iterable is a generator that is producing items on the fly.
#
# The 1.5 factor is a small overhead to keep jobs > workers at all times, instead
# of jobs == workers, which could cause the workers to be idle while waiting for
# the iterable to produce more items.
low_water_mark = int(workers * 1.5)
iterator = iter(iterable)
with ThreadPoolExecutor(max_workers=workers) as executor:
futures: List[Future[Union[str, None]]] = [] # pylint: disable=E1136
# Pluck an item from the iterable and submit it to the executor.
# If the iterable is exhausted, this function is a no-op.
def submit_item() -> None:
item = next(iterator, None)
if item is not None:
futures.append(executor.submit(fun, item))
# Fill the futures list up to the low water mark
def fill_futures() -> None:
for _ in range(low_water_mark - len(futures)):
submit_item()
# Submit the first batch of items
fill_futures()
# Continue until no more futures are queued
while futures:
# Wait for the first future(s) to complete
done, not_done = wait(futures, return_when=FIRST_COMPLETED)
futures = list(not_done)
for future in done:
yield future.result()
# Submit items to replace the ones that are done.
fill_futures()
CDSETool-0.3.1/src/cdsetool/cli.py 0000664 0000000 0000000 00000011403 15172660107 0016617 0 ustar 00root root 0000000 0000000 """
Command line interface
"""
import json as JSON
import os
import sys
from typing import Annotated, Dict, List, Optional
import typer
from cdsetool.download import download_features
from cdsetool.monitor import StatusMonitor
from cdsetool.query import (
SearchTermValue,
describe_collection,
describe_search_terms,
query_features,
)
app = typer.Typer(no_args_is_help=True)
query_app = typer.Typer(no_args_is_help=True)
app.add_typer(query_app, name="query")
def _format_attributes(attributes: Dict[str, Dict[str, str]]) -> str:
"""Format attribute details into a readable string."""
lines = []
for key, attr in attributes.items():
lines.append(f" - {key}")
if title := attr.get("title"):
lines.append(f" Description: {title}")
if attribute_type := attr.get("type"):
lines.append(f" Type: {attribute_type}")
if example := attr.get("example"):
lines.append(f" Example: {example}")
return "\n".join(lines)
@query_app.command("search-terms")
def query_search_terms(
collection: Annotated[
Optional[str],
typer.Argument(
help="Collection name (e.g., SENTINEL-1, SENTINEL-2). "
"If omitted, shows only builtin parameters without querying the server."
),
] = None,
) -> None:
"""
List the available search terms for a collection
"""
if collection is None:
# No collection specified - show only builtin params (no API call)
print("Builtin search terms (use with --search-term):")
print()
print(_format_attributes(describe_search_terms()))
print()
print("Specify a collection name to see collection-specific attributes.")
else:
# Collection specified - fetch from API and show all supported params
print(f"Search terms for collection {collection}:")
print()
if search_terms := describe_collection(collection):
print(_format_attributes(search_terms))
else:
print(" (none)")
# TODO: implement limit
@query_app.command("search")
def query_search(
collection: str,
search_term: Annotated[
Optional[List[str]],
typer.Option(
help="Search by term=value pairs. "
+ "Pass multiple times for multiple search terms"
),
] = None,
json: Annotated[bool, typer.Option(help="Output JSON")] = False,
) -> None:
"""
Search for features matching the search terms
"""
search_term = search_term or []
features = query_features(
collection, _to_dict(search_term), options={"max_attempts": 1}
)
for feature in features:
if json:
print(JSON.dumps(feature))
else:
print(feature.get("Name"))
# TODO: implement limit
@app.command("download")
def download( # pylint: disable=[too-many-arguments, too-many-positional-arguments]
collection: str,
path: str,
concurrency: Annotated[
int, typer.Option(help="Number of concurrent connections")
] = 1,
overwrite_existing: Annotated[
bool, typer.Option(help="Overwrite already downloaded files")
] = False,
search_term: Annotated[
Optional[List[str]],
typer.Option(
help="Search by term=value pairs. "
+ "Pass multiple times for multiple search terms"
),
] = None,
filter_pattern: Annotated[
Optional[str],
typer.Option(
help=(
"Download specific files within product bundles using OData API's node"
" filtering functionality"
)
),
] = None,
) -> None:
"""
Download all features matching the search terms
"""
if not os.path.exists(path):
print(f"Path {path} does not exist")
sys.exit(1)
search_term = search_term or []
features = query_features(
collection, _to_dict(search_term), options={"max_attempts": 1}
)
results = list(
download_features(
features,
path,
{
"monitor": StatusMonitor(),
"concurrency": concurrency,
"overwrite_existing": overwrite_existing,
"filter_pattern": filter_pattern,
"download_attempts": 1,
},
)
)
downloaded = sum(1 for item in results if item is not None)
print(f"Successfully downloaded {downloaded}/{len(results)} feature(s)")
def main():
"""
Main entry point
"""
app()
def _to_dict(term_list: List[str]) -> Dict[str, SearchTermValue]:
search_terms = {}
for item in term_list:
key, value = item.split("=", 1) # Split on first = only
search_terms[key] = value
return search_terms
if __name__ == "__main__":
main()
CDSETool-0.3.1/src/cdsetool/credentials.py 0000664 0000000 0000000 00000021531 15172660107 0020350 0 ustar 00root root 0000000 0000000 """
This module provides a class for handling credentials for
the Copernicus Identity and Access Management (IAM) system.
"""
import netrc
import threading
from datetime import datetime, timedelta
from typing import Any, Dict, List, Union
import jwt
import requests
from requests.adapters import HTTPAdapter
from urllib3.util import Retry
class NoCredentialsException(Exception):
"""
Raised when no credentials are found
"""
class InvalidCredentialsException(Exception):
"""
Raised when credentials are invalid
"""
class DeprecatedNoTokenException(Exception):
"""
Deprecated
"""
def NoTokenException(*args: object, **kwargs: object): # pylint: disable=invalid-name
"""
Raised when no token is available
"""
from warnings import warn # pylint: disable=import-outside-toplevel
error_msg = [
"Warning! NoTokenException is deprecated, and will be removed in"
"the next major release."
]
warn(" ".join(error_msg))
return DeprecatedNoTokenException(*args, **kwargs)
class TokenExchangeException(Exception):
"""
Raised when token exchange fails
"""
class TokenClientConnectionError(Exception):
"""
Raised when token connection fails.
"""
class TokenExpiredSignatureError(Exception):
"""
Raised when token signature has expired.
"""
class Credentials: # pylint: disable=too-few-public-methods disable=too-many-instance-attributes
"""
A class for handling credentials for the Copernicus Identity
and Access Management (IAM) system
"""
RETRY_CODES = frozenset([413, 429, 500, 502, 503])
RETRIES = Retry(
total=25,
backoff_factor=0.5,
raise_on_status=False,
status_forcelist=RETRY_CODES,
)
def __init__(
self,
username: Union[str, None] = None,
password: Union[str, None] = None,
openid_configuration_endpoint: Union[str, None] = None,
proxies: Union[Dict[str, str], None] = None,
) -> None:
self.__username: Union[str, None] = username
self.__password: Union[str, None] = password
self.__proxies: Union[Dict[str, str], None] = proxies
self.__openid_conf = None
self.__jwks = None
self.__openid_configuration_endpoint: str = (
openid_configuration_endpoint
or "https://identity.dataspace.copernicus.eu"
+ "/auth/realms/CDSE/.well-known/openid-configuration"
)
self.__access_token: Union[str, None] = None
self.__refresh_token: Union[str, None] = None
self.__access_token_expires: datetime = datetime.now() - timedelta(hours=8)
self.__refresh_token_expires: datetime = self.__access_token_expires
self.__lock = threading.Lock()
if self.__username is None or self.__password is None:
self.__read_credentials()
self.__ensure_tokens()
def get_session(self) -> requests.Session:
"""
Returns a session with the credentials set as the Authorization header
"""
return self.make_session(self, True, self.RETRIES, self.__proxies)
@staticmethod
def make_session(
caller,
authorization: bool,
max_retries: Retry,
proxies: Union[Dict[str, str], None],
) -> requests.Session:
"""
Creates a new session. Authorization is only available from callers
that are subclasses of Credentials.
"""
if authorization:
caller.__ensure_tokens() # pylint: disable=protected-access
session = requests.Session()
session.mount("http://", HTTPAdapter(max_retries=max_retries))
session.mount("https://", HTTPAdapter(max_retries=max_retries))
if proxies is not None:
session.proxies.update(proxies)
if authorization:
token = caller.__access_token # pylint: disable=protected-access
session.headers.update({"Authorization": f"Bearer {token}"})
return session
def __token_exchange(self, data: Dict[str, str]) -> timedelta:
# Make a session that will retry post, respecting the retry-after
# header when we get a 503 and a few other temporary failures.
session = self.make_session(
caller=self,
authorization=False,
max_retries=Retry(
total=15,
backoff_factor=0.5,
allowed_methods=None,
raise_on_status=False,
status_forcelist=self.RETRY_CODES,
),
proxies=self.__proxies,
)
response = session.post(self.__token_endpoint, data=data, timeout=120)
if response.status_code == 401:
raise InvalidCredentialsException(
"Unable to exchange token with "
+ f"username: {self.__username} and "
+ f"password: {len(self.__password or '') * '*'}"
)
if response.status_code != 200:
raise TokenExchangeException(f"Token exchange failed: {response.text}")
response = response.json()
self.__access_token = response["access_token"]
self.__refresh_token = response["refresh_token"]
return timedelta(seconds=response["refresh_expires_in"])
def __ensure_tokens(self) -> None:
with self.__lock:
refresh_expire_delta = None
if self.__access_token_expires < datetime.now():
if self.__refresh_token_expires < datetime.now():
data = {
"grant_type": "password",
"username": self.__username,
"password": self.__password,
"client_id": "cdse-public",
}
else:
data = {
"grant_type": "refresh_token",
"refresh_token": self.__refresh_token,
"client_id": "cdse-public",
}
refresh_expire_delta = self.__token_exchange(data)
if not self.__access_token:
raise InvalidCredentialsException(
"Internal error: access token not available"
)
try:
key = self.__jwk_client.get_signing_key_from_jwt(self.__access_token)
except jwt.PyJWKClientConnectionError as e:
raise TokenClientConnectionError from e
try:
data = jwt.decode(
self.__access_token,
key=key.key,
algorithms=self.__id_token_signing_algos, # pylint: disable=protected-access
options={"verify_aud": False},
)
except jwt.ExpiredSignatureError as e:
raise TokenExpiredSignatureError from e
if refresh_expire_delta is not None:
self.__access_token_expires = datetime.fromtimestamp(data["exp"])
self.__refresh_token_expires = (
datetime.fromtimestamp(data["iat"]) + refresh_expire_delta
)
def __read_credentials(self) -> None:
rv = netrc.netrc().authenticators(self.__token_endpoint)
if isinstance(rv, tuple):
self.__username, _, self.__password = rv
else:
raise NoCredentialsException("No credentials found")
@property
def __openid_configuration(self) -> Dict[str, Any]:
if self.__openid_conf:
return self.__openid_conf
session = self.make_session(
caller=self,
authorization=False,
max_retries=self.RETRIES,
proxies=self.__proxies,
)
response = session.get(self.__openid_configuration_endpoint, timeout=120)
response.raise_for_status()
self.__openid_conf = response.json()
return self.__openid_conf
@property
def __token_endpoint(self) -> str:
return self.__openid_configuration["token_endpoint"]
@property
def __jwks_uri(self) -> str:
return self.__openid_configuration["jwks_uri"]
@property
def __id_token_signing_algos(self) -> List[str]:
return self.__openid_configuration["id_token_signing_alg_values_supported"]
@property
def __jwk_client(self) -> jwt.PyJWKClient:
if self.__jwks:
return self.__jwks
self.__jwks = jwt.PyJWKClient(self.__jwks_uri)
return self.__jwks
def validate_credentials(
username: Union[str, None] = None, password: Union[str, None] = None
) -> bool:
"""
This function validates CDSE credentials and returns a bool.
If credentials are none, .netrc will be validated
"""
try:
Credentials(username, password)
return True
except NoCredentialsException:
return False
except InvalidCredentialsException:
return False
except TokenExchangeException:
return False
CDSETool-0.3.1/src/cdsetool/download.py 0000664 0000000 0000000 00000022460 15172660107 0017664 0 ustar 00root root 0000000 0000000 """
Download features from a Copernicus Data Space Ecosystem OData API result
Provides a function to download a single feature, a function to download all features
in a result set, and a function to download specific files in a given feature using
node filtering.
"""
import fnmatch
import os
import random
import shutil
import tempfile
import time
from pathlib import Path
from typing import Any, Dict, Generator, List, Union
from xml.etree import ElementTree as ET
from requests import Session
from requests.exceptions import ChunkedEncodingError
from urllib3.exceptions import ProtocolError
from cdsetool._processing import _concurrent_process
from cdsetool.credentials import (
Credentials,
TokenClientConnectionError,
TokenExpiredSignatureError,
)
from cdsetool.logger import NoopLogger
from cdsetool.monitor import NoopMonitor, StatusMonitor
from cdsetool.query import FeatureQuery
MANIFEST_FILENAMES = {
"SENTINEL-1": "manifest.safe",
"SENTINEL-2": "manifest.safe",
"SENTINEL-3": "xfdumanifest.xml",
}
def filter_files(
manifest_file: Path, pattern: Union[str, None], exclude: bool = False
) -> Union[List[Path], None]:
"""
Filter a product's files, listed in its manifest, based on a given pattern.
Returns a list of file paths within the product bundle.
All files not matching the pattern are returned if "exclude" is set to true.
"""
def read_sentinel_manifest(manifest_file: Path) -> Union[List[Path], None]:
xmldoc = ET.parse(manifest_file)
section = xmldoc.find("dataObjectSection")
if section is None:
return None
paths = []
for obj in section.iterfind("dataObject"):
loc = obj.find("byteStream/fileLocation")
if loc is None:
return None
path = loc.get("href")
if path is None:
return None
paths.append(Path(path))
return paths
if pattern is None:
return []
paths = read_sentinel_manifest(manifest_file)
if paths is None:
return None
return [path for path in paths if fnmatch.fnmatch(str(path), pattern) ^ exclude]
def download_file(url: str, path: Path, options: Dict[str, Any]) -> bool:
"""
Download a single file.
Caller is responsible for ensuring that nothing else writes to path.
"""
log = _get_logger(options)
filename = path.name
max_attempts = options.get("download_attempts", 10)
with _get_monitor(options).status() as status:
status.set_filename(filename)
attempts = 0
while attempts < max_attempts:
attempts += 1
# Always get a new session, credentials might have expired.
try:
session = _get_credentials(options).get_session()
except TokenClientConnectionError:
log.warning("Token client connection failed, retrying..")
continue
except TokenExpiredSignatureError:
log.warning("Token signature expired, retrying..")
continue
url = _follow_redirect(url, session)
with session.get(url, stream=True) as response:
if response.status_code != 200:
retrying = attempts < max_attempts
log.warning(
f"Status code {response.status_code}, "
f"{'retrying...' if retrying else 'aborting'}"
)
if retrying:
time.sleep(60 * (1 + (random.random() / 4)))
continue
status.set_filesize(int(response.headers["Content-Length"]))
with open(path, "wb") as outfile:
# Server might not send all bytes specified by the
# Content-Length header before closing connection.
# Log as a warning and try again.
try:
for chunk in response.iter_content(chunk_size=1024 * 1024 * 5):
outfile.write(chunk)
status.add_progress(len(chunk))
except (
ChunkedEncodingError,
ConnectionResetError,
ProtocolError,
) as e:
log.warning(e)
continue
return True
log.error(f"Failed to download {filename}")
return False
def download_feature( # pylint: disable=too-many-return-statements
feature, path: str, options: Union[Dict[str, Any], None] = None
) -> Union[str, None]:
"""
Download a single feature.
Returns the feature title.
"""
options = options or {}
log = _get_logger(options)
temp_dir_usr = _get_temp_dir(options)
title = feature.get("Name")
collection = feature.get("Collection")
download_full = options.get("filter_pattern") is None
try:
manifest_filename = "" if download_full else MANIFEST_FILENAMES[collection]
except KeyError:
log.error(
f"No support for downloading individual files in {collection} products"
)
return None
# Prepare to download full product, or manifest file if filter_pattern is used
filename = title + ".zip" if download_full else manifest_filename
url = (
_get_feature_url(feature)
if download_full
else _get_odata_url(feature["Id"], title, filename)
)
if not url or not title:
log.debug(f"Bad URL ('{url}') or title ('{title}')")
return None
result_path = os.path.join(path, filename if download_full else title)
if not options.get("overwrite_existing", False) and os.path.exists(result_path):
log.debug(f"File {result_path} already exists, skipping..")
return os.path.basename(result_path)
with tempfile.TemporaryDirectory(
prefix=f"{title}____", dir=temp_dir_usr
) as temp_dir:
temp_file = os.path.join(temp_dir, filename)
if not download_file(url, Path(temp_file), options):
return None
# If filter_pattern is used, list matching files based on manifest contents
temp_product_path = os.path.join(temp_dir, title)
filtered_files = filter_files(Path(temp_file), options.get("filter_pattern"))
if filtered_files is None:
log.error(f"Failed to parse manifest file for {title}")
return None
for file in filtered_files:
output_file = os.path.join(temp_product_path, file)
os.makedirs(os.path.dirname(output_file), exist_ok=True)
if not download_file(
_get_odata_url(feature["Id"], title, str(file)),
Path(output_file),
options,
):
log.error(f"Failed to download {file} from {title}")
return None
# Move downloaded files to output dir
if download_full or filtered_files:
os.makedirs(path, exist_ok=True)
shutil.move(temp_file if download_full else temp_product_path, path)
return filename if download_full else title
def download_features(
features: FeatureQuery, path: str, options: Union[Dict[str, Any], None] = None
) -> Generator[Union[str, None], None, None]:
"""
Generator function that downloads all features in a result set
Feature IDs are yielded as they are downloaded
"""
options = options or {}
options["credentials"] = _get_credentials(options)
options["logger"] = _get_logger(options)
options["monitor"] = _get_monitor(options)
options["monitor"].start()
def _download_feature(feature) -> Union[str, None]:
return download_feature(feature, path, options)
try:
yield from _concurrent_process(
_download_feature, features, options.get("concurrency", 1)
)
finally:
options["monitor"].stop()
def _get_feature_url(feature) -> str:
"""
Generate OData download URL for a feature from its ID.
"""
feature_id = feature.get("Id")
return (
(
"https://download.dataspace.copernicus.eu"
f"/odata/v1/Products({feature_id})/$value"
)
if feature_id
else ""
)
def _get_odata_url(product_id: str, product_name: str, href: str) -> str:
"""
Convert href, describing file location in manifest file, to an OData download URL.
"""
odata_url = "https://download.dataspace.copernicus.eu/odata/v1"
path = "/".join([f"Nodes({item})" for item in href.split("/")])
return f"{odata_url}/Products({product_id})/Nodes({product_name})/{path}/$value"
def _follow_redirect(url: str, session: Session) -> str:
response = session.head(url, allow_redirects=False)
while response.status_code in range(300, 400):
url = response.headers["Location"]
response = session.head(url, allow_redirects=False)
return url
def _get_logger(options: Dict) -> NoopLogger:
return options.get("logger") or NoopLogger()
def _get_monitor(options: Dict) -> Union[StatusMonitor, NoopMonitor]:
return options.get("monitor") or NoopMonitor()
def _get_credentials(options: Dict) -> Credentials:
return options.get("credentials") or Credentials(
proxies=options.get("proxies", None)
)
def _get_temp_dir(options: Dict) -> Union[str, None]:
return options.get("tmpdir") or None
CDSETool-0.3.1/src/cdsetool/logger.py 0000664 0000000 0000000 00000001246 15172660107 0017333 0 ustar 00root root 0000000 0000000 """
Logging utilities.
This module provides a NoopLogger class that outputs nothing.
"""
from typing import Any
class NoopLogger:
"""
A logger that does nothing.
"""
def debug(self, msg: object, *args: Any, **kwargs: Any) -> None:
"""
Log a debug message.
"""
def error(self, msg: object, *args: Any, **kwargs: Any) -> None:
"""
Log an error message.
"""
def info(self, msg: object, *args: Any, **kwargs: Any) -> None:
"""
Log an info message.
"""
def warning(self, msg: object, *args: Any, **kwargs: Any) -> None:
"""
Log a warning message.
"""
CDSETool-0.3.1/src/cdsetool/monitor.py 0000664 0000000 0000000 00000017653 15172660107 0017554 0 ustar 00root root 0000000 0000000 """
File download monitoring.
This module provides a StatusMonitor class that can be used to monitor the
progress of file downloads.
For non-interactive programs the NoopMonitor class can be used to disable
status monitoring.
"""
import os
import shutil
import signal
import sys
import threading
import time
from typing import List, Tuple, Union
IS_IPYTHON = True
try:
from IPython import get_ipython # type:ignore[reportMissingImports]
from IPython.display import clear_output # type:ignore[reportMissingImports]
if "IPKernelApp" not in get_ipython().config:
IS_IPYTHON = False
except ImportError:
IS_IPYTHON = False
except AttributeError:
IS_IPYTHON = False
class StatusMonitor(threading.Thread):
"""
A monitor that prints a status bar for each download
Usage:
with StatusMonitor() as monitor:
with monitor.status() as status:
status.set_filename("filename.txt")
status.set_filesize(1024)
status.add_progress(512)
time.sleep(10)
status.add_progress(512)
"""
line_length: int = 80
__is_running: bool = True
__progress_lines: int = 0
__download_speed_deltas: List[int] = []
__done = []
__status = []
def start(self) -> None:
"""
Start the monitor
"""
def _set_line_length(_signal_num: Union[int, None], _stack) -> None:
self.line_length, _ = shutil.get_terminal_size()
_set_line_length(None, None)
if os.name != "nt":
signal.signal(signal.SIGWINCH, _set_line_length)
super().start()
def stop(self) -> None:
"""
Stop the monitor
"""
self.__is_running = False
def status(self) -> "Status":
"""
Returns a status bar for a single download
"""
status = Status(self)
self.__status.append(status)
return status
def remove_status(self, status: "Status") -> None:
"""
Remove a status from the monitor, marking it as done
"""
self.__done.append(status)
self.__status.remove(status)
def run(self) -> None:
"""
Main loop for the monitor, printing the status bars every second until stopped
"""
while True:
self.__track_download_speed()
if self.__is_running is False:
break
self.__clear_progress_lines()
self.__print_done_lines()
self.__draw()
print("")
@property
def __download_speed(self) -> float:
if len(self.__download_speed_deltas) < 2:
return 0
return sum(self.__download_speed_deltas) / len(self.__download_speed_deltas)
def __track_download_speed(self) -> None:
speed_t0 = self.__total_downloaded
time.sleep(1)
speed_t1 = self.__total_downloaded
self.__download_speed_deltas.append(speed_t1 - speed_t0)
if len(self.__download_speed_deltas) > 10:
self.__download_speed_deltas.pop(0)
def __print_done_lines(self) -> None:
for status in self.__done:
print(status.done_line())
def __clear_progress_lines(self) -> None:
if IS_IPYTHON:
clear_output(wait=True) # type:ignore[reportPossiblyUnboundVariable]
return
sys.stdout.write("\033[K")
for _ in range(self.__progress_lines + 2):
sys.stdout.write("\033[F\033[K")
for _ in self.__done:
sys.stdout.write("\033[F\033[K")
print("")
print("")
def __draw(self) -> None:
self.__progress_lines = 1
print(
" | ".join(
[
"[[ ",
f"{len(self.__status)} files in progress",
f"{len(self.__done)} files done",
f"{bytes_to_human(self.__total_downloaded)} total downloaded",
f"{bytes_to_human(int(self.__download_speed))}/s ]]",
]
)
)
for status in self.__status:
filename_line, progress_line = status.status_lines()
print(filename_line.ljust(self.line_length, " "))
print(progress_line.ljust(self.line_length, " "))
self.__progress_lines += 2
@property
def __total_downloaded(self) -> int:
return sum(status.downloaded for status in self.__status) + sum(
status.size for status in self.__done
)
def __enter__(self) -> "StatusMonitor":
self.start()
return self
def __exit__(self, exc_type: object, exc_val: object, exc_tb: object) -> None:
self.stop()
class NoopMonitor:
"""
A monitor that does nothing
"""
def status(self) -> "Status":
"""
Returns a status bar for a single download
"""
return Status(self)
def remove_status(self, status: "Status") -> None:
"""
Remove a status from the monitor
"""
def start(self) -> None:
"""
Start the monitor
"""
def stop(self) -> None:
"""
Stop the monitor
"""
def __enter__(self) -> "NoopMonitor":
return self
def __exit__(self, exc_type: object, exc_val: object, exc_tb: object) -> None:
pass
class Status:
"""
A status bar for a single download
"""
__monitor: Union[NoopMonitor, StatusMonitor, None] = None
filename: Union[str, None] = None
size: int = 0
downloaded: int = 0
def done_line(self) -> str:
"""
Returns a line to print when the download is complete
"""
if self.size == 0 and self.downloaded == 0:
return f"{self.filename} skipped"
return f"{self.filename} ({bytes_to_human(self.size)})"
def status_lines(self) -> Tuple[str, str]:
"""
Returns a tuple of lines to print for the status bar
"""
if (
not self.__monitor
or isinstance(self.__monitor, NoopMonitor)
or not self.filename
):
return ("", "")
line_length = self.__monitor.line_length
if self.downloaded == 0:
return (
"Thread waiting for connection to start...",
f"[{' ' * (line_length - 2)}]",
)
progress = self.downloaded / self.size
filename_line = (
f"{self.filename[0 : line_length - 6]} "
+ f"{bytes_to_human(self.size)} ({int(progress * 100)}%)"
)
progress_line = (
"["
+ f"{'â–ˆ' * int(progress * (line_length - 2))}"
+ f"{' ' * (line_length - int(progress * (line_length - 2)) - 2)}"
+ "]"
)
return filename_line, progress_line
def add_progress(self, chunk_bytes: int) -> None:
"""
Add to the number of bytes downloaded
"""
self.downloaded += chunk_bytes
def set_filename(self, filename: str) -> None:
"""
Set the name of the file being downloaded
"""
self.filename = filename
def set_filesize(self, size: int) -> None:
"""
Set the size of the file being downloaded
"""
self.size = size
def __init__(self, monitor: Union[NoopMonitor, StatusMonitor]) -> None:
self.__monitor = monitor
def __enter__(self) -> "Status":
return self
def __exit__(self, exc_type: object, exc_val: object, exc_tb: object) -> None:
if self.__monitor:
self.__monitor.remove_status(self)
def bytes_to_human(num_bytes: int) -> str:
"""
Convert a number of bytes to a human-readable string
"""
if num_bytes < 1000:
return f"{num_bytes} B"
if num_bytes < 1000000:
return f"{num_bytes / 1000:.2f} KB"
if num_bytes < 1000000000:
return f"{num_bytes / 1000000:.2f} MB"
if num_bytes < 1000000000000:
return f"{num_bytes / 1000000000:.2f} GB"
return f"{num_bytes / 1000000000000:.2f} TB"
CDSETool-0.3.1/src/cdsetool/py.typed 0000664 0000000 0000000 00000000000 15172660107 0017164 0 ustar 00root root 0000000 0000000 CDSETool-0.3.1/src/cdsetool/query.py 0000664 0000000 0000000 00000051513 15172660107 0017223 0 ustar 00root root 0000000 0000000 """
Query the Copernicus Data Space Ecosystem
https://documentation.dataspace.copernicus.eu/APIs/OData.html
"""
import json
import re
from dataclasses import dataclass
from datetime import date, datetime
from random import random
from time import sleep
from typing import Any, Dict, Final, List, Literal, Optional, TypeVar, Union
from urllib.parse import quote
import geopandas as gpd
from requests.exceptions import ChunkedEncodingError
from urllib3.exceptions import ProtocolError
from cdsetool._attributes import ATTRIBUTES
from cdsetool.credentials import Credentials
from cdsetool.logger import NoopLogger
SearchTermValue = Union[str, int, float, bool, date, datetime]
ODataComparisonOp = Literal["eq", "lt", "le", "gt", "ge"]
ODataAttributeType = Literal[
"StringAttribute",
"IntegerAttribute",
"DoubleAttribute",
"DateTimeOffsetAttribute",
"BooleanAttribute",
]
T = TypeVar("T")
# API-imposed limits from the Copernicus OData API
MAX_BATCH_SIZE: Final = 1000
@dataclass(frozen=True)
class DateFilterSpec:
"""Specification for a date-based filter."""
odata_field: str
operator: ODataComparisonOp
title: str
interval_only: bool
_OPERATOR_LABELS: Dict[ODataComparisonOp, str] = {
"eq": "equals",
"lt": "less than",
"le": "less than or equal",
"gt": "greater than",
"ge": "greater than or equal",
}
_DATE_FIELD_SPECS: List[tuple[str, str, str]] = [
("contentDateStart", "ContentDate/Start", "Acquisition start date"),
("contentDateEnd", "ContentDate/End", "Acquisition end date"),
("publicationDate", "PublicationDate", "Publication date"),
]
_OPERATOR_SUFFIXES: Dict[str, ODataComparisonOp] = {
"Eq": "eq",
"Lt": "lt",
"Le": "le",
"Gt": "gt",
"Ge": "ge",
}
_DATE_FILTERS: Dict[str, DateFilterSpec] = {
f"{base}{suffix}": DateFilterSpec(
field,
op,
f"{desc} {_OPERATOR_LABELS[op]} ({field} {op})",
interval_only=suffix == "",
)
for base, field, desc in _DATE_FIELD_SPECS
for suffix, op in [("", "eq"), *_OPERATOR_SUFFIXES.items()]
}
_BUILTIN_PARAMS: Dict[str, Dict[str, str]] = {
"name": {
"title": "Filter by product name (substring match)",
"example": "S2A_MSIL2A_20240110",
},
"geometry": {
"title": "WKT geometry for spatial filtering",
"example": "POLYGON((lon1 lat1, lon2 lat2, ...))",
},
}
_INTERNAL_PARAMS = {"top", "skip"}
_TYPE_TO_ODATA_ATTR: Dict[str, ODataAttributeType] = {
"String": "StringAttribute",
"Integer": "IntegerAttribute",
"Double": "DoubleAttribute",
"DateTimeOffset": "DateTimeOffsetAttribute",
"Boolean": "BooleanAttribute",
}
_DEPRECATED_PARAMS: Dict[str, str] = {
"box": (
"The 'box' parameter was only supported in the old OpenSearch API, "
"use the 'geometry' parameter with a polygon in WKT format instead. "
"Example: geometry='POLYGON((west south, west north, "
"east north, east south, west south))'."
),
"startDate": (
"The 'startDate' parameter has been renamed. Use 'contentDateStartGt' instead."
),
"completionDate": (
"The 'completionDate' parameter has been renamed. "
"Use 'contentDateEndLt' instead."
),
"publishedAfter": (
"The 'publishedAfter' parameter has been renamed. "
"Use 'publicationDateGt' instead."
),
"publishedBefore": (
"The 'publishedBefore' parameter has been renamed. "
"Use 'publicationDateLt' instead."
),
"maxRecords": "The 'maxRecords' parameter has been renamed. Use 'top' instead.",
}
class _FeatureIterator:
def __init__(self, feature_query) -> None:
self.index = 0
self.feature_query = feature_query
def __len__(self) -> int:
return len(self.feature_query)
def __iter__(self):
return self
def __next__(self):
try:
item = self.feature_query[self.index]
self.index += 1
return item
except IndexError as exc:
raise StopIteration from exc
class FeatureQuery: # pylint: disable=too-many-instance-attributes
"""
An iterator over the features matching the search terms
Queries the API in batches (default: MAX_BATCH_SIZE) features, and returns them
one by one. Queries the next batch when the current batch is exhausted.
"""
def __init__(
self,
collection: str,
search_terms: Dict[str, SearchTermValue],
proxies: Union[Dict[str, str], None] = None,
options: Union[Dict[str, Any], None] = None,
) -> None:
opts = options or {}
self.total_results = -1
self.features: List[Dict[str, Any]] = []
self.proxies = proxies
self._max_attempts = opts.get("max_attempts", 10)
self.log = opts.get("logger") or NoopLogger()
self.collection = collection
self.search_terms = search_terms
# Option to expand Attributes for product metadata (default: False)
self.expand_attributes = opts.get("expand_attributes", False)
self._initial_skip = _to_int(search_terms.get("skip", 0))
self._top = _to_int(search_terms.get("top", MAX_BATCH_SIZE))
if self._top > MAX_BATCH_SIZE:
self.log.warning(
f"Maximum 'top' value is {MAX_BATCH_SIZE}, setting to {MAX_BATCH_SIZE}"
)
self._top = MAX_BATCH_SIZE
self.next_url = self._build_query_url(include_count=True)
def __iter__(self):
return _FeatureIterator(self)
def __len__(self) -> int:
if self.total_results < 0:
self.__fetch_features()
return self.total_results
def __getitem__(self, index: int):
while index >= len(self.features) and self.next_url is not None:
self.__fetch_features()
return self.features[index]
def _build_query_url(self, include_count: bool = False) -> str:
"""Build query URL with current skip offset"""
filter_expr = _build_odata_filter(self.collection, self.search_terms)
params = [
f"$filter={quote(filter_expr)}",
f"$top={self._top}",
# Ordering for consistent pagination
"$orderby=ContentDate/Start%20asc",
]
if self._initial_skip > 0:
params.append(f"$skip={self._initial_skip}")
if include_count:
params.append("$count=true")
# Optionally expand Attributes to get product metadata
# (productType, cloudCover, etc.)
if self.expand_attributes:
params.append("$expand=Attributes")
return (
"https://catalogue.dataspace.copernicus.eu/odata/v1/Products?"
f"{'&'.join(params)}"
)
def __fetch_features(self) -> None:
if self.next_url is None:
return
session = Credentials.make_session(
None, False, Credentials.RETRIES, self.proxies
)
attempts = 0
while attempts < self._max_attempts:
attempts += 1
try:
assert self.next_url is not None # for type checker
with session.get(self.next_url) as response:
if response.status_code != 200:
retrying = attempts < self._max_attempts
self.log.warning(
f"Status code {response.status_code}, "
f"{'retrying...' if retrying else 'aborting'}"
)
if retrying:
sleep(60 * (1 + (random() / 4)))
continue
odata_response = response.json()
products = odata_response.get("value", [])
# Add Collection attribute for download_feature()
for product in products:
product["Collection"] = self.collection
self.features.extend(products)
total_results = odata_response.get("@odata.count")
if total_results is not None:
self.total_results = total_results
elif self.total_results < 0:
self.log.error("Total result count not present in response.")
next_link = odata_response.get("@odata.nextLink")
self.next_url = (
_strip_odata_count(next_link)
if next_link and self._top > 0
else None
)
return
except (ChunkedEncodingError, ConnectionResetError, ProtocolError) as e:
self.log.warning(e)
continue
self.log.error("Failed to fetch features after %d attempts", attempts)
self.next_url = None
def query_features(
collection: str,
search_terms: Dict[str, SearchTermValue],
proxies: Union[Dict[str, str], None] = None,
options: Union[Dict[str, Any], None] = None,
) -> FeatureQuery:
"""
Returns an iterator over the features matching the search terms
"""
return FeatureQuery(collection, search_terms, proxies, options)
def shape_to_wkt(shape: str) -> str:
"""
Convert a shapefile to a WKT string
"""
# pylint: disable=line-too-long
coordinates = list(gpd.read_file(shape).geometry[0].exterior.coords) # pyright:ignore[reportAttributeAccessIssue]
return (
"POLYGON(("
+ ", ".join(" ".join(map(str, coord)) for coord in coordinates)
+ "))"
)
def geojson_to_wkt(geojson_in: Union[str, Dict]) -> str:
"""
Convert a geojson geometry to a WKT string
"""
geojson = json.loads(geojson_in) if isinstance(geojson_in, str) else geojson_in
if geojson.get("type") == "Feature":
geojson = geojson["geometry"]
elif geojson.get("type") == "FeatureCollection" and len(geojson["features"]) == 1:
geojson = geojson["features"][0]["geometry"]
coordinates = str(
tuple(item for sublist in geojson["coordinates"][0] for item in sublist)
)
paired_coord = ",".join(
[
f"{a}{b}"
for a, b in zip(coordinates.split(",")[0::2], coordinates.split(",")[1::2])
]
)
return f"POLYGON({paired_coord})"
def describe_search_terms() -> Dict[str, Dict[str, str]]:
"""Get builtin search terms (date filters, geometry) that are always available.
Returns only the builtin parameters. To get collection-specific attributes,
use describe_collection() with a collection name.
"""
terms: Dict[str, Dict[str, str]] = {
key: {"title": spec.title, "example": "2024-01-01 or 2024-01-01T00:00:00Z"}
for key, spec in _DATE_FILTERS.items()
if not spec.interval_only
}
terms.update(_BUILTIN_PARAMS)
return terms
def _fetch_collection_attributes(
collection: str,
proxies: Union[Dict[str, str], None] = None,
options: Union[Dict[str, Any], None] = None,
) -> Optional[List[Dict[str, str]]]:
"""Fetch available attributes for a collection from the OData API."""
url = f"https://catalogue.dataspace.copernicus.eu/odata/v1/Attributes({collection})"
log = (options or {}).get("logger") or NoopLogger()
session = Credentials.make_session(None, False, Credentials.RETRIES, proxies)
response = session.get(url, timeout=30)
if response.status_code == 404:
log.error("Collection '%s' not found", collection)
elif response.status_code != 200:
log.error(
"Failed to fetch attributes for '%s': HTTP status code %s",
collection,
response.status_code,
)
return response.json() if response.status_code == 200 else None
def describe_collection(
collection: str,
proxies: Union[Dict[str, str], None] = None,
options: Union[Dict[str, Any], None] = None,
) -> Optional[Dict[str, Dict[str, str]]]:
"""
Get available filter parameters for a given collection.
Fetches available attributes from the OData API's Attributes endpoint.
Not all server attributes might be available as search terms at this time.
Args:
collection: Collection name (e.g., "SENTINEL-2", "SENTINEL-1")
proxies: Optional proxy configuration
options: Optional options
Returns:
Dictionary of parameters that can be used in filters (builtin + server attrs)
or None if description could not be fetched
"""
# Start with built-in search terms (base date names only, no Lt/Le/Gt/Ge variants)
search_terms: Dict[str, Dict[str, str]] = {
base: {"title": desc, "example": "2024-01-01 or 2024-01-01T00:00:00Z"}
for base, _, desc in _DATE_FIELD_SPECS
}
search_terms.update(_BUILTIN_PARAMS)
# Fetch attributes for the collection from the server
if server_attributes := _fetch_collection_attributes(collection, proxies, options):
for attr in server_attributes:
if not (name := attr.get("Name")):
continue
entry: Dict[str, str] = {"type": attr.get("ValueType", "String")}
# Use title from ATTRIBUTES if available
if name in ATTRIBUTES and (title := ATTRIBUTES[name].get("Title")):
entry["title"] = title
search_terms[name] = entry
return dict(sorted(search_terms.items()))
return None
def get_product_attribute(
product: Dict[str, Any], name: str, default: Optional[T] = None
) -> Optional[T]:
"""
Get an attribute value from a product's Attributes array.
Args:
product: Product dictionary
name: Attribute name to retrieve (e.g., 'cloudCover', 'productType')
default: Value to return if attribute is not found (default: None)
Returns:
The attribute value if found, default otherwise
"""
for attr in product.get("Attributes", []):
if attr.get("Name") == name:
return attr.get("Value")
return default
def _parse_interval(
value: str,
) -> Optional[tuple[str, str, ODataComparisonOp, ODataComparisonOp]]:
"""Parse interval syntax like [a,b], (a,b), [a,b), (a,b].
Returns:
Tuple of (start_value, end_value, start_op, end_op) or None if not an interval.
start_op is 'ge' for '[' or 'gt' for '('
end_op is 'le' for ']' or 'lt' for ')'
"""
value = value.strip()
if len(value) < 3:
return None
start_char = value[0]
end_char = value[-1]
if start_char not in "[(" or end_char not in "])":
return None
inner = value[1:-1]
parts = inner.split(",")
if len(parts) != 2:
return None
start_value = parts[0].strip()
end_value = parts[1].strip()
if not start_value or not end_value:
return None
# Determine operators based on brackets
start_op = "ge" if start_char == "[" else "gt"
end_op = "le" if end_char == "]" else "lt"
return start_value, end_value, start_op, end_op
def _parse_operator_suffix(key: str) -> tuple[str, ODataComparisonOp]:
"""Parse operator suffix from a key like 'cloudCoverLt'."""
for suffix, operator in _OPERATOR_SUFFIXES.items():
if key.endswith(suffix):
base_name = key[: -len(suffix)]
return base_name, operator
return key, "eq"
def _build_generic_attribute_filters(key: str, str_value: str) -> List[str]:
"""Build OData filter expression(s) for a generic attribute parameter."""
# Check if key has operator suffix (e.g., cloudCoverLt, orbitNumberGe)
base_name, operator = _parse_operator_suffix(key)
if not (attr_info := ATTRIBUTES.get(base_name)):
raise ValueError(f"The '{key}' parameter is not supported.")
attr_type = attr_info.get("Type", "String")
if not (odata_attr_type := _TYPE_TO_ODATA_ATTR.get(attr_type)):
raise ValueError(
f"Unsupported attribute type '{attr_type}' for parameter '{key}'."
)
# String and Boolean attributes only support equality
if attr_type in ("String", "Boolean") and operator != "eq":
raise ValueError(
f"Comparison operators are not supported on {attr_type.lower()} "
f"attribute '{base_name}'."
)
# Check for interval syntax (only for numeric and date types)
if attr_type in ("Integer", "Double", "DateTimeOffset"):
has_suffix = key != base_name
interval = _parse_interval(str_value)
if not has_suffix:
if not interval:
raise ValueError(
f"'{key}' requires interval syntax, e.g. {key}=[a,b]. "
f"For an exact match, use '{key}Eq' instead."
)
start_str, end_str, start_op, end_op = interval
return [
_build_attribute_filter(
base_name, start_str, odata_attr_type, start_op
),
_build_attribute_filter(base_name, end_str, odata_attr_type, end_op),
]
if interval:
raise ValueError(
f"Interval syntax is not allowed on '{key}'. "
f"Use '{base_name}' for intervals instead."
)
return [_build_attribute_filter(base_name, str_value, odata_attr_type, operator)]
def _build_odata_filter(
collection: str, search_terms: Dict[str, SearchTermValue]
) -> str:
"""Build $filter expression from search terms."""
filters = [f"Collection/Name eq '{collection}'"]
for key, value in search_terms.items():
if key in _INTERNAL_PARAMS:
continue
if deprecated_message := _DEPRECATED_PARAMS.get(key):
raise ValueError(deprecated_message)
str_value = (
_format_odata_date(value)
if isinstance(value, (datetime, date))
else str(value)
)
if spec := _DATE_FILTERS.get(key):
interval = _parse_interval(str_value)
if spec.interval_only:
if not interval:
raise ValueError(
f"'{key}' requires interval syntax, e.g. {key}=[a,b]. "
f"For an exact match, use '{key}Eq' instead."
)
start_str, end_str, start_op, end_op = interval
filters.append(f"{spec.odata_field} {start_op} {start_str}")
filters.append(f"{spec.odata_field} {end_op} {end_str}")
else:
if interval:
raise ValueError(
f"Interval syntax is not allowed on '{key}'. "
f"Use the base name for intervals instead."
)
filters.append(f"{spec.odata_field} {spec.operator} {str_value}")
elif key in ("name", "nameEq"):
filters.append(f"contains(Name,'{str_value}')")
elif key in ("geometry", "geometryEq"):
filters.append(
f"OData.CSC.Intersects(area=geography'SRID=4326;{str_value}')"
)
else:
filters.extend(_build_generic_attribute_filters(key, str_value))
return " and ".join(filters)
def _format_odata_date(date_value: Union[date, datetime]) -> str:
"""Format date value for OData filter expressions"""
if isinstance(date_value, datetime):
return date_value.strftime("%Y-%m-%dT%H:%M:%S.000Z")
return f"{date_value.strftime('%Y-%m-%d')}T00:00:00.000Z"
def _to_odata_value_str(
value: str, attr_type: ODataAttributeType, attr_name: str
) -> str:
"""Convert a string value to its OData string representation."""
if attr_type == "StringAttribute":
return f"'{value}'"
if attr_type == "DoubleAttribute":
return str(float(value))
if attr_type == "IntegerAttribute":
return str(int(value))
if attr_type == "DateTimeOffsetAttribute":
return value
if attr_type == "BooleanAttribute":
lower = value.lower()
if lower not in ("true", "false"):
raise ValueError(
f"Invalid boolean value '{value}' for attribute '{attr_name}'. "
"Use 'true' or 'false'."
)
return lower
return value
def _build_attribute_filter(
attr_name: str,
attr_value: str,
attr_type: ODataAttributeType,
operator: ODataComparisonOp,
) -> str:
value_str = _to_odata_value_str(attr_value, attr_type, attr_name)
return (
f"Attributes/OData.CSC.{attr_type}/any(att:att/Name eq '{attr_name}' and "
f"att/OData.CSC.{attr_type}/Value {operator} {value_str})"
)
def _to_int(value: SearchTermValue) -> int:
"""Convert a search term value to int, accepting only int or str."""
if isinstance(value, (int, str)) and not isinstance(value, bool):
return int(value)
raise ValueError(f"Expected int or str, got {type(value).__name__}: {value!r}")
def _strip_odata_count(url: str) -> str:
"""Remove $count=true from a URL to avoid requesting count on every page."""
url = re.sub(r"[&?](\$|%24)count=true", "", url, count=1)
# If the first param was removed, the next '&' must become '?'
if "?" not in url and "&" in url:
url = url.replace("&", "?", 1)
return url
CDSETool-0.3.1/tests/ 0000775 0000000 0000000 00000000000 15172660107 0014236 5 ustar 00root root 0000000 0000000 CDSETool-0.3.1/tests/__init__.py 0000664 0000000 0000000 00000000143 15172660107 0016345 0 ustar 00root root 0000000 0000000 # File intentionally left empty, its purpose is to allow relative imports within the tests folder.
CDSETool-0.3.1/tests/credentials/ 0000775 0000000 0000000 00000000000 15172660107 0016533 5 ustar 00root root 0000000 0000000 CDSETool-0.3.1/tests/credentials/__init__.py 0000664 0000000 0000000 00000000143 15172660107 0020642 0 ustar 00root root 0000000 0000000 # File intentionally left empty, its purpose is to allow relative imports within the tests folder.
CDSETool-0.3.1/tests/credentials/credentials_test.py 0000664 0000000 0000000 00000010345 15172660107 0022444 0 ustar 00root root 0000000 0000000 """Tests for CDSETool's credentials module."""
# pyright: reportAttributeAccessIssue=false
import datetime
from typing import Any
import pytest
import requests
from cdsetool.credentials import (
Credentials,
InvalidCredentialsException,
NoCredentialsException,
TokenExchangeException,
)
from ..mock_auth import mock_jwks, mock_openid, mock_token
def test_ensure_tokens(requests_mock: Any, mocker: Any) -> None:
mock_openid(requests_mock)
mock_token(requests_mock)
mock_jwks(mocker)
credentials = Credentials("username", "password")
assert credentials._Credentials__access_token is not None
assert credentials._Credentials__refresh_token is not None
credentials._Credentials__access_token_expires = (
datetime.datetime.now() - datetime.timedelta(hours=100)
)
spy = mocker.spy(credentials, "_Credentials__token_exchange")
credentials._Credentials__ensure_tokens()
spy.assert_called_once()
credentials._Credentials__access_token_expires = (
datetime.datetime.now() - datetime.timedelta(hours=100)
)
credentials._Credentials__refresh_token_expires = (
datetime.datetime.now() - datetime.timedelta(hours=100)
)
credentials._Credentials__ensure_tokens()
assert spy.call_count == 2
def test_read_credentials(requests_mock: Any, mocker: Any) -> None:
mock_openid(requests_mock)
mock_token(requests_mock)
mock_jwks(mocker)
mocker.patch(
"netrc.netrc",
return_value=mocker.Mock(
authenticators=lambda x: ("username", None, "password")
),
)
credentials = Credentials()
assert credentials._Credentials__username == "username"
assert credentials._Credentials__password == "password"
mocker.patch("netrc.netrc", return_value=mocker.Mock(authenticators=lambda x: None))
with pytest.raises(NoCredentialsException):
credentials = Credentials()
def test_refresh_token(requests_mock: Any, mocker: Any) -> None:
mock_openid(requests_mock)
mock_token(requests_mock)
mock_jwks(mocker)
credentials = Credentials("username", "password")
assert credentials._Credentials__access_token is not None
assert credentials._Credentials__refresh_token is not None
mock_token(requests_mock) # mock again to return a new token
prev_access_token = credentials._Credentials__access_token
credentials._Credentials__access_token_expires = datetime.datetime.now()
credentials._Credentials__refresh_token_expires = datetime.datetime.now()
credentials._Credentials__ensure_tokens()
assert credentials._Credentials__access_token is not None
assert credentials._Credentials__refresh_token is not None
assert prev_access_token != credentials._Credentials__access_token
def test_get_session(requests_mock: Any, mocker: Any) -> None:
mock_openid(requests_mock)
mock_token(requests_mock)
mock_jwks(mocker)
credentials = Credentials("username", "password")
session = credentials.get_session()
assert isinstance(session, requests.Session)
assert (
session.headers.get("Authorization")
== f"Bearer {credentials._Credentials__access_token}"
)
def test_token_exchange(requests_mock: Any, mocker: Any) -> None:
mock_openid(requests_mock)
mock_token(requests_mock)
mock_jwks(mocker)
credentials = Credentials("myuser123123", "password")
data = {
"grant_type": "password",
"username": credentials._Credentials__username,
"password": credentials._Credentials__password,
"client_id": "cdse-public",
}
credentials._Credentials__token_exchange(data)
requests_mock.post(
"https://identity.dataspace.copernicus.eu/auth/realms/CDSE/protocol/openid-connect/token",
text="{}",
status_code=401,
)
with pytest.raises(
InvalidCredentialsException, match="with username: myuser123123"
):
credentials._Credentials__token_exchange(data)
requests_mock.post(
"https://identity.dataspace.copernicus.eu/auth/realms/CDSE/protocol/openid-connect/token",
text="Failure 123123",
status_code=500,
)
with pytest.raises(TokenExchangeException, match="Failure 123123"):
credentials._Credentials__token_exchange(data)
CDSETool-0.3.1/tests/credentials/mock/ 0000775 0000000 0000000 00000000000 15172660107 0017464 5 ustar 00root root 0000000 0000000 CDSETool-0.3.1/tests/credentials/mock/openid-configuration.json 0000664 0000000 0000000 00000014475 15172660107 0024515 0 ustar 00root root 0000000 0000000 {"issuer":"https://identity.dataspace.copernicus.eu/auth/realms/CDSE","authorization_endpoint":"https://identity.dataspace.copernicus.eu/auth/realms/CDSE/protocol/openid-connect/auth","token_endpoint":"https://identity.dataspace.copernicus.eu/auth/realms/CDSE/protocol/openid-connect/token","introspection_endpoint":"https://identity.dataspace.copernicus.eu/auth/realms/CDSE/protocol/openid-connect/token/introspect","userinfo_endpoint":"https://identity.dataspace.copernicus.eu/auth/realms/CDSE/protocol/openid-connect/userinfo","end_session_endpoint":"https://identity.dataspace.copernicus.eu/auth/realms/CDSE/protocol/openid-connect/logout","frontchannel_logout_session_supported":true,"frontchannel_logout_supported":true,"jwks_uri":"https://identity.dataspace.copernicus.eu/auth/realms/CDSE/protocol/openid-connect/certs","check_session_iframe":"https://identity.dataspace.copernicus.eu/auth/realms/CDSE/protocol/openid-connect/login-status-iframe.html","grant_types_supported":["authorization_code","implicit","refresh_token","password","client_credentials","urn:ietf:params:oauth:grant-type:device_code","urn:openid:params:grant-type:ciba","urn:ietf:params:oauth:grant-type:token-exchange"],"acr_values_supported":["0","1"],"response_types_supported":["code","none","id_token","token","id_token token","code id_token","code token","code id_token token"],"subject_types_supported":["public","pairwise"],"id_token_signing_alg_values_supported":["PS384","ES384","RS384","HS256","HS512","ES256","RS256","HS384","ES512","PS256","PS512","RS512"],"id_token_encryption_alg_values_supported":["RSA-OAEP","RSA-OAEP-256","RSA1_5"],"id_token_encryption_enc_values_supported":["A256GCM","A192GCM","A128GCM","A128CBC-HS256","A192CBC-HS384","A256CBC-HS512"],"userinfo_signing_alg_values_supported":["PS384","ES384","RS384","HS256","HS512","ES256","RS256","HS384","ES512","PS256","PS512","RS512","none"],"userinfo_encryption_alg_values_supported":["RSA-OAEP","RSA-OAEP-256","RSA1_5"],"userinfo_encryption_enc_values_supported":["A256GCM","A192GCM","A128GCM","A128CBC-HS256","A192CBC-HS384","A256CBC-HS512"],"request_object_signing_alg_values_supported":["PS384","ES384","RS384","HS256","HS512","ES256","RS256","HS384","ES512","PS256","PS512","RS512","none"],"request_object_encryption_alg_values_supported":["RSA-OAEP","RSA-OAEP-256","RSA1_5"],"request_object_encryption_enc_values_supported":["A256GCM","A192GCM","A128GCM","A128CBC-HS256","A192CBC-HS384","A256CBC-HS512"],"response_modes_supported":["query","fragment","form_post","query.jwt","fragment.jwt","form_post.jwt","jwt"],"registration_endpoint":"https://identity.dataspace.copernicus.eu/auth/realms/CDSE/clients-registrations/openid-connect","token_endpoint_auth_methods_supported":["private_key_jwt","client_secret_basic","client_secret_post","tls_client_auth","client_secret_jwt"],"token_endpoint_auth_signing_alg_values_supported":["PS384","ES384","RS384","HS256","HS512","ES256","RS256","HS384","ES512","PS256","PS512","RS512"],"introspection_endpoint_auth_methods_supported":["private_key_jwt","client_secret_basic","client_secret_post","tls_client_auth","client_secret_jwt"],"introspection_endpoint_auth_signing_alg_values_supported":["PS384","ES384","RS384","HS256","HS512","ES256","RS256","HS384","ES512","PS256","PS512","RS512"],"authorization_signing_alg_values_supported":["PS384","ES384","RS384","HS256","HS512","ES256","RS256","HS384","ES512","PS256","PS512","RS512"],"authorization_encryption_alg_values_supported":["RSA-OAEP","RSA-OAEP-256","RSA1_5"],"authorization_encryption_enc_values_supported":["A256GCM","A192GCM","A128GCM","A128CBC-HS256","A192CBC-HS384","A256CBC-HS512"],"claims_supported":["aud","sub","iss","auth_time","name","given_name","family_name","preferred_username","email","acr"],"claim_types_supported":["normal"],"claims_parameter_supported":true,"scopes_supported":["openid","additional_scope","acr","web-origins","microprofile-jwt","AUDIENCE_PUBLIC","phone","zipper_session_scope","Geographic_origin","address","openid","email","ondemand_processing","offline_access","roles","profile","user-context"],"request_parameter_supported":true,"request_uri_parameter_supported":true,"require_request_uri_registration":true,"code_challenge_methods_supported":["plain","S256"],"tls_client_certificate_bound_access_tokens":true,"revocation_endpoint":"https://identity.dataspace.copernicus.eu/auth/realms/CDSE/protocol/openid-connect/revoke","revocation_endpoint_auth_methods_supported":["private_key_jwt","client_secret_basic","client_secret_post","tls_client_auth","client_secret_jwt"],"revocation_endpoint_auth_signing_alg_values_supported":["PS384","ES384","RS384","HS256","HS512","ES256","RS256","HS384","ES512","PS256","PS512","RS512"],"backchannel_logout_supported":true,"backchannel_logout_session_supported":true,"device_authorization_endpoint":"https://identity.dataspace.copernicus.eu/auth/realms/CDSE/protocol/openid-connect/auth/device","backchannel_token_delivery_modes_supported":["poll","ping"],"backchannel_authentication_endpoint":"https://identity.dataspace.copernicus.eu/auth/realms/CDSE/protocol/openid-connect/ext/ciba/auth","backchannel_authentication_request_signing_alg_values_supported":["PS384","ES384","RS384","ES256","RS256","ES512","PS256","PS512","RS512"],"require_pushed_authorization_requests":false,"pushed_authorization_request_endpoint":"https://identity.dataspace.copernicus.eu/auth/realms/CDSE/protocol/openid-connect/ext/par/request","mtls_endpoint_aliases":{"token_endpoint":"https://identity.dataspace.copernicus.eu/auth/realms/CDSE/protocol/openid-connect/token","revocation_endpoint":"https://identity.dataspace.copernicus.eu/auth/realms/CDSE/protocol/openid-connect/revoke","introspection_endpoint":"https://identity.dataspace.copernicus.eu/auth/realms/CDSE/protocol/openid-connect/token/introspect","device_authorization_endpoint":"https://identity.dataspace.copernicus.eu/auth/realms/CDSE/protocol/openid-connect/auth/device","registration_endpoint":"https://identity.dataspace.copernicus.eu/auth/realms/CDSE/clients-registrations/openid-connect","userinfo_endpoint":"https://identity.dataspace.copernicus.eu/auth/realms/CDSE/protocol/openid-connect/userinfo","pushed_authorization_request_endpoint":"https://identity.dataspace.copernicus.eu/auth/realms/CDSE/protocol/openid-connect/ext/par/request","backchannel_authentication_endpoint":"https://identity.dataspace.copernicus.eu/auth/realms/CDSE/protocol/openid-connect/ext/ciba/auth"}} CDSETool-0.3.1/tests/download/ 0000775 0000000 0000000 00000000000 15172660107 0016045 5 ustar 00root root 0000000 0000000 CDSETool-0.3.1/tests/download/__init__.py 0000664 0000000 0000000 00000000143 15172660107 0020154 0 ustar 00root root 0000000 0000000 # File intentionally left empty, its purpose is to allow relative imports within the tests folder.
CDSETool-0.3.1/tests/download/download_test.py 0000664 0000000 0000000 00000023530 15172660107 0021270 0 ustar 00root root 0000000 0000000 """Tests for CDSETool's download module."""
import logging
import os
from pathlib import Path
from typing import Any, Dict, List
import pytest
from cdsetool.credentials import Credentials
from cdsetool.download import (
_get_feature_url,
_get_odata_url,
download_feature,
download_file,
filter_files,
)
from ..mock_auth import mock_jwks, mock_openid, mock_token
def mock_download_file(url: str, path: str, options: Dict[str, Any]) -> bool:
"""Mock the download_file function to create mock files."""
with open(path, "wb") as f:
f.write(b"dummy data")
return True
def test_get_feature_url() -> None:
"""Test full product OData download URL generation."""
feature = {"Id": "a6215824-704b-46d7-a2ec-efea4e468668"}
expected_url = (
"https://download.dataspace.copernicus.eu/odata/v1/"
"Products(a6215824-704b-46d7-a2ec-efea4e468668)/$value"
)
assert _get_feature_url(feature) == expected_url
assert _get_feature_url({}) == ""
assert _get_feature_url({"Id": None}) == ""
def test_get_odata_url() -> None:
"""Test individual file OData download URL generation."""
product_id = "a6215824-704b-46d7-a2ec-efea4e468668"
product_name = "S2B_MSIL1C_20241209T162609_N0511_R040_T17UPV_20241209T195414.SAFE"
href = "path/to/resource.xml"
expected_url = (
"https://download.dataspace.copernicus.eu/odata/v1/"
"Products(a6215824-704b-46d7-a2ec-efea4e468668)/"
f"Nodes({product_name})/"
"Nodes(path)/Nodes(to)/Nodes(resource.xml)/$value"
)
assert _get_odata_url(product_id, product_name, href) == expected_url
@pytest.mark.parametrize(
"manifest_file_path, pattern, expected_files",
[
(
Path("tests/download/mock/sentinel_1/manifest.safe"),
"*/calibration-*.xml",
[
Path(
"annotation/calibration/calibration-s1a-iw-grd-vh-20241217t061735-20241217t061800-057028-07020f-002.xml"
),
Path(
"annotation/calibration/calibration-s1a-iw-grd-vv-20241217t061735-20241217t061800-057028-07020f-001.xml"
),
],
),
(
Path("tests/download/mock/sentinel_2/manifest.safe"),
"*TCI.jp2",
[
Path(
"GRANULE/L1C_T17UPV_A040535_20241209T162603/IMG_DATA/T17UPV_20241209T162609_TCI.jp2"
)
],
),
(
Path("tests/download/mock/sentinel_3/xfdumanifest.xml"),
"*Oa02_reflectance.nc",
[Path("Oa02_reflectance.nc")],
),
],
)
def test_filter_files(
manifest_file_path: Path, pattern: str, expected_files: List[str]
) -> None:
filtered_files = filter_files(manifest_file_path, pattern)
assert filtered_files == expected_files
def test_filter_files_with_exclude() -> None:
manifest_file_path = Path("tests/download/mock/sentinel_2/manifest.safe")
filtered_files = filter_files(manifest_file_path, "*.jp2", exclude=True)
assert filtered_files == [
Path("MTD_MSIL1C.xml"),
Path("INSPIRE.xml"),
Path("HTML/UserProduct_index.html"),
Path("HTML/UserProduct_index.xsl"),
Path("DATASTRIP/DS_2BPS_20241209T195414_S20241209T162603/MTD_DS.xml"),
Path(
"DATASTRIP/DS_2BPS_20241209T195414_S20241209T162603/QI_DATA/FORMAT_CORRECTNESS.xml"
),
Path(
"DATASTRIP/DS_2BPS_20241209T195414_S20241209T162603/QI_DATA/GENERAL_QUALITY.xml"
),
Path(
"DATASTRIP/DS_2BPS_20241209T195414_S20241209T162603/QI_DATA/GEOMETRIC_QUALITY.xml"
),
Path(
"DATASTRIP/DS_2BPS_20241209T195414_S20241209T162603/QI_DATA/RADIOMETRIC_QUALITY.xml"
),
Path(
"DATASTRIP/DS_2BPS_20241209T195414_S20241209T162603/QI_DATA/SENSOR_QUALITY.xml"
),
Path("GRANULE/L1C_T17UPV_A040535_20241209T162603/AUX_DATA/AUX_CAMSFO"),
Path("GRANULE/L1C_T17UPV_A040535_20241209T162603/AUX_DATA/AUX_ECMWFT"),
Path("GRANULE/L1C_T17UPV_A040535_20241209T162603/MTD_TL.xml"),
Path(
"GRANULE/L1C_T17UPV_A040535_20241209T162603/QI_DATA/FORMAT_CORRECTNESS.xml"
),
Path("GRANULE/L1C_T17UPV_A040535_20241209T162603/QI_DATA/GENERAL_QUALITY.xml"),
Path(
"GRANULE/L1C_T17UPV_A040535_20241209T162603/QI_DATA/GEOMETRIC_QUALITY.xml"
),
Path("GRANULE/L1C_T17UPV_A040535_20241209T162603/QI_DATA/SENSOR_QUALITY.xml"),
]
def test_filter_files_no_match() -> None:
manifest_file_path = Path("tests/download/mock/sentinel_2/manifest.safe")
filtered_files = filter_files(manifest_file_path, "Oa1*.nc")
assert not filtered_files
def test_filter_files_broken_manifest() -> None:
manifest_file_path = Path("tests/download/mock/sentinel_2/broken_manifest.safe")
filtered_files = filter_files(manifest_file_path, "*TCI.jp2")
assert filtered_files is None
def test_download_file_success(requests_mock: Any, mocker: Any, tmp_path: Path) -> None:
mock_openid(requests_mock)
mock_token(requests_mock)
mock_jwks(mocker)
mock_url = "http://example.com/file"
mocker.patch("cdsetool.download._follow_redirect", return_value=mock_url)
content = b"data" * 5
requests_mock.get(
mock_url, status_code=200, headers={"Content-Length": "100"}, content=content
)
mock_file = tmp_path / "mock_file"
result = download_file(
mock_url, mock_file, {"credentials": Credentials("usr", "pwd")}
)
assert result is True
# Check that file was written correctly
with open(mock_file, "rb") as f:
file_content = f.read()
assert file_content == content
def test_download_file_failure(requests_mock: Any, mocker: Any, tmp_path: Path) -> None:
mock_openid(requests_mock)
mock_token(requests_mock)
mock_jwks(mocker)
mock_url = "http://example.com/file"
mocker.patch("cdsetool.download._follow_redirect", return_value=mock_url)
requests_mock.get(mock_url, status_code=404, headers={"Content-Length": "100"})
mock_file = tmp_path / "mock_file"
mocker.patch("time.sleep", return_value=None) # Avoid retry delay
result = download_file(
mock_url, mock_file, {"credentials": Credentials("usr", "pwd")}
)
assert result is False
def test_download_feature(mocker: Any, tmp_path: Path) -> None:
title = "S2B_MSIL1C_20241209T162609_N0511_R040_T17UPV_20241209T195414.SAFE"
mock_feature = {
"Id": "a6215824-704b-46d7-a2ec-efea4e468668",
"Name": title,
"ContentLength": 1000,
"Online": True,
}
mocker.patch("cdsetool.download.download_file", mock_download_file)
final_dir = str(tmp_path / "test_download_feature")
filename = download_feature(mock_feature, final_dir)
assert filename == f"{title}.zip"
assert os.path.exists(os.path.join(final_dir, f"{title}.zip"))
def test_download_feature_failure(mocker: Any, tmp_path: Path) -> None:
title = "S2B_MSIL1C_20241209T162609_N0511_R040_T17UPV_20241209T195414.SAFE"
mock_feature = {
"Id": "a6215824-704b-46d7-a2ec-efea4e468668",
"Name": title,
"ContentLength": 1000,
"Online": True,
}
mocker.patch(
"cdsetool.download.download_file", side_effect=lambda url, path, options: None
)
final_dir = str(tmp_path / "test_download_feature_failure")
filename = download_feature(mock_feature, final_dir)
assert filename is None
def test_download_feature_with_filter(mocker: Any, tmp_path: Path) -> None:
options = {"filter_pattern": "*.jp2"}
title = "S2B_MSIL1C_20241209T162609_N0511_R040_T17UPV_20241209T195414.SAFE"
mock_feature = {
"Id": "a6215824-704b-46d7-a2ec-efea4e468668",
"Name": title,
"ContentLength": 1000,
"Online": True,
"Collection": "SENTINEL-2",
}
mocker.patch(
"cdsetool.download.filter_files",
return_value=[Path("./GRANULE/file1.jp2"), Path("./GRANULE/file2.jp2")],
)
mocker.patch("cdsetool.download.download_file", mock_download_file)
final_dir = str(tmp_path / "test_download_feature_with_filter")
product_name = download_feature(mock_feature, final_dir, options)
assert product_name == mock_feature["Name"]
assert os.path.exists(os.path.join(final_dir, title, "GRANULE", "file1.jp2"))
assert os.path.exists(os.path.join(final_dir, title, "GRANULE", "file2.jp2"))
def test_download_feature_with_filter_failure(mocker: Any, tmp_path: Path) -> None:
options = {"filter_pattern": "*.jp2"}
mock_feature = {
"Id": "a6215824-704b-46d7-a2ec-efea4e468668",
"Name": "S2B_MSIL1C_20241209T162609_N0511_R040_T17UPV_20241209T195414.SAFE",
"ContentLength": 1000,
"Online": True,
"Collection": "SENTINEL-2",
}
mocker.patch(
"cdsetool.download.filter_files",
return_value=["./GRANULE/file1.jp2", "./GRANULE/file2.jp2"],
)
mocker.patch(
"cdsetool.download.download_file", side_effect=lambda url, path, options: None
)
final_dir = str(tmp_path / "test_download_feature_with_filter_failure")
product_name = download_feature(mock_feature, final_dir, options)
assert product_name is None
def test_download_feature_with_filter_unsupported_coll(
caplog: Any, tmp_path: Path
) -> None:
options = {"logger": logging.getLogger(__name__), "filter_pattern": "*MTL.txt"}
mock_feature = {
"Id": "a6215824-704b-46d7-a2ec-efea4e468668",
"Name": "L8XXX",
"ContentLength": 1000,
"Online": True,
"Collection": "Landsat8",
}
final_dir = str(tmp_path / "test_download_feature_with_filter_unsupported_coll")
product_name = download_feature(mock_feature, final_dir, options)
assert product_name is None
assert (
"No support for downloading individual files in "
f"{mock_feature['Collection']} products"
) in caplog.text
CDSETool-0.3.1/tests/download/mock/ 0000775 0000000 0000000 00000000000 15172660107 0016776 5 ustar 00root root 0000000 0000000 CDSETool-0.3.1/tests/download/mock/sentinel_1/ 0000775 0000000 0000000 00000000000 15172660107 0021037 5 ustar 00root root 0000000 0000000 CDSETool-0.3.1/tests/download/mock/sentinel_1/manifest.safe 0000664 0000000 0000000 00000057724 15172660107 0023524 0 ustar 00root root 0000000 0000000
2014-016A
SENTINEL-1
A
Synthetic Aperture Radar
IW
IW
57028
57028
81
81
340
1
DESCENDING
2024-12-17T05:40:13.187705
7
459279
VV
VH
S
SAR Standard L1 Product
Slice
GRD
NRT-3h
true
2024-12-17T06:09:11.775902
21
30
2024-12-17T06:17:35.476248
2024-12-17T06:18:00.475440
2.242288e+06
2.267288e+06
42.366711,-1.164012 42.775124,-4.343804 44.273342,-4.011340 43.865166,-0.752081
5b30ed5fdd5c443779924148cc0ab233
97bbba6752ff6c8481c5359b2dad036f
de1b98b98001255ef4301acfa9ddaf71
7861e6ec3cd64bcfece1441f390ff314
1985a1e903bc8a6137dc9d88a4ced035
f828087c6ef5f41f8f33a633ad8ddeb6
02c0dda3f8e409a15e92f9635625316b
42128e0689c0d32be00c71953adccf0e
abca46e8ad0eb8cee9a853963a4cc624
ccc01e1335dcaedbfbea083dccc0a140
c6ab33190822b1151862d7a86433b896
f1d13a5bbd57c466f2192fe7246d4472
7a7f2cba6bec280ff36c63b7dfa95697
CDSETool-0.3.1/tests/download/mock/sentinel_2/ 0000775 0000000 0000000 00000000000 15172660107 0021040 5 ustar 00root root 0000000 0000000 CDSETool-0.3.1/tests/download/mock/sentinel_2/broken_manifest.safe 0000664 0000000 0000000 00000000541 15172660107 0025046 0 ustar 00root root 0000000 0000000
91CB5B925086749536F310AC7F5E0F2F892CE36FBAA85D6DF2E696EAA64B8400