pax_global_header00006660000000000000000000000064151007621370014514gustar00rootroot0000000000000052 comment=9982e9aff91f587408486eddead2d31cfa3d5d7f orgparse-0.4.20251020/000077500000000000000000000000001510076213700140735ustar00rootroot00000000000000orgparse-0.4.20251020/.ci/000077500000000000000000000000001510076213700145445ustar00rootroot00000000000000orgparse-0.4.20251020/.ci/release000077500000000000000000000037711510076213700161220ustar00rootroot00000000000000#!/usr/bin/env python3 ''' Run [[file:scripts/release][scripts/release]] to deploy Python package onto [[https://pypi.org][PyPi]] and [[https://test.pypi.org][test PyPi]]. The script expects =TWINE_PASSWORD= environment variable to contain the [[https://pypi.org/help/#apitoken][PyPi token]] (not the password!). The script can be run manually. It's also running as =pypi= job in [[file:.github/workflows/main.yml][Github Actions config]]. Packages are deployed on: - every master commit, onto test pypi - every new tag, onto production pypi You'll need to set =TWINE_PASSWORD= and =TWINE_PASSWORD_TEST= in [[https://help.github.com/en/actions/configuring-and-managing-workflows/creating-and-storing-encrypted-secrets#creating-encrypted-secrets][secrets]] for Github Actions deployment to work. ''' import os import sys from pathlib import Path from subprocess import check_call import shutil is_ci = os.environ.get('CI') is not None def main() -> None: import argparse p = argparse.ArgumentParser() p.add_argument('--test', action='store_true', help='use test pypi') args = p.parse_args() extra = [] if args.test: extra.extend(['--repository', 'testpypi']) root = Path(__file__).absolute().parent.parent os.chdir(root) # just in case if is_ci: # see https://github.com/actions/checkout/issues/217 check_call('git fetch --prune --unshallow'.split()) dist = root / 'dist' if dist.exists(): shutil.rmtree(dist) check_call(['python3', '-m', 'build']) TP = 'TWINE_PASSWORD' password = os.environ.get(TP) if password is None: print(f"WARNING: no {TP} passed", file=sys.stderr) import pip_secrets password = pip_secrets.token_test if args.test else pip_secrets.token # meh check_call([ 'python3', '-m', 'twine', 'upload', *dist.iterdir(), *extra, ], env={ 'TWINE_USERNAME': '__token__', TP: password, **os.environ, }) if __name__ == '__main__': main() orgparse-0.4.20251020/.ci/release-uv000077500000000000000000000030511510076213700165410ustar00rootroot00000000000000#!/usr/bin/env python3 ''' Deploys Python package onto [[https://pypi.org][PyPi]] or [[https://test.pypi.org][test PyPi]]. - running manually You'll need =UV_PUBLISH_TOKEN= env variable - running on Github Actions Instead of env variable, relies on configuring github as Trusted publisher (https://docs.pypi.org/trusted-publishers/) -- both for test and regular pypi It's running as =pypi= job in [[file:.github/workflows/main.yml][Github Actions config]]. Packages are deployed on: - every master commit, onto test pypi - every new tag, onto production pypi ''' UV_PUBLISH_TOKEN = 'UV_PUBLISH_TOKEN' import argparse import os import shutil from pathlib import Path from subprocess import check_call is_ci = os.environ.get('CI') is not None def main() -> None: p = argparse.ArgumentParser() p.add_argument('--use-test-pypi', action='store_true') args = p.parse_args() publish_url = ['--publish-url', 'https://test.pypi.org/legacy/'] if args.use_test_pypi else [] root = Path(__file__).absolute().parent.parent os.chdir(root) # just in case # TODO ok, for now uv won't remove dist dir if it already exists # https://github.com/astral-sh/uv/issues/10293 dist = root / 'dist' if dist.exists(): shutil.rmtree(dist) check_call(['uv', 'build']) if not is_ci: # CI relies on trusted publishers so doesn't need env variable assert UV_PUBLISH_TOKEN in os.environ, f'no {UV_PUBLISH_TOKEN} passed' check_call(['uv', 'publish', *publish_url]) if __name__ == '__main__': main() orgparse-0.4.20251020/.ci/run000077500000000000000000000014141510076213700152760ustar00rootroot00000000000000#!/bin/bash set -eu cd "$(dirname "$0")" cd .. # git root if ! command -v sudo; then # CI or Docker sometimes doesn't have it, so useful to have a dummy function sudo { "$@" } fi # --parallel-live to show outputs while it's running tox_cmd='run-parallel --parallel-live' if [ -n "${CI-}" ]; then # install OS specific stuff here case "$OSTYPE" in darwin*) # macos : ;; cygwin* | msys* | win*) # windows # ugh. parallel stuff seems super flaky under windows, some random failures, "file used by other process" and crap like that tox_cmd='run' ;; *) # must be linux? : ;; esac fi # NOTE: expects uv installed uv tool run --with tox-uv tox $tox_cmd "$@" orgparse-0.4.20251020/.github/000077500000000000000000000000001510076213700154335ustar00rootroot00000000000000orgparse-0.4.20251020/.github/workflows/000077500000000000000000000000001510076213700174705ustar00rootroot00000000000000orgparse-0.4.20251020/.github/workflows/main.yml000066400000000000000000000101271510076213700211400ustar00rootroot00000000000000# see https://github.com/karlicoss/pymplate for up-to-date reference name: CI on: push: branches: '*' tags: 'v[0-9]+.*' # only trigger on 'release' tags for PyPi # Ideally I would put this in the pypi job... but github syntax doesn't allow for regexes there :shrug: # Needed to trigger on others' PRs. # Note that people who fork it need to go to "Actions" tab on their fork and click "I understand my workflows, go ahead and enable them". pull_request: # Needed to trigger workflows manually. workflow_dispatch: inputs: debug_enabled: type: boolean description: 'Run the build with tmate debugging enabled (https://github.com/marketplace/actions/debugging-with-tmate)' required: false default: false jobs: build: strategy: fail-fast: false matrix: platform: [ubuntu-latest, macos-latest, windows-latest] python-version: ['3.9', '3.10', '3.11', '3.12', '3.13', '3.14'] # vvv just an example of excluding stuff from matrix # exclude: [{platform: macos-latest, python-version: '3.6'}] runs-on: ${{ matrix.platform }} # useful for 'optional' pipelines # continue-on-error: ${{ matrix.platform == 'windows-latest' }} steps: # ugh https://github.com/actions/toolkit/blob/main/docs/commands.md#path-manipulation - run: echo "$HOME/.local/bin" >> $GITHUB_PATH - uses: actions/checkout@v5 with: submodules: recursive fetch-depth: 0 # nicer to have all git history when debugging/for tests - uses: actions/setup-python@v6 with: python-version: ${{ matrix.python-version }} - uses: astral-sh/setup-uv@v7 with: enable-cache: false # we don't have lock files, so can't use them as cache key - uses: mxschmitt/action-tmate@v3 if: ${{ github.event_name == 'workflow_dispatch' && inputs.debug_enabled }} # explicit bash command is necessary for Windows CI runner, otherwise it thinks it's cmd... - run: bash .ci/run env: # only compute lxml coverage on ubuntu; it crashes on windows CI_MYPY_COVERAGE: ${{ matrix.platform == 'ubuntu-latest' && '--cobertura-xml-report .coverage.mypy' || '' }} - if: matrix.platform == 'ubuntu-latest' # no need to compute coverage for other platforms uses: codecov/codecov-action@v5 with: fail_ci_if_error: true # default false token: ${{ secrets.CODECOV_TOKEN }} flags: mypy-${{ matrix.python-version }} files: .coverage.mypy/cobertura.xml pypi: # Do not run it for PRs/cron schedule etc. # NOTE: release tags are guarded by on: push: tags on the top. if: github.event_name == 'push' && (startsWith(github.event.ref, 'refs/tags/') || (github.event.ref == format('refs/heads/{0}', github.event.repository.master_branch))) # Ugh, I tried using matrix or something to explicitly generate only test pypi or prod pypi pipelines. # But github actions is so shit, it's impossible to do any logic at all, e.g. doesn't support conditional matrix, if/else statements for variables etc. needs: [build] # add all other jobs here runs-on: ubuntu-latest permissions: # necessary for Trusted Publishing id-token: write steps: # ugh https://github.com/actions/toolkit/blob/main/docs/commands.md#path-manipulation - run: echo "$HOME/.local/bin" >> $GITHUB_PATH - uses: actions/checkout@v5 with: submodules: recursive fetch-depth: 0 # pull all commits to correctly infer vcs version - uses: actions/setup-python@v6 with: python-version: '3.10' - uses: astral-sh/setup-uv@v7 with: enable-cache: false # we don't have lock files, so can't use them as cache key - name: 'release to test pypi' # always deploy merged master to test pypi if: github.event.ref == format('refs/heads/{0}', github.event.repository.master_branch) run: .ci/release-uv --use-test-pypi - name: 'release to prod pypi' # always deploy tags to release pypi if: startsWith(github.event.ref, 'refs/tags/') run: .ci/release-uv orgparse-0.4.20251020/.gitignore000066400000000000000000000051371510076213700160710ustar00rootroot00000000000000 # Created by https://www.toptal.com/developers/gitignore/api/python,emacs # Edit at https://www.toptal.com/developers/gitignore?templates=python,emacs ### Emacs ### # -*- mode: gitignore; -*- *~ \#*\# /.emacs.desktop /.emacs.desktop.lock *.elc auto-save-list tramp .\#* # Org-mode .org-id-locations *_archive # flymake-mode *_flymake.* # eshell files /eshell/history /eshell/lastdir # elpa packages /elpa/ # reftex files *.rel # AUCTeX auto folder /auto/ # cask packages .cask/ dist/ # Flycheck flycheck_*.el # server auth directory /server/ # projectiles files .projectile # directory configuration .dir-locals.el # network security /network-security.data ### Python ### # Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] *$py.class # C extensions *.so # Distribution / packaging .Python build/ develop-eggs/ downloads/ eggs/ .eggs/ lib/ lib64/ parts/ sdist/ var/ wheels/ pip-wheel-metadata/ share/python-wheels/ *.egg-info/ .installed.cfg *.egg MANIFEST # PyInstaller # Usually these files are written by a python script from a template # before PyInstaller builds the exe, so as to inject date/other infos into it. *.manifest *.spec # Installer logs pip-log.txt pip-delete-this-directory.txt # Unit test / coverage reports htmlcov/ .tox/ .nox/ .coverage .coverage.* .cache nosetests.xml coverage.xml *.cover *.py,cover .hypothesis/ .pytest_cache/ pytestdebug.log # Translations *.mo *.pot # Django stuff: *.log local_settings.py db.sqlite3 db.sqlite3-journal # Flask stuff: instance/ .webassets-cache # Scrapy stuff: .scrapy # Sphinx documentation docs/_build/ doc/_build/ # PyBuilder target/ # Jupyter Notebook .ipynb_checkpoints # IPython profile_default/ ipython_config.py # pyenv .python-version # pipenv # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. # However, in case of collaboration, if having platform-specific dependencies or dependencies # having no cross-platform support, pipenv may install dependencies that don't work, or not # install all needed dependencies. #Pipfile.lock # PEP 582; used by e.g. github.com/David-OConnor/pyflow __pypackages__/ # Celery stuff celerybeat-schedule celerybeat.pid # SageMath parsed files *.sage.py # Environments .env .venv env/ venv/ ENV/ env.bak/ venv.bak/ pythonenv* # Spyder project settings .spyderproject .spyproject # Rope project settings .ropeproject # mkdocs documentation /site # mypy .mypy_cache/ .dmypy.json dmypy.json # Pyre type checker .pyre/ # pytype static type analyzer .pytype/ # profiling data .prof # End of https://www.toptal.com/developers/gitignore/api/python,emacs orgparse-0.4.20251020/LICENSE000066400000000000000000000024561510076213700151070ustar00rootroot00000000000000BSD 2-Clause License Copyright (c) 2012, Takafumi Arakaki All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. orgparse-0.4.20251020/Makefile000066400000000000000000000003161510076213700155330ustar00rootroot00000000000000## Run test test: cog tox ## Build document doc: cog make -C doc html ## Update files using cog.py cog: orgparse/__init__.py orgparse/__init__.py: README.rst cd src/orgparse && cog.py -r __init__.py orgparse-0.4.20251020/README.rst000066400000000000000000000052141510076213700155640ustar00rootroot00000000000000=========================================================== orgparse - Python module for reading Emacs org-mode files =========================================================== * `Documentation (Read the Docs) `_ * `Repository (at GitHub) `_ * `PyPI `_ * `conda-forge `_ Install ------- You can install ``orgpase`` via PyPI .. code-block:: console pip install orgparse or via conda-forge .. code-block:: console conda install orgparse -c conda-forge Usage ----- There are pretty extensive doctests if you're interested in some specific method. Otherwise here are some example snippets: Load org node ^^^^^^^^^^^^^ :: from orgparse import load, loads load('PATH/TO/FILE.org') load(file_like_object) loads(''' * This is org-mode contents You can load org object from string. ** Second header ''') Traverse org tree ^^^^^^^^^^^^^^^^^ >>> root = loads(''' ... * Heading 1 ... ** Heading 2 ... *** Heading 3 ... ''') >>> for node in root[1:]: # [1:] for skipping root itself ... print(node) * Heading 1 ** Heading 2 *** Heading 3 >>> h1 = root.children[0] >>> h2 = h1.children[0] >>> h3 = h2.children[0] >>> print(h1) * Heading 1 >>> print(h2) ** Heading 2 >>> print(h3) *** Heading 3 >>> print(h2.get_parent()) * Heading 1 >>> print(h3.get_parent(max_level=1)) * Heading 1 Accessing node attributes ^^^^^^^^^^^^^^^^^^^^^^^^^ >>> root = loads(''' ... * DONE Heading :TAG: ... CLOSED: [2012-02-26 Sun 21:15] SCHEDULED: <2012-02-26 Sun> ... CLOCK: [2012-02-26 Sun 21:10]--[2012-02-26 Sun 21:15] => 0:05 ... :PROPERTIES: ... :Effort: 1:00 ... :OtherProperty: some text ... :END: ... Body texts... ... ''') >>> node = root.children[0] >>> node.heading 'Heading' >>> node.scheduled OrgDateScheduled((2012, 2, 26)) >>> node.closed OrgDateClosed((2012, 2, 26, 21, 15, 0)) >>> node.clock [OrgDateClock((2012, 2, 26, 21, 10, 0), (2012, 2, 26, 21, 15, 0))] >>> bool(node.deadline) # it is not specified False >>> node.tags == set(['TAG']) True >>> node.get_property('Effort') 60 >>> node.get_property('UndefinedProperty') # returns None >>> node.get_property('OtherProperty') 'some text' >>> node.body ' Body texts...' Project status -------------- Project is maintained by @karlicoss (myself). For my personal use, orgparse mostly has all features I need, so there hasn't been much active development lately. However, contributions are always welcome! Please provide tests along with your contribution if you're fixing bugs or adding new functionality. orgparse-0.4.20251020/conftest.py000066400000000000000000000044251510076213700162770ustar00rootroot00000000000000# this is a hack to monkey patch pytest so it handles tests inside namespace packages without __init__.py properly # without it, pytest can't discover the package root for some reason # also see https://github.com/karlicoss/pytest_namespace_pkgs for more import os import pathlib from typing import Optional import _pytest.main import _pytest.pathlib # we consider all dirs in repo/ to be namespace packages root_dir = pathlib.Path(__file__).absolute().parent.resolve() / 'src' assert root_dir.exists(), root_dir # TODO assert it contains package name?? maybe get it via setuptools.. namespace_pkg_dirs = [str(d) for d in root_dir.iterdir() if d.is_dir()] # resolve_package_path is called from _pytest.pathlib.import_path # takes a full abs path to the test file and needs to return the path to the 'root' package on the filesystem resolve_pkg_path_orig = _pytest.pathlib.resolve_package_path def resolve_package_path(path: pathlib.Path) -> Optional[pathlib.Path]: result = path # search from the test file upwards for parent in result.parents: if str(parent) in namespace_pkg_dirs: return parent if os.name == 'nt': # ??? for some reason on windows it is trying to call this against conftest? but not on linux/osx if path.name == 'conftest.py': return resolve_pkg_path_orig(path) raise RuntimeError("Couldn't determine path for ", path) # NOTE: seems like it's not necessary anymore? # keeping it for now just in case # after https://github.com/pytest-dev/pytest/pull/13426 we should be able to remove the whole conftest # _pytest.pathlib.resolve_package_path = resolve_package_path # without patching, the orig function returns just a package name for some reason # (I think it's used as a sort of fallback) # so we need to point it at the absolute path properly # not sure what are the consequences.. maybe it wouldn't be able to run against installed packages? not sure.. search_pypath_orig = _pytest.main.search_pypath def search_pypath(module_name: str) -> str: mpath = root_dir / module_name.replace('.', os.sep) if not mpath.is_dir(): mpath = mpath.with_suffix('.py') assert mpath.exists(), mpath # just in case return str(mpath) _pytest.main.search_pypath = search_pypath # ty: ignore[invalid-assignment] orgparse-0.4.20251020/doc/000077500000000000000000000000001510076213700146405ustar00rootroot00000000000000orgparse-0.4.20251020/doc/Makefile000066400000000000000000000127151510076213700163060ustar00rootroot00000000000000# Makefile for Sphinx documentation # # You can set these variables from the command line. SPHINXOPTS = SPHINXBUILD = sphinx-build PAPER = BUILDDIR = build # Internal variables. PAPEROPT_a4 = -D latex_paper_size=a4 PAPEROPT_letter = -D latex_paper_size=letter ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source # the i18n builder cannot share the environment and doctrees with the others I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source .PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext help: @echo "Please use \`make ' where is one of" @echo " html to make standalone HTML files" @echo " dirhtml to make HTML files named index.html in directories" @echo " singlehtml to make a single large HTML file" @echo " pickle to make pickle files" @echo " json to make JSON files" @echo " htmlhelp to make HTML files and a HTML help project" @echo " qthelp to make HTML files and a qthelp project" @echo " devhelp to make HTML files and a Devhelp project" @echo " epub to make an epub" @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" @echo " latexpdf to make LaTeX files and run them through pdflatex" @echo " text to make text files" @echo " man to make manual pages" @echo " texinfo to make Texinfo files" @echo " info to make Texinfo files and run them through makeinfo" @echo " gettext to make PO message catalogs" @echo " changes to make an overview of all changed/added/deprecated items" @echo " linkcheck to check all external links for integrity" @echo " doctest to run all doctests embedded in the documentation (if enabled)" clean: -rm -rf $(BUILDDIR)/* html: $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html @echo @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." dirhtml: $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml @echo @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." singlehtml: $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml @echo @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." pickle: $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle @echo @echo "Build finished; now you can process the pickle files." json: $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json @echo @echo "Build finished; now you can process the JSON files." htmlhelp: $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp @echo @echo "Build finished; now you can run HTML Help Workshop with the" \ ".hhp project file in $(BUILDDIR)/htmlhelp." qthelp: $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp @echo @echo "Build finished; now you can run "qcollectiongenerator" with the" \ ".qhcp project file in $(BUILDDIR)/qthelp, like this:" @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/orgparse.qhcp" @echo "To view the help file:" @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/orgparse.qhc" devhelp: $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp @echo @echo "Build finished." @echo "To view the help file:" @echo "# mkdir -p $$HOME/.local/share/devhelp/orgparse" @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/orgparse" @echo "# devhelp" epub: $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub @echo @echo "Build finished. The epub file is in $(BUILDDIR)/epub." latex: $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex @echo @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." @echo "Run \`make' in that directory to run these through (pdf)latex" \ "(use \`make latexpdf' here to do that automatically)." latexpdf: $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex @echo "Running LaTeX files through pdflatex..." $(MAKE) -C $(BUILDDIR)/latex all-pdf @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." text: $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text @echo @echo "Build finished. The text files are in $(BUILDDIR)/text." man: $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man @echo @echo "Build finished. The manual pages are in $(BUILDDIR)/man." texinfo: $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo @echo @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo." @echo "Run \`make' in that directory to run these through makeinfo" \ "(use \`make info' here to do that automatically)." info: $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo @echo "Running Texinfo files through makeinfo..." make -C $(BUILDDIR)/texinfo info @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo." gettext: $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale @echo @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale." changes: $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes @echo @echo "The overview file is in $(BUILDDIR)/changes." linkcheck: $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck @echo @echo "Link check complete; look for any errors in the above output " \ "or in $(BUILDDIR)/linkcheck/output.txt." doctest: $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest @echo "Testing of doctests in the sources finished, look at the " \ "results in $(BUILDDIR)/doctest/output.txt." orgparse-0.4.20251020/doc/make.bat000066400000000000000000000117651510076213700162570ustar00rootroot00000000000000@ECHO OFF REM Command file for Sphinx documentation if "%SPHINXBUILD%" == "" ( set SPHINXBUILD=sphinx-build ) set BUILDDIR=build set ALLSPHINXOPTS=-d %BUILDDIR%/doctrees %SPHINXOPTS% source set I18NSPHINXOPTS=%SPHINXOPTS% source if NOT "%PAPER%" == "" ( set ALLSPHINXOPTS=-D latex_paper_size=%PAPER% %ALLSPHINXOPTS% set I18NSPHINXOPTS=-D latex_paper_size=%PAPER% %I18NSPHINXOPTS% ) if "%1" == "" goto help if "%1" == "help" ( :help echo.Please use `make ^` where ^ is one of echo. html to make standalone HTML files echo. dirhtml to make HTML files named index.html in directories echo. singlehtml to make a single large HTML file echo. pickle to make pickle files echo. json to make JSON files echo. htmlhelp to make HTML files and a HTML help project echo. qthelp to make HTML files and a qthelp project echo. devhelp to make HTML files and a Devhelp project echo. epub to make an epub echo. latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter echo. text to make text files echo. man to make manual pages echo. texinfo to make Texinfo files echo. gettext to make PO message catalogs echo. changes to make an overview over all changed/added/deprecated items echo. linkcheck to check all external links for integrity echo. doctest to run all doctests embedded in the documentation if enabled goto end ) if "%1" == "clean" ( for /d %%i in (%BUILDDIR%\*) do rmdir /q /s %%i del /q /s %BUILDDIR%\* goto end ) if "%1" == "html" ( %SPHINXBUILD% -b html %ALLSPHINXOPTS% %BUILDDIR%/html if errorlevel 1 exit /b 1 echo. echo.Build finished. The HTML pages are in %BUILDDIR%/html. goto end ) if "%1" == "dirhtml" ( %SPHINXBUILD% -b dirhtml %ALLSPHINXOPTS% %BUILDDIR%/dirhtml if errorlevel 1 exit /b 1 echo. echo.Build finished. The HTML pages are in %BUILDDIR%/dirhtml. goto end ) if "%1" == "singlehtml" ( %SPHINXBUILD% -b singlehtml %ALLSPHINXOPTS% %BUILDDIR%/singlehtml if errorlevel 1 exit /b 1 echo. echo.Build finished. The HTML pages are in %BUILDDIR%/singlehtml. goto end ) if "%1" == "pickle" ( %SPHINXBUILD% -b pickle %ALLSPHINXOPTS% %BUILDDIR%/pickle if errorlevel 1 exit /b 1 echo. echo.Build finished; now you can process the pickle files. goto end ) if "%1" == "json" ( %SPHINXBUILD% -b json %ALLSPHINXOPTS% %BUILDDIR%/json if errorlevel 1 exit /b 1 echo. echo.Build finished; now you can process the JSON files. goto end ) if "%1" == "htmlhelp" ( %SPHINXBUILD% -b htmlhelp %ALLSPHINXOPTS% %BUILDDIR%/htmlhelp if errorlevel 1 exit /b 1 echo. echo.Build finished; now you can run HTML Help Workshop with the ^ .hhp project file in %BUILDDIR%/htmlhelp. goto end ) if "%1" == "qthelp" ( %SPHINXBUILD% -b qthelp %ALLSPHINXOPTS% %BUILDDIR%/qthelp if errorlevel 1 exit /b 1 echo. echo.Build finished; now you can run "qcollectiongenerator" with the ^ .qhcp project file in %BUILDDIR%/qthelp, like this: echo.^> qcollectiongenerator %BUILDDIR%\qthelp\orgparse.qhcp echo.To view the help file: echo.^> assistant -collectionFile %BUILDDIR%\qthelp\orgparse.ghc goto end ) if "%1" == "devhelp" ( %SPHINXBUILD% -b devhelp %ALLSPHINXOPTS% %BUILDDIR%/devhelp if errorlevel 1 exit /b 1 echo. echo.Build finished. goto end ) if "%1" == "epub" ( %SPHINXBUILD% -b epub %ALLSPHINXOPTS% %BUILDDIR%/epub if errorlevel 1 exit /b 1 echo. echo.Build finished. The epub file is in %BUILDDIR%/epub. goto end ) if "%1" == "latex" ( %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex if errorlevel 1 exit /b 1 echo. echo.Build finished; the LaTeX files are in %BUILDDIR%/latex. goto end ) if "%1" == "text" ( %SPHINXBUILD% -b text %ALLSPHINXOPTS% %BUILDDIR%/text if errorlevel 1 exit /b 1 echo. echo.Build finished. The text files are in %BUILDDIR%/text. goto end ) if "%1" == "man" ( %SPHINXBUILD% -b man %ALLSPHINXOPTS% %BUILDDIR%/man if errorlevel 1 exit /b 1 echo. echo.Build finished. The manual pages are in %BUILDDIR%/man. goto end ) if "%1" == "texinfo" ( %SPHINXBUILD% -b texinfo %ALLSPHINXOPTS% %BUILDDIR%/texinfo if errorlevel 1 exit /b 1 echo. echo.Build finished. The Texinfo files are in %BUILDDIR%/texinfo. goto end ) if "%1" == "gettext" ( %SPHINXBUILD% -b gettext %I18NSPHINXOPTS% %BUILDDIR%/locale if errorlevel 1 exit /b 1 echo. echo.Build finished. The message catalogs are in %BUILDDIR%/locale. goto end ) if "%1" == "changes" ( %SPHINXBUILD% -b changes %ALLSPHINXOPTS% %BUILDDIR%/changes if errorlevel 1 exit /b 1 echo. echo.The overview file is in %BUILDDIR%/changes. goto end ) if "%1" == "linkcheck" ( %SPHINXBUILD% -b linkcheck %ALLSPHINXOPTS% %BUILDDIR%/linkcheck if errorlevel 1 exit /b 1 echo. echo.Link check complete; look for any errors in the above output ^ or in %BUILDDIR%/linkcheck/output.txt. goto end ) if "%1" == "doctest" ( %SPHINXBUILD% -b doctest %ALLSPHINXOPTS% %BUILDDIR%/doctest if errorlevel 1 exit /b 1 echo. echo.Testing of doctests in the sources finished, look at the ^ results in %BUILDDIR%/doctest/output.txt. goto end ) :end orgparse-0.4.20251020/doc/source/000077500000000000000000000000001510076213700161405ustar00rootroot00000000000000orgparse-0.4.20251020/doc/source/conf.py000066400000000000000000000053631510076213700174460ustar00rootroot00000000000000import sys from pathlib import Path sys.path.insert(0, str(Path(__file__).parent.parent.parent)) # -- General configuration ------------------------------------------------ extensions = [ 'sphinx.ext.todo', 'sphinx.ext.autodoc', 'sphinx.ext.intersphinx', 'sphinx.ext.inheritance_diagram', ] templates_path = [] # ['_templates'] source_suffix = '.rst' master_doc = 'index' # TODO not sure I'm doing that right.. import orgparse # General information about the project. project = 'orgparse' copyright = '2012, Takafumi Arakaki' # noqa: A001 # The short X.Y version. # TODO use setup.py for version version = orgparse.__version__ # ty: ignore[unresolved-attribute] # The full version, including alpha/beta/rc tags. release = orgparse.__version__ # ty: ignore[unresolved-attribute] exclude_patterns = [] pygments_style = 'sphinx' # -- Options for HTML output ---------------------------------------------- html_theme = 'default' html_static_path = [] # ['_static'] # Output file base name for HTML help builder. htmlhelp_basename = 'orgparsedoc' # -- Options for LaTeX output --------------------------------------------- latex_elements = { # The paper size ('letterpaper' or 'a4paper'). #'papersize': 'letterpaper', # The font size ('10pt', '11pt' or '12pt'). #'pointsize': '10pt', # Additional stuff for the LaTeX preamble. #'preamble': '', } # Grouping the document tree into LaTeX files. List of tuples # (source start file, target name, title, # author, documentclass [howto/manual]). latex_documents = [ ('index', 'orgparse.tex', 'orgparse Documentation', 'Takafumi Arakaki', 'manual'), ] # -- Options for manual page output --------------------------------------- # One entry per manual page. List of tuples # (source start file, name, description, authors, manual section). man_pages = [ ('index', 'orgparse', 'orgparse Documentation', ['Takafumi Arakaki'], 1), ] # If true, show URL addresses after external links. # man_show_urls = False # -- Options for Texinfo output ------------------------------------------- # Grouping the document tree into Texinfo files. List of tuples # (source start file, target name, title, author, # dir menu entry, description, category) texinfo_documents = [ ( 'index', 'orgparse', 'orgparse Documentation', 'Takafumi Arakaki', 'orgparse', 'One line description of project.', 'Miscellaneous', ), ] # -- Options for extensions ----------------------------------------------- # Example configuration for intersphinx: refer to the Python standard library. intersphinx_mapping = {'http://docs.python.org/': None} autodoc_member_order = 'bysource' autodoc_default_flags = ['members'] inheritance_graph_attrs = {'rankdir': "TB"} orgparse-0.4.20251020/doc/source/dev.rst000066400000000000000000000006521510076213700174530ustar00rootroot00000000000000Internals ========= orgparse.node ------------- .. py:currentmodule:: orgparse.node .. automodule:: orgparse.node :members: :private-members: :noindex: orgparse.date ------------- .. py:currentmodule:: orgparse.date .. automodule:: orgparse.date :members: :private-members: :noindex: .. autoclass:: OrgDate :members: :private-members: :noindex: .. autoattribute:: OrgDate._active_default orgparse-0.4.20251020/doc/source/index.rst000066400000000000000000000024531510076213700200050ustar00rootroot00000000000000.. orgparse documentation master file, created by sphinx-quickstart on Sun Mar 4 22:50:33 2012. You can adapt this file completely to your liking, but it should at least contain the root `toctree` directive. .. automodule:: orgparse Tree structure interface ======================== .. py:module:: orgparse.node .. inheritance-diagram:: orgparse.node.OrgBaseNode orgparse.node.OrgRootNode orgparse.node.OrgNode :parts: 1 .. autoclass:: OrgBaseNode .. automethod:: __init__ .. autoclass:: OrgRootNode .. autoclass:: OrgNode .. autoclass:: OrgEnv Date interface ============== .. py:module:: orgparse.date .. inheritance-diagram:: orgparse.date.OrgDate orgparse.date.OrgDateSDCBase orgparse.date.OrgDateScheduled orgparse.date.OrgDateDeadline orgparse.date.OrgDateClosed orgparse.date.OrgDateClock orgparse.date.OrgDateRepeatedTask :parts: 1 .. autoclass:: OrgDate .. automethod:: __init__ .. autoclass:: OrgDateScheduled .. autoclass:: OrgDateDeadline .. autoclass:: OrgDateClosed .. autoclass:: OrgDateClock .. autoclass:: OrgDateRepeatedTask Further resources ================= .. toctree:: dev - `GitHub repository `_ Indices and tables ================== * :ref:`genindex` * :ref:`modindex` * :ref:`search` orgparse-0.4.20251020/mypy.ini000066400000000000000000000007601510076213700155750ustar00rootroot00000000000000[mypy] pretty = True show_error_context = True show_column_numbers = True show_error_end = True check_untyped_defs = True # see https://mypy.readthedocs.io/en/stable/error_code_list2.html warn_redundant_casts = True strict_equality = True warn_unused_ignores = True enable_error_code = deprecated,redundant-expr,possibly-undefined,truthy-bool,truthy-iterable,ignore-without-code,unused-awaitable # an example of suppressing # [mypy-my.config.repos.pdfannots.pdfannots] # ignore_errors = True orgparse-0.4.20251020/pyproject.toml000066400000000000000000000026431510076213700170140ustar00rootroot00000000000000[project] dynamic = ["version"] # version is managed by build backend name = "orgparse" dependencies = [ ] requires-python = ">=3.9" description = "orgparse - Emacs org-mode parser in Python" license = {file = "LICENSE"} authors = [ {name = "Takafumi Arakaki (@tkf)", email = "aka.tkf@gmail.com"}, {name = "Dmitrii Gerasimov (@karlicoss)", email = "karlicoss@gmail.com"}, ] maintainers = [ {name = "Dmitrii Gerasimov (@karlicoss)", email = "karlicoss@gmail.com"}, ] keywords = ["org", "org-mode", "emacs"] # see: http://pypi.python.org/pypi?%3Aaction=list_classifiers classifiers = [ "Development Status :: 5 - Production/Stable", "License :: OSI Approved :: BSD License", "Topic :: Text Processing :: Markup", ] # TODO add it back later, perhaps via ast? # long_description=orgparse.__doc__, [project.urls] Homepage = "https://github.com/karlicoss/orgparse" [project.optional-dependencies] [dependency-groups] testing = [ "pytest", "ruff", "mypy", "lxml", # for mypy html coverage "ty>=0.0.1a25", ] [build-system] requires = ["hatchling", "hatch-vcs"] build-backend = "hatchling.build" # unfortunately have to duplicate project name here atm, see https://github.com/pypa/hatch/issues/1894 [tool.hatch.build.targets.wheel] packages = ["src/orgparse"] [tool.hatch.version] source = "vcs" [tool.hatch.version.raw-options] version_scheme = "python-simplified-semver" local_scheme = "dirty-tag" orgparse-0.4.20251020/pytest.ini000066400000000000000000000011211510076213700161170ustar00rootroot00000000000000[pytest] # discover files that don't follow test_ naming. Useful to keep tests along with the source code python_files = *.py # this setting only impacts package/module naming under pytest, not the discovery consider_namespace_packages = true addopts = # prevent pytest cache from being created... it craps into project dir and I never use it anyway -p no:cacheprovider # -rap to print tests summary even when they are successful -rap --verbose # otherwise it won't discover doctests --doctest-modules # show all test durations (unless they are too short) --durations=0 orgparse-0.4.20251020/ruff.toml000066400000000000000000000112651510076213700157370ustar00rootroot00000000000000line-length = 120 # impacts import sorting lint.extend-select = [ "ALL", ] # Preserve types, even if a file imports `from __future__ import annotations` # we need this for cachew to work with HPI types on 3.9 # can probably remove after 3.10? lint.pyupgrade.keep-runtime-typing = true lint.ignore = [ "D", # annoying nags about docstrings "N", # pep naming "TCH", # type checking rules, mostly just suggests moving imports under TYPE_CHECKING "S", # bandit (security checks) -- tends to be not very useful, lots of nitpicks "DTZ", # datetimes checks -- complaining about missing tz and mostly false positives "FIX", # complains about fixmes/todos -- annoying "TD", # complains about todo formatting -- too annoying "ANN", # missing type annotations? seems way to strict though "EM" , # suggests assigning all exception messages into a variable first... pretty annoying ### too opinionated style checks "E501", # too long lines "E731", # assigning lambda instead of using def "E741", # Ambiguous variable name: `l` "E742", # Ambiguous class name: `O "E401", # Multiple imports on one line "F403", # import *` used; unable to detect undefined names ### ### "E722", # Do not use bare `except` ## Sometimes it's useful for defensive imports and that sort of thing.. "F811", # Redefinition of unused # this gets in the way of pytest fixtures (e.g. in cachew) ## might be nice .. but later and I don't wanna make it strict "E402", # Module level import not at top of file ### these are just nitpicky, we usually know better "PLR0911", # too many return statements "PLR0912", # too many branches "PLR0913", # too many function arguments "PLR0915", # too many statements "PLR1714", # consider merging multiple comparisons "PLR2044", # line with empty comment "PLR5501", # use elif instead of else if "PLR2004", # magic value in comparison -- super annoying in tests ### "PLR0402", # import X.Y as Y -- TODO maybe consider enabling it, but double check "B009", # calling gettattr with constant attribute -- this is useful to convince mypy "B010", # same as above, but setattr "B017", # pytest.raises(Exception) "B023", # seems to result in false positives? # complains about useless pass, but has sort of a false positive if the function has a docstring? # this is common for click entrypoints (e.g. in __main__), so disable "PIE790", # a bit too annoying, offers to convert for loops to list comprehension # , which may heart readability "PERF401", # suggests no using exception in for loops # we do use this technique a lot, plus in 3.11 happy path exception handling is "zero-cost" "PERF203", "RET504", # unnecessary assignment before returning -- that can be useful for readability "RET505", # unnecessary else after return -- can hurt readability "PLW0603", # global variable update.. we usually know why we are doing this "PLW2901", # for loop variable overwritten, usually this is intentional "PT011", # pytest raises is too broad "COM812", # trailing comma missing -- mostly just being annoying with long multiline strings "TRY003", # suggests defining exception messages in exception class -- kinda annoying "TRY201", # raise without specifying exception name -- sometimes hurts readability "TRY400", # a bit dumb, and results in false positives (see https://github.com/astral-sh/ruff/issues/18070) "TRY401", # redundant exception in logging.exception call? TODO double check, might result in excessive logging "TID252", # Prefer absolute imports over relative imports from parent modules ## too annoying "T20", # just complains about prints and pprints (TODO maybe consider later?) "Q", # flake quotes, too annoying "C90", # some complexity checking "G004", # logging statement uses f string "ERA001", # commented out code "SLF001", # private member accessed "BLE001", # do not catch 'blind' Exception "INP001", # complains about implicit namespace packages "SIM102", # if statements collapsing, often hurts readability "SIM103", # multiple conditions collapsing, often hurts readability "SIM105", # suggests using contextlib.suppress instad of try/except -- this wouldn't be mypy friendly "SIM108", # suggests using ternary operation instead of if -- hurts readability "SIM110", # suggests using any(...) instead of for look/return -- hurts readability "SIM117", # suggests using single with statement instead of nested -- doesn't work in tests "RSE102", # complains about missing parens in exceptions ## ] orgparse-0.4.20251020/src/000077500000000000000000000000001510076213700146625ustar00rootroot00000000000000orgparse-0.4.20251020/src/orgparse/000077500000000000000000000000001510076213700165045ustar00rootroot00000000000000orgparse-0.4.20251020/src/orgparse/__init__.py000066400000000000000000000075141510076213700206240ustar00rootroot00000000000000# Import README.rst using cog # [[[cog # from cog import out # out('"""\n{0}\n"""'.format(open('../README.rst').read())) # ]]] """ =========================================================== orgparse - Python module for reading Emacs org-mode files =========================================================== * `Documentation (Read the Docs) `_ * `Repository (at GitHub) `_ * `PyPI `_ Install ------- pip install orgparse Usage ----- There are pretty extensive doctests if you're interested in some specific method. Otherwise here are some example snippets: Load org node ^^^^^^^^^^^^^ :: from orgparse import load, loads load('PATH/TO/FILE.org') load(file_like_object) loads(''' * This is org-mode contents You can load org object from string. ** Second header ''') Traverse org tree ^^^^^^^^^^^^^^^^^ >>> root = loads(''' ... * Heading 1 ... ** Heading 2 ... *** Heading 3 ... ''') >>> for node in root[1:]: # [1:] for skipping root itself ... print(node) * Heading 1 ** Heading 2 *** Heading 3 >>> h1 = root.children[0] >>> h2 = h1.children[0] >>> h3 = h2.children[0] >>> print(h1) * Heading 1 >>> print(h2) ** Heading 2 >>> print(h3) *** Heading 3 >>> print(h2.get_parent()) * Heading 1 >>> print(h3.get_parent(max_level=1)) * Heading 1 Accessing node attributes ^^^^^^^^^^^^^^^^^^^^^^^^^ >>> root = loads(''' ... * DONE Heading :TAG: ... CLOSED: [2012-02-26 Sun 21:15] SCHEDULED: <2012-02-26 Sun> ... CLOCK: [2012-02-26 Sun 21:10]--[2012-02-26 Sun 21:15] => 0:05 ... :PROPERTIES: ... :Effort: 1:00 ... :OtherProperty: some text ... :END: ... Body texts... ... ''') >>> node = root.children[0] >>> node.heading 'Heading' >>> node.scheduled OrgDateScheduled((2012, 2, 26)) >>> node.closed OrgDateClosed((2012, 2, 26, 21, 15, 0)) >>> node.clock [OrgDateClock((2012, 2, 26, 21, 10, 0), (2012, 2, 26, 21, 15, 0))] >>> bool(node.deadline) # it is not specified False >>> node.tags == set(['TAG']) True >>> node.get_property('Effort') 60 >>> node.get_property('UndefinedProperty') # returns None >>> node.get_property('OtherProperty') 'some text' >>> node.body ' Body texts...' """ # [[[end]]] from collections.abc import Iterable from pathlib import Path from typing import Optional, TextIO, Union from .node import OrgEnv, OrgNode, parse_lines # todo basenode?? __all__ = ["load", "loadi", "loads"] def load(path: Union[str, Path, TextIO], env: Optional[OrgEnv] = None) -> OrgNode: """ Load org-mode document from a file. :type path: str or file-like :arg path: Path to org file or file-like object of an org document. :rtype: :class:`orgparse.node.OrgRootNode` """ # Make sure it is a Path object. if isinstance(path, str): path = Path(path) # if it is a Path if isinstance(path, Path): # open that Path with path.open('r', encoding='utf8') as orgfile: # try again loading return load(orgfile, env) # We assume it is a file-like object (e.g. io.StringIO) all_lines = (line.rstrip('\n') for line in path) # get the filename filename = path.name if hasattr(path, 'name') else '' return loadi(all_lines, filename=filename, env=env) def loads(string: str, filename: str = '', env: Optional[OrgEnv] = None) -> OrgNode: """ Load org-mode document from a string. :rtype: :class:`orgparse.node.OrgRootNode` """ return loadi(string.splitlines(), filename=filename, env=env) def loadi(lines: Iterable[str], filename: str = '', env: Optional[OrgEnv] = None) -> OrgNode: """ Load org-mode document from an iterative object. :rtype: :class:`orgparse.node.OrgRootNode` """ return parse_lines(lines, filename=filename, env=env) orgparse-0.4.20251020/src/orgparse/date.py000066400000000000000000000566401510076213700200060ustar00rootroot00000000000000from __future__ import annotations import datetime import re from datetime import timedelta from typing import Optional, Union DateIsh = Union[datetime.date, datetime.datetime] def total_seconds(td: timedelta) -> float: """Equivalent to `datetime.timedelta.total_seconds`.""" return float(td.microseconds + (td.seconds + td.days * 24 * 3600) * 10**6) / 10**6 def total_minutes(td: timedelta) -> float: """Alias for ``total_seconds(td) / 60``.""" return total_seconds(td) / 60 def gene_timestamp_regex(brtype: str, prefix: str | None = None, *, nocookie: bool = False) -> str: """ Generate timestamp regex for active/inactive/nobrace brace type :type brtype: {'active', 'inactive', 'nobrace'} :arg brtype: It specifies a type of brace. active: <>-type; inactive: []-type; nobrace: no braces. :type prefix: str or None :arg prefix: It will be appended to the head of keys of the "groupdict". For example, if prefix is ``'active_'`` the groupdict has keys such as ``'active_year'``, ``'active_month'``, and so on. If it is None it will be set to ``brtype`` + ``'_'``. :type nocookie: bool :arg nocookie: Cookie part (e.g., ``'-3d'`` or ``'+6m'``) is not included if it is ``True``. Default value is ``False``. >>> timestamp_re = re.compile( ... gene_timestamp_regex('active', prefix=''), ... re.VERBOSE) >>> timestamp_re.match('no match') # returns None >>> m = timestamp_re.match('<2010-06-21 Mon>') >>> m.group() '<2010-06-21 Mon>' >>> '{year}-{month}-{day}'.format(**m.groupdict()) '2010-06-21' >>> m = timestamp_re.match('<2005-10-01 Sat 12:30 +7m -3d>') >>> from collections import OrderedDict >>> sorted(m.groupdict().items()) ... # doctest: +NORMALIZE_WHITESPACE [('day', '01'), ('end_hour', None), ('end_min', None), ('hour', '12'), ('min', '30'), ('month', '10'), ('repeatdwmy', 'm'), ('repeatnum', '7'), ('repeatpre', '+'), ('warndwmy', 'd'), ('warnnum', '3'), ('warnpre', '-'), ('year', '2005')] When ``brtype = 'nobrace'``, cookie part cannot be retrieved. >>> timestamp_re = re.compile( ... gene_timestamp_regex('nobrace', prefix=''), ... re.VERBOSE) >>> timestamp_re.match('no match') # returns None >>> m = timestamp_re.match('2010-06-21 Mon') >>> m.group() '2010-06-21' >>> '{year}-{month}-{day}'.format(**m.groupdict()) '2010-06-21' >>> m = timestamp_re.match('2005-10-01 Sat 12:30 +7m -3d') >>> sorted(m.groupdict().items()) ... # doctest: +NORMALIZE_WHITESPACE [('day', '01'), ('end_hour', None), ('end_min', None), ('hour', '12'), ('min', '30'), ('month', '10'), ('year', '2005')] """ if brtype == 'active': (bo, bc) = ('<', '>') elif brtype == 'inactive': (bo, bc) = (r'\[', r'\]') elif brtype == 'nobrace': (bo, bc) = ('', '') else: raise ValueError(f"brtype='{brtype!r}' is invalid") if brtype == 'nobrace': ignore = r'[\s\w]' else: ignore = f'[^{bc}]' if prefix is None: prefix = f'{brtype}_' regex_date_time = r""" (?P<{prefix}year>\d{{4}}) - (?P<{prefix}month>\d{{2}}) - (?P<{prefix}day>\d{{2}}) ( # optional time field ({ignore}+?) (?P<{prefix}hour>\d{{2}}) : (?P<{prefix}min>\d{{2}}) ( # optional end time range --? (?P<{prefix}end_hour>\d{{2}}) : (?P<{prefix}end_min>\d{{2}}) )? )? """ regex_cookie = r""" ( # optional repeater ({ignore}+?) (?P<{prefix}repeatpre> [\.\+]{{1,2}}) (?P<{prefix}repeatnum> \d+) (?P<{prefix}repeatdwmy> [hdwmy]) )? ( # optional warning ({ignore}+?) (?P<{prefix}warnpre> \-) (?P<{prefix}warnnum> \d+) (?P<{prefix}warndwmy> [hdwmy]) )? """ regex = ''.join([ bo, regex_date_time, regex_cookie if nocookie or brtype != 'nobrace' else '', '({ignore}*?)', bc, ]) # fmt: skip return regex.format(prefix=prefix, ignore=ignore) def date_time_format(date: DateIsh) -> str: """ Format a date or datetime in default org format @param date The date @return Formatted date(time) """ default_format_date = "%Y-%m-%d %a" default_format_datetime = "%Y-%m-%d %a %H:%M" is_datetime = isinstance(date, datetime.datetime) return date.strftime(default_format_datetime if is_datetime else default_format_date) def is_same_day(date0, date1) -> bool: """ Check if two dates or datetimes are on the same day """ return OrgDate._date_to_tuple(date0)[:3] == OrgDate._date_to_tuple(date1)[:3] TIMESTAMP_NOBRACE_RE = re.compile( gene_timestamp_regex('nobrace', prefix=''), re.VERBOSE, ) TIMESTAMP_RE = re.compile( '|'.join(( gene_timestamp_regex('active'), gene_timestamp_regex('inactive'), )), re.VERBOSE, ) # fmt: skip _Repeater = tuple[str, int, str] class OrgDate: _active_default = True """ The default active value. When the `active` argument to ``__init__`` is ``None``, This value will be used. """ """ When formatting the date to string via __str__, and there is an end date on the same day as the start date, allow formatting in the short syntax <2021-09-03 Fri 16:01--17:30>? Otherwise the string represenation would be <2021-09-03 Fri 16:01>--<2021-09-03 Fri 17:30> """ _allow_short_range = True def __init__( self, start, end=None, active: bool | None = None, # noqa: FBT001 repeater: _Repeater | None = None, warning: _Repeater | None = None, ) -> None: """ Create :class:`OrgDate` object :type start: datetime, date, tuple, int, float or None :type end: datetime, date, tuple, int, float or None :arg start: Starting date. :arg end: Ending date. :type active: bool or None :arg active: Active/inactive flag. None means using its default value, which may be different for different subclasses. :type repeater: tuple or None :arg repeater: Repeater interval. :type warning: tuple or None :arg warning: Deadline warning interval. >>> OrgDate(datetime.date(2012, 2, 10)) OrgDate((2012, 2, 10)) >>> OrgDate((2012, 2, 10)) OrgDate((2012, 2, 10)) >>> OrgDate((2012, 2)) #doctest: +NORMALIZE_WHITESPACE Traceback (most recent call last): ... ValueError: Automatic conversion to the datetime object requires at least 3 elements in the tuple. Only 2 elements are in the given tuple '(2012, 2)'. >>> OrgDate((2012, 2, 10, 12, 20, 30)) OrgDate((2012, 2, 10, 12, 20, 30)) >>> OrgDate((2012, 2, 10), (2012, 2, 15), active=False) OrgDate((2012, 2, 10), (2012, 2, 15), False) OrgDate can be created using unix timestamp: >>> OrgDate(datetime.datetime.fromtimestamp(0)) == OrgDate(0) True """ self._start = self._to_date(start) self._end = self._to_date(end) self._active = self._active_default if active is None else active # repeater and warning are tuples of (prefix, number, interval) self._repeater = repeater self._warning = warning @staticmethod def _to_date(date) -> DateIsh: if isinstance(date, (tuple, list)): if len(date) == 3: return datetime.date(*date) elif len(date) > 3: return datetime.datetime(*date) else: raise ValueError( "Automatic conversion to the datetime object " "requires at least 3 elements in the tuple. " f"Only {len(date)} elements are in the given tuple '{date}'." ) elif isinstance(date, (int, float)): return datetime.datetime.fromtimestamp(date) else: return date @staticmethod def _date_to_tuple(date: DateIsh) -> tuple[int, ...]: if isinstance(date, datetime.datetime): return tuple(date.timetuple()[:6]) elif isinstance(date, datetime.date): return tuple(date.timetuple()[:3]) else: raise TypeError(f"can't happen: {date} {type(date)}") def __repr__(self) -> str: args = [ self.__class__.__name__, self._date_to_tuple(self.start), self._date_to_tuple(self.end) if self.has_end() else None, None if self._active is self._active_default else self._active, self._repeater, self._warning, ] while args[-1] is None: args.pop() if len(args) > 3 and args[3] is None: args[3] = self._active_default return '{}({})'.format(args[0], ', '.join(map(repr, args[1:]))) def __str__(self) -> str: fence = ("<", ">") if self.is_active() else ("[", "]") start = date_time_format(self.start) end = None if self.has_end(): if self._allow_short_range and is_same_day(self.start, self.end): start += "--{}".format(self.end.strftime("%H:%M")) else: end = date_time_format(self.end) if self._repeater is not None: (x, y, z) = self._repeater start += f" {x}{y}{z}" if self._warning is not None: (x, y, z) = self._warning start += f" {x}{y}{z}" ret = f"{fence[0]}{start}{fence[1]}" if end: ret += f"--{fence[0]}{end}{fence[1]}" return ret def __bool__(self) -> bool: return bool(self._start) def __hash__(self) -> int: return hash((self._start, self._end, self._active, self._repeater, self._warning)) def __eq__(self, other) -> bool: if isinstance(other, OrgDate) and self._start is None and other._start is None: return True return ( isinstance(other, self.__class__) and self._start == other._start and self._end == other._end and self._active == other._active ) @property def start(self) -> DateIsh: """ Get date or datetime object >>> OrgDate((2012, 2, 10)).start datetime.date(2012, 2, 10) >>> OrgDate((2012, 2, 10, 12, 10)).start datetime.datetime(2012, 2, 10, 12, 10) """ return self._start @property def end(self) -> DateIsh: """ Get date or datetime object >>> OrgDate((2012, 2, 10), (2012, 2, 15)).end datetime.date(2012, 2, 15) >>> OrgDate((2012, 2, 10, 12, 10), (2012, 2, 15, 12, 10)).end datetime.datetime(2012, 2, 15, 12, 10) """ return self._end def is_active(self) -> bool: """Return true if the date is active""" return self._active def has_end(self) -> bool: """Return true if it has the end date""" return bool(self._end) def has_time(self) -> bool: """ Return true if the start date has time field >>> OrgDate((2012, 2, 10)).has_time() False >>> OrgDate((2012, 2, 10, 12, 10)).has_time() True """ return isinstance(self._start, datetime.datetime) def has_overlap(self, other) -> bool: """ Test if it has overlap with other :class:`OrgDate` instance If the argument is not an instance of :class:`OrgDate`, it is converted to :class:`OrgDate` instance by ``OrgDate(other)`` first. >>> od = OrgDate((2012, 2, 10), (2012, 2, 15)) >>> od.has_overlap(OrgDate((2012, 2, 11))) True >>> od.has_overlap(OrgDate((2012, 2, 20))) False >>> od.has_overlap(OrgDate((2012, 2, 11), (2012, 2, 20))) True >>> od.has_overlap((2012, 2, 11)) True """ if not isinstance(other, OrgDate): other = OrgDate(other) if self.has_end(): return self._datetime_in_range(other.start) or self._datetime_in_range(other.end) elif other.has_end(): return other._datetime_in_range(self.start) elif self.start == other.start: return True else: return False def _datetime_in_range(self, date): if not isinstance(date, (datetime.datetime, datetime.date)): return False asdt = self._as_datetime if asdt(self.start) <= asdt(date) <= asdt(self.end): return True return False @staticmethod def _as_datetime(date) -> datetime.datetime: """ Convert the given date into datetime (if it already is, return it unmodified """ if not isinstance(date, datetime.datetime): return datetime.datetime(*date.timetuple()[:3]) return date @staticmethod def _daterange_from_groupdict(dct, prefix='') -> tuple[list, Optional[list]]: start_keys = ['year', 'month', 'day', 'hour' , 'min'] # fmt: skip end_keys = ['year', 'month', 'day', 'end_hour', 'end_min'] # fmt: skip start_range = list(map(int, filter(None, (dct[prefix + k] for k in start_keys)))) end_range: Optional[list] end_range = list(map(int, filter(None, (dct[prefix + k] for k in end_keys)))) if len(end_range) < len(end_keys): end_range = None return (start_range, end_range) @classmethod def _datetuple_from_groupdict(cls, dct, prefix=''): return cls._daterange_from_groupdict(dct, prefix=prefix)[0] @classmethod def list_from_str(cls, string: str) -> list[OrgDate]: """ Parse string and return a list of :class:`OrgDate` objects >>> OrgDate.list_from_str("... <2012-02-10 Fri> and <2012-02-12 Sun>") [OrgDate((2012, 2, 10)), OrgDate((2012, 2, 12))] >>> OrgDate.list_from_str("<2012-02-10 Fri>--<2012-02-12 Sun>") [OrgDate((2012, 2, 10), (2012, 2, 12))] >>> OrgDate.list_from_str("<2012-02-10 Fri>--[2012-02-12 Sun]") [OrgDate((2012, 2, 10)), OrgDate((2012, 2, 12), None, False)] >>> OrgDate.list_from_str("this is not timestamp") [] >>> OrgDate.list_from_str("<2012-02-11 Sat 10:11--11:20>") [OrgDate((2012, 2, 11, 10, 11, 0), (2012, 2, 11, 11, 20, 0))] """ cookie_suffix = ['pre', 'num', 'dwmy'] match = TIMESTAMP_RE.search(string) if match: rest = string[match.end() :] mdict = match.groupdict() if mdict['active_year']: prefix = 'active_' active = True rangedash = '--<' else: prefix = 'inactive_' active = False rangedash = '--[' repeater: Optional[tuple[str, int, str]] = None warning: Optional[tuple[str, int, str]] = None if mdict[prefix + 'repeatpre'] is not None: keys = [prefix + 'repeat' + suffix for suffix in cookie_suffix] values = [mdict[k] for k in keys] repeater = (values[0], int(values[1]), values[2]) if mdict[prefix + 'warnpre'] is not None: keys = [prefix + 'warn' + suffix for suffix in cookie_suffix] values = [mdict[k] for k in keys] warning = (values[0], int(values[1]), values[2]) has_rangedash = rest.startswith(rangedash) match2 = TIMESTAMP_RE.search(rest) if has_rangedash else None if has_rangedash and match2: rest = rest[match2.end() :] # no need for check activeness here because of the rangedash mdict2 = match2.groupdict() odate = cls( cls._datetuple_from_groupdict(mdict, prefix), cls._datetuple_from_groupdict(mdict2, prefix), active=active, repeater=repeater, warning=warning, ) else: odate = cls( *cls._daterange_from_groupdict(mdict, prefix), active=active, repeater=repeater, warning=warning ) return [odate, *cls.list_from_str(rest)] else: return [] @classmethod def from_str(cls, string: str) -> OrgDate: """ Parse string and return an :class:`OrgDate` objects. >>> OrgDate.from_str('2012-02-10 Fri') OrgDate((2012, 2, 10)) >>> OrgDate.from_str('2012-02-10 Fri 12:05') OrgDate((2012, 2, 10, 12, 5, 0)) """ match = cls._from_str_re.match(string) if match: mdict = match.groupdict() return cls(cls._datetuple_from_groupdict(mdict), active=cls._active_default) else: return cls(None) _from_str_re = TIMESTAMP_NOBRACE_RE def compile_sdc_re(sdctype): brtype = 'inactive' if sdctype == 'CLOSED' else 'active' return re.compile( r'^(?!\#).*{}:\s+{}'.format( sdctype, gene_timestamp_regex(brtype, prefix='', nocookie=True), ), re.VERBOSE, ) class OrgDateSDCBase(OrgDate): _re = None # override this! # FIXME: use OrgDate.from_str @classmethod def from_str(cls, string): rgx = cls._re assert rgx is not None match = rgx.search(string) if match: mdict = match.groupdict() start = cls._datetuple_from_groupdict(mdict) end = None end_hour = mdict['end_hour'] end_min = mdict['end_min'] if end_hour is not None and end_min is not None: end_dict = {} end_dict.update(mdict) end_dict.update({'hour': end_hour, 'min': end_min}) end = cls._datetuple_from_groupdict(end_dict) cookie_suffix = ['pre', 'num', 'dwmy'] repeater: Optional[tuple[str, int, str]] = None warning: Optional[tuple[str, int, str]] = None prefix = '' if mdict[prefix + 'repeatpre'] is not None: keys = [prefix + 'repeat' + suffix for suffix in cookie_suffix] values = [mdict[k] for k in keys] repeater = (values[0], int(values[1]), values[2]) if mdict[prefix + 'warnpre'] is not None: keys = [prefix + 'warn' + suffix for suffix in cookie_suffix] values = [mdict[k] for k in keys] warning = (values[0], int(values[1]), values[2]) return cls(start, end, active=cls._active_default, repeater=repeater, warning=warning) else: return cls(None) class OrgDateScheduled(OrgDateSDCBase): """Date object to represent SCHEDULED attribute.""" _re = compile_sdc_re('SCHEDULED') _active_default = True class OrgDateDeadline(OrgDateSDCBase): """Date object to represent DEADLINE attribute.""" _re = compile_sdc_re('DEADLINE') _active_default = True class OrgDateClosed(OrgDateSDCBase): """Date object to represent CLOSED attribute.""" _re = compile_sdc_re('CLOSED') _active_default = False def parse_sdc(string): return (OrgDateScheduled.from_str(string), OrgDateDeadline.from_str(string), OrgDateClosed.from_str(string)) class OrgDateClock(OrgDate): """ Date object to represent CLOCK attributes. >>> OrgDateClock.from_str( ... 'CLOCK: [2010-08-08 Sun 17:00]--[2010-08-08 Sun 17:30] => 0:30') OrgDateClock((2010, 8, 8, 17, 0, 0), (2010, 8, 8, 17, 30, 0)) """ _active_default = False _allow_short_range = False def __init__(self, start, end=None, duration=None, active=None): """ Create OrgDateClock object """ super().__init__(start, end, active=active) self._duration = duration @property def duration(self): """ Get duration of CLOCK. >>> duration = OrgDateClock.from_str( ... 'CLOCK: [2010-08-08 Sun 17:00]--[2010-08-08 Sun 17:30] => 0:30' ... ).duration >>> duration.seconds 1800 >>> total_minutes(duration) 30.0 """ return self.end - self.start def is_duration_consistent(self): """ Check duration value of CLOCK line. >>> OrgDateClock.from_str( ... 'CLOCK: [2010-08-08 Sun 17:00]--[2010-08-08 Sun 17:30] => 0:30' ... ).is_duration_consistent() True >>> OrgDateClock.from_str( ... 'CLOCK: [2010-08-08 Sun 17:00]--[2010-08-08 Sun 17:30] => 0:15' ... ).is_duration_consistent() False """ return self._duration is None or self._duration == total_minutes(self.duration) @classmethod def from_str(cls, line: str) -> OrgDateClock: """ Get CLOCK from given string. Return three tuple (start, stop, length) which is datetime object of start time, datetime object of stop time and length in minute. """ match = cls._re.search(line) if not match: return cls(None, None) ymdhm1 = [int(d) for d in match.groups()[:5]] # second part starting with "--", does not exist for open clock dates has_end = bool(match.group(6)) ymdhm2_dt: Optional[datetime.datetime] len_min: Optional[int] if has_end: ymdhm2 = [int(d) for d in match.groups()[6:11]] hm3 = [int(d) for d in match.groups()[11:]] ymdhm2_dt = datetime.datetime(*ymdhm2) # type: ignore[arg-type] len_min = hm3[0] * 60 + hm3[1] else: ymdhm2_dt = None len_min = None return cls( datetime.datetime(*ymdhm1), # type: ignore[arg-type] ymdhm2_dt, len_min, ) _re = re.compile( r'^(?!#).*CLOCK:\s+' r'\[(\d+)\-(\d+)\-(\d+)[^\]\d]*(\d+)\:(\d+)\]' r'(--\[(\d+)\-(\d+)\-(\d+)[^\]\d]*(\d+)\:(\d+)\]\s+=>\s+(\d+)\:(\d+))?' ) class OrgDateRepeatedTask(OrgDate): """ Date object to represent repeated tasks. """ _active_default = False def __init__(self, start, before: str, after: str, active=None) -> None: super().__init__(start, active=active) self._before = before self._after = after def __repr__(self) -> str: args: list = [self._date_to_tuple(self.start), self.before, self.after] if self._active is not self._active_default: args.append(self._active) return '{}({})'.format(self.__class__.__name__, ', '.join(map(repr, args))) def __hash__(self) -> int: return hash((self._before, self._after)) def __eq__(self, other) -> bool: return ( super().__eq__(other) and isinstance(other, self.__class__) and self._before == other._before and self._after == other._after ) @property def before(self) -> str: """ The state of task before marked as done. >>> od = OrgDateRepeatedTask((2005, 9, 1, 16, 10, 0), 'TODO', 'DONE') >>> od.before 'TODO' """ return self._before @property def after(self) -> str: """ The state of task after marked as done. >>> od = OrgDateRepeatedTask((2005, 9, 1, 16, 10, 0), 'TODO', 'DONE') >>> od.after 'DONE' """ return self._after orgparse-0.4.20251020/src/orgparse/extra.py000066400000000000000000000065141510076213700202070ustar00rootroot00000000000000from __future__ import annotations import re from collections.abc import Iterator, Sequence from typing import Optional, Union RE_TABLE_SEPARATOR = re.compile(r'\s*\|(\-+\+)*\-+\|') RE_TABLE_ROW = re.compile(r'\s*\|([^|]+)+\|') STRIP_CELL_WHITESPACE = True Row = Sequence[str] class Table: def __init__(self, lines: list[str]) -> None: self._lines = lines @property def blocks(self) -> Iterator[Sequence[Row]]: group: list[Row] = [] first = True for r in self._pre_rows(): if r is None: if not first or len(group) > 0: yield group first = False group = [] else: group.append(r) if len(group) > 0: yield group def __iter__(self) -> Iterator[Row]: return self.rows @property def rows(self) -> Iterator[Row]: for r in self._pre_rows(): if r is not None: yield r def _pre_rows(self) -> Iterator[Optional[Row]]: for l in self._lines: if RE_TABLE_SEPARATOR.match(l): yield None else: pr = l.strip().strip('|').split('|') if STRIP_CELL_WHITESPACE: pr = [x.strip() for x in pr] yield pr # TODO use iparse helper? @property def as_dicts(self) -> AsDictHelper: bl = list(self.blocks) if len(bl) != 2: raise RuntimeError('Need two-block table to non-ambiguously guess column names') hrows = bl[0] if len(hrows) != 1: raise RuntimeError(f'Need single row heading to guess column names, got: {hrows}') columns = hrows[0] assert len(set(columns)) == len(columns), f'Duplicate column names: {columns}' return AsDictHelper( columns=columns, rows=bl[1], ) class AsDictHelper: def __init__(self, columns: Sequence[str], rows: Sequence[Row]) -> None: self.columns = columns self._rows = rows def __iter__(self) -> Iterator[dict[str, str]]: for x in self._rows: yield dict(zip(self.columns, x)) class Gap: # todo later, add indices etc pass Rich = Union[Table, Gap] def to_rich_text(text: str) -> Iterator[Rich]: ''' Convert an org-mode text into a 'rich' text, e.g. tables/lists/etc, interleaved by gaps. NOTE: you shouldn't rely on the number of items returned by this function, it might change in the future when more types are supported. At the moment only tables are supported. ''' lines = text.splitlines(keepends=True) group: list[str] = [] last: type[Rich] = Gap def emit() -> Rich: nonlocal group, last if last is Gap: res = Gap() elif last is Table: res = Table(group) # type: ignore[assignment] else: raise RuntimeError(f'Unexpected type {last}') group = [] return res for line in lines: if RE_TABLE_ROW.match(line) or RE_TABLE_SEPARATOR.match(line): cur = Table else: cur = Gap # type: ignore[assignment] if cur is not last: if len(group) > 0: yield emit() last = cur group.append(line) if len(group) > 0: yield emit() orgparse-0.4.20251020/src/orgparse/inline.py000066400000000000000000000016171510076213700203410ustar00rootroot00000000000000""" Org-mode inline markup parser. """ import re def to_plain_text(org_text): """ Convert an org-mode text into a plain text. >>> to_plain_text('there is a [[link]] in text') 'there is a link in text' >>> to_plain_text('some [[link][more complex link]] here') 'some more complex link here' >>> print(to_plain_text('''It can handle ... [[link][multi ... line ... link]]. ... See also: [[info:org#Link%20format][info:org#Link format]]''')) It can handle multi line link. See also: info:org#Link format """ return RE_LINK.sub(lambda m: m.group('desc0') or m.group('desc1'), org_text) RE_LINK = re.compile( r""" (?: \[ \[ (?P [^\]]+) \] \] ) | (?: \[ \[ (?P [^\]]+) \] \[ (?P [^\]]+) \] \] ) """, re.VERBOSE, ) orgparse-0.4.20251020/src/orgparse/node.py000066400000000000000000001252061510076213700200110ustar00rootroot00000000000000from __future__ import annotations import itertools import re from collections.abc import Iterable, Iterator, Sequence from typing import ( Any, Optional, Union, cast, ) from .date import ( OrgDate, OrgDateClock, OrgDateClosed, OrgDateDeadline, OrgDateRepeatedTask, OrgDateScheduled, parse_sdc, ) from .extra import Rich, to_rich_text from .inline import to_plain_text def lines_to_chunks(lines: Iterable[str]) -> Iterable[list[str]]: chunk: list[str] = [] for l in lines: if RE_NODE_HEADER.search(l): yield chunk chunk = [] chunk.append(l) yield chunk RE_NODE_HEADER = re.compile(r"^\*+ ") def parse_heading_level(heading: str) -> tuple[str, int] | None: """ Get star-stripped heading and its level >>> parse_heading_level('* Heading') ('Heading', 1) >>> parse_heading_level('******** Heading') ('Heading', 8) >>> parse_heading_level('*') # None since no space after star >>> parse_heading_level('*bold*') # None >>> parse_heading_level('not heading') # None """ m = RE_HEADING_STARS.search(heading) if m is not None: return (m.group(2), len(m.group(1))) return None RE_HEADING_STARS = re.compile(r'^(\*+)\s+(.*?)\s*$') def parse_heading_tags(heading: str) -> tuple[str, list[str]]: """ Get first tags and heading without tags >>> parse_heading_tags('HEADING') ('HEADING', []) >>> parse_heading_tags('HEADING :TAG1:TAG2:') ('HEADING', ['TAG1', 'TAG2']) >>> parse_heading_tags('HEADING: this is still heading :TAG1:TAG2:') ('HEADING: this is still heading', ['TAG1', 'TAG2']) >>> parse_heading_tags('HEADING :@tag:_tag_:') ('HEADING', ['@tag', '_tag_']) Here is the spec of tags from Org Mode manual: Tags are normal words containing letters, numbers, ``_``, and ``@``. Tags must be preceded and followed by a single colon, e.g., ``:work:``. -- (info "(org) Tags") """ match = RE_HEADING_TAGS.search(heading) if match: heading = match.group(1) tagstr = match.group(2) tags = tagstr.split(':') else: tags = [] return (heading, tags) # Tags are normal words containing letters, numbers, '_', and '@'. https://orgmode.org/manual/Tags.html RE_HEADING_TAGS = re.compile(r'(.*?)\s*:([\w@:]+):\s*$') def parse_heading_todos(heading: str, todo_candidates: list[str]) -> tuple[str, Optional[str]]: """ Get TODO keyword and heading without TODO keyword. >>> todos = ['TODO', 'DONE'] >>> parse_heading_todos('Normal heading', todos) ('Normal heading', None) >>> parse_heading_todos('TODO Heading', todos) ('Heading', 'TODO') """ for todo in todo_candidates: if heading == todo: return ('', todo) if heading.startswith(todo + ' '): return (heading[len(todo) + 1 :], todo) return (heading, None) def parse_heading_priority(heading): """ Get priority and heading without priority field. >>> parse_heading_priority('HEADING') ('HEADING', None) >>> parse_heading_priority('[#A] HEADING') ('HEADING', 'A') >>> parse_heading_priority('[#0] HEADING') ('HEADING', '0') >>> parse_heading_priority('[#A]') ('', 'A') """ match = RE_HEADING_PRIORITY.search(heading) if match: return (match.group(2), match.group(1)) else: return (heading, None) RE_HEADING_PRIORITY = re.compile(r'^\s*\[#([A-Z0-9])\] ?(.*)$') PropertyValue = Union[str, int, float] def parse_property(line: str) -> tuple[Optional[str], Optional[PropertyValue]]: """ Get property from given string. >>> parse_property(':Some_property: some value') ('Some_property', 'some value') >>> parse_property(':Effort: 1:10') ('Effort', 70) """ prop_key = None prop_val: Optional[Union[str, int, float]] = None match = RE_PROP.search(line) if match: prop_key = match.group(1) prop_val = match.group(2) if prop_key == 'Effort': prop_val = parse_duration_to_minutes(prop_val) return (prop_key, prop_val) RE_PROP = re.compile(r'^\s*:(.*?):\s*(.*?)\s*$') def parse_duration_to_minutes(duration: str) -> Union[float, int]: """ Parse duration minutes from given string. Convert to integer if number has no decimal points >>> parse_duration_to_minutes('3:12') 192 >>> parse_duration_to_minutes('1:23:45') 83.75 >>> parse_duration_to_minutes('1y 3d 3h 4min') 530464 >>> parse_duration_to_minutes('1d3h5min') 1625 >>> parse_duration_to_minutes('3d 13:35') 5135 >>> parse_duration_to_minutes('2.35h') 141 >>> parse_duration_to_minutes('10') 10 >>> parse_duration_to_minutes('10.') 10 >>> parse_duration_to_minutes('1 h') 60 >>> parse_duration_to_minutes('') 0 """ minutes = parse_duration_to_minutes_float(duration) return int(minutes) if minutes.is_integer() else minutes def parse_duration_to_minutes_float(duration: str) -> float: """ Parse duration minutes from given string. The following code is fully compatible with the 'org-duration-to-minutes' function in org mode: https://github.com/emacs-mirror/emacs/blob/master/lisp/org/org-duration.el >>> parse_duration_to_minutes_float('3:12') 192.0 >>> parse_duration_to_minutes_float('1:23:45') 83.75 >>> parse_duration_to_minutes_float('1y 3d 3h 4min') 530464.0 >>> parse_duration_to_minutes_float('1d3h5min') 1625.0 >>> parse_duration_to_minutes_float('3d 13:35') 5135.0 >>> parse_duration_to_minutes_float('2.35h') 141.0 >>> parse_duration_to_minutes_float('10') 10.0 >>> parse_duration_to_minutes_float('10.') 10.0 >>> parse_duration_to_minutes_float('1 h') 60.0 >>> parse_duration_to_minutes_float('') 0.0 """ match: Optional[Any] if duration == "": return 0.0 if isinstance(duration, float): return float(duration) if RE_ORG_DURATION_H_MM.fullmatch(duration): hours, minutes, *seconds_ = map(float, duration.split(":")) seconds = seconds_[0] if seconds_ else 0 return seconds / 60.0 + minutes + 60 * hours if RE_ORG_DURATION_FULL.fullmatch(duration): minutes = 0 for match in RE_ORG_DURATION_UNIT.finditer(duration): value = float(match.group(1)) unit = match.group(2) minutes += value * ORG_DURATION_UNITS[unit] return float(minutes) match = RE_ORG_DURATION_MIXED.fullmatch(duration) if match: units_part = match.groupdict()['A'] hms_part = match.groupdict()['B'] return parse_duration_to_minutes_float(units_part) + parse_duration_to_minutes_float(hms_part) if RE_FLOAT.fullmatch(duration): return float(duration) raise ValueError(f"Invalid duration format {duration}") # Conversion factor to minutes for a duration. ORG_DURATION_UNITS = { "min": 1, "h": 60, "d": 60 * 24, "w": 60 * 24 * 7, "m": 60 * 24 * 30, "y": 60 * 24 * 365.25, } # Regexp matching for all units. ORG_DURATION_UNITS_RE = r'({})'.format(r'|'.join(ORG_DURATION_UNITS.keys())) # Regexp matching a duration expressed with H:MM or H:MM:SS format. # Hours can use any number of digits. ORG_DURATION_H_MM_RE = r'[ \t]*[0-9]+(?::[0-9]{2}){1,2}[ \t]*' RE_ORG_DURATION_H_MM = re.compile(ORG_DURATION_H_MM_RE) # Regexp matching a duration with an unit. # Allowed units are defined in ORG_DURATION_UNITS. # Match group 1 contains the bare number. # Match group 2 contains the unit. ORG_DURATION_UNIT_RE = r'([0-9]+(?:[.][0-9]*)?)[ \t]*' + ORG_DURATION_UNITS_RE RE_ORG_DURATION_UNIT = re.compile(ORG_DURATION_UNIT_RE) # Regexp matching a duration expressed with units. # Allowed units are defined in ORG_DURATION_UNITS. ORG_DURATION_FULL_RE = rf'(?:[ \t]*{ORG_DURATION_UNIT_RE})+[ \t]*' RE_ORG_DURATION_FULL = re.compile(ORG_DURATION_FULL_RE) # Regexp matching a duration expressed with units and H:MM or H:MM:SS format. # Allowed units are defined in ORG_DURATION_UNITS. # Match group A contains units part. # Match group B contains H:MM or H:MM:SS part. ORG_DURATION_MIXED_RE = rf'(?P([ \t]*{ORG_DURATION_UNIT_RE})+)[ \t]*(?P[0-9]+(?::[0-9][0-9]){{1,2}})[ \t]*' RE_ORG_DURATION_MIXED = re.compile(ORG_DURATION_MIXED_RE) # Regexp matching float numbers. RE_FLOAT = re.compile(r'[0-9]+([.][0-9]*)?') # -> Optional[Tuple[str, Sequence[str]]]: # todo wtf?? it says 'ABCMeta isn't subscriptable??' def parse_comment(line: str): """ Parse special comment such as ``#+SEQ_TODO`` >>> parse_comment('#+SEQ_TODO: TODO | DONE') ('SEQ_TODO', ['TODO | DONE']) >>> parse_comment('# not a special comment') # None >>> parse_comment('#+FILETAGS: :tag1:tag2:') ('FILETAGS', ['tag1', 'tag2']) """ match = re.match(r'\s*#\+', line) if match: end = match.end(0) comment = line[end:].split(':', maxsplit=1) if len(comment) >= 2: key = comment[0] value = comment[1].strip() if key.upper() == 'FILETAGS': # just legacy behaviour; it seems like filetags is the only one that separated by ':' # see https://orgmode.org/org.html#In_002dbuffer-Settings return (key, [c.strip() for c in value.split(':') if len(c.strip()) > 0]) else: return (key, [value]) return None def parse_seq_todo(line): """ Parse value part of SEQ_TODO/TODO/TYP_TODO comment. >>> parse_seq_todo('TODO | DONE') (['TODO'], ['DONE']) >>> parse_seq_todo(' Fred Sara Lucy Mike | DONE ') (['Fred', 'Sara', 'Lucy', 'Mike'], ['DONE']) >>> parse_seq_todo('| CANCELED') ([], ['CANCELED']) >>> parse_seq_todo('REPORT(r) BUG(b) KNOWNCAUSE(k) | FIXED(f)') (['REPORT', 'BUG', 'KNOWNCAUSE'], ['FIXED']) See also: * (info "(org) Per-file keywords") * (info "(org) Fast access to TODO states") """ todo_done = line.split('|', 1) if len(todo_done) == 2: (todos, dones) = todo_done else: (todos, dones) = (line, '') strip_fast_access_key = lambda x: x.split('(', 1)[0] return ( list(map(strip_fast_access_key, todos.split())), list(map(strip_fast_access_key, dones.split())), ) class OrgEnv: """ Information global to the file (e.g, TODO keywords). """ def __init__( self, todos: Sequence[str] | None = None, dones: Sequence[str] | None = None, filename: str = '', ) -> None: if dones is None: dones = ['DONE'] if todos is None: todos = ['TODO'] self._todos = list(todos) self._dones = list(dones) self._todo_not_specified_in_comment = True self._filename = filename self._nodes: list[OrgBaseNode] = [] @property def nodes(self) -> list[OrgBaseNode]: """ A list of org nodes. >>> OrgEnv().nodes # default is empty (of course) [] >>> from orgparse import loads >>> loads(''' ... * Heading 1 ... ** Heading 2 ... *** Heading 3 ... ''').env.nodes # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE [, , , ] """ return self._nodes def add_todo_keys(self, todos, dones): if self._todo_not_specified_in_comment: self._todos = [] self._dones = [] self._todo_not_specified_in_comment = False self._todos.extend(todos) self._dones.extend(dones) @property def todo_keys(self): """ TODO keywords defined for this document (file). >>> env = OrgEnv() >>> env.todo_keys ['TODO'] """ return self._todos @property def done_keys(self): """ DONE keywords defined for this document (file). >>> env = OrgEnv() >>> env.done_keys ['DONE'] """ return self._dones @property def all_todo_keys(self): """ All TODO keywords (including DONEs). >>> env = OrgEnv() >>> env.all_todo_keys ['TODO', 'DONE'] """ return self._todos + self._dones @property def filename(self) -> str: """ Return a path to the source file or similar information. If the org objects are not loaded from a file, this value will be a string of the form ````. """ return self._filename # parser def from_chunks(self, chunks): yield OrgRootNode.from_chunk(self, next(chunks)) for chunk in chunks: yield OrgNode.from_chunk(self, chunk) class OrgBaseNode(Sequence): """ Base class for :class:`OrgRootNode` and :class:`OrgNode` .. attribute:: env An instance of :class:`OrgEnv`. All nodes in a same file shares same instance. :class:`OrgBaseNode` is an iterable object: >>> from orgparse import loads >>> root = loads(''' ... * Heading 1 ... ** Heading 2 ... *** Heading 3 ... * Heading 4 ... ''') >>> for node in root: ... print(node) * Heading 1 ** Heading 2 *** Heading 3 * Heading 4 Note that the first blank line is due to the root node, as iteration contains the object itself. To skip that, use slice access ``[1:]``: >>> for node in root[1:]: ... print(node) * Heading 1 ** Heading 2 *** Heading 3 * Heading 4 It also supports sequence protocol. >>> print(root[1]) * Heading 1 >>> root[0] is root # index 0 means itself True >>> len(root) # remember, sequence contains itself 5 Note the difference between ``root[1:]`` and ``root[1]``: >>> for node in root[1]: ... print(node) * Heading 1 ** Heading 2 *** Heading 3 Nodes remember the line number information (1-indexed): >>> print(root.children[1].linenumber) 5 """ _body_lines: list[str] # set by the child classes def __init__(self, env: OrgEnv, index: int | None = None) -> None: self.env = env self.linenumber = cast(int, None) # set in parse_lines # content self._lines: list[str] = [] self._properties: dict[str, PropertyValue] = {} self._timestamps: list[OrgDate] = [] # FIXME: use `index` argument to set index. (Currently it is # done externally in `parse_lines`.) if index is not None: self._index = index """ Index of `self` in `self.env.nodes`. It must satisfy an equality:: self.env.nodes[self._index] is self This value is used for quick access for iterator and tree-like traversing. """ def __iter__(self): yield self level = self.level for node in self.env._nodes[self._index + 1 :]: if node.level > level: yield node else: break def __len__(self) -> int: return sum(1 for _ in self) def __bool__(self) -> bool: # As self.__len__ returns non-zero value always this is not # needed. This function is only for performance. return True def __getitem__(self, key): if isinstance(key, slice): return itertools.islice(self, key.start, key.stop, key.step) elif isinstance(key, int): if key < 0: key += len(self) for i, node in enumerate(self): if i == key: return node raise IndexError(f"Out of range {key}") else: raise TypeError(f"Inappropriate type {type(key)} for {type(self)}") # tree structure def _find_same_level(self, iterable) -> OrgBaseNode | None: for node in iterable: if node.level < self.level: return None if node.level == self.level: return node return None @property def previous_same_level(self) -> OrgBaseNode | None: """ Return previous node if exists or None otherwise. >>> from orgparse import loads >>> root = loads(''' ... * Node 1 ... * Node 2 ... ** Node 3 ... ''') >>> (n1, n2, n3) = list(root[1:]) >>> n1.previous_same_level is None True >>> n2.previous_same_level is n1 True >>> n3.previous_same_level is None # n2 is not at the same level True """ return self._find_same_level(reversed(self.env._nodes[: self._index])) @property def next_same_level(self) -> OrgBaseNode | None: """ Return next node if exists or None otherwise. >>> from orgparse import loads >>> root = loads(''' ... * Node 1 ... * Node 2 ... ** Node 3 ... ''') >>> (n1, n2, n3) = list(root[1:]) >>> n1.next_same_level is n2 True >>> n2.next_same_level is None # n3 is not at the same level True >>> n3.next_same_level is None True """ return self._find_same_level(self.env._nodes[self._index + 1 :]) # FIXME: cache parent node def _find_parent(self): for node in reversed(self.env._nodes[: self._index]): if node.level < self.level: return node return None def get_parent(self, max_level: int | None = None): """ Return a parent node. :arg int max_level: In the normally structured org file, it is a level of the ancestor node to return. For example, ``get_parent(max_level=0)`` returns a root node. In the general case, it specify a maximum level of the desired ancestor node. If there is no ancestor node whose level is equal to ``max_level``, this function try to find an ancestor node which level is smaller than ``max_level``. >>> from orgparse import loads >>> root = loads(''' ... * Node 1 ... ** Node 2 ... ** Node 3 ... ''') >>> (n1, n2, n3) = list(root[1:]) >>> n1.get_parent() is root True >>> n2.get_parent() is n1 True >>> n3.get_parent() is n1 True For simplicity, accessing :attr:`parent` is alias of calling :meth:`get_parent` without argument. >>> n1.get_parent() is n1.parent True >>> root.parent is None True This is a little bit pathological situation -- but works. >>> root = loads(''' ... * Node 1 ... *** Node 2 ... ** Node 3 ... ''') >>> (n1, n2, n3) = list(root[1:]) >>> n1.get_parent() is root True >>> n2.get_parent() is n1 True >>> n3.get_parent() is n1 True Now let's play with `max_level`. >>> root = loads(''' ... * Node 1 (level 1) ... ** Node 2 (level 2) ... *** Node 3 (level 3) ... ''') >>> (n1, n2, n3) = list(root[1:]) >>> n3.get_parent() is n2 True >>> n3.get_parent(max_level=2) is n2 # same as default True >>> n3.get_parent(max_level=1) is n1 True >>> n3.get_parent(max_level=0) is root True """ if max_level is None: max_level = self.level - 1 parent = self._find_parent() while parent.level > max_level: parent = parent.get_parent() return parent @property def parent(self): """ Alias of :meth:`get_parent()` (calling without argument). """ return self.get_parent() # FIXME: cache children nodes def _find_children(self): nodeiter = iter(self.env._nodes[self._index + 1 :]) try: node = next(nodeiter) except StopIteration: return if node.level <= self.level: return yield node last_child_level = node.level for node in nodeiter: if node.level <= self.level: return if node.level <= last_child_level: yield node last_child_level = node.level @property def children(self): """ A list of child nodes. >>> from orgparse import loads >>> root = loads(''' ... * Node 1 ... ** Node 2 ... *** Node 3 ... ** Node 4 ... ''') >>> (n1, n2, n3, n4) = list(root[1:]) >>> (c1, c2) = n1.children >>> c1 is n2 True >>> c2 is n4 True Note the difference to ``n1[1:]``, which returns the Node 3 also: >>> (m1, m2, m3) = list(n1[1:]) >>> m2 is n3 True """ return list(self._find_children()) @property def root(self): """ The root node. >>> from orgparse import loads >>> root = loads('* Node 1') >>> n1 = root[1] >>> n1.root is root True """ root = self while True: parent = root.get_parent() if not parent: return root root = parent @property def properties(self) -> dict[str, PropertyValue]: """ Node properties as a dictionary. >>> from orgparse import loads >>> root = loads(''' ... * Node ... :PROPERTIES: ... :SomeProperty: value ... :END: ... ''') >>> root.children[0].properties['SomeProperty'] 'value' """ return self._properties def get_property(self, key, val=None) -> Optional[PropertyValue]: """ Return property named ``key`` if exists or ``val`` otherwise. :arg str key: Key of property. :arg val: Default value to return. """ return self._properties.get(key, val) # parser @classmethod def from_chunk(cls, env, lines): self = cls(env) self._lines = lines self._parse_comments() return self def _parse_comments(self): special_comments: dict[str, list[str]] = {} for line in self._lines: parsed = parse_comment(line) if parsed: (key, vals) = parsed key = key.upper() # case insensitive, so keep as uppercase special_comments.setdefault(key, []).extend(vals) self._special_comments = special_comments # parse TODO keys and store in OrgEnv for todokey in ['TODO', 'SEQ_TODO', 'TYP_TODO']: for val in special_comments.get(todokey, []): self.env.add_todo_keys(*parse_seq_todo(val)) def _iparse_properties(self, ilines: Iterator[str]) -> Iterator[str]: self._properties = {} in_property_field = False for line in ilines: if in_property_field: if line.find(":END:") >= 0: break else: (key, val) = parse_property(line) if key is not None and val is not None: self._properties.update({key: val}) elif line.find(":PROPERTIES:") >= 0: in_property_field = True else: yield line for line in ilines: yield line # misc @property def level(self) -> int: """ Level of this node. """ raise NotImplementedError def _get_tags(self, *, inher: bool = False) -> set[str]: # noqa: ARG002 """ Return tags :arg inher: Mix with tags of all ancestor nodes if ``True``. """ return set() @property def tags(self) -> set[str]: """ Tags of this and parent's node. >>> from orgparse import loads >>> n2 = loads(''' ... * Node 1 :TAG1: ... ** Node 2 :TAG2: ... ''')[2] >>> n2.tags == set(['TAG1', 'TAG2']) True """ return self._get_tags(inher=True) @property def shallow_tags(self) -> set[str]: """ Tags defined for this node (don't look-up parent nodes). >>> from orgparse import loads >>> n2 = loads(''' ... * Node 1 :TAG1: ... ** Node 2 :TAG2: ... ''')[2] >>> n2.shallow_tags == set(['TAG2']) True """ return self._get_tags(inher=False) @staticmethod def _get_text(text, format: str = 'plain'): # noqa: A002 if format == 'plain': return to_plain_text(text) elif format == 'raw': return text elif format == 'rich': return to_rich_text(text) else: raise ValueError(f'format={format} is not supported.') def get_body(self, format: str = 'plain') -> str: # noqa: A002 """ Return a string of body text. See also: :meth:`get_heading`. """ return self._get_text('\n'.join(self._body_lines), format) if self._lines else '' @property def body(self) -> str: """Alias of ``.get_body(format='plain')``.""" return self.get_body() @property def body_rich(self) -> Iterator[Rich]: r = self.get_body(format='rich') return cast(Iterator[Rich], r) # meh.. @property def heading(self) -> str: raise NotImplementedError def is_root(self): """ Return ``True`` when it is a root node. >>> from orgparse import loads >>> root = loads('* Node 1') >>> root.is_root() True >>> n1 = root[1] >>> n1.is_root() False """ return False def get_timestamps(self, active=False, inactive=False, range=False, point=False): # noqa: FBT002,A002 # will fix later """ Return a list of timestamps in the body text. :type active: bool :arg active: Include active type timestamps. :type inactive: bool :arg inactive: Include inactive type timestamps. :type range: bool :arg range: Include timestamps which has end date. :type point: bool :arg point: Include timestamps which has no end date. :rtype: list of :class:`orgparse.date.OrgDate` subclasses Consider the following org node: >>> from orgparse import loads >>> node = loads(''' ... * Node ... CLOSED: [2012-02-26 Sun 21:15] SCHEDULED: <2012-02-26 Sun> ... CLOCK: [2012-02-26 Sun 21:10]--[2012-02-26 Sun 21:15] => 0:05 ... Some inactive timestamp [2012-02-23 Thu] in body text. ... Some active timestamp <2012-02-24 Fri> in body text. ... Some inactive time range [2012-02-25 Sat]--[2012-02-27 Mon]. ... Some active time range <2012-02-26 Sun>--<2012-02-28 Tue>. ... ''').children[0] The default flags are all off, so it does not return anything. >>> node.get_timestamps() [] You can fetch appropriate timestamps using keyword arguments. >>> node.get_timestamps(inactive=True, point=True) [OrgDate((2012, 2, 23), None, False)] >>> node.get_timestamps(active=True, point=True) [OrgDate((2012, 2, 24))] >>> node.get_timestamps(inactive=True, range=True) [OrgDate((2012, 2, 25), (2012, 2, 27), False)] >>> node.get_timestamps(active=True, range=True) [OrgDate((2012, 2, 26), (2012, 2, 28))] This is more complex example. Only active timestamps, regardless of range/point type. >>> node.get_timestamps(active=True, point=True, range=True) [OrgDate((2012, 2, 24)), OrgDate((2012, 2, 26), (2012, 2, 28))] """ return [ ts for ts in self._timestamps if ( ((active and ts.is_active()) or (inactive and not ts.is_active())) and ((range and ts.has_end()) or (point and not ts.has_end())) ) ] @property def datelist(self): """ Alias of ``.get_timestamps(active=True, inactive=True, point=True)``. :rtype: list of :class:`orgparse.date.OrgDate` subclasses >>> from orgparse import loads >>> root = loads(''' ... * Node with point dates <2012-02-25 Sat> ... CLOSED: [2012-02-25 Sat 21:15] ... Some inactive timestamp [2012-02-26 Sun] in body text. ... Some active timestamp <2012-02-27 Mon> in body text. ... ''') >>> root.children[0].datelist # doctest: +NORMALIZE_WHITESPACE [OrgDate((2012, 2, 25)), OrgDate((2012, 2, 26), None, False), OrgDate((2012, 2, 27))] """ return self.get_timestamps(active=True, inactive=True, point=True) @property def rangelist(self): """ Alias of ``.get_timestamps(active=True, inactive=True, range=True)``. :rtype: list of :class:`orgparse.date.OrgDate` subclasses >>> from orgparse import loads >>> root = loads(''' ... * Node with range dates <2012-02-25 Sat>--<2012-02-28 Tue> ... CLOCK: [2012-02-26 Sun 21:10]--[2012-02-26 Sun 21:15] => 0:05 ... Some inactive time range [2012-02-25 Sat]--[2012-02-27 Mon]. ... Some active time range <2012-02-26 Sun>--<2012-02-28 Tue>. ... Some time interval <2012-02-27 Mon 11:23-12:10>. ... ''') >>> root.children[0].rangelist # doctest: +NORMALIZE_WHITESPACE [OrgDate((2012, 2, 25), (2012, 2, 28)), OrgDate((2012, 2, 25), (2012, 2, 27), False), OrgDate((2012, 2, 26), (2012, 2, 28)), OrgDate((2012, 2, 27, 11, 23, 0), (2012, 2, 27, 12, 10, 0))] """ return self.get_timestamps(active=True, inactive=True, range=True) def __str__(self) -> str: return "\n".join(self._lines) # todo hmm, not sure if it really belongs here and not to OrgRootNode? def get_file_property_list(self, property: str): # noqa: A002 """ Return a list of the selected property """ vals = self._special_comments.get(property.upper(), None) return [] if vals is None else vals def get_file_property(self, property: str): # noqa: A002 """ Return a single element of the selected property or None if it doesn't exist """ vals = self._special_comments.get(property.upper(), None) if vals is None: return None elif len(vals) == 1: return vals[0] else: raise RuntimeError(f'Multiple values for property {property}: {vals}') class OrgRootNode(OrgBaseNode): """ Node to represent a file. Its body contains all lines before the first headline See :class:`OrgBaseNode` for other available functions. """ @property def heading(self) -> str: return '' def _get_tags(self, *, inher: bool = False) -> set[str]: # noqa: ARG002 filetags = self.get_file_property_list('FILETAGS') return set(filetags) @property def level(self) -> int: return 0 def get_parent(self, max_level=None): # noqa: ARG002 return None def is_root(self) -> bool: return True # parsers def _parse_pre(self): """Call parsers which must be called before tree structuring""" ilines: Iterator[str] = iter(self._lines) ilines = self._iparse_properties(ilines) ilines = self._iparse_timestamps(ilines) self._body_lines = list(ilines) def _iparse_timestamps(self, ilines: Iterator[str]) -> Iterator[str]: self._timestamps = [] for line in ilines: self._timestamps.extend(OrgDate.list_from_str(line)) yield line class OrgNode(OrgBaseNode): """ Node to represent normal org node See :class:`OrgBaseNode` for other available functions. """ def __init__(self, *args, **kwds) -> None: super().__init__(*args, **kwds) # fixme instead of casts, should organize code in such a way that they aren't necessary self._heading = cast(str, None) self._level: int | None = None self._tags = cast(list[str], None) self._todo: Optional[str] = None self._priority = None self._scheduled = OrgDateScheduled(None) self._deadline = OrgDateDeadline(None) self._closed = OrgDateClosed(None) self._clocklist: list[OrgDateClock] = [] self._body_lines: list[str] = [] self._repeated_tasks: list[OrgDateRepeatedTask] = [] # parser def _parse_pre(self): """Call parsers which must be called before tree structuring""" self._parse_heading() # FIXME: make the following parsers "lazy" ilines: Iterator[str] = iter(self._lines) try: next(ilines) # skip heading except StopIteration: return ilines = self._iparse_sdc(ilines) ilines = self._iparse_clock(ilines) ilines = self._iparse_properties(ilines) ilines = self._iparse_repeated_tasks(ilines) ilines = self._iparse_timestamps(ilines) self._body_lines = list(ilines) def _parse_heading(self) -> None: heading = self._lines[0] heading_level = parse_heading_level(heading) if heading_level is not None: (heading, self._level) = heading_level (heading, self._tags) = parse_heading_tags(heading) (heading, self._todo) = parse_heading_todos(heading, self.env.all_todo_keys) (heading, self._priority) = parse_heading_priority(heading) self._heading = heading # The following ``_iparse_*`` methods are simple generator based # parser. See ``_parse_pre`` for how it is used. The principle # is simple: these methods get an iterator and returns an iterator. # If the item returned by the input iterator must be dedicated to # the parser, do not yield the item or yield it as-is otherwise. def _iparse_sdc(self, ilines: Iterator[str]) -> Iterator[str]: """ Parse SCHEDULED, DEADLINE and CLOSED time tamps. They are assumed be in the first line. """ try: line = next(ilines) except StopIteration: return (self._scheduled, self._deadline, self._closed) = parse_sdc(line) if not (self._scheduled or self._deadline or self._closed): yield line # when none of them were found for line in ilines: yield line def _iparse_clock(self, ilines: Iterator[str]) -> Iterator[str]: self._clocklist = [] for line in ilines: cl = OrgDateClock.from_str(line) if cl: self._clocklist.append(cl) else: yield line def _iparse_timestamps(self, ilines: Iterator[str]) -> Iterator[str]: self._timestamps = [] self._timestamps.extend(OrgDate.list_from_str(self._heading)) for l in ilines: self._timestamps.extend(OrgDate.list_from_str(l)) yield l def _iparse_repeated_tasks(self, ilines: Iterator[str]) -> Iterator[str]: self._repeated_tasks = [] for line in ilines: match = self._repeated_tasks_re.search(line) if match: # FIXME: move this parsing to OrgDateRepeatedTask.from_str mdict = match.groupdict() done_state = mdict['done'] todo_state = mdict['todo'] date = OrgDate.from_str(mdict['date']) self._repeated_tasks.append(OrgDateRepeatedTask(date.start, todo_state, done_state)) else: yield line _repeated_tasks_re = re.compile( r''' \s*- \s+ State \s+ "(?P [^"]+)" \s+ from \s+ "(?P [^"]+)" \s+ \[ (?P [^\]]+) \]''', re.VERBOSE, ) def get_heading(self, format: str = 'plain') -> str: # noqa: A002 """ Return a string of head text without tags and TODO keywords. >>> from orgparse import loads >>> node = loads('* TODO Node 1').children[0] >>> node.get_heading() 'Node 1' It strips off inline markup by default (``format='plain'``). You can get the original raw string by specifying ``format='raw'``. >>> node = loads('* [[link][Node 1]]').children[0] >>> node.get_heading() 'Node 1' >>> node.get_heading(format='raw') '[[link][Node 1]]' """ return self._get_text(self._heading, format) @property def heading(self) -> str: """Alias of ``.get_heading(format='plain')``.""" return self.get_heading() @property def level(self): """ Level attribute of this node. Top level node is level 1. >>> from orgparse import loads >>> root = loads(''' ... * Node 1 ... ** Node 2 ... ''') >>> (n1, n2) = list(root[1:]) >>> root.level 0 >>> n1.level 1 >>> n2.level 2 """ return self._level @property def priority(self) -> str | None: """ Priority attribute of this node. It is None if undefined. >>> from orgparse import loads >>> (n1, n2) = loads(''' ... * [#A] Node 1 ... * Node 2 ... ''').children >>> n1.priority 'A' >>> n2.priority is None True """ return self._priority def _get_tags(self, *, inher: bool = False) -> set[str]: tags = set(self._tags) if inher: parent = self.get_parent() if parent: return tags | parent._get_tags(inher=True) return tags @property def todo(self) -> Optional[str]: """ A TODO keyword of this node if exists or None otherwise. >>> from orgparse import loads >>> root = loads('* TODO Node 1') >>> root.children[0].todo 'TODO' """ return self._todo @property def scheduled(self): """ Return scheduled timestamp :rtype: a subclass of :class:`orgparse.date.OrgDate` >>> from orgparse import loads >>> root = loads(''' ... * Node ... SCHEDULED: <2012-02-26 Sun> ... ''') >>> root.children[0].scheduled OrgDateScheduled((2012, 2, 26)) """ return self._scheduled @property def deadline(self): """ Return deadline timestamp. :rtype: a subclass of :class:`orgparse.date.OrgDate` >>> from orgparse import loads >>> root = loads(''' ... * Node ... DEADLINE: <2012-02-26 Sun> ... ''') >>> root.children[0].deadline OrgDateDeadline((2012, 2, 26)) """ return self._deadline @property def closed(self): """ Return timestamp of closed time. :rtype: a subclass of :class:`orgparse.date.OrgDate` >>> from orgparse import loads >>> root = loads(''' ... * Node ... CLOSED: [2012-02-26 Sun 21:15] ... ''') >>> root.children[0].closed OrgDateClosed((2012, 2, 26, 21, 15, 0)) """ return self._closed @property def clock(self): """ Return a list of clocked timestamps :rtype: a list of a subclass of :class:`orgparse.date.OrgDate` >>> from orgparse import loads >>> root = loads(''' ... * Node ... CLOCK: [2012-02-26 Sun 21:10]--[2012-02-26 Sun 21:15] => 0:05 ... ''') >>> root.children[0].clock [OrgDateClock((2012, 2, 26, 21, 10, 0), (2012, 2, 26, 21, 15, 0))] """ return self._clocklist def has_date(self): """ Return ``True`` if it has any kind of timestamp """ return self.scheduled or self.deadline or self.datelist or self.rangelist @property def repeated_tasks(self): """ Get repeated tasks marked DONE in an entry having repeater. :rtype: list of :class:`orgparse.date.OrgDateRepeatedTask` >>> from orgparse import loads >>> node = loads(''' ... * TODO Pay the rent ... DEADLINE: <2005-10-01 Sat +1m> ... - State "DONE" from "TODO" [2005-09-01 Thu 16:10] ... - State "DONE" from "TODO" [2005-08-01 Mon 19:44] ... - State "DONE" from "TODO" [2005-07-01 Fri 17:27] ... ''').children[0] >>> node.repeated_tasks # doctest: +NORMALIZE_WHITESPACE [OrgDateRepeatedTask((2005, 9, 1, 16, 10, 0), 'TODO', 'DONE'), OrgDateRepeatedTask((2005, 8, 1, 19, 44, 0), 'TODO', 'DONE'), OrgDateRepeatedTask((2005, 7, 1, 17, 27, 0), 'TODO', 'DONE')] >>> node.repeated_tasks[0].before 'TODO' >>> node.repeated_tasks[0].after 'DONE' Repeated tasks in ``:LOGBOOK:`` can be fetched by the same code. >>> node = loads(''' ... * TODO Pay the rent ... DEADLINE: <2005-10-01 Sat +1m> ... :LOGBOOK: ... - State "DONE" from "TODO" [2005-09-01 Thu 16:10] ... - State "DONE" from "TODO" [2005-08-01 Mon 19:44] ... - State "DONE" from "TODO" [2005-07-01 Fri 17:27] ... :END: ... ''').children[0] >>> node.repeated_tasks # doctest: +NORMALIZE_WHITESPACE [OrgDateRepeatedTask((2005, 9, 1, 16, 10, 0), 'TODO', 'DONE'), OrgDateRepeatedTask((2005, 8, 1, 19, 44, 0), 'TODO', 'DONE'), OrgDateRepeatedTask((2005, 7, 1, 17, 27, 0), 'TODO', 'DONE')] See: `(info "(org) Repeated tasks") `_ """ return self._repeated_tasks def parse_lines(lines: Iterable[str], filename, env=None) -> OrgNode: if not env: env = OrgEnv(filename=filename) elif env.filename != filename: raise ValueError('If env is specified, filename must match') # parse into node of list (environment will be parsed) ch1, ch2 = itertools.tee(lines_to_chunks(lines)) linenos = itertools.accumulate(itertools.chain([0], (len(c) for c in ch1))) nodes = env.from_chunks(ch2) nodelist = [] for lineno, node in zip(linenos, nodes): lineno += 1 # in text editors lines are 1-indexed node.linenumber = lineno nodelist.append(node) # parse headings (level, TODO, TAGs, and heading) nodelist[0]._index = 0 # parse the root node nodelist[0]._parse_pre() for i, node in enumerate(nodelist[1:], 1): # nodes except root node node._index = i node._parse_pre() env._nodes = nodelist return nodelist[0] # root orgparse-0.4.20251020/src/orgparse/py.typed000066400000000000000000000000001510076213700201710ustar00rootroot00000000000000orgparse-0.4.20251020/src/orgparse/tests/000077500000000000000000000000001510076213700176465ustar00rootroot00000000000000orgparse-0.4.20251020/src/orgparse/tests/__init__.py000066400000000000000000000000001510076213700217450ustar00rootroot00000000000000orgparse-0.4.20251020/src/orgparse/tests/data/000077500000000000000000000000001510076213700205575ustar00rootroot00000000000000orgparse-0.4.20251020/src/orgparse/tests/data/00_simple.org000066400000000000000000000007121510076213700230600ustar00rootroot00000000000000#+STARTUP: hidestars #+SEQ_TODO: TODO1 TODO2 TODO3 TODO4 * TODO1 Heading 0 :TAG1: ** TODO2 Heading 1 :TAG2: *** TODO3 Heading 2 :TAG3: **** TODO4 Heading 3 :TAG4: CLOSED: [2010-08-06 Fri 21:45] ** Heading 4 ** Heading 5 * Heading 6 :TAG2: ** Heading 7 :TAG2: *** Heading 8 ***** Heading 9 :TAG3:TAG4: **** Heading 10 :TAG1: ** Heading 11 * Heading 12 orgparse-0.4.20251020/src/orgparse/tests/data/00_simple.py000066400000000000000000000023051510076213700227210ustar00rootroot00000000000000from typing import Any def nodedict(i, level, todo=None, shallow_tags=None, tags=None) -> dict[str, Any]: if tags is None: tags = set() if shallow_tags is None: shallow_tags = set() return { "heading": f"Heading {i}", "level": level, "todo": todo, "shallow_tags": shallow_tags, "tags": tags, } def tags(nums) -> set[str]: return set(map('TAG{0}'.format, nums)) data = [ nodedict(i, *vals) for (i, vals) in enumerate([ # type: ignore[misc] [1, 'TODO1', tags([1]) , tags(range(1, 2))], [2, 'TODO2', tags([2]) , tags(range(1, 3))], [3, 'TODO3', tags([3]) , tags(range(1, 4))], [4, 'TODO4', tags([4]) , tags(range(1, 5))], [2, None , tags([]) , tags([1]) ], [2, None , tags([]) , tags([1]) ], [1, None , tags([2]) , tags([2]) ], [2, None , tags([2]) , tags([2]) ], [3, None , tags([]) , tags([2]) ], [5, None , tags([3, 4]), tags([2, 3, 4]) ], [4, None , tags([1]) , tags([1, 2]) ], [2, None , tags([]) , tags([2]) ], [1], ]) ] # fmt: skip orgparse-0.4.20251020/src/orgparse/tests/data/01_attributes.org000066400000000000000000000021271510076213700237600ustar00rootroot00000000000000#+STARTUP: hidestars * DONE [#A] A node with a lot of attributes SCHEDULED: <2010-08-06 Fri> DEADLINE: <2010-08-10 Tue> CLOSED: [2010-08-08 Sun 18:00] CLOCK: [2010-08-08 Sun 17:40]--[2010-08-08 Sun 17:50] => 0:10 CLOCK: [2010-08-08 Sun 17:00]--[2010-08-08 Sun 17:30] => 0:30 :PROPERTIES: :Effort: 1:10 :END: - <2010-08-16 Mon> DateList - <2010-08-07 Sat>--<2010-08-08 Sun> - <2010-08-09 Mon 00:30>--<2010-08-10 Tue 13:20> RangeList - <2019-08-10 Sat 16:30-17:30> TimeRange * A node without any attributed * DONE [#A] A node with a lot of attributes SCHEDULED: <2010-08-06 Fri> DEADLINE: <2010-08-10 Tue> CLOSED: [2010-08-08 Sun 18:00] CLOCK: [2010-08-08 Sun 17:40]--[2010-08-08 Sun 17:50] => 0:10 CLOCK: [2010-08-08 Sun 17:00]--[2010-08-08 Sun 17:30] => 0:30 :PROPERTIES: :Effort: 1:10 :END: - <2010-08-16 Mon> DateList - <2010-08-07 Sat>--<2010-08-08 Sun> - <2010-08-09 Mon 00:30>--<2010-08-10 Tue 13:20> RangeList - <2019-08-10 Sat 16:30-17:30> TimeRange * range in deadline DEADLINE: <2019-09-06 Fri 10:00--11:20> body * node with a second line but no date body orgparse-0.4.20251020/src/orgparse/tests/data/01_attributes.py000066400000000000000000000037441510076213700236270ustar00rootroot00000000000000from typing import Any from orgparse.date import ( OrgDate, OrgDateClock, OrgDateClosed, OrgDateDeadline, OrgDateScheduled, ) Raw = dict[str, Any] node1: Raw = { "heading": "A node with a lot of attributes", "priority": 'A', "scheduled": OrgDateScheduled((2010, 8, 6)), "deadline": OrgDateDeadline((2010, 8, 10)), "closed": OrgDateClosed((2010, 8, 8, 18, 0)), "clock": [ OrgDateClock((2010, 8, 8, 17, 40), (2010, 8, 8, 17, 50), 10), OrgDateClock((2010, 8, 8, 17, 00), (2010, 8, 8, 17, 30), 30), ], "properties": {"Effort": 70}, "datelist": [OrgDate((2010, 8, 16))], "rangelist": [ OrgDate((2010, 8, 7), (2010, 8, 8)), OrgDate((2010, 8, 9, 0, 30), (2010, 8, 10, 13, 20)), OrgDate((2019, 8, 10, 16, 30, 0), (2019, 8, 10, 17, 30, 0)), ], "body": """\ - <2010-08-16 Mon> DateList - <2010-08-07 Sat>--<2010-08-08 Sun> - <2010-08-09 Mon 00:30>--<2010-08-10 Tue 13:20> RangeList - <2019-08-10 Sat 16:30-17:30> TimeRange""", } node2: Raw = { "heading": "A node without any attributed", "priority": None, "scheduled": OrgDateScheduled(None), "deadline": OrgDateDeadline(None), "closed": OrgDateClosed(None), "clock": [], "properties": {}, "datelist": [], "rangelist": [], "body": "", } node3: Raw = { "heading": "range in deadline", "priority": None, "scheduled": OrgDateScheduled(None), "deadline": OrgDateDeadline((2019, 9, 6, 10, 0), (2019, 9, 6, 11, 20)), "closed": OrgDateClosed(None), "clock": [], "properties": {}, "datelist": [], "rangelist": [], "body": " body", } node4: Raw = { "heading": "node with a second line but no date", "priority": None, "scheduled": OrgDateScheduled(None), "deadline": OrgDateDeadline(None), "closed": OrgDateClosed(None), "clock": [], "properties": {}, "datelist": [], "rangelist": [], "body": "body", } data = [node1, node2, node1, node3, node4] orgparse-0.4.20251020/src/orgparse/tests/data/02_tree_struct.org000066400000000000000000000003421510076213700241330ustar00rootroot00000000000000* G0-H1 * G1-H1 ** G1-H2 *** G1-H3 * G2-H1 *** G2-H2 ** G2-H3 * G3-H1 **** G3-H2 ** G3-H3 * G4-H1 **** G4-H2 *** G4-H3 ** G4-H4 * G5-H1 ** G5-H2 *** G5-H3 ** G5-H4 * G6-H1 ** G6-H2 **** G6-H3 *** G6-H4 ** G6-H5 * G7-H1 orgparse-0.4.20251020/src/orgparse/tests/data/02_tree_struct.py000066400000000000000000000022211510076213700237720ustar00rootroot00000000000000from typing import Any def nodedict(parent, children=None, previous=None, next_=None) -> dict[str, Any]: if children is None: children = [] return { 'parent_heading': parent, 'children_heading': children, 'previous_same_level_heading': previous, 'next_same_level_heading': next_, } data = [nodedict(*args) for args in [ # G0 (None, [], None, 'G1-H1'), # G1 (None, ['G1-H2'], 'G0-H1', 'G2-H1'), ('G1-H1', ['G1-H3']), ('G1-H2',), # G2 (None, ['G2-H2', 'G2-H3'], 'G1-H1', 'G3-H1'), ('G2-H1',), ('G2-H1',), # G3 (None, ['G3-H2', 'G3-H3'], 'G2-H1', 'G4-H1'), ('G3-H1',), ('G3-H1',), # G4 (None, ['G4-H2', 'G4-H3', 'G4-H4'], 'G3-H1', 'G5-H1'), ('G4-H1',), ('G4-H1',), ('G4-H1',), # G5 (None, ['G5-H2', 'G5-H4'], 'G4-H1', 'G6-H1'), ('G5-H1', ['G5-H3'], None, 'G5-H4'), ('G5-H2',), ('G5-H1', [], 'G5-H2'), # G6 (None, ['G6-H2', 'G6-H5'], 'G5-H1', 'G7-H1'), ('G6-H1', ['G6-H3', 'G6-H4'], None, 'G6-H5'), ('G6-H2',), ('G6-H2',), ('G6-H1', [], 'G6-H2'), # G7 (None, [], 'G6-H1'), ]] # fmt: skip orgparse-0.4.20251020/src/orgparse/tests/data/03_repeated_tasks.org000066400000000000000000000003271510076213700245720ustar00rootroot00000000000000* TODO Pay the rent DEADLINE: <2005-10-01 Sat +1m> - State "DONE" from "TODO" [2005-09-01 Thu 16:10] - State "DONE" from "TODO" [2005-08-01 Mon 19:44] - State "DONE" from "TODO" [2005-07-01 Fri 17:27] orgparse-0.4.20251020/src/orgparse/tests/data/03_repeated_tasks.py000066400000000000000000000006551510076213700244370ustar00rootroot00000000000000from orgparse.date import OrgDateDeadline, OrgDateRepeatedTask data = [{ 'heading': 'Pay the rent', 'todo': 'TODO', 'deadline': OrgDateDeadline((2005, 10, 1)), 'repeated_tasks': [ OrgDateRepeatedTask((2005, 9, 1, 16, 10, 0), 'TODO', 'DONE'), OrgDateRepeatedTask((2005, 8, 1, 19, 44, 0), 'TODO', 'DONE'), OrgDateRepeatedTask((2005, 7, 1, 17, 27, 0), 'TODO', 'DONE'), ] }] # fmt: skip orgparse-0.4.20251020/src/orgparse/tests/data/04_logbook.org000066400000000000000000000004151510076213700232270ustar00rootroot00000000000000* LOGBOOK drawer test :LOGBOOK: CLOCK: [2012-10-26 Fri 16:01] CLOCK: [2012-10-26 Fri 14:50]--[2012-10-26 Fri 15:00] => 0:10 CLOCK: [2012-10-26 Fri 14:30]--[2012-10-26 Fri 14:40] => 0:10 CLOCK: [2012-10-26 Fri 14:10]--[2012-10-26 Fri 14:20] => 0:10 :END: orgparse-0.4.20251020/src/orgparse/tests/data/04_logbook.py000066400000000000000000000005741510076213700230760ustar00rootroot00000000000000from orgparse.date import OrgDateClock data = [{ 'heading': 'LOGBOOK drawer test', 'clock': [ OrgDateClock((2012, 10, 26, 16, 1)), OrgDateClock((2012, 10, 26, 14, 50), (2012, 10, 26, 15, 00)), OrgDateClock((2012, 10, 26, 14, 30), (2012, 10, 26, 14, 40)), OrgDateClock((2012, 10, 26, 14, 10), (2012, 10, 26, 14, 20)), ] }] # fmt: skip orgparse-0.4.20251020/src/orgparse/tests/data/05_tags.org000066400000000000000000000013021510076213700225260ustar00rootroot00000000000000* Node 0 :tag: * Node 1 :@tag: * Node 2 :tag1:tag2: * Node 3 :_: * Node 4 :@: * Node 5 :@_: * Node 6 :_tag_: * Heading: :with:colon: :tag: * unicode :ёж:tag:háček: orgparse-0.4.20251020/src/orgparse/tests/data/05_tags.py000066400000000000000000000007341510076213700223770ustar00rootroot00000000000000def nodedict(i, tags): return { "heading": f"Node {i}", "tags": set(tags), } data = [ nodedict(i, *vals) for (i, vals) in enumerate([ [["tag"]], [["@tag"]], [["tag1", "tag2"]], [["_"]], [["@"]], [["@_"]], [["_tag_"]], ])] + [ {"heading": 'Heading: :with:colon:', "tags": {"tag"}}, ] + [ {"heading": 'unicode', "tags": {'ёж', 'tag', 'háček'}}, ] # fmt: skip orgparse-0.4.20251020/src/orgparse/tests/data/__init__.py000066400000000000000000000000001510076213700226560ustar00rootroot00000000000000orgparse-0.4.20251020/src/orgparse/tests/test_data.py000066400000000000000000000075001510076213700221720ustar00rootroot00000000000000import pickle from collections.abc import Iterator from pathlib import Path import pytest from .. import load, loads DATADIR = Path(__file__).parent / 'data' def load_data(path: Path): """Load data from python file""" ns = {} # type: ignore[var-annotated] # read_bytes() and compile hackery to avoid encoding issues (e.g. see 05_tags) exec(compile(path.read_bytes(), path, 'exec'), ns) return ns['data'] def value_from_data_key(node, key): """ Helper function for check_data. Get value from Orgnode by key. """ if key == 'tags_inher': return node.tags elif key == 'children_heading': return [c.heading for c in node.children] elif key in ( 'parent_heading', 'previous_same_level_heading', 'next_same_level_heading', ): othernode = getattr(node, key.rsplit('_', 1)[0]) if othernode and not othernode.is_root(): return othernode.heading else: return None else: return getattr(node, key) def data_path(dataname: str, ext: str) -> Path: return DATADIR / f'{dataname}.{ext}' def get_datanames() -> Iterator[str]: for oname in sorted(DATADIR.glob('*.org')): yield oname.stem @pytest.mark.parametrize('dataname', get_datanames()) def test_data(dataname): """ Compare parsed data from 'data/*.org' and its correct answer 'data/*.py' """ oname = data_path(dataname, "org") data = load_data(data_path(dataname, "py")) root = load(oname) for i, (node, kwds) in enumerate(zip(root[1:], data)): for key in kwds: val = value_from_data_key(node, key) assert kwds[key] == val, ( f'check value of {i}-th node of key "{key}" from "{dataname}".\n\nParsed:\n{val}\n\nReal:\n{kwds[key]}' ) assert type(kwds[key]) == type(val), ( # noqa: E721 f'check type of {i}-th node of key "{key}" from "{dataname}".\n\nParsed:\n{type(val)}\n\nReal:\n{type(kwds[key])}' ) assert root.env.filename == str(oname) @pytest.mark.parametrize('dataname', get_datanames()) def test_picklable(dataname): oname = data_path(dataname, "org") root = load(oname) pickle.dumps(root) def test_iter_node(): root = loads(""" * H1 ** H2 *** H3 * H4 ** H5 """) node = root[1] assert node.heading == 'H1' by_iter = [n.heading for n in node] assert by_iter == ['H1', 'H2', 'H3'] def test_commented_headings_do_not_appear_as_children(): root = loads("""\ * H1 #** H2 ** H3 #* H4 #** H5 * H6 """) assert root.linenumber == 1 top_level = root.children assert len(top_level) == 2 h1 = top_level[0] assert h1.heading == "H1" assert h1.get_body() == "#** H2" assert h1.linenumber == 1 [h3] = h1.children assert h3.heading == "H3" assert h3.get_body() == "#* H4\n#** H5" assert h3.linenumber == 3 h6 = top_level[1] assert h6.heading == "H6" assert len(h6.children) == 0 assert h6.linenumber == 6 def test_commented_clock_entries_are_ignored_by_node_clock(): root = loads("""\ * Heading # * Floss # SCHEDULED: <2019-06-22 Sat 08:30 .+1w> # :LOGBOOK: # CLOCK: [2019-06-04 Tue 16:00]--[2019-06-04 Tue 17:00] => 1:00 # :END: """) [node] = root.children[0] assert node.heading == "Heading" assert node.clock == [] def test_commented_scheduled_marker_is_ignored_by_node_scheduled(): root = loads("""\ * Heading # SCHEDULED: <2019-06-22 Sat 08:30 .+1w> """) [node] = root.children[0] assert node.heading == "Heading" assert node.scheduled.start is None def test_commented_property_is_ignored_by_node_get_property(): root = loads("""\ * Heading # :PROPERTIES: # :PROPER-TEA: backup # :END: """) [node] = root.children[0] assert node.heading == "Heading" assert node.get_property("PROPER-TEA") is None orgparse-0.4.20251020/src/orgparse/tests/test_date.py000066400000000000000000000044001510076213700221720ustar00rootroot00000000000000import datetime from orgparse.date import ( OrgDate, OrgDateClock, OrgDateClosed, OrgDateDeadline, OrgDateScheduled, ) def test_date_as_string() -> None: testdate = datetime.date(2021, 9, 3) testdate2 = datetime.date(2021, 9, 5) testdatetime = datetime.datetime(2021, 9, 3, 16, 19, 13) testdatetime2 = datetime.datetime(2021, 9, 3, 17, 0, 1) testdatetime_nextday = datetime.datetime(2021, 9, 4, 0, 2, 1) assert str(OrgDate(testdate)) == "<2021-09-03 Fri>" assert str(OrgDate(testdatetime)) == "<2021-09-03 Fri 16:19>" assert str(OrgDate(testdate, active=False)) == "[2021-09-03 Fri]" assert str(OrgDate(testdatetime, active=False)) == "[2021-09-03 Fri 16:19]" assert str(OrgDate(testdate, testdate2)) == "<2021-09-03 Fri>--<2021-09-05 Sun>" assert str(OrgDate(testdate, testdate2)) == "<2021-09-03 Fri>--<2021-09-05 Sun>" assert str(OrgDate(testdatetime, testdatetime2)) == "<2021-09-03 Fri 16:19--17:00>" assert str(OrgDate(testdate, testdate2, active=False)) == "[2021-09-03 Fri]--[2021-09-05 Sun]" assert str(OrgDate(testdate, testdate2, active=False)) == "[2021-09-03 Fri]--[2021-09-05 Sun]" assert str(OrgDate(testdatetime, testdatetime2, active=False)) == "[2021-09-03 Fri 16:19--17:00]" assert str(OrgDateScheduled(testdate)) == "<2021-09-03 Fri>" assert str(OrgDateScheduled(testdatetime)) == "<2021-09-03 Fri 16:19>" assert str(OrgDateDeadline(testdate)) == "<2021-09-03 Fri>" assert str(OrgDateDeadline(testdatetime)) == "<2021-09-03 Fri 16:19>" assert str(OrgDateClosed(testdate)) == "[2021-09-03 Fri]" assert str(OrgDateClosed(testdatetime)) == "[2021-09-03 Fri 16:19]" assert str(OrgDateClock(testdatetime, testdatetime2)) == "[2021-09-03 Fri 16:19]--[2021-09-03 Fri 17:00]" assert str(OrgDateClock(testdatetime, testdatetime_nextday)) == "[2021-09-03 Fri 16:19]--[2021-09-04 Sat 00:02]" assert str(OrgDateClock(testdatetime)) == "[2021-09-03 Fri 16:19]" def test_date_as_datetime() -> None: testdate = (2021, 9, 3) testdatetime = (2021, 9, 3, 16, 19, 13) assert OrgDate._as_datetime(datetime.date(*testdate)) == datetime.datetime(*testdate, 0, 0, 0) assert OrgDate._as_datetime(datetime.datetime(*testdatetime)) == datetime.datetime(*testdatetime) orgparse-0.4.20251020/src/orgparse/tests/test_hugedata.py000066400000000000000000000015111510076213700230370ustar00rootroot00000000000000import pickle from .. import loadi def generate_org_lines(num_top_nodes, depth=3, nodes_per_level=1, _level=1): if depth == 0: return for i in range(num_top_nodes): yield ("*" * _level) + f' {i}-th heading of level {_level}' yield from generate_org_lines(nodes_per_level, depth - 1, nodes_per_level, _level + 1) def num_generate_org_lines(num_top_nodes, depth=3, nodes_per_level=1): if depth == 0: return 0 return num_top_nodes * (1 + num_generate_org_lines(nodes_per_level, depth - 1, nodes_per_level)) def test_picklable() -> None: num = 1000 depth = 3 nodes_per_level = 1 root = loadi(generate_org_lines(num, depth, nodes_per_level)) assert sum(1 for _ in root) == num_generate_org_lines(num, depth, nodes_per_level) + 1 pickle.dumps(root) # should not fail orgparse-0.4.20251020/src/orgparse/tests/test_misc.py000066400000000000000000000224371510076213700222220ustar00rootroot00000000000000import io import pytest from orgparse.date import OrgDate from .. import load, loads from ..node import OrgEnv def test_empty_heading() -> None: root = loads(''' * TODO :sometag: has no heading but still a todo? it's a bit unclear, but seems to be highligted by emacs.. ''') [h] = root.children assert h.todo == 'TODO' assert h.heading == '' assert h.tags == {'sometag'} def test_root() -> None: root = loads( ''' #+STARTUP: hidestars Whatever # comment * heading 1 '''.strip() ) assert len(root.children) == 1 # todo not sure if should strip special comments?? assert root.body.endswith('Whatever\n# comment') assert root.heading == '' def test_stars(): # https://github.com/karlicoss/orgparse/issues/7#issuecomment-533732660 root = loads(""" * Heading with text (A) The following line is not a heading, because it begins with a star but has no spaces afterward, just a newline: * ** Subheading with text (A1) *this_is_just* *some_bold_text* This subheading is a child of (A). The next heading has no text, but it does have a space after the star, so it's a heading: * This text is under the "anonymous" heading above, which would be (B). ** Subheading with text (B1) This subheading is a child of the "anonymous" heading (B), not of heading (A). """) # noqa: W291 [h1, h2] = root.children assert h1.heading == 'Heading with text (A)' assert h2.heading == '' def test_parse_custom_todo_keys(): todo_keys = ['TODO', 'CUSTOM1', 'ANOTHER_KEYWORD'] done_keys = ['DONE', 'A'] filename = '' # default for loads content = """ * TODO Heading with a default todo keyword * DONE Heading with a default done keyword * CUSTOM1 Heading with a custom todo keyword * ANOTHER_KEYWORD Heading with a long custom todo keyword * A Heading with a short custom done keyword """ env = OrgEnv(todos=todo_keys, dones=done_keys, filename=filename) root = loads(content, env=env) assert root.env.all_todo_keys == ['TODO', 'CUSTOM1', 'ANOTHER_KEYWORD', 'DONE', 'A'] assert len(root.children) == 5 assert root.children[0].todo == 'TODO' assert root.children[1].todo == 'DONE' assert root.children[2].todo == 'CUSTOM1' assert root.children[3].todo == 'ANOTHER_KEYWORD' assert root.children[4].todo == 'A' def test_add_custom_todo_keys(): todo_keys = ['CUSTOM_TODO'] done_keys = ['CUSTOM_DONE'] filename = '' # default for loads content = """#+TODO: COMMENT_TODO | COMMENT_DONE """ env = OrgEnv(filename=filename) env.add_todo_keys(todos=todo_keys, dones=done_keys) # check that only the custom keys are know before parsing assert env.all_todo_keys == ['CUSTOM_TODO', 'CUSTOM_DONE'] # after parsing, all keys are set root = loads(content, filename, env) assert root.env.all_todo_keys == ['CUSTOM_TODO', 'COMMENT_TODO', 'CUSTOM_DONE', 'COMMENT_DONE'] def test_get_file_property() -> None: content = """#+TITLE: Test: title * Node 1 test 1 * Node 2 test 2 """ # after parsing, all keys are set root = loads(content) assert root.get_file_property('Nosuchproperty') is None assert root.get_file_property_list('TITLE') == ['Test: title'] # also it's case insensitive assert root.get_file_property('title') == 'Test: title' assert root.get_file_property_list('Nosuchproperty') == [] def test_get_file_property_multivalued() -> None: content = """ #+TITLE: Test #+OTHER: Test title #+title: alternate title * Node 1 test 1 * Node 2 test 2 """ # after parsing, all keys are set root = loads(content) assert root.get_file_property_list('TITLE') == ['Test', 'alternate title'] with pytest.raises(RuntimeError): # raises because there are multiple of them root.get_file_property('TITLE') def test_filetags_are_tags() -> None: content = ''' #+FILETAGS: :f1:f2: * heading :h1: ** child :f2: '''.strip() root = loads(content) # breakpoint() assert root.tags == {'f1', 'f2'} child = root.children[0].children[0] assert child.tags == {'f1', 'f2', 'h1'} def test_load_filelike() -> None: stream = io.StringIO(''' * heading1 * heading 2 ''') root = load(stream) assert len(root.children) == 2 assert root.env.filename == '' def test_level_0_properties() -> None: content = ''' foo bar :PROPERTIES: :PROP-FOO: Bar :PROP-BAR: Bar bar :END: * heading :h1: :PROPERTIES: :HEADING-PROP: foo :END: ** child :f2: '''.strip() root = loads(content) assert root.get_property('PROP-FOO') == 'Bar' assert root.get_property('PROP-BAR') == 'Bar bar' assert root.get_property('PROP-INVALID') is None assert root.get_property('HEADING-PROP') is None assert root.children[0].get_property('HEADING-PROP') == 'foo' def test_level_0_timestamps() -> None: content = ''' foo bar - <2010-08-16 Mon> DateList - <2010-08-07 Sat>--<2010-08-08 Sun> - <2010-08-09 Mon 00:30>--<2010-08-10 Tue 13:20> RangeList - <2019-08-10 Sat 16:30-17:30> TimeRange" * heading :h1: ** child :f2: '''.strip() root = loads(content) assert root.datelist == [OrgDate((2010, 8, 16))] assert root.rangelist == [ OrgDate((2010, 8, 7), (2010, 8, 8)), OrgDate((2010, 8, 9, 0, 30), (2010, 8, 10, 13, 20)), OrgDate((2019, 8, 10, 16, 30, 0), (2019, 8, 10, 17, 30, 0)), ] def test_date_with_cookies() -> None: testcases = [ ('<2010-06-21 Mon +1y>', "OrgDate((2010, 6, 21), None, True, ('+', 1, 'y'))"), ('<2005-10-01 Sat +1m>', "OrgDate((2005, 10, 1), None, True, ('+', 1, 'm'))"), ('<2005-10-01 Sat +1m -3d>', "OrgDate((2005, 10, 1), None, True, ('+', 1, 'm'), ('-', 3, 'd'))"), ('<2005-10-01 Sat -3d>', "OrgDate((2005, 10, 1), None, True, None, ('-', 3, 'd'))"), ('<2008-02-10 Sun ++1w>', "OrgDate((2008, 2, 10), None, True, ('++', 1, 'w'))"), ('<2008-02-08 Fri 20:00 ++1d>', "OrgDate((2008, 2, 8, 20, 0, 0), None, True, ('++', 1, 'd'))"), ('<2019-04-05 Fri 08:00 .+1h>', "OrgDate((2019, 4, 5, 8, 0, 0), None, True, ('.+', 1, 'h'))"), ('[2019-04-05 Fri 08:00 .+1h]', "OrgDate((2019, 4, 5, 8, 0, 0), None, False, ('.+', 1, 'h'))"), ('<2007-05-16 Wed 12:30 +1w>', "OrgDate((2007, 5, 16, 12, 30, 0), None, True, ('+', 1, 'w'))"), ] # fmt: skip for inp, expected in testcases: root = loads(inp) output = root[0].datelist[0] assert str(output) == inp assert repr(output) == expected testcases = [ ('<2006-11-02 Thu 20:00-22:00 +1w>', "OrgDate((2006, 11, 2, 20, 0, 0), (2006, 11, 2, 22, 0, 0), True, ('+', 1, 'w'))"), ('<2006-11-02 Thu 20:00--22:00 +1w>', "OrgDate((2006, 11, 2, 20, 0, 0), (2006, 11, 2, 22, 0, 0), True, ('+', 1, 'w'))"), ] # fmt: skip for inp, expected in testcases: root = loads(inp) output = root[0].rangelist[0] assert str(output) == "<2006-11-02 Thu 20:00--22:00 +1w>" assert repr(output) == expected # DEADLINE and SCHEDULED testcases2 = [ ('* TODO Pay the rent\nDEADLINE: <2005-10-01 Sat +1m>', "<2005-10-01 Sat +1m>", "OrgDateDeadline((2005, 10, 1), None, True, ('+', 1, 'm'))"), ('* TODO Pay the rent\nDEADLINE: <2005-10-01 Sat +1m -3d>', "<2005-10-01 Sat +1m -3d>", "OrgDateDeadline((2005, 10, 1), None, True, ('+', 1, 'm'), ('-', 3, 'd'))"), ('* TODO Pay the rent\nDEADLINE: <2005-10-01 Sat -3d>', "<2005-10-01 Sat -3d>", "OrgDateDeadline((2005, 10, 1), None, True, None, ('-', 3, 'd'))"), ('* TODO Pay the rent\nDEADLINE: <2005-10-01 Sat ++1m>', "<2005-10-01 Sat ++1m>", "OrgDateDeadline((2005, 10, 1), None, True, ('++', 1, 'm'))"), ('* TODO Pay the rent\nDEADLINE: <2005-10-01 Sat .+1m>', "<2005-10-01 Sat .+1m>", "OrgDateDeadline((2005, 10, 1), None, True, ('.+', 1, 'm'))"), ] # fmt: skip for inp, expected_str, expected_repr in testcases2: root = loads(inp) output = root[1].deadline assert str(output) == expected_str assert repr(output) == expected_repr testcases2 = [ ('* TODO Pay the rent\nSCHEDULED: <2005-10-01 Sat +1m>', "<2005-10-01 Sat +1m>", "OrgDateScheduled((2005, 10, 1), None, True, ('+', 1, 'm'))"), ('* TODO Pay the rent\nSCHEDULED: <2005-10-01 Sat +1m -3d>', "<2005-10-01 Sat +1m -3d>", "OrgDateScheduled((2005, 10, 1), None, True, ('+', 1, 'm'), ('-', 3, 'd'))"), ('* TODO Pay the rent\nSCHEDULED: <2005-10-01 Sat -3d>', "<2005-10-01 Sat -3d>", "OrgDateScheduled((2005, 10, 1), None, True, None, ('-', 3, 'd'))"), ('* TODO Pay the rent\nSCHEDULED: <2005-10-01 Sat ++1m>', "<2005-10-01 Sat ++1m>", "OrgDateScheduled((2005, 10, 1), None, True, ('++', 1, 'm'))"), ('* TODO Pay the rent\nSCHEDULED: <2005-10-01 Sat .+1m>', "<2005-10-01 Sat .+1m>", "OrgDateScheduled((2005, 10, 1), None, True, ('.+', 1, 'm'))"), ] # fmt: skip for inp, expected_str, expected_repr in testcases2: root = loads(inp) output = root[1].scheduled assert str(output) == expected_str assert repr(output) == expected_repr orgparse-0.4.20251020/src/orgparse/tests/test_rich.py000066400000000000000000000042361510076213700222110ustar00rootroot00000000000000''' Tests for rich formatting: tables etc. ''' import pytest from .. import loads from ..extra import Table def test_table() -> None: root = loads(''' | | | | | | "heading" | | | | | | |-------+-----------+-----| | reiwf | fef | | |-------+-----------+-----| |-------+-----------+-----| | aba | caba | 123 | | yeah | | X | |------------------------+-------| | when | count | | datetime | int | |------------------------+-------| | | -1 | | [2020-11-05 Thu 23:44] | | | [2020-11-06 Fri 01:00] | 1 | |------------------------+-------| some irrelevant text | simple | |--------| | value1 | | value2 | ''') [_gap1, t1, _gap2, t2, _gap3, t3, _gap4] = root.body_rich t1 = Table(root._lines[1:10]) t2 = Table(root._lines[11:19]) t3 = Table(root._lines[22:26]) assert ilen(t1.blocks) == 4 assert list(t1.blocks)[2] == [] assert ilen(t1.rows) == 6 with pytest.raises(RuntimeError): list(t1.as_dicts) # not sure what should it be assert ilen(t2.blocks) == 2 assert ilen(t2.rows) == 5 assert list(t2.rows)[3] == ['[2020-11-05 Thu 23:44]', ''] assert ilen(t3.blocks) == 2 assert list(t3.rows) == [['simple'], ['value1'], ['value2']] assert t3.as_dicts.columns == ['simple'] assert list(t3.as_dicts) == [{'simple': 'value1'}, {'simple': 'value2'}] def test_table_2() -> None: root = loads(''' * item #+tblname: something | date | value | comment | |----------------------+-------+-------------------------------| | 14.04.17 | 11 | aaaa | | May 26 2017 08:00 | 12 | what + about + pluses? | | May 26 09:00 - 10:00 | 13 | time is | some comment #+BEGIN_SRC python :var fname="plot.png" :var table=something :results file fig.savefig(fname) return fname #+END_SRC #+RESULTS: [[file:plot.png]] ''') [_, t, _] = root.children[0].body_rich assert ilen(t.as_dicts) == 3 def ilen(x) -> int: return len(list(x)) orgparse-0.4.20251020/tox.ini000066400000000000000000000034231510076213700154100ustar00rootroot00000000000000[tox] minversion = 3.21 # relies on the correct version of Python installed envlist = ruff,tests,mypy,ty # https://github.com/tox-dev/tox/issues/20#issuecomment-247788333 # hack to prevent .tox from crapping to the project directory toxworkdir = {env:TOXWORKDIR_BASE:}{toxinidir}/.tox [testenv] # TODO how to get package name from setuptools? package_name = "orgparse" pass_env = # useful for tests to know they are running under ci CI CI_* # respect user's cache dirs to prevent tox from crapping into project dir PYTHONPYCACHEPREFIX MYPY_CACHE_DIR RUFF_CACHE_DIR set_env = # do not add current working directory to pythonpath # generally this is more robust and safer, prevents weird issues later on PYTHONSAFEPATH=1 # default is 'editable', in which tox builds wheel first for some reason? not sure if makes much sense package = uv-editable [testenv:ruff] skip_install = true dependency_groups = testing commands = {envpython} -m ruff check \ {posargs} [testenv:tests] dependency_groups = testing commands = # posargs allow test filtering, e.g. tox ... -- -k test_name {envpython} -m pytest \ --pyargs {[testenv]package_name} \ {posargs} [testenv:mypy] dependency_groups = testing commands = {envpython} -m mypy --no-install-types \ -p {[testenv]package_name} \ --txt-report .coverage.mypy \ --html-report .coverage.mypy \ # this is for github actions to upload to codecov.io # sadly xml coverage crashes on windows... so we need to disable it {env:CI_MYPY_COVERAGE} \ {posargs} [testenv:ty] dependency_groups = testing extras = optional deps = # any other dependencies (if needed) commands = {envpython} -m ty \ check \ {posargs}