pax_global_header00006660000000000000000000000064147777746270014546gustar00rootroot0000000000000052 comment=8209a147fb61deed38ca376d063bbff0343ca234 drgn-0.0.31/000077500000000000000000000000001477777462700125615ustar00rootroot00000000000000drgn-0.0.31/.editorconfig000066400000000000000000000004211477777462700152330ustar00rootroot00000000000000[*] end_of_line = lf insert_final_newline = true trim_trailing_whitespace = true charset = utf-8 indent_style = tab indent_size = 8 [*.{py,pyi}] indent_style = space indent_size = 4 [*.rst] indent_style = space indent_size = 4 [{makefile, Makefile}*] indent_style = tab drgn-0.0.31/.flake8000066400000000000000000000006771477777462700137460ustar00rootroot00000000000000[flake8] extend-ignore = # "undefined name": leave this to mypy. F821, # These get confused by the C code we have embedded in docstrings in # various places. # "indentation contains mixed spaces and tabs" E101, # "indentation contains tabs" W191, # For the following, we live by Black. # "whitespace before ':'" E203, # "line too long" E501, # "line break before binary operator" W503 drgn-0.0.31/.git-blame-ignore-revs000066400000000000000000000000511477777462700166550ustar00rootroot00000000000000660276a0b84fc5b8a7287d5b7a3b49d784115077 drgn-0.0.31/.github/000077500000000000000000000000001477777462700141215ustar00rootroot00000000000000drgn-0.0.31/.github/workflows/000077500000000000000000000000001477777462700161565ustar00rootroot00000000000000drgn-0.0.31/.github/workflows/ci.yml000066400000000000000000000053511477777462700173000ustar00rootroot00000000000000name: CI on: push: branches: - main workflow_dispatch: inputs: test_all_python_versions: description: "Run tests on all Python versions" type: boolean default: false required: true test_all_kernel_flavors: description: "Run tests on all kernel flavors" type: boolean default: false required: true workflow_call: inputs: test_all_python_versions: description: "Run tests on all Python versions" type: boolean default: false required: true test_all_kernel_flavors: description: "Run tests on all kernel flavors" type: boolean default: false required: true concurrency: group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} cancel-in-progress: true jobs: test: runs-on: ubuntu-22.04 strategy: matrix: python-version: ${{ (github.event_name == 'push' || inputs.test_all_python_versions) && fromJSON('["3.13", "3.12", "3.11", "3.10", "3.9", "3.8"]') || fromJSON('["3.13", "3.8"]')}} cc: [gcc, clang] fail-fast: false env: CC: ${{ matrix.cc }} GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} steps: - uses: actions/checkout@v4 - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} allow-prereleases: true - name: Install dependencies run: | sudo apt-get update -y sudo apt-get install -y btrfs-progs check dwarves libelf-dev libdw-dev qemu-kvm zstd ${{ matrix.cc == 'clang' && 'libomp-$(clang --version | sed -rn "s/.*clang version ([0-9]+).*/\\1/p")-dev' || '' }} # pyroute2 0.9.1 dropped support for Python < 3.9. if [[ "${{ matrix.python-version }}" =~ ^3\.[678]$ ]]; then pyroute2_version="<0.9.1" fi pip install "pyroute2$pyroute2_version" setuptools pre-commit - name: Generate version.py run: python setup.py --version - name: Check with mypy run: pre-commit run --all-files mypy - name: Build and test with ${{ matrix.cc }} run: CONFIGURE_FLAGS="--enable-compiler-warnings=error" python setup.py test -K ${{ inputs.test_all_kernel_flavors && '-F' || '' }} lint: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - name: Set up Python uses: actions/setup-python@v5 with: python-version: '3.x' - name: Install dependencies run: pip install pre-commit - name: Run pre-commit hooks run: SKIP=mypy pre-commit run --all-files --show-diff-on-failure drgn-0.0.31/.github/workflows/dco-check.yml000066400000000000000000000026651477777462700205320ustar00rootroot00000000000000name: DCO Check on: pull_request: types: [opened, synchronize, reopened, ready_for_review] concurrency: group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} cancel-in-progress: true jobs: check: if: ${{ !github.event.pull_request.draft }} runs-on: ubuntu-latest steps: - name: Checkout commit logs run: | git init git fetch --filter=blob:none "$GITHUB_SERVER_URL/$GITHUB_REPOSITORY" "$GITHUB_BASE_REF" "$GITHUB_REF" - name: Check for DCO sign-offs shell: bash run: | status=0 while read -r commit; do author="$(git show --no-patch --pretty='format:%an <%ae>' "$commit")" if ! git show --no-patch --pretty='format:%(trailers:key=Signed-off-by,valueonly)' "$commit" | grep -Fxq "$author"; then if [ $status -eq 0 ]; then echo "The following commits are missing a Developer Certificate of Origin sign-off;" echo "see https://github.com/osandov/drgn/blob/main/CONTRIBUTING.rst#signing-off" echo fi status=1 git show --no-patch "$commit" fi done < <(git rev-list --no-merges "FETCH_HEAD..$GITHUB_SHA") if [ $status -eq 0 ]; then echo "All commits have a Developer Certificate of Origin sign-off" fi exit $status drgn-0.0.31/.github/workflows/pull_request.yml000066400000000000000000000010111477777462700214160ustar00rootroot00000000000000name: Pull Request CI on: pull_request: types: - opened - synchronize - reopened - labeled jobs: test: uses: ./.github/workflows/ci.yml if: ${{ github.event.action != 'labeled' || github.event.label.name == 'test-all-python-versions' }} with: test_all_python_versions: ${{ contains(github.event.pull_request.labels.*.name, 'test-all-python-versions') }} test_all_kernel_flavors: ${{ contains(github.event.pull_request.labels.*.name, 'test-all-kernel-flavors') }} drgn-0.0.31/.github/workflows/vmtest-build.yml000066400000000000000000000023601477777462700213210ustar00rootroot00000000000000name: vmtest Build on: schedule: - cron: '16 6 * * MON' workflow_dispatch: jobs: build: strategy: matrix: flavor: [default, alternative, tiny] arch: [x86_64, aarch64, ppc64, s390x, arm] fail-fast: false max-parallel: 5 runs-on: ubuntu-22.04 permissions: contents: write env: GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}} steps: - uses: actions/checkout@v4 - name: Install dependencies run: | sudo apt-get update sudo apt-get install dwarves libelf-dev pip install aiohttp uritemplate - name: Build and upload ${{ matrix.arch }} ${{ matrix.flavor }} kernels run: python3 -m vmtest.manage --kernel-directory build/vmtest/linux.git --build-directory build/vmtest/kbuild -K -a ${{ matrix.arch }} -f ${{ matrix.flavor }} - name: Upload kernel build logs if: always() uses: actions/upload-artifact@v4 with: name: kernel-build-logs-${{ matrix.arch }}-${{ matrix.flavor }} path: build/vmtest/kbuild/*.log if-no-files-found: ignore test: needs: build uses: ./.github/workflows/ci.yml with: test_all_python_versions: true test_all_kernel_flavors: true drgn-0.0.31/.gitignore000066400000000000000000000003141477777462700145470ustar00rootroot00000000000000*.pyc *.so /.coverage /.mypy_cache /build /coverage.info /cscope.* /dist /docs/_build /drgn-*.tar.gz /drgn.egg-info /drgn/internal/version.py /htmlcov /python-drgn-*.src.rpm /python-drgn.spec __pycache__ drgn-0.0.31/.packit.yaml000066400000000000000000000051131477777462700147760ustar00rootroot00000000000000# See the documentation for more information: # https://packit.dev/docs/configuration/ specfile_path: python-drgn.spec files_to_sync: - python-drgn.spec - .packit.yaml upstream_package_name: drgn downstream_package_name: python-drgn actions: get-current-version: "python3 setup.py --version" # Fetch the specfile from Rawhide, drop any patches and disable rpmautospec post-upstream-clone: "bash -c \"curl -s https://src.fedoraproject.org/rpms/python-drgn/raw/main/f/python-drgn.spec | sed -e '/^Patch[0-9]/d' -e '/^%autochangelog$/d' > python-drgn.spec\"" srpm_build_deps: - bash - curl - python3-setuptools - sed jobs: - job: copr_build trigger: commit owner: "@meta" project: drgn targets: fedora-all-aarch64: {} fedora-all-i386: {} fedora-all-ppc64le: {} fedora-all-s390x: {} fedora-all-x86_64: {} fedora-eln-aarch64: {} fedora-eln-ppc64le: {} fedora-eln-s390x: {} fedora-eln-x86_64: {} epel-8-aarch64: {} epel-8-ppc64le: {} epel-8-s390x: {} epel-8-x86_64: {} centos-stream+epel-next-9-aarch64: additional_repos: - https://kojihub.stream.centos.org/kojifiles/repos/c9s-build/latest/aarch64/ centos-stream+epel-next-9-ppc64le: additional_repos: - https://kojihub.stream.centos.org/kojifiles/repos/c9s-build/latest/ppc64le/ centos-stream+epel-next-9-s390x: additional_repos: - https://kojihub.stream.centos.org/kojifiles/repos/c9s-build/latest/s390x/ centos-stream+epel-next-9-x86_64: additional_repos: - https://kojihub.stream.centos.org/kojifiles/repos/c9s-build/latest/x86_64/ - job: copr_build trigger: pull_request owner: "@meta" project: drgn targets: fedora-all-aarch64: {} fedora-all-i386: {} fedora-all-ppc64le: {} fedora-all-s390x: {} fedora-all-x86_64: {} fedora-eln-aarch64: {} fedora-eln-ppc64le: {} fedora-eln-s390x: {} fedora-eln-x86_64: {} epel-8-aarch64: {} epel-8-ppc64le: {} epel-8-s390x: {} epel-8-x86_64: {} centos-stream+epel-next-9-aarch64: additional_repos: - https://kojihub.stream.centos.org/kojifiles/repos/c9s-build/latest/aarch64/ centos-stream+epel-next-9-ppc64le: additional_repos: - https://kojihub.stream.centos.org/kojifiles/repos/c9s-build/latest/ppc64le/ centos-stream+epel-next-9-s390x: additional_repos: - https://kojihub.stream.centos.org/kojifiles/repos/c9s-build/latest/s390x/ centos-stream+epel-next-9-x86_64: additional_repos: - https://kojihub.stream.centos.org/kojifiles/repos/c9s-build/latest/x86_64/ drgn-0.0.31/.pre-commit-config.yaml000066400000000000000000000025611477777462700170460ustar00rootroot00000000000000exclude: ^contrib/ repos: - repo: https://github.com/pycqa/isort rev: 5.13.2 hooks: - id: isort name: isort (python) - repo: https://github.com/psf/black rev: 24.8.0 hooks: - id: black exclude: ^docs/exts/details\.py$ - repo: https://github.com/pycqa/flake8 rev: 7.1.1 hooks: - id: flake8 - repo: https://github.com/pre-commit/mirrors-mypy rev: v1.11.2 hooks: - id: mypy args: [--show-error-codes, --strict, --no-warn-return-any, --no-warn-unused-ignores] files: ^(drgn/.*\.py|_drgn.pyi|_drgn_util/.*\.py|tools/.*\.py)$ - repo: https://github.com/pre-commit/pre-commit-hooks rev: v4.6.0 hooks: - id: trailing-whitespace exclude_types: [diff] - id: end-of-file-fixer exclude_types: [diff] - id: check-yaml - id: check-added-large-files - id: debug-statements - id: check-merge-conflict - repo: https://github.com/netromdk/vermin rev: v1.6.0 hooks: - id: vermin # The vmtest package in general should adhere to the same version # requirements as drgn, with the following exceptions: The manage & # kbuild scripts are used by Github Actions and need not be broadly # compatible. exclude: "^vmtest/(manage|kbuild).py$" args: ['-t=3.6-', '--violations', '--eval-annotations'] drgn-0.0.31/.readthedocs.yaml000066400000000000000000000003001477777462700160010ustar00rootroot00000000000000version: 2 build: os: ubuntu-22.04 tools: python: "3" apt_packages: - graphviz sphinx: configuration: docs/conf.py python: install: - requirements: docs/requirements.txt drgn-0.0.31/CONTRIBUTING.rst000066400000000000000000000236711477777462700152330ustar00rootroot00000000000000Contributing ============ Thanks for your interest in drgn! See below for how to build, test, code, and submit changes for drgn. Building -------- The easiest way to develop drgn is by building and running it locally. Please build with warnings enabled. Install the dependencies from the `installation instructions `_, then run: .. code-block:: console $ git clone https://github.com/osandov/drgn.git $ cd drgn $ CONFIGURE_FLAGS="--enable-compiler-warnings=error" python3 setup.py build_ext -i $ python3 -m drgn --help Drgn can build, run, and pass its test suite on Python 3.6 or later. However, many of the tools used as part of the development workflow do not support Python versions once they have reached their end-of-life. Thus, your main drgn development environment should use a Python version which is actively supported upstream. In particular, the drgn development workflow no longer supported on Python 3.6. Testing ------- Tests should be added for all features and bug fixes. drgn's test suite can be run with: .. code-block:: console $ python3 setup.py test To run Linux kernel helper tests in a virtual machine on all supported kernels, add ``-K``. See `vmtest `_ for more details. Tests can also be run manually with `unittest `_ after building locally: .. code-block:: console $ python3 -m unittest discover -v To run Linux kernel helper tests on the running kernel, this must be run as root, and debug information for the running kernel must be available. pre-commit ---------- Several linters and checks are run on every pull request. If you'd like to run them locally prior to submission, you can install `pre-commit `_: .. code-block:: console $ pip install pre-commit Then, you can either install the checks as Git hooks so that they're run when creating a commit: .. code-block:: console $ pre-commit install --install-hooks Or you can run them manually: .. code-block:: console $ pre-commit run --all-files Please remember that these pre-commit hooks do not support Python 3.6; they require a Python major version which is actively supported upstream. Coding Guidelines ----------------- * Core functionality should be implemented in ``libdrgn`` and exposed to Python via the `C extension `_. Only the CLI and helpers should be in pure Python. C ^ drgn is written in GNU C11. C code in drgn mostly follows the `Linux kernel coding style `_ with some slightly more modern preferences: * Variables should be declared as close as possible to where they are used (as opposed to the C89 style of declaring everything at the top of a function). * As an exception, if a function has a local ``struct drgn_error *err``, it should usually be declared at the top of the function. (This is because must functions have such a variable, and it adds noise to have it in the middle of the function.) * Scope guards and the `cleanup attribute `_ should be used liberally. * ``//``-style comments are preferred over ``/* */``. * As an exception, Doxygen comments should use ``/** */``. For example: .. code-block:: c /** Good example. */ struct drgn_error *my_func(struct drgn_program *prog, size_t n) { struct drgn_error *err; _cleanup_free_ void *buf = malloc(n); if (!buf) return &drgn_enomem; // 0xffff0000 is a nice address. err = drgn_program_read_memory(prog, buf, 0xffff0000, n, false); if (err) return err; ... return NULL; } NOT: .. code-block:: c /* BAD example. */ struct drgn_error *my_func(struct drgn_program *prog, size_t n) { struct drgn_error *err; void *buf; buf = malloc(n); if (!buf) { return &drgn_enomem; } /* 0xffff0000 is a nice address. */ err = drgn_program_read_memory(prog, buf, 0xffff0000, n, false); if (err) goto out; ... err = NULL; out: free(buf); return err; } A few other guidelines/conventions: * Constants should be defined as enums or ``static const`` variables rather than macros. * Functions that can fail should return a ``struct drgn_error *`` (and return their result via an out parameter if necessary). * Out parameters should be named ``ret`` (or suffixed with ``_ret`` if there are multiple) and be the last parameter(s) of the function. * Functions that initialize an already allocated structure should be suffixed with ``_init`` and take the structure to initialize as the first argument, e.g., ``struct drgn_error *foo_init(struct foo *foo, int foo_flags)``. * The matching function to deinitialize a structure should be suffixed with ``_deinit``, e.g., ``void foo_deinit(struct foo *foo)``. If possible, the definition should be placed directly after the definition of ``_init`` so that it is easier to visually verify that everything is cleaned up. * Functions that allocate and initialize a structure should be suffixed with ``_create`` and either return the structure as an out parameter (e.g., ``struct drgn_error *foo_create(int foo_flags, struct foo **ret)``) or as the return value if they can only fail with an out-of-memory error (e.g., ``struct foo *foo_create(int foo_flags)``). * The matching function to free an allocated structure should be suffixed with ``_destroy``, e.g., ``void foo_destroy(struct foo *foo)``. If possible, the definition should be placed directly after the definition of ``_create``. ``_destroy`` should usually allow a ``NULL`` argument, just like ``free()``. * Functions that return a result in a ``struct drgn_object *`` parameter should only modify the object if the function succeeds. drgn assumes some `implementation-defined behavior `_ for sanity: * Signed integers are represented with two's complement. * Bitwise operators on signed integers operate on the two's complement representation. * Right shift of a signed integer type is arithmetic. * Conversion to a signed integer type is modular. * Casting between pointers and integers does not change the bit representation. Python ^^^^^^ Python code in drgn should be compatible with Python 3.6 and newer. Python code is formatted with `Black `_ and `isort `_. Type hints are required everywhere (including helpers and the C extension), except in tests. Linux Kernel Helpers ^^^^^^^^^^^^^^^^^^^^ Linux kernel helpers should work on all `supported kernels `_ if possible. This may require handling changes between kernel releases. * Do NOT check the kernel version number to do this; Linux distributions often backport changes without updating the version number. Instead, use the presence or absence of variables, types, structure members, etc. * Optimize for the latest kernel release, and follow "easier to ask for forgiveness than permission" (`EAFP `_). For example, assume that a structure member from the latest release exists and catch the exception if it doesn't. * Reference the diverging commit and version number in the format ``Linux kernel commit $abbreviated_commit_hash "$commit_subject" (in v$kernel_version)``. For example: .. code-block:: python3 # Since Linux kernel commit 2f064a59a11f ("sched: Change # task_struct::state") (in v5.14), the task state is named "__state". # Before that, it is named "state". try: return task.__state except AttributeError: return task.state NOT: .. code-block:: python3 # BAD if hasattr(task, "state"): return task.state else: return task.__state * Document the expected C types of arguments and return values. For example: .. code-block:: python3 def cgroup_parent(cgrp: Object) -> Object: """ Return the parent cgroup of the given cgroup if it exists, ``NULL`` otherwise. :param cgrp: ``struct cgroup *`` :return: ``struct cgroup *`` """ ... Submitting PRs -------------- Pull requests and issues are always welcome. Feel free to start a discussion with a prototype. Signing Off ^^^^^^^^^^^ All commits must be signed off (i.e., ``Signed-off-by: Jane Doe ``) as per the `Developer Certificate of Origin `_. ``git commit -s`` can do this for you. Separating Changes ^^^^^^^^^^^^^^^^^^ Each logical change should be a separate commit. For example, if a PR adds new functionality to the core library and a new helper that uses the new functionality, the core change and the helper should be separate commits. This makes code review much easier. Each commit should build, pass tests, follow coding guidelines, and run correctly. (In other words, within a PR, later commits often build on top of earlier commits, but later commits shouldn't need to "fix" earlier commits.) This makes it easier to track down problems with tools like ``git bisect`` which may check out any commit in the middle of a PR. Commit Messages ^^^^^^^^^^^^^^^ The template for a good commit message is: .. code-block:: none One line summary Longer explanation including more details, background, and/or motivation. Signed-off-by: Jane Doe See `this post `_ for more information about writing good commit messages. drgn-0.0.31/COPYING000066400000000000000000000003241477777462700136130ustar00rootroot00000000000000drgn is provided under: SPDX-License-Identifier: LGPL-2.1-or-later Some source files are provided under different licenses as noted in each file. See the LICENSES directory for the full list of licenses used. drgn-0.0.31/LICENSES/000077500000000000000000000000001477777462700137665ustar00rootroot00000000000000drgn-0.0.31/LICENSES/GPL-2.0-or-later.txt000066400000000000000000000416711477777462700172020ustar00rootroot00000000000000GNU GENERAL PUBLIC LICENSE Version 2, June 1991 Copyright (C) 1989, 1991 Free Software Foundation, Inc. 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed. Preamble The licenses for most software are designed to take away your freedom to share and change it. By contrast, the GNU General Public License is intended to guarantee your freedom to share and change free software--to make sure the software is free for all its users. This General Public License applies to most of the Free Software Foundation's software and to any other program whose authors commit to using it. (Some other Free Software Foundation software is covered by the GNU Lesser General Public License instead.) You can apply it to your programs, too. When we speak of free software, we are referring to freedom, not price. Our General Public Licenses are designed to make sure that you have the freedom to distribute copies of free software (and charge for this service if you wish), that you receive source code or can get it if you want it, that you can change the software or use pieces of it in new free programs; and that you know you can do these things. To protect your rights, we need to make restrictions that forbid anyone to deny you these rights or to ask you to surrender the rights. These restrictions translate to certain responsibilities for you if you distribute copies of the software, or if you modify it. For example, if you distribute copies of such a program, whether gratis or for a fee, you must give the recipients all the rights that you have. You must make sure that they, too, receive or can get the source code. And you must show them these terms so they know their rights. We protect your rights with two steps: (1) copyright the software, and (2) offer you this license which gives you legal permission to copy, distribute and/or modify the software. Also, for each author's protection and ours, we want to make certain that everyone understands that there is no warranty for this free software. If the software is modified by someone else and passed on, we want its recipients to know that what they have is not the original, so that any problems introduced by others will not reflect on the original authors' reputations. Finally, any free program is threatened constantly by software patents. We wish to avoid the danger that redistributors of a free program will individually obtain patent licenses, in effect making the program proprietary. To prevent this, we have made it clear that any patent must be licensed for everyone's free use or not licensed at all. The precise terms and conditions for copying, distribution and modification follow. TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 0. This License applies to any program or other work which contains a notice placed by the copyright holder saying it may be distributed under the terms of this General Public License. The "Program", below, refers to any such program or work, and a "work based on the Program" means either the Program or any derivative work under copyright law: that is to say, a work containing the Program or a portion of it, either verbatim or with modifications and/or translated into another language. (Hereinafter, translation is included without limitation in the term "modification".) Each licensee is addressed as "you". Activities other than copying, distribution and modification are not covered by this License; they are outside its scope. The act of running the Program is not restricted, and the output from the Program is covered only if its contents constitute a work based on the Program (independent of having been made by running the Program). Whether that is true depends on what the Program does. 1. You may copy and distribute verbatim copies of the Program's source code as you receive it, in any medium, provided that you conspicuously and appropriately publish on each copy an appropriate copyright notice and disclaimer of warranty; keep intact all the notices that refer to this License and to the absence of any warranty; and give any other recipients of the Program a copy of this License along with the Program. You may charge a fee for the physical act of transferring a copy, and you may at your option offer warranty protection in exchange for a fee. 2. You may modify your copy or copies of the Program or any portion of it, thus forming a work based on the Program, and copy and distribute such modifications or work under the terms of Section 1 above, provided that you also meet all of these conditions: a) You must cause the modified files to carry prominent notices stating that you changed the files and the date of any change. b) You must cause any work that you distribute or publish, that in whole or in part contains or is derived from the Program or any part thereof, to be licensed as a whole at no charge to all third parties under the terms of this License. c) If the modified program normally reads commands interactively when run, you must cause it, when started running for such interactive use in the most ordinary way, to print or display an announcement including an appropriate copyright notice and a notice that there is no warranty (or else, saying that you provide a warranty) and that users may redistribute the program under these conditions, and telling the user how to view a copy of this License. (Exception: if the Program itself is interactive but does not normally print such an announcement, your work based on the Program is not required to print an announcement.) These requirements apply to the modified work as a whole. If identifiable sections of that work are not derived from the Program, and can be reasonably considered independent and separate works in themselves, then this License, and its terms, do not apply to those sections when you distribute them as separate works. But when you distribute the same sections as part of a whole which is a work based on the Program, the distribution of the whole must be on the terms of this License, whose permissions for other licensees extend to the entire whole, and thus to each and every part regardless of who wrote it. Thus, it is not the intent of this section to claim rights or contest your rights to work written entirely by you; rather, the intent is to exercise the right to control the distribution of derivative or collective works based on the Program. In addition, mere aggregation of another work not based on the Program with the Program (or with a work based on the Program) on a volume of a storage or distribution medium does not bring the other work under the scope of this License. 3. You may copy and distribute the Program (or a work based on it, under Section 2) in object code or executable form under the terms of Sections 1 and 2 above provided that you also do one of the following: a) Accompany it with the complete corresponding machine-readable source code, which must be distributed under the terms of Sections 1 and 2 above on a medium customarily used for software interchange; or, b) Accompany it with a written offer, valid for at least three years, to give any third party, for a charge no more than your cost of physically performing source distribution, a complete machine-readable copy of the corresponding source code, to be distributed under the terms of Sections 1 and 2 above on a medium customarily used for software interchange; or, c) Accompany it with the information you received as to the offer to distribute corresponding source code. (This alternative is allowed only for noncommercial distribution and only if you received the program in object code or executable form with such an offer, in accord with Subsection b above.) The source code for a work means the preferred form of the work for making modifications to it. For an executable work, complete source code means all the source code for all modules it contains, plus any associated interface definition files, plus the scripts used to control compilation and installation of the executable. However, as a special exception, the source code distributed need not include anything that is normally distributed (in either source or binary form) with the major components (compiler, kernel, and so on) of the operating system on which the executable runs, unless that component itself accompanies the executable. If distribution of executable or object code is made by offering access to copy from a designated place, then offering equivalent access to copy the source code from the same place counts as distribution of the source code, even though third parties are not compelled to copy the source along with the object code. 4. You may not copy, modify, sublicense, or distribute the Program except as expressly provided under this License. Any attempt otherwise to copy, modify, sublicense or distribute the Program is void, and will automatically terminate your rights under this License. However, parties who have received copies, or rights, from you under this License will not have their licenses terminated so long as such parties remain in full compliance. 5. You are not required to accept this License, since you have not signed it. However, nothing else grants you permission to modify or distribute the Program or its derivative works. These actions are prohibited by law if you do not accept this License. Therefore, by modifying or distributing the Program (or any work based on the Program), you indicate your acceptance of this License to do so, and all its terms and conditions for copying, distributing or modifying the Program or works based on it. 6. Each time you redistribute the Program (or any work based on the Program), the recipient automatically receives a license from the original licensor to copy, distribute or modify the Program subject to these terms and conditions. You may not impose any further restrictions on the recipients' exercise of the rights granted herein. You are not responsible for enforcing compliance by third parties to this License. 7. If, as a consequence of a court judgment or allegation of patent infringement or for any other reason (not limited to patent issues), conditions are imposed on you (whether by court order, agreement or otherwise) that contradict the conditions of this License, they do not excuse you from the conditions of this License. If you cannot distribute so as to satisfy simultaneously your obligations under this License and any other pertinent obligations, then as a consequence you may not distribute the Program at all. For example, if a patent license would not permit royalty-free redistribution of the Program by all those who receive copies directly or indirectly through you, then the only way you could satisfy both it and this License would be to refrain entirely from distribution of the Program. If any portion of this section is held invalid or unenforceable under any particular circumstance, the balance of the section is intended to apply and the section as a whole is intended to apply in other circumstances. It is not the purpose of this section to induce you to infringe any patents or other property right claims or to contest validity of any such claims; this section has the sole purpose of protecting the integrity of the free software distribution system, which is implemented by public license practices. Many people have made generous contributions to the wide range of software distributed through that system in reliance on consistent application of that system; it is up to the author/donor to decide if he or she is willing to distribute software through any other system and a licensee cannot impose that choice. This section is intended to make thoroughly clear what is believed to be a consequence of the rest of this License. 8. If the distribution and/or use of the Program is restricted in certain countries either by patents or by copyrighted interfaces, the original copyright holder who places the Program under this License may add an explicit geographical distribution limitation excluding those countries, so that distribution is permitted only in or among countries not thus excluded. In such case, this License incorporates the limitation as if written in the body of this License. 9. The Free Software Foundation may publish revised and/or new versions of the General Public License from time to time. Such new versions will be similar in spirit to the present version, but may differ in detail to address new problems or concerns. Each version is given a distinguishing version number. If the Program specifies a version number of this License which applies to it and "any later version", you have the option of following the terms and conditions either of that version or of any later version published by the Free Software Foundation. If the Program does not specify a version number of this License, you may choose any version ever published by the Free Software Foundation. 10. If you wish to incorporate parts of the Program into other free programs whose distribution conditions are different, write to the author to ask for permission. For software which is copyrighted by the Free Software Foundation, write to the Free Software Foundation; we sometimes make exceptions for this. Our decision will be guided by the two goals of preserving the free status of all derivatives of our free software and of promoting the sharing and reuse of software generally. NO WARRANTY 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. END OF TERMS AND CONDITIONS How to Apply These Terms to Your New Programs If you develop a new program, and you want it to be of the greatest possible use to the public, the best way to achieve this is to make it free software which everyone can redistribute and change under these terms. To do so, attach the following notices to the program. It is safest to attach them to the start of each source file to most effectively convey the exclusion of warranty; and each file should have at least the "copyright" line and a pointer to where the full notice is found. one line to give the program's name and an idea of what it does. Copyright (C) yyyy name of author This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. Also add information on how to contact you by electronic and paper mail. If the program is interactive, make it output a short notice like this when it starts in an interactive mode: Gnomovision version 69, Copyright (C) year name of author Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. This is free software, and you are welcome to redistribute it under certain conditions; type `show c' for details. The hypothetical commands `show w' and `show c' should show the appropriate parts of the General Public License. Of course, the commands you use may be called something other than `show w' and `show c'; they could even be mouse-clicks or menu items--whatever suits your program. You should also get your employer (if you work as a programmer) or your school, if any, to sign a "copyright disclaimer" for the program, if necessary. Here is a sample; alter the names: Yoyodyne, Inc., hereby disclaims all copyright interest in the program `Gnomovision' (which makes passes at compilers) written by James Hacker. signature of Ty Coon, 1 April 1989 Ty Coon, President of Vice drgn-0.0.31/LICENSES/LGPL-2.1-or-later.txt000066400000000000000000000625571477777462700173250ustar00rootroot00000000000000GNU LESSER GENERAL PUBLIC LICENSE Version 2.1, February 1999 Copyright (C) 1991, 1999 Free Software Foundation, Inc. 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed. [This is the first released version of the Lesser GPL. It also counts as the successor of the GNU Library Public License, version 2, hence the version number 2.1.] Preamble The licenses for most software are designed to take away your freedom to share and change it. By contrast, the GNU General Public Licenses are intended to guarantee your freedom to share and change free software--to make sure the software is free for all its users. This license, the Lesser General Public License, applies to some specially designated software packages--typically libraries--of the Free Software Foundation and other authors who decide to use it. You can use it too, but we suggest you first think carefully about whether this license or the ordinary General Public License is the better strategy to use in any particular case, based on the explanations below. When we speak of free software, we are referring to freedom of use, not price. Our General Public Licenses are designed to make sure that you have the freedom to distribute copies of free software (and charge for this service if you wish); that you receive source code or can get it if you want it; that you can change the software and use pieces of it in new free programs; and that you are informed that you can do these things. To protect your rights, we need to make restrictions that forbid distributors to deny you these rights or to ask you to surrender these rights. These restrictions translate to certain responsibilities for you if you distribute copies of the library or if you modify it. For example, if you distribute copies of the library, whether gratis or for a fee, you must give the recipients all the rights that we gave you. You must make sure that they, too, receive or can get the source code. If you link other code with the library, you must provide complete object files to the recipients, so that they can relink them with the library after making changes to the library and recompiling it. And you must show them these terms so they know their rights. We protect your rights with a two-step method: (1) we copyright the library, and (2) we offer you this license, which gives you legal permission to copy, distribute and/or modify the library. To protect each distributor, we want to make it very clear that there is no warranty for the free library. Also, if the library is modified by someone else and passed on, the recipients should know that what they have is not the original version, so that the original author's reputation will not be affected by problems that might be introduced by others. Finally, software patents pose a constant threat to the existence of any free program. We wish to make sure that a company cannot effectively restrict the users of a free program by obtaining a restrictive license from a patent holder. Therefore, we insist that any patent license obtained for a version of the library must be consistent with the full freedom of use specified in this license. Most GNU software, including some libraries, is covered by the ordinary GNU General Public License. This license, the GNU Lesser General Public License, applies to certain designated libraries, and is quite different from the ordinary General Public License. We use this license for certain libraries in order to permit linking those libraries into non-free programs. When a program is linked with a library, whether statically or using a shared library, the combination of the two is legally speaking a combined work, a derivative of the original library. The ordinary General Public License therefore permits such linking only if the entire combination fits its criteria of freedom. The Lesser General Public License permits more lax criteria for linking other code with the library. We call this license the "Lesser" General Public License because it does Less to protect the user's freedom than the ordinary General Public License. It also provides other free software developers Less of an advantage over competing non-free programs. These disadvantages are the reason we use the ordinary General Public License for many libraries. However, the Lesser license provides advantages in certain special circumstances. For example, on rare occasions, there may be a special need to encourage the widest possible use of a certain library, so that it becomes a de-facto standard. To achieve this, non-free programs must be allowed to use the library. A more frequent case is that a free library does the same job as widely used non-free libraries. In this case, there is little to gain by limiting the free library to free software only, so we use the Lesser General Public License. In other cases, permission to use a particular library in non-free programs enables a greater number of people to use a large body of free software. For example, permission to use the GNU C Library in non-free programs enables many more people to use the whole GNU operating system, as well as its variant, the GNU/Linux operating system. Although the Lesser General Public License is Less protective of the users' freedom, it does ensure that the user of a program that is linked with the Library has the freedom and the wherewithal to run that program using a modified version of the Library. The precise terms and conditions for copying, distribution and modification follow. Pay close attention to the difference between a "work based on the library" and a "work that uses the library". The former contains code derived from the library, whereas the latter must be combined with the library in order to run. TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 0. This License Agreement applies to any software library or other program which contains a notice placed by the copyright holder or other authorized party saying it may be distributed under the terms of this Lesser General Public License (also called "this License"). Each licensee is addressed as "you". A "library" means a collection of software functions and/or data prepared so as to be conveniently linked with application programs (which use some of those functions and data) to form executables. The "Library", below, refers to any such software library or work which has been distributed under these terms. A "work based on the Library" means either the Library or any derivative work under copyright law: that is to say, a work containing the Library or a portion of it, either verbatim or with modifications and/or translated straightforwardly into another language. (Hereinafter, translation is included without limitation in the term "modification".) "Source code" for a work means the preferred form of the work for making modifications to it. For a library, complete source code means all the source code for all modules it contains, plus any associated interface definition files, plus the scripts used to control compilation and installation of the library. Activities other than copying, distribution and modification are not covered by this License; they are outside its scope. The act of running a program using the Library is not restricted, and output from such a program is covered only if its contents constitute a work based on the Library (independent of the use of the Library in a tool for writing it). Whether that is true depends on what the Library does and what the program that uses the Library does. 1. You may copy and distribute verbatim copies of the Library's complete source code as you receive it, in any medium, provided that you conspicuously and appropriately publish on each copy an appropriate copyright notice and disclaimer of warranty; keep intact all the notices that refer to this License and to the absence of any warranty; and distribute a copy of this License along with the Library. You may charge a fee for the physical act of transferring a copy, and you may at your option offer warranty protection in exchange for a fee. 2. You may modify your copy or copies of the Library or any portion of it, thus forming a work based on the Library, and copy and distribute such modifications or work under the terms of Section 1 above, provided that you also meet all of these conditions: a) The modified work must itself be a software library. b) You must cause the files modified to carry prominent notices stating that you changed the files and the date of any change. c) You must cause the whole of the work to be licensed at no charge to all third parties under the terms of this License. d) If a facility in the modified Library refers to a function or a table of data to be supplied by an application program that uses the facility, other than as an argument passed when the facility is invoked, then you must make a good faith effort to ensure that, in the event an application does not supply such function or table, the facility still operates, and performs whatever part of its purpose remains meaningful. (For example, a function in a library to compute square roots has a purpose that is entirely well-defined independent of the application. Therefore, Subsection 2d requires that any application-supplied function or table used by this function must be optional: if the application does not supply it, the square root function must still compute square roots.) These requirements apply to the modified work as a whole. If identifiable sections of that work are not derived from the Library, and can be reasonably considered independent and separate works in themselves, then this License, and its terms, do not apply to those sections when you distribute them as separate works. But when you distribute the same sections as part of a whole which is a work based on the Library, the distribution of the whole must be on the terms of this License, whose permissions for other licensees extend to the entire whole, and thus to each and every part regardless of who wrote it. Thus, it is not the intent of this section to claim rights or contest your rights to work written entirely by you; rather, the intent is to exercise the right to control the distribution of derivative or collective works based on the Library. In addition, mere aggregation of another work not based on the Library with the Library (or with a work based on the Library) on a volume of a storage or distribution medium does not bring the other work under the scope of this License. 3. You may opt to apply the terms of the ordinary GNU General Public License instead of this License to a given copy of the Library. To do this, you must alter all the notices that refer to this License, so that they refer to the ordinary GNU General Public License, version 2, instead of to this License. (If a newer version than version 2 of the ordinary GNU General Public License has appeared, then you can specify that version instead if you wish.) Do not make any other change in these notices. Once this change is made in a given copy, it is irreversible for that copy, so the ordinary GNU General Public License applies to all subsequent copies and derivative works made from that copy. This option is useful when you wish to copy part of the code of the Library into a program that is not a library. 4. You may copy and distribute the Library (or a portion or derivative of it, under Section 2) in object code or executable form under the terms of Sections 1 and 2 above provided that you accompany it with the complete corresponding machine-readable source code, which must be distributed under the terms of Sections 1 and 2 above on a medium customarily used for software interchange. If distribution of object code is made by offering access to copy from a designated place, then offering equivalent access to copy the source code from the same place satisfies the requirement to distribute the source code, even though third parties are not compelled to copy the source along with the object code. 5. A program that contains no derivative of any portion of the Library, but is designed to work with the Library by being compiled or linked with it, is called a "work that uses the Library". Such a work, in isolation, is not a derivative work of the Library, and therefore falls outside the scope of this License. However, linking a "work that uses the Library" with the Library creates an executable that is a derivative of the Library (because it contains portions of the Library), rather than a "work that uses the library". The executable is therefore covered by this License. Section 6 states terms for distribution of such executables. When a "work that uses the Library" uses material from a header file that is part of the Library, the object code for the work may be a derivative work of the Library even though the source code is not. Whether this is true is especially significant if the work can be linked without the Library, or if the work is itself a library. The threshold for this to be true is not precisely defined by law. If such an object file uses only numerical parameters, data structure layouts and accessors, and small macros and small inline functions (ten lines or less in length), then the use of the object file is unrestricted, regardless of whether it is legally a derivative work. (Executables containing this object code plus portions of the Library will still fall under Section 6.) Otherwise, if the work is a derivative of the Library, you may distribute the object code for the work under the terms of Section 6. Any executables containing that work also fall under Section 6, whether or not they are linked directly with the Library itself. 6. As an exception to the Sections above, you may also combine or link a "work that uses the Library" with the Library to produce a work containing portions of the Library, and distribute that work under terms of your choice, provided that the terms permit modification of the work for the customer's own use and reverse engineering for debugging such modifications. You must give prominent notice with each copy of the work that the Library is used in it and that the Library and its use are covered by this License. You must supply a copy of this License. If the work during execution displays copyright notices, you must include the copyright notice for the Library among them, as well as a reference directing the user to the copy of this License. Also, you must do one of these things: a) Accompany the work with the complete corresponding machine-readable source code for the Library including whatever changes were used in the work (which must be distributed under Sections 1 and 2 above); and, if the work is an executable linked with the Library, with the complete machine-readable "work that uses the Library", as object code and/or source code, so that the user can modify the Library and then relink to produce a modified executable containing the modified Library. (It is understood that the user who changes the contents of definitions files in the Library will not necessarily be able to recompile the application to use the modified definitions.) b) Use a suitable shared library mechanism for linking with the Library. A suitable mechanism is one that (1) uses at run time a copy of the library already present on the user's computer system, rather than copying library functions into the executable, and (2) will operate properly with a modified version of the library, if the user installs one, as long as the modified version is interface-compatible with the version that the work was made with. c) Accompany the work with a written offer, valid for at least three years, to give the same user the materials specified in Subsection 6a, above, for a charge no more than the cost of performing this distribution. d) If distribution of the work is made by offering access to copy from a designated place, offer equivalent access to copy the above specified materials from the same place. e) Verify that the user has already received a copy of these materials or that you have already sent this user a copy. For an executable, the required form of the "work that uses the Library" must include any data and utility programs needed for reproducing the executable from it. However, as a special exception, the materials to be distributed need not include anything that is normally distributed (in either source or binary form) with the major components (compiler, kernel, and so on) of the operating system on which the executable runs, unless that component itself accompanies the executable. It may happen that this requirement contradicts the license restrictions of other proprietary libraries that do not normally accompany the operating system. Such a contradiction means you cannot use both them and the Library together in an executable that you distribute. 7. You may place library facilities that are a work based on the Library side-by-side in a single library together with other library facilities not covered by this License, and distribute such a combined library, provided that the separate distribution of the work based on the Library and of the other library facilities is otherwise permitted, and provided that you do these two things: a) Accompany the combined library with a copy of the same work based on the Library, uncombined with any other library facilities. This must be distributed under the terms of the Sections above. b) Give prominent notice with the combined library of the fact that part of it is a work based on the Library, and explaining where to find the accompanying uncombined form of the same work. 8. You may not copy, modify, sublicense, link with, or distribute the Library except as expressly provided under this License. Any attempt otherwise to copy, modify, sublicense, link with, or distribute the Library is void, and will automatically terminate your rights under this License. However, parties who have received copies, or rights, from you under this License will not have their licenses terminated so long as such parties remain in full compliance. 9. You are not required to accept this License, since you have not signed it. However, nothing else grants you permission to modify or distribute the Library or its derivative works. These actions are prohibited by law if you do not accept this License. Therefore, by modifying or distributing the Library (or any work based on the Library), you indicate your acceptance of this License to do so, and all its terms and conditions for copying, distributing or modifying the Library or works based on it. 10. Each time you redistribute the Library (or any work based on the Library), the recipient automatically receives a license from the original licensor to copy, distribute, link with or modify the Library subject to these terms and conditions. You may not impose any further restrictions on the recipients' exercise of the rights granted herein. You are not responsible for enforcing compliance by third parties with this License. 11. If, as a consequence of a court judgment or allegation of patent infringement or for any other reason (not limited to patent issues), conditions are imposed on you (whether by court order, agreement or otherwise) that contradict the conditions of this License, they do not excuse you from the conditions of this License. If you cannot distribute so as to satisfy simultaneously your obligations under this License and any other pertinent obligations, then as a consequence you may not distribute the Library at all. For example, if a patent license would not permit royalty-free redistribution of the Library by all those who receive copies directly or indirectly through you, then the only way you could satisfy both it and this License would be to refrain entirely from distribution of the Library. If any portion of this section is held invalid or unenforceable under any particular circumstance, the balance of the section is intended to apply, and the section as a whole is intended to apply in other circumstances. It is not the purpose of this section to induce you to infringe any patents or other property right claims or to contest validity of any such claims; this section has the sole purpose of protecting the integrity of the free software distribution system which is implemented by public license practices. Many people have made generous contributions to the wide range of software distributed through that system in reliance on consistent application of that system; it is up to the author/donor to decide if he or she is willing to distribute software through any other system and a licensee cannot impose that choice. This section is intended to make thoroughly clear what is believed to be a consequence of the rest of this License. 12. If the distribution and/or use of the Library is restricted in certain countries either by patents or by copyrighted interfaces, the original copyright holder who places the Library under this License may add an explicit geographical distribution limitation excluding those countries, so that distribution is permitted only in or among countries not thus excluded. In such case, this License incorporates the limitation as if written in the body of this License. 13. The Free Software Foundation may publish revised and/or new versions of the Lesser General Public License from time to time. Such new versions will be similar in spirit to the present version, but may differ in detail to address new problems or concerns. Each version is given a distinguishing version number. If the Library specifies a version number of this License which applies to it and "any later version", you have the option of following the terms and conditions either of that version or of any later version published by the Free Software Foundation. If the Library does not specify a license version number, you may choose any version ever published by the Free Software Foundation. 14. If you wish to incorporate parts of the Library into other free programs whose distribution conditions are incompatible with these, write to the author to ask for permission. For software which is copyrighted by the Free Software Foundation, write to the Free Software Foundation; we sometimes make exceptions for this. Our decision will be guided by the two goals of preserving the free status of all derivatives of our free software and of promoting the sharing and reuse of software generally. NO WARRANTY 15. BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE LIBRARY "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE LIBRARY IS WITH YOU. SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 16. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR REDISTRIBUTE THE LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE LIBRARY (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. END OF TERMS AND CONDITIONS How to Apply These Terms to Your New Libraries If you develop a new library, and you want it to be of the greatest possible use to the public, we recommend making it free software that everyone can redistribute and change. You can do so by permitting redistribution under these terms (or, alternatively, under the terms of the ordinary General Public License). To apply these terms, attach the following notices to the library. It is safest to attach them to the start of each source file to most effectively convey the exclusion of warranty; and each file should have at least the "copyright" line and a pointer to where the full notice is found. one line to give the library's name and an idea of what it does. Copyright (C) year name of author This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 2.1 of the License, or (at your option) any later version. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA Also add information on how to contact you by electronic and paper mail. You should also get your employer (if you work as a programmer) or your school, if any, to sign a "copyright disclaimer" for the library, if necessary. Here is a sample; alter the names: Yoyodyne, Inc., hereby disclaims all copyright interest in the library `Frob' (a library for tweaking knobs) written by James Random Hacker. signature of Ty Coon, 1 April 1990 Ty Coon, President of Vice That's all there is to it! drgn-0.0.31/LICENSES/MIT.txt000066400000000000000000000020661477777462700151640ustar00rootroot00000000000000MIT License Copyright (c) Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. drgn-0.0.31/MANIFEST.in000066400000000000000000000006741477777462700143260ustar00rootroot00000000000000recursive-include docs *.css *.ico *.png *.py *.rst recursive-include tests *.py recursive-include tests/resources *.zst include tests/linux_kernel/kmod/Makefile tests/linux_kernel/kmod/drgn_test.c recursive-include contrib *.py *.rst recursive-include tools *.py *.rst recursive-include vmtest *.c *.py *.rst recursive-include vmtest/patches *.patch recursive-include LICENSES *.txt include .flake8 CONTRIBUTING.rst COPYING pytest.ini util.py drgn-0.0.31/README.rst000066400000000000000000000220031477777462700142450ustar00rootroot00000000000000drgn ==== |pypi badge| |ci badge| |docs badge| |black badge| .. |pypi badge| image:: https://img.shields.io/pypi/v/drgn :target: https://pypi.org/project/drgn/ :alt: PyPI .. |ci badge| image:: https://github.com/osandov/drgn/workflows/CI/badge.svg :target: https://github.com/osandov/drgn/actions :alt: CI Status .. |docs badge| image:: https://readthedocs.org/projects/drgn/badge/?version=latest :target: https://drgn.readthedocs.io/en/latest/?badge=latest :alt: Documentation Status .. |black badge| image:: https://img.shields.io/badge/code%20style-black-000000.svg :target: https://github.com/psf/black .. start-introduction drgn (pronounced "dragon") is a debugger with an emphasis on programmability. drgn exposes the types and variables in a program for easy, expressive scripting in Python. For example, you can debug the Linux kernel: .. code-block:: pycon >>> from drgn.helpers.linux import list_for_each_entry >>> for mod in list_for_each_entry('struct module', ... prog['modules'].address_of_(), ... 'list'): ... if mod.refcnt.counter > 10: ... print(mod.name) ... (char [56])"snd" (char [56])"evdev" (char [56])"i915" Although other debuggers like `GDB `_ have scripting support, drgn aims to make scripting as natural as possible so that debugging feels like coding. This makes it well-suited for introspecting the complex, inter-connected state in large programs. Additionally, drgn is designed as a library that can be used to build debugging and introspection tools; see the official `tools `_. drgn was developed at `Meta `_ for debugging the Linux kernel (as an alternative to the `crash `_ utility), but it can also debug userspace programs written in C. C++ support is in progress. .. end-introduction Documentation can be found at `drgn.readthedocs.io `_. .. start-installation Installation ------------ Package Manager ^^^^^^^^^^^^^^^ drgn can be installed using the package manager on some Linux distributions. .. image:: https://repology.org/badge/vertical-allrepos/drgn.svg?exclude_unsupported=1 :target: https://repology.org/project/drgn/versions :alt: Packaging Status * Fedora, RHEL/CentOS Stream >= 9 .. code-block:: console $ sudo dnf install drgn * RHEL/CentOS < 9 `Enable EPEL `_. Then: .. code-block:: console $ sudo dnf install drgn * Oracle Linux >= 8 Enable the ``ol8_addons`` or ``ol9_addons`` repository. Then: .. code-block:: console $ sudo dnf config-manager --enable ol8_addons # OR: ol9_addons $ sudo dnf install drgn drgn is also available for Python versions in application streams. For example, use ``dnf install python3.12-drgn`` to install drgn for Python 3.12. See the documentation for drgn in `Oracle Linux 9 `_ and `Oracle Linux 8 `_ for more information. * Debian >= 12 (Bookworm)/Ubuntu >= 24.04 (Noble Numbat) .. code-block:: console $ sudo apt install python3-drgn To get the latest version on Ubuntu, enable the `michel-slm/kernel-utils PPA `_ first. * Arch Linux .. code-block:: console $ sudo pacman -S drgn * Gentoo .. code-block:: console $ sudo emerge dev-debug/drgn * openSUSE .. code-block:: console $ sudo zypper install python3-drgn pip ^^^ If your Linux distribution doesn't package the latest release of drgn, you can install it with `pip `_. First, `install pip `_. Then, run: .. code-block:: console $ sudo pip3 install drgn This will install a binary wheel by default. If you get a build error, then pip wasn't able to use the binary wheel. Install the dependencies listed `below <#from-source>`_ and try again. Note that RHEL/CentOS 6, Debian Stretch, Ubuntu Trusty, and Ubuntu Xenial (and older) ship Python versions which are too old. Python 3.6 or newer must be installed. .. _installation-from-source: From Source ^^^^^^^^^^^ To get the development version of drgn, you will need to build it from source. First, install dependencies: * Fedora, RHEL/CentOS Stream >= 9 .. code-block:: console $ sudo dnf install autoconf automake check-devel elfutils-debuginfod-client-devel elfutils-devel gcc git libkdumpfile-devel libtool make pkgconf python3 python3-devel python3-pip python3-setuptools * RHEL/CentOS < 9, Oracle Linux .. code-block:: console $ sudo dnf install autoconf automake check-devel elfutils-devel gcc git libtool make pkgconf python3 python3-devel python3-pip python3-setuptools Optionally, install ``libkdumpfile-devel`` from EPEL on RHEL/CentOS >= 8 or install `libkdumpfile `_ from source if you want support for the makedumpfile format. For Oracle Linux >= 7, ``libkdumpfile-devel`` can be installed directly from the corresponding addons repository (e.g. ``ol9_addons``). Replace ``dnf`` with ``yum`` for RHEL/CentOS/Oracle Linux < 8. When building on RHEL/CentOS/Oracle Linux < 8, you may need to use a newer version of GCC, for example, using the ``devtoolset-12`` developer toolset. Check your distribution's documentation for information on installing and using these newer toolchains. * Debian/Ubuntu .. code-block:: console $ sudo apt install autoconf automake check gcc git libdebuginfod-dev libkdumpfile-dev liblzma-dev libelf-dev libdw-dev libtool make pkgconf python3 python3-dev python3-pip python3-setuptools zlib1g-dev On Debian <= 11 (Bullseye) and Ubuntu <= 22.04 (Jammy Jellyfish), ``libkdumpfile-dev`` is not available, so you must install libkdumpfile from source if you want support for the makedumpfile format. * Arch Linux .. code-block:: console $ sudo pacman -S --needed autoconf automake check gcc git libelf libkdumpfile libtool make pkgconf python python-pip python-setuptools * Gentoo .. code-block:: console $ sudo emerge --noreplace --oneshot dev-build/autoconf dev-build/automake dev-libs/check dev-libs/elfutils sys-devel/gcc dev-vcs/git dev-libs/libkdumpfile dev-build/libtool dev-build/make dev-python/pip virtual/pkgconfig dev-lang/python dev-python/setuptools * openSUSE .. code-block:: console $ sudo zypper install autoconf automake check-devel gcc git libdebuginfod-devel libdw-devel libelf-devel libkdumpfile-devel libtool make pkgconf python3 python3-devel python3-pip python3-setuptools Then, run: .. code-block:: console $ git clone https://github.com/osandov/drgn.git $ cd drgn $ python3 setup.py build $ sudo python3 setup.py install .. end-installation See the `installation documentation `_ for more options. Quick Start ----------- .. start-quick-start drgn debugs the running kernel by default; simply run ``drgn``. To debug a running program, run ``drgn -p $PID``. To debug a core dump (either a kernel vmcore or a userspace core dump), run ``drgn -c $PATH``. Make sure to `install debugging symbols `_ for whatever you are debugging. Then, you can access variables in the program with ``prog["name"]`` and access structure members with ``.``: .. code-block:: pycon $ drgn >>> prog["init_task"].comm (char [16])"swapper/0" You can use various predefined helpers: .. code-block:: pycon >>> len(list(bpf_prog_for_each())) 11 >>> task = find_task(115) >>> cmdline(task) [b'findmnt', b'-p'] You can get stack traces with ``stack_trace()`` and access parameters or local variables with ``trace["name"]``: .. code-block:: pycon >>> trace = stack_trace(task) >>> trace[5] #5 at 0xffffffff8a5a32d0 (do_sys_poll+0x400/0x578) in do_poll at ./fs/select.c:961:8 (inlined) >>> poll_list = trace[5]["list"] >>> file = fget(task, poll_list.entries[0].fd) >>> d_path(file.f_path.address_of_()) b'/proc/115/mountinfo' .. end-quick-start See the `user guide `_ for more details and features. .. start-for-index Getting Help ------------ * The `GitHub issue tracker `_ is the preferred method to report issues. * There is also a `Linux Kernel Debuggers Matrix room `_. License ------- Copyright (c) Meta Platforms, Inc. and affiliates. drgn is licensed under the `LGPLv2.1 `_ or later. .. end-for-index drgn-0.0.31/_drgn.pyi000066400000000000000000003620371477777462700144100ustar00rootroot00000000000000# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: LGPL-2.1-or-later """ libdrgn bindings Don't use this module directly. Instead, use the drgn package. """ import collections.abc import enum import os import sys from typing import ( Any, Callable, ClassVar, Dict, Iterable, Iterator, List, Mapping, MutableMapping, NamedTuple, Optional, Sequence, Set, Tuple, Union, overload, ) if sys.version_info < (3, 8): from typing_extensions import Final, Protocol else: from typing import Final, Protocol if sys.version_info < (3, 10): from typing_extensions import TypeAlias else: from typing import TypeAlias if sys.version_info < (3, 12): from typing_extensions import Buffer else: from collections.abc import Buffer # This is effectively typing.SupportsIndex without @typing.runtime_checkable # (both of which are only available since Python 3.8), with a more # self-explanatory name. class IntegerLike(Protocol): """ An :class:`int` or integer-like object. Parameters annotated with this type expect an integer which may be given as a Python :class:`int` or an :class:`Object` with integer type. .. note:: This is equivalent to :class:`typing.SupportsIndex` except that it is not runtime-checkable. """ def __index__(self) -> int: ... Path: TypeAlias = Union[str, bytes, os.PathLike[str], os.PathLike[bytes]] """ Filesystem path. Parameters annotated with this type accept a filesystem path as :class:`str`, :class:`bytes`, or :class:`os.PathLike`. """ class Program: """ A ``Program`` represents a crashed or running program. It can be used to lookup type definitions, access variables, and read arbitrary memory. The main functionality of a ``Program`` is looking up objects (i.e., variables, constants, or functions). This is usually done with the :meth:`[] <.__getitem__>` operator. """ def __init__( self, platform: Optional[Platform] = None, *, vmcoreinfo: Union[bytes, str, None] = None, ) -> None: """ Create a ``Program`` with no target program. It is usually more convenient to use one of the :ref:`api-program-constructors`. :param platform: The platform of the program, or ``None`` if it should be determined automatically when a core dump or symbol file is added. :param vmcoreinfo: Optionally provide the ``VMCOREINFO`` note data for Linux kernel core dumps, which will override any detected data. When not provided or ``None``, automatically detect the info. """ ... flags: ProgramFlags """Flags which apply to this program.""" platform: Optional[Platform] """ Platform that this program runs on, or ``None`` if it has not been determined yet. """ language: Language """ Default programming language of the program. This is used for interpreting the type name given to :meth:`type()` and when creating an :class:`Object` without an explicit type. For the Linux kernel, this defaults to :attr:`Language.C`. For userspace programs, this defaults to the language of ``main`` in the program, falling back to :attr:`Language.C`. This heuristic may change in the future. This can be explicitly set to a different language (e.g., if the heuristic was incorrect). """ def __getitem__(self, name: str) -> Object: """ Implement ``self[name]``. Get the object (variable, constant, or function) with the given name. This is equivalent to ``prog.object(name)`` except that this raises :exc:`KeyError` instead of :exc:`LookupError` if no objects with the given name are found. If there are multiple objects with the same name, one is returned arbitrarily. In this case, the :meth:`variable()`, :meth:`constant()`, :meth:`function()`, or :meth:`object()` methods can be used instead. >>> prog['jiffies'] Object(prog, 'volatile unsigned long', address=0xffffffff94c05000) :param name: Object name. """ ... def __contains__(self, name: str) -> bool: """ Implement ``name in self``. Return whether an object (variable, constant, or function) with the given name exists in the program. :param name: Object name. """ ... def variable(self, name: str, filename: Optional[str] = None) -> Object: """ Get the variable with the given name. >>> prog.variable('jiffies') Object(prog, 'volatile unsigned long', address=0xffffffff94c05000) This is equivalent to ``prog.object(name, FindObjectFlags.VARIABLE, filename)``. :param name: The variable name. :param filename: The source code file that contains the definition. See :ref:`api-filenames`. :raises LookupError: if no variables with the given name are found in the given file """ ... def constant(self, name: str, filename: Optional[str] = None) -> Object: """ Get the constant (e.g., enumeration constant) with the given name. Note that support for macro constants is not yet implemented for DWARF files, and most compilers don't generate macro debugging information by default anyways. >>> prog.constant('PIDTYPE_MAX') Object(prog, 'enum pid_type', value=4) This is equivalent to ``prog.object(name, FindObjectFlags.CONSTANT, filename)``. :param name: The constant name. :param filename: The source code file that contains the definition. See :ref:`api-filenames`. :raises LookupError: if no constants with the given name are found in the given file """ ... def function(self, name: str, filename: Optional[str] = None) -> Object: """ Get the function with the given name. >>> prog.function('schedule') Object(prog, 'void (void)', address=0xffffffff94392370) This is equivalent to ``prog.object(name, FindObjectFlags.FUNCTION, filename)``. :param name: The function name. :param filename: The source code file that contains the definition. See :ref:`api-filenames`. :raises LookupError: if no functions with the given name are found in the given file """ ... def object( self, name: str, flags: FindObjectFlags = FindObjectFlags.ANY, filename: Optional[str] = None, ) -> Object: """ Get the object (variable, constant, or function) with the given name. When debugging the Linux kernel, this can look up certain special objects documented in :ref:`kernel-special-objects`, sometimes without any debugging information loaded. :param name: The object name. :param flags: Flags indicating what kind of object to look for. :param filename: The source code file that contains the definition. See :ref:`api-filenames`. :raises LookupError: if no objects with the given name are found in the given file """ ... def symbol(self, __address_or_name: Union[IntegerLike, str]) -> Symbol: """ Get a symbol containing the given address, or a symbol with the given name. Global symbols are preferred over weak symbols, and weak symbols are preferred over other symbols. In other words: if a matching :attr:`SymbolBinding.GLOBAL` or :attr:`SymbolBinding.UNIQUE` symbol is found, it is returned. Otherwise, if a matching :attr:`SymbolBinding.WEAK` symbol is found, it is returned. Otherwise, any matching symbol (e.g., :attr:`SymbolBinding.LOCAL`) is returned. If there are multiple matching symbols with the same binding, one is returned arbitrarily. To retrieve all matching symbols, use :meth:`symbols()`. :param address_or_name: Address or name to search for. :raises LookupError: if no symbol contains the given address or matches the given name """ ... def symbols( self, __address_or_name: Union[None, IntegerLike, str] = None, ) -> List[Symbol]: """ Get a list of global and local symbols, optionally matching a name or address. If a string argument is given, this returns all symbols matching that name. If an integer-like argument given, this returns a list of all symbols containing that address. If no argument is given, all symbols in the program are returned. In all cases, the symbols are returned in an unspecified order. :param address_or_name: Address or name to search for. """ ... def stack_trace( self, # Object is already IntegerLike, but this explicitly documents that it # can take non-integer Objects. thread: Union[Object, IntegerLike], ) -> StackTrace: """ Get the stack trace for the given thread in the program. ``thread`` may be a thread ID (as defined by :manpage:`gettid(2)`), in which case this will unwind the stack for the thread with that ID. The ID may be a Python ``int`` or an integer :class:`Object` ``thread`` may also be a ``struct pt_regs`` or ``struct pt_regs *`` object, in which case the initial register values will be fetched from that object. Finally, if debugging the Linux kernel, ``thread`` may be a ``struct task_struct *`` object, in which case this will unwind the stack for that task. See :func:`drgn.helpers.linux.pid.find_task()`. This is implemented for the Linux kernel (both live and core dumps) as well as userspace core dumps; it is not yet implemented for live userspace processes. :param thread: Thread ID, ``struct pt_regs`` object, or ``struct task_struct *`` object. """ ... def stack_trace_from_pcs(self, pcs: Sequence[IntegerLike]) -> StackTrace: """ Get a stack trace with the supplied list of program counters. :param pcs: List of program counters. """ ... @overload def type(self, name: str, filename: Optional[str] = None) -> Type: """ Get the type with the given name. >>> prog.type('long') prog.int_type(name='long', size=8, is_signed=True) :param name: The type name. :param filename: The source code file that contains the definition. See :ref:`api-filenames`. :raises LookupError: if no types with the given name are found in the given file """ ... @overload def type(self, __type: Type) -> Type: """ Return the given type. This is mainly useful so that helpers can use ``prog.type()`` to get a :class:`Type` regardless of whether they were given a :class:`str` or a :class:`Type`. For example: .. code-block:: python3 def my_helper(obj: Object, type: Union[str, Type]) -> bool: # type may be str or Type. type = obj.prog_.type(type) # type is now always Type. return sizeof(obj) > sizeof(type) :param type: Type. :return: The exact same type. """ ... def threads(self) -> Iterator[Thread]: """Get an iterator over all of the threads in the program.""" ... def thread(self, tid: IntegerLike) -> Thread: """ Get the thread with the given thread ID. :param tid: Thread ID (as defined by :manpage:`gettid(2)`). :raises LookupError: if no thread has the given thread ID """ ... def main_thread(self) -> Thread: """ Get the main thread of the program. This is only defined for userspace programs. :raises ValueError: if the program is the Linux kernel """ ... def crashed_thread(self) -> Thread: """ Get the thread that caused the program to crash. For userspace programs, this is the thread that received the fatal signal (e.g., ``SIGSEGV`` or ``SIGQUIT``). For the kernel, this is the thread that panicked (either directly or as a result of an oops, ``BUG_ON()``, etc.). :raises ValueError: if the program is live (i.e., not a core dump) """ ... def read( self, address: IntegerLike, size: IntegerLike, physical: bool = False ) -> bytes: """ Read *size* bytes of memory starting at *address* in the program. The address may be virtual (the default) or physical if the program supports it. >>> prog.read(0xffffffffbe012b40, 16) b'swapper/0\x00\x00\x00\x00\x00\x00\x00' :param address: The starting address. :param size: The number of bytes to read. :param physical: Whether *address* is a physical memory address. If ``False``, then it is a virtual memory address. Physical memory can usually only be read when the program is an operating system kernel. :raises FaultError: if the address range is invalid or the type of address (physical or virtual) is not supported by the program :raises ValueError: if *size* is negative """ ... def read_u8(self, address: IntegerLike, physical: bool = False) -> int: """ """ ... def read_u16(self, address: IntegerLike, physical: bool = False) -> int: """ """ ... def read_u32(self, address: IntegerLike, physical: bool = False) -> int: """ """ ... def read_u64(self, address: IntegerLike, physical: bool = False) -> int: """ """ ... def read_word(self, address: IntegerLike, physical: bool = False) -> int: """ Read an unsigned integer from the program's memory in the program's byte order. :meth:`read_u8()`, :meth:`read_u16()`, :meth:`read_u32()`, and :meth:`read_u64()` read an 8-, 16-, 32-, or 64-bit unsigned integer, respectively. :meth:`read_word()` reads a program word-sized unsigned integer. For signed integers, alternate byte order, or other formats, you can use :meth:`read()` and :meth:`int.from_bytes()` or the :mod:`struct` module. :param address: Address of the integer. :param physical: Whether *address* is a physical memory address; see :meth:`read()`. :raises FaultError: if the address is invalid; see :meth:`read()` """ ... def add_memory_segment( self, address: IntegerLike, size: IntegerLike, read_fn: Callable[[int, int, int, bool], bytes], physical: bool = False, ) -> None: """ Define a region of memory in the program. If it overlaps a previously registered segment, the new segment takes precedence. :param address: Address of the segment. :param size: Size of the segment in bytes. :param physical: Whether to add a physical memory segment. If ``False``, then this adds a virtual memory segment. :param read_fn: Callable to call to read memory from the segment. It is passed the address being read from, the number of bytes to read, the offset in bytes from the beginning of the segment, and whether the address is physical: ``(address, count, offset, physical)``. It should return the requested number of bytes as :class:`bytes` or another :ref:`buffer ` type. """ ... def register_type_finder( self, name: str, fn: Callable[[Program, TypeKindSet, str, Optional[str]], Optional[Type]], *, enable_index: Optional[int] = None, ) -> None: """ Register a callback for finding types in the program. This does not enable the finder unless *enable_index* is given. :param name: Finder name. :param fn: Callable taking the program, a :class:`TypeKindSet`, name, and filename: ``(prog, kinds, name, filename)``. The filename should be matched with :func:`filename_matches()`. This should return a :class:`Type` or ``None`` if not found. :param enable_index: Insert the finder into the list of enabled type finders at the given index. If -1 or greater than the number of enabled finders, insert it at the end. If ``None`` or not given, don't enable the finder. :raises ValueError: if there is already a finder with the given name """ ... def registered_type_finders(self) -> Set[str]: """Return the names of all registered type finders.""" ... def set_enabled_type_finders(self, names: Sequence[str]) -> None: """ Set the list of enabled type finders. Finders are called in the same order as the list until a type is found. Finders that are not in the list are not called. :param names: Names of finders to enable, in order. :raises ValueError: if no finder has a given name or the same name is given more than once """ ... def enabled_type_finders(self) -> List[str]: """Return the names of enabled type finders, in order.""" ... def register_object_finder( self, name: str, fn: Callable[[Program, str, FindObjectFlags, Optional[str]], Optional[Object]], *, enable_index: Optional[int] = None, ) -> None: """ Register a callback for finding objects in the program. This does not enable the finder unless *enable_index* is given. :param name: Finder name. :param fn: Callable taking the program, name, :class:`FindObjectFlags`, and filename: ``(prog, name, flags, filename)``. The filename should be matched with :func:`filename_matches()`. This should return an :class:`Object` or ``None`` if not found. :param enable_index: Insert the finder into the list of enabled object finders at the given index. If -1 or greater than the number of enabled finders, insert it at the end. If ``None`` or not given, don't enable the finder. :raises ValueError: if there is already a finder with the given name """ ... def registered_object_finders(self) -> Set[str]: """Return the names of all registered object finders.""" ... def set_enabled_object_finders(self, names: Sequence[str]) -> None: """ Set the list of enabled object finders. Finders are called in the same order as the list until an object is found. Finders that are not in the list are not called. :param names: Names of finders to enable, in order. :raises ValueError: if no finder has a given name or the same name is given more than once """ ... def enabled_object_finders(self) -> List[str]: """Return the names of enabled object finders, in order.""" ... def register_symbol_finder( self, name: str, fn: Callable[[Program, Optional[str], Optional[int], bool], Sequence[Symbol]], *, enable_index: Optional[int] = None, ) -> None: """ Register a callback for finding symbols in the program. This does not enable the finder unless *enable_index* is given. The callback should take four arguments: the program, a *name*, an *address*, and a boolean flag *one*. It should return a list of symbols or an empty list if no matches are found. If *name* is not ``None``, then only symbols with that name should be returned. If *address* is not ``None``, then only symbols containing that address should be returned. If neither is ``None``, then the returned symbols must match both. If both are ``None``, then all symbols should be considered matching. When the *one* flag is ``False``, the callback should return a list of all matching symbols. When it is ``True``, it should return a list with at most one symbol which is the best match. :param name: Finder name. :param fn: Callable taking ``(prog, name, address, one)`` and returning a sequence of :class:`Symbol`\\ s. :param enable_index: Insert the finder into the list of enabled finders at the given index. If -1 or greater than the number of enabled finders, insert it at the end. If ``None`` or not given, don't enable the finder. :raises ValueError: if there is already a finder with the given name """ ... def registered_symbol_finders(self) -> Set[str]: """Return the names of all registered symbol finders.""" ... def set_enabled_symbol_finders(self, names: Sequence[str]) -> None: """ Set the list of enabled symbol finders. Finders are called in the same order as the list. When the *one* flag is set, the search will short-circuit after the first finder which returns a result, and subsequent finders will not be called. Otherwise, all callbacks will be called, and all results will be returned. Finders that are not in the list are not called. :param names: Names of finders to enable, in order. :raises ValueError: if no finder has a given name or the same name is given more than once """ ... def enabled_symbol_finders(self) -> List[str]: """Return the names of enabled symbol finders, in order.""" ... def add_type_finder( self, fn: Callable[[TypeKind, str, Optional[str]], Optional[Type]] ) -> None: """ Deprecated method to register and enable a callback for finding types in the program. .. deprecated:: 0.0.27 Use :meth:`register_type_finder()` instead. The differences from :meth:`register_type_finder()` are: 1. *fn* is not passed *prog*. 2. *fn* is passed a :class:`TypeKind` instead of a :class:`TypeKindSet`. If multiple kinds are being searched for, *fn* will be called multiple times. 3. A name for the finder is generated from *fn*. 4. The finder is always enabled before any existing finders. """ ... def add_object_finder( self, fn: Callable[[Program, str, FindObjectFlags, Optional[str]], Optional[Object]], ) -> None: """ Deprecated method to register and enable a callback for finding objects in the program. .. deprecated:: 0.0.27 Use :meth:`register_object_finder()` instead. The differences from :meth:`register_object_finder()` are: 1. A name for the finder is generated from *fn*. 2. The finder is always enabled before any existing finders. """ ... def set_core_dump(self, path: Union[Path, int]) -> None: """ Set the program to a core dump. This loads the memory segments from the core dump and determines the mapped executable and libraries. It does not load any debugging symbols; see :meth:`load_default_debug_info()`. :param path: Core dump file path or open file descriptor. """ ... def set_kernel(self) -> None: """ Set the program to the running operating system kernel. This loads the memory of the running kernel and thus requires root privileges. It does not load any debugging symbols; see :meth:`load_default_debug_info()`. """ ... def set_pid(self, pid: int) -> None: """ Set the program to a running process. This loads the memory of the process and determines the mapped executable and libraries. It does not load any debugging symbols; see :meth:`load_default_debug_info()`. :param pid: Process ID. """ ... def modules(self) -> Iterator[Module]: """Get an iterator over all of the created modules in the program.""" def loaded_modules(self) -> Iterator[Tuple[Module, bool]]: """ Get an iterator over executables, libraries, etc. that are loaded in the program, creating modules to represent them. Modules are created lazily as items are consumed. This may automatically load some debugging information necessary to enumerate the modules. Other than that, it does not load debugging information. See :meth:`load_debug_info()` for a higher-level interface that does load debugging information. :return: Iterator of module and ``True`` if it was newly created or ``False`` if it was previously found. """ ... def create_loaded_modules(self) -> None: """ Determine what executables, libraries, etc. are loaded in the program and create modules to represent them. This is a shortcut for exhausting a :meth:`loaded_modules()` iterator. It is equivalent to: .. code-block:: python3 for _ in prog.loaded_modules(): pass """ @overload def main_module(self) -> MainModule: """ Find the main module. :raises LookupError: if the main module has not been created """ ... @overload def main_module(self, name: Path, *, create: bool = False) -> MainModule: """ Find the main module. :param name: :attr:`Module.name` :param create: Create the module if it doesn't exist. :raises LookupError: if the main module has not been created and *create* is ``False``, or if the main module has already been created with a different name """ ... def shared_library_module( self, name: Path, dynamic_address: IntegerLike, *, create: bool = False, ) -> SharedLibraryModule: """ Find a shared library module. :param name: :attr:`Module.name` :param dynamic_address: :attr:`SharedLibraryModule.dynamic_address` :param create: Create the module if it doesn't exist. :return: Shared library module with the given name and dynamic address. :raises LookupError: if no matching module has been created and *create* is ``False`` """ ... def vdso_module( self, name: Path, dynamic_address: IntegerLike, *, create: bool = False, ) -> VdsoModule: """ Find a vDSO module. :param name: :attr:`Module.name` :param dynamic_address: :attr:`VdsoModule.dynamic_address` :param create: Create the module if it doesn't exist. :return: vDSO module with the given name and dynamic address. :raises LookupError: if no matching module has been created and *create* is ``False`` """ ... def relocatable_module( self, name: Path, address: IntegerLike, *, create: bool = False ) -> RelocatableModule: """ Find a relocatable module. :param name: :attr:`Module.name` :param address: :attr:`RelocatableModule.address` :param create: Create the module if it doesn't exist. :return: Relocatable module with the given name and address. :raises LookupError: if no matching module has been created and *create* is ``False`` """ ... def linux_kernel_loadable_module( self, module_obj: Object, *, create: bool = False ) -> RelocatableModule: """ Find a Linux kernel loadable module from a ``struct module *`` object. Note that kernel modules are represented as relocatable modules. :param module_obj: ``struct module *`` object for the kernel module. :param create: Create the module if it doesn't exist. :return: Relocatable module with a name and address matching *module_obj*. :raises LookupError: if no matching module has been created and *create* is ``False`` """ ... def extra_module( self, name: Path, id: IntegerLike = 0, *, create: bool = False ) -> ExtraModule: """ Find an extra module. :param name: :attr:`Module.name` :param id: :attr:`ExtraModule.id` :param create: Create the module if it doesn't exist. :return: Extra module with the given name and ID number. :raises LookupError: if no matching module has been created and *create* is ``False`` """ ... def module(self, __address_or_name: Union[IntegerLike, str]) -> Module: """ Find the module containing the given address, or the module with the given name. Addresses are matched based on :attr:`Module.address_range`. If there are multiple modules with the given name, one is returned arbitrarily. :param address_or_name: Address or name to search for. :raises LookupError: if no module contains the given address or has the given name """ ... def register_debug_info_finder( self, name: str, fn: Callable[[Sequence[Module]], None], *, enable_index: Optional[int] = None, ) -> None: """ Register a callback for finding debugging information. This does not enable the finder unless *enable_index* is given. :param name: Finder name. :param fn: Callable taking a list of :class:`Module`\\ s that want debugging information. This should check :meth:`Module.wants_loaded_file()` and :meth:`Module.wants_debug_file()` and do one of the following for each module: * Obtain and/or locate a file wanted by the module and call :meth:`Module.try_file()`. * Install files for a later finder to use. * Set :attr:`Module.loaded_file_status` or :attr:`Module.debug_file_status` to :attr:`ModuleFileStatus.DONT_NEED` if the finder believes that the file is not needed. * Ignore it, for example if the finder doesn't know how to find the wanted files for the module. :param enable_index: Insert the finder into the list of enabled object finders at the given index. If -1 or greater than the number of enabled finders, insert it at the end. If ``None`` or not given, don't enable the finder. :raises ValueError: if there is already a finder with the given name """ ... def registered_debug_info_finders(self) -> Set[str]: """Return the names of all registered debugging information finders.""" ... def set_enabled_debug_info_finders(self, names: Sequence[str]) -> None: """ Set the list of enabled debugging information finders. Finders are called in the same order as the list until all wanted files have been found. Finders that are not in the list are not called. :param names: Names of finders to enable, in order. :raises ValueError: if no finder has a given name or the same name is given more than once """ ... def enabled_debug_info_finders(self) -> List[str]: """ Return the names of enabled debugging information finders, in order. """ ... debug_info_options: DebugInfoOptions """Default options for debugging information searches.""" def load_debug_info( self, paths: Optional[Iterable[Path]] = (), default: bool = False, main: bool = False, ) -> None: """ Load debugging information for the given set of files and/or modules. This determines what executables, libraries, etc. are loaded in the program (see :meth:`loaded_modules()`) and tries to load their debugging information from the given *paths*. .. note:: It is much more efficient to load multiple files at once rather than one by one when possible. :param paths: Paths of binary files to try. Files that don't correspond to any loaded modules are ignored. See :class:`ExtraModule` for a way to provide arbitrary debugging information. :param default: Try to load all debugging information for all loaded modules. The files in *paths* are tried first before falling back to the enabled debugging information finders. This implies ``main=True``. :param main: Try to load all debugging information for the main module. The files in *paths* are tried first before falling back to the enabled debugging information finders. :raises MissingDebugInfoError: if debugging information was not available for some files; other files with debugging information are still loaded """ ... def load_default_debug_info(self) -> None: """ Load all debugging information that can automatically be determined from the program. This is equivalent to ``load_debug_info(default=True)``. """ ... def load_module_debug_info(self, *modules: Module) -> None: """ Load debugging information for the given modules using the enabled debugging information finders. The files to search for are controlled by :attr:`Module.loaded_file_status` and :attr:`Module.debug_file_status`. """ ... def find_standard_debug_info( self, modules: Iterable[Module], options: Optional[DebugInfoOptions] = None ) -> None: """ Load debugging information for the given modules from the standard locations. This is equivalent to the ``standard`` debugging information finder that is registered by default. It is intended for use by other debugging information finders that need a variation of the standard finder (e.g., after installing something or setting specific options). :param modules: Modules to load debugging information for. :param options: Options to use when searching for debugging information. If ``None`` or not given, this uses :attr:`self.debug_info_options `. """ cache: Dict[Any, Any] """ Dictionary for caching program metadata. This isn't used by drgn itself. It is intended to be used by helpers to cache metadata about the program. For example, if a helper for a program depends on the program version or an optional feature, the helper can detect it and cache it for subsequent invocations: .. code-block:: python3 def my_helper(prog): try: have_foo = prog.cache['have_foo'] except KeyError: have_foo = detect_foo_feature(prog) prog.cache['have_foo'] = have_foo if have_foo: return prog['foo'] else: return prog['bar'] """ def void_type( self, *, qualifiers: Qualifiers = Qualifiers.NONE, language: Optional[Language] = None, ) -> Type: """ Create a new void type. It has kind :attr:`TypeKind.VOID`. :param qualifiers: :attr:`Type.qualifiers` :param lang: :attr:`Type.language` """ ... def int_type( self, name: str, size: IntegerLike, is_signed: bool, byteorder: Optional[str] = None, *, qualifiers: Qualifiers = Qualifiers.NONE, language: Optional[Language] = None, ) -> Type: """ Create a new integer type. It has kind :attr:`TypeKind.INT`. :param name: :attr:`Type.name` :param size: :attr:`Type.size` :param is_signed: :attr:`Type.is_signed` :param byteorder: :attr:`Type.byteorder`, or ``None`` to use the program's default byte order. :param qualifiers: :attr:`Type.qualifiers` :param lang: :attr:`Type.language` """ ... def bool_type( self, name: str, size: IntegerLike, byteorder: Optional[str] = None, *, qualifiers: Qualifiers = Qualifiers.NONE, language: Optional[Language] = None, ) -> Type: """ Create a new boolean type. It has kind :attr:`TypeKind.BOOL`. :param name: :attr:`Type.name` :param size: :attr:`Type.size` :param byteorder: :attr:`Type.byteorder`, or ``None`` to use the program's default byte order. :param qualifiers: :attr:`Type.qualifiers` :param lang: :attr:`Type.language` """ ... def float_type( self, name: str, size: IntegerLike, byteorder: Optional[str] = None, *, qualifiers: Qualifiers = Qualifiers.NONE, language: Optional[Language] = None, ) -> Type: """ Create a new floating-point type. It has kind :attr:`TypeKind.FLOAT`. :param name: :attr:`Type.name` :param size: :attr:`Type.size` :param byteorder: :attr:`Type.byteorder`, or ``None`` to use the program's default byte order. :param qualifiers: :attr:`Type.qualifiers` :param lang: :attr:`Type.language` """ ... @overload def struct_type( self, tag: Optional[str], size: IntegerLike, members: Sequence[TypeMember], *, template_parameters: Sequence[TypeTemplateParameter] = (), qualifiers: Qualifiers = Qualifiers.NONE, language: Optional[Language] = None, ) -> Type: """ Create a new structure type. It has kind :attr:`TypeKind.STRUCT`. :param tag: :attr:`Type.tag` :param size: :attr:`Type.size` :param members: :attr:`Type.members` :param template_parameters: :attr:`Type.template_parameters` :param qualifiers: :attr:`Type.qualifiers` :param lang: :attr:`Type.language` """ ... @overload def struct_type( self, tag: Optional[str], size: None = None, members: None = None, *, template_parameters: Sequence[TypeTemplateParameter] = (), qualifiers: Qualifiers = Qualifiers.NONE, language: Optional[Language] = None, ) -> Type: """Create a new incomplete structure type.""" ... @overload def union_type( self, tag: Optional[str], size: IntegerLike, members: Sequence[TypeMember], *, template_parameters: Sequence[TypeTemplateParameter] = (), qualifiers: Qualifiers = Qualifiers.NONE, language: Optional[Language] = None, ) -> Type: """ Create a new union type. It has kind :attr:`TypeKind.UNION`. Otherwise, this is the same as as :meth:`struct_type()`. """ ... @overload def union_type( self, tag: Optional[str], size: None = None, members: None = None, *, template_parameters: Sequence[TypeTemplateParameter] = (), qualifiers: Qualifiers = Qualifiers.NONE, language: Optional[Language] = None, ) -> Type: """Create a new incomplete union type.""" ... @overload def class_type( self, tag: Optional[str], size: IntegerLike, members: Sequence[TypeMember], *, template_parameters: Sequence[TypeTemplateParameter] = (), qualifiers: Qualifiers = Qualifiers.NONE, language: Optional[Language] = None, ) -> Type: """ Create a new class type. It has kind :attr:`TypeKind.CLASS`. Otherwise, this is the same as as :meth:`struct_type()`. """ ... @overload def class_type( self, tag: Optional[str], size: None = None, members: None = None, *, template_parameters: Sequence[TypeTemplateParameter] = (), qualifiers: Qualifiers = Qualifiers.NONE, language: Optional[Language] = None, ) -> Type: """Create a new incomplete class type.""" ... @overload def enum_type( self, tag: Optional[str], type: Type, enumerators: Sequence[TypeEnumerator], *, qualifiers: Qualifiers = Qualifiers.NONE, language: Optional[Language] = None, ) -> Type: """ Create a new enumerated type. It has kind :attr:`TypeKind.ENUM`. :param tag: :attr:`Type.tag` :param type: The compatible integer type (:attr:`Type.type`) :param enumerators: :attr:`Type.enumerators` :param qualifiers: :attr:`Type.qualifiers` :param lang: :attr:`Type.language` """ ... @overload def enum_type( self, tag: Optional[str], type: None = None, enumerators: None = None, *, qualifiers: Qualifiers = Qualifiers.NONE, language: Optional[Language] = None, ) -> Type: """Create a new incomplete enumerated type.""" ... def typedef_type( self, name: str, type: Type, *, qualifiers: Qualifiers = Qualifiers.NONE, language: Optional[Language] = None, ) -> Type: """ Create a new typedef type. It has kind :attr:`TypeKind.TYPEDEF`. :param name: :attr:`Type.name` :param type: The aliased type (:attr:`Type.type`) :param qualifiers: :attr:`Type.qualifiers` :param lang: :attr:`Type.language` """ ... def pointer_type( self, type: Type, size: Optional[int] = None, byteorder: Optional[str] = None, *, qualifiers: Qualifiers = Qualifiers.NONE, language: Optional[Language] = None, ) -> Type: """ Create a new pointer type. It has kind :attr:`TypeKind.POINTER`, :param type: The referenced type (:attr:`Type.type`) :param size: :attr:`Type.size`, or ``None`` to use the program's default pointer size. :param byteorder: :attr:`Type.byteorder`, or ``None`` to use the program's default byte order. :param qualifiers: :attr:`Type.qualifiers` :param lang: :attr:`Type.language` """ ... def array_type( self, type: Type, length: Optional[int] = None, *, qualifiers: Qualifiers = Qualifiers.NONE, language: Optional[Language] = None, ) -> Type: """ Create a new array type. It has kind :attr:`TypeKind.ARRAY`. :param type: The element type (:attr:`Type.type`) :param length: :attr:`Type.length` :param qualifiers: :attr:`Type.qualifiers` :param lang: :attr:`Type.language` """ ... def function_type( self, type: Type, parameters: Sequence[TypeParameter], is_variadic: bool = False, *, template_parameters: Sequence[TypeTemplateParameter] = (), qualifiers: Qualifiers = Qualifiers.NONE, language: Optional[Language] = None, ) -> Type: """ Create a new function type. It has kind :attr:`TypeKind.FUNCTION`. :param type: The return type (:attr:`Type.type`) :param parameters: :attr:`Type.parameters` :param is_variadic: :attr:`Type.is_variadic` :param template_parameters: :attr:`Type.template_parameters` :param qualifiers: :attr:`Type.qualifiers` :param lang: :attr:`Type.language` """ ... class ProgramFlags(enum.Flag): """ ``ProgramFlags`` are flags that can apply to a :class:`Program` (e.g., about what kind of program it is). """ IS_LINUX_KERNEL = ... """The program is the Linux kernel.""" IS_LIVE = ... """ The program is currently running (e.g., it is the running operating system kernel or a running process). """ IS_LOCAL = ... """ The program is running on the local machine. """ class FindObjectFlags(enum.Flag): """ ``FindObjectFlags`` are flags for :meth:`Program.object()`. These can be combined to search for multiple kinds of objects at once. """ CONSTANT = ... "" FUNCTION = ... "" VARIABLE = ... "" ANY = ... "" class DebugInfoOptions: """ Options for debugging information searches. All of these options can be reassigned. """ def __init__( self, __options: Optional[DebugInfoOptions] = None, *, directories: Iterable[Path] = ..., try_module_name: bool = ..., try_build_id: bool = ..., try_debug_link: bool = ..., try_procfs: bool = ..., try_embedded_vdso: bool = ..., try_reuse: bool = ..., try_supplementary: bool = ..., kernel_directories: Iterable[Path] = ..., try_kmod: KmodSearchMethod = ..., ) -> None: """ Create a ``DebugInfoOptions``. :param options: If given, create a copy of the given options. Otherwise, use the default options. Any remaining arguments override the copied/default options. """ ... directories: Tuple[str, ...] """ Directories to search for debugging information files. Defaults to ``("", ".debug", "/usr/lib/debug")``, which should work out of the box on most Linux distributions. This controls searches by build ID (see :attr:`try_build_id`) and debug link (see :attr:`try_debug_link`), and for kernel files (see :attr:`kernel_directories`). """ try_module_name: bool """ If the name of a module resembles a filesystem path, try the file at that path. Defaults to ``True``. """ try_build_id: bool """ Try finding files using build IDs. Defaults to ``True``. A *build ID* is a unique byte string present in a module's :ref:`loaded file ` and :ref:`debug file `. If configured correctly, it is also present in core dumps and provides a reliable way to identify the correct files for a module. Searches by build ID check under each absolute path in :attr:`directories` for a file named ``.build-id/xx/yyyy`` (for loaded files) or ``.build-id/xx/yyyy.debug`` (for debug files), where ``xxyyyy`` is the lowercase hexadecimal representation of the build ID. """ try_debug_link: bool """ Try finding files using debug links. Defaults to ``True``. A *debug link* is a pointer in a module's :ref:`loaded file ` to its :ref:`debug file `. It consists of a name and a checksum. Searches by debug link check every path in :attr:`directories` for a file with a matching name and checksum. Relative paths in :attr:`directories` are relative to the directory containing the loaded file. An empty path in :attr:`directories` means the directory containing the loaded file. """ try_procfs: bool """ For local processes, try getting files via the ``proc`` filesystem (e.g., :manpage:`proc_pid_exe(5)`, :manpage:`proc_pid_map_files(5)`). Defaults to ``True``. """ try_embedded_vdso: bool """ Try reading the vDSO embedded in a process's memory/core dump. Defaults to ``True``. The entire (stripped) vDSO is included in core dumps, so this is a reliable way to get it. """ try_reuse: bool """ Try reusing a module's loaded file as its debug file and vice versa. Defaults to ``True``. """ try_supplementary: bool """ Try finding :ref:`supplementary files `. Defaults to ``True``. """ kernel_directories: Tuple[str, ...] """ Directories to search for the kernel image and loadable kernel modules. Defaults to ``("",)``. An empty path means to check standard paths (e.g., :file:`/boot/vmlinux-{release}`, :file:`/lib/modules/{release}`) absolutely and under each absolute path in :attr:`directories`. """ try_kmod: KmodSearchMethod """ How to search for loadable kernel modules. Defaults to :attr:`KmodSearchMethod.DEPMOD_OR_WALK`. """ class KmodSearchMethod(enum.Enum): """ Methods of searching for loadable kernel module debugging information. In addition to searching by build ID, there are currently two methods of searching for debugging information specific to loadable kernel modules: 1. Using :manpage:`depmod(8)` metadata. This looks for :command:`depmod` metadata (specifically, :file:`modules.dep.bin`) at the top level of each directory in :attr:`DebugInfoOptions.kernel_directories` (an empty path means :file:`/lib/modules/{release}`). The metadata is used to quickly find the path of each module, which is then checked relative to each directory specified by :attr:`DebugInfoOptions.kernel_directories`. This method is faster but typically only applicable to installed kernels. 2. Walking kernel directories. This traverses each directory specified by :attr:`DebugInfoOptions.kernel_directories` looking for ``.ko`` files. Module names are matched to filenames before the ``.ko`` extension and with dashes (``-``) replaced with underscores (``_``). This method is slower but not limited to installed kernels. Debugging information searches can be configured to use one, both, or neither method. """ NONE = ... """Don't search using kernel module-specific methods.""" DEPMOD = ... """Search using :command:`depmod` metadata.""" WALK = ... """Search by walking kernel directories.""" DEPMOD_OR_WALK = ... """ Search using :command:`depmod` metadata, falling back to walking kernel directories only if no :command:`depmod` metadata is found. Since :command:`depmod` metadata is expected to be reliable if present, this is the default. """ DEPMOD_AND_WALK = ... """ Search using :command:`depmod` metadata and by walking kernel directories. Unlike :attr:`DEPMOD_OR_WALK`, if :command:`depmod` metadata is found but doesn't result in the desired debugging information, this will still walk kernel directories. """ def get_default_prog() -> Program: """ Get the default program for the current thread. :raises NoDefaultProgramError: if the default program is not set """ ... def set_default_prog(__prog: Optional[Program]) -> None: """ Set the default program for the current thread. :param prog: Program to set as the default, or ``None`` to unset it. """ ... class NoDefaultProgramError(Exception): """ Error raised when trying to use the default program when it is not set. """ ... class Module: """ A ``Module`` represents an executable, library, or other binary file used by a program. It has several subclasses representing specific types of modules. Modules are uniquely identified by their type, name, and a type-specific value. Modules have several attributes that are determined automatically whenever possible but may be overridden manually if needed. Modules can be assigned files that provide debugging and runtime information: * .. _module-loaded-file: The "loaded file" is the file containing the executable code, data, etc. used by the program at runtime. * .. _module-debug-file: The "debug file" is the file containing debugging information (e.g., `DWARF `_). The loaded file and debug file may be the same file, for example, an unstripped binary. They may be different files if the binary was stripped and its debugging information was split into a separate file. * .. _module-supplementary-debug-file: The debug file may depend on a "supplementary debug file" such as one generated by `dwz(1) `_. If so, then the supplementary debug file must be found before the debug file can be used. """ prog: Final[Program] """Program that this module is from.""" name: Final[str] """ Name of this module. Its exact meaning varies by module type. """ address_range: Optional[Tuple[int, int]] """ Address range where this module is loaded. This is a tuple of the start (inclusive) and end (exclusive) addresses. If the module is not loaded in memory, then both are 0. If not known yet, then this is ``None``. :meth:`Program.loaded_modules()` sets this automatically from the program state/core dump when possible. Otherwise, for :class:`MainModule`, :class:`SharedLibraryModule`, and :class:`VdsoModule`, it may be set automatically when a file is assigned to the module. It is never set automatically for :class:`ExtraModule`. It can also be set manually. """ build_id: Optional[bytes] """ Unique byte string (e.g., GNU build ID) identifying files used by this module. If not known, then this is ``None``. :meth:`Program.loaded_modules()` sets this automatically from the program state/core dump when possible. Otherwise, when a file is assigned to the module, it is set to the file's build ID if it is not already set. It can also be set manually. """ object: Object """ The object associated with this module. For Linux kernel loadable modules, this is the ``struct module *`` associated with the kernel module. For other kinds, this is currently an absent object. The object may be set manually. """ loaded_file_status: ModuleFileStatus """Status of the module's :ref:`loaded file `.""" loaded_file_path: Optional[str] """ Absolute path of the module's :ref:`loaded file `, or ``None`` if not known. """ loaded_file_bias: Optional[int] """ Difference between the load address in the program and addresses in the :ref:`loaded file ` itself. This is often non-zero due to address space layout randomization (ASLR). It is set automatically based on the module type when the loaded file is added: * For :class:`MainModule`, it is set based on metadata from the process or core dump (the `auxiliary vector `_ for userspace programs, the ``VMCOREINFO`` note for the Linux kernel). * For :class:`SharedLibraryModule` and :class:`VdsoModule`, it is set to :attr:`~SharedLibraryModule.dynamic_address` minus the address of the dynamic section in the file. * For :class:`RelocatableModule`, it is set to zero. Addresses are adjusted according to :attr:`~RelocatableModule.section_addresses` instead. * For :class:`ExtraModule`, if :attr:`~Module.address_range` is set before the file is added, then the bias is set to :attr:`address_range[0] ` (i.e., the module's start address) minus the file's start address. If :attr:`~Module.address_range` is not set when the file is added or is set to ``(0, 0)``, then the bias is set to zero. This cannot be set manually. """ debug_file_status: ModuleFileStatus """Status of the module's :ref:`debug file `.""" debug_file_path: Optional[str] """ Absolute path of the module's :ref:`debug file `, or ``None`` if not known. """ debug_file_bias: Optional[int] """ Difference between the load address in the program and addresses in the :ref:`debug file `. See :attr:`loaded_file_bias`. """ supplementary_debug_file_kind: Optional[SupplementaryFileKind] """ Kind of the module's :ref:`supplementary debug file `, or ``None`` if not known or not needed. """ supplementary_debug_file_path: Optional[str] """ Absolute path of the module's :ref:`supplementary debug file `, or ``None`` if not known or not needed. """ def wants_loaded_file(self) -> bool: """ Return whether this module wants a :ref:`loaded file `. This should be preferred over checking :attr:`loaded_file_status` directly since this is future-proof against new status types being added. It is currently equivalent to ``module.loaded_file_status == ModuleFileStatus.WANT``. """ ... def wants_debug_file(self) -> bool: """ Return whether this module wants a :ref:`debug file `. This should be preferred over checking :attr:`debug_file_status` directly since this is future-proof against new status types being added. It is currently equivalent to ``module.debug_file_status == ModuleFileStatus.WANT or module.debug_file_status == ModuleFileStatus.WANT_SUPPLEMENTARY``. """ ... def wanted_supplementary_debug_file(self) -> WantedSupplementaryFile: """ Return information about the :ref:`supplementary debug file ` that this module currently wants. :raises ValueError: if the module doesn't currently want a supplementary debug file (i.e., ``module.debug_file_status != ModuleFileStatus.WANT_SUPPLEMENTARY``) """ ... def try_file( self, path: Path, *, fd: int = -1, force: bool = False, ) -> None: """ Try to use the given file for this module. If the file does not appear to belong to this module, then it is ignored. This currently checks that the file and the module have the same build ID. If :attr:`loaded_file_status` is :attr:`~ModuleFileStatus.WANT` and the file is loadable, then it is used as the :ref:`loaded file ` and :attr:`loaded_file_status` is set to :attr:`~ModuleFileStatus.HAVE`. If :attr:`debug_file_status` is :attr:`~ModuleFileStatus.WANT` or :attr:`~ModuleFileStatus.WANT_SUPPLEMENTARY` and the file provides debugging information, then it is used as the :ref:`debug file ` and :attr:`debug_file_status` is set to :attr:`~ModuleFileStatus.HAVE`. However, if the file requires a supplementary debug file, then it is not used as the debug file yet and :attr:`debug_file_status` is set to :attr:`~ModuleFileStatus.WANT_SUPPLEMENTARY` instead. If :attr:`debug_file_status` is :attr:`~ModuleFileStatus.WANT_SUPPLEMENTARY` and the file matches :meth:`wanted_supplementary_debug_file()`, then the previously found file is used as the debug file, the given file is used as the :ref:`supplementary debug file `, and :attr:`debug_file_status` is set to :attr:`~ModuleFileStatus.HAVE`. The file may be used as both the loaded file and debug file if applicable. :param path: Path to file. :param fd: If nonnegative, an open file descriptor referring to the file. This always takes ownership of the file descriptor even if the file is not used or on error, so the caller must not close it. :param force: If ``True``, then don't check whether the file matches the module. """ ... class MainModule(Module): """ Main module. There is only one main module in a program. For userspace programs, it is the executable, and its name is usually the absolute path of the executable. For the Linux kernel, it is the kernel image, a.k.a. ``vmlinux``, and its name is "kernel". """ class SharedLibraryModule(Module): """ Shared library (a.k.a. dynamic library, dynamic shared object, or ``.so``) module. Shared libraries are uniquely identified by their name (usually the absolute path of the shared object file) and dynamic address. """ dynamic_address: Final[int] """Address of the shared object's dynamic section.""" class VdsoModule(Module): """ Virtual dynamic shared object (vDSO) module. The vDSO is a special shared library automatically loaded into a process by the kernel; see :manpage:`vdso(7)`. It is uniquely identified by its name (the ``SONAME`` field of the shared object file) and dynamic address. """ dynamic_address: Final[int] """Address of the shared object's dynamic section.""" class RelocatableModule(Module): """ Relocatable object module. A relocatable object is an object file requiring a linking step to assign section addresses and adjust the file to reference those addresses. Linux kernel loadable modules (``.ko`` files) are a special kind of relocatable object. For userspace programs, relocatable objects are usually intermediate products of the compilation process (``.o`` files). They are not typically loaded at runtime. However, drgn allows manually defining a relocatable module and assigning its section addresses if needed. Relocatable modules are uniquely identified by a name and address. """ address: Final[int] """ Address identifying the module. For Linux kernel loadable modules, this is the module base address. """ section_addresses: MutableMapping[str, int] """ Mapping from section names to assigned addresses. Once a file has been assigned to the module, this can no longer be modified. :meth:`Program.linux_kernel_loadable_module()` and :meth:`Program.loaded_modules()` prepopulate this for Linux kernel loadable modules. """ class ExtraModule(Module): """ Module with extra debugging information. For advanced use cases, it may be necessary to manually add debugging information that does not fit into any of the categories above. ``ExtraModule`` is intended for these use cases. For example, it can be used to add debugging information from a standalone file that is not in use by a particular program. Extra modules are uniquely identified by a name and ID number. Both the name and ID number are arbitrary. """ id: Final[int] """Arbitrary identification number.""" class ModuleFileStatus(enum.Enum): """ Status of a file in a :class:`Module`. This is usually used to communicate with debugging information finders; see :meth:`Program.register_debug_info_finder()`. """ WANT = ... """File has not been found and should be searched for.""" HAVE = ... """File has already been found and assigned.""" DONT_WANT = ... """ File has not been found, but it should not be searched for. :meth:`Module.try_file()` and debugging information finders are required to honor this and will never change it. However, other operations may reset this to :attr:`WANT` when they load debugging information automatically. """ DONT_NEED = ... """ File has not been found and is not needed (e.g., because its debugging information is not applicable or is provided through another mechanism). In contrast to :attr:`DONT_WANT`, drgn itself will never change this to :attr:`WANT`. """ WANT_SUPPLEMENTARY = ... """ File has been found, but it requires a supplementary file before it can be used. See :meth:`Module.wanted_supplementary_debug_file()`. """ class WantedSupplementaryFile(NamedTuple): """Information about a wanted supplementary file.""" kind: SupplementaryFileKind """Kind of supplementary file.""" path: str """Path of main file that wants the supplementary file.""" supplementary_path: str """ Path to the supplementary file. This may be absolute or relative to :attr:`path`. """ checksum: bytes """ Unique identifier of the supplementary file. The interpretation depends on :attr:`kind`. """ class SupplementaryFileKind(enum.Enum): """ Kind of supplementary file. .. note:: DWARF 5 supplementary files are not currently supported but may be in the future. DWARF package files are not considered supplementary files. They are considered part of the debug file and must have the same path as the debug file plus a ".dwp" extension. """ GNU_DEBUGALTLINK = ... """ GNU-style supplementary debug file referred to by a ``.gnu_debugaltlink`` section. Its :attr:`~WantedSupplementaryFile.checksum` is the file's GNU build ID. """ class Thread: """A thread in a program.""" tid: Final[int] """Thread ID (as defined by :manpage:`gettid(2)`).""" name: Optional[str] """ Thread name, or ``None`` if unknown. See `PR_SET_NAME `_ and `/proc/pid/comm `_. .. note:: Linux userspace core dumps only save the name of the main thread, so :attr:`name` will be ``None`` for other threads. """ object: Final[Object] """ If the program is the Linux kernel, the ``struct task_struct *`` object for this thread. Otherwise, not defined. """ def stack_trace(self) -> StackTrace: """ Get the stack trace for this thread. This is equivalent to ``prog.stack_trace(thread.tid)``. See :meth:`Program.stack_trace()`. """ ... def filename_matches(haystack: Optional[str], needle: Optional[str]) -> bool: """ Return whether a filename containing a definition (*haystack*) matches a filename being searched for (*needle*). The filename is matched from right to left, so ``'stdio.h'``, ``'include/stdio.h'``, ``'usr/include/stdio.h'``, and ``'/usr/include/stdio.h'`` would all match a definition in ``/usr/include/stdio.h``. If *needle* is ``None`` or empty, it matches any definition. If *haystack* is ``None`` or empty, it only matches if *needle* is also ``None`` or empty. :param haystack: Path of file containing definition. :param needle: Filename to match. """ ... def program_from_core_dump(path: Union[Path, int]) -> Program: """ Create a :class:`Program` from a core dump file. The type of program (e.g., userspace or kernel) is determined automatically. :param path: Core dump file path or open file descriptor. """ ... def program_from_kernel() -> Program: """ Create a :class:`Program` from the running operating system kernel. This requires root privileges. """ ... def program_from_pid(pid: int) -> Program: """ Create a :class:`Program` from a running program with the given PID. This requires appropriate permissions (on Linux, :manpage:`ptrace(2)` attach permissions). :param pid: Process ID of the program to debug. """ ... class Platform: """ A ``Platform`` represents the environment (i.e., architecture and ABI) that a program runs on. """ def __init__( self, arch: Architecture, flags: Optional[PlatformFlags] = None ) -> None: """ Create a ``Platform``. :param arch: :attr:`Platform.arch` :param flags: :attr:`Platform.flags`; if ``None``, default flags for the architecture are used. """ ... arch: Final[Architecture] """Instruction set architecture of this platform.""" flags: Final[PlatformFlags] """Flags which apply to this platform.""" registers: Final[Sequence[Register]] """Processor registers on this platform.""" class Architecture(enum.Enum): """An ``Architecture`` represents an instruction set architecture.""" X86_64 = ... """The x86-64 architecture, a.k.a. AMD64 or Intel 64.""" I386 = ... """The 32-bit x86 architecture, a.k.a. i386 or IA-32.""" AARCH64 = ... """The AArch64 architecture, a.k.a. ARM64.""" ARM = ... """The 32-bit Arm architecture.""" PPC64 = ... """The 64-bit PowerPC architecture.""" RISCV64 = ... """The 64-bit RISC-V architecture.""" RISCV32 = ... """The 32-bit RISC-V architecture.""" S390X = ... """The s390x architecture, a.k.a. IBM Z or z/Architecture.""" S390 = ... """The 32-bit s390 architecture, a.k.a. System/390.""" UNKNOWN = ... """ An architecture which is not known to drgn. Certain features are not available when the architecture is unknown, but most of drgn will still work. """ class PlatformFlags(enum.Flag): """``PlatformFlags`` are flags describing a :class:`Platform`.""" IS_64_BIT = ... """Platform is 64-bit.""" IS_LITTLE_ENDIAN = ... """Platform is little-endian.""" class Register: """A ``Register`` represents information about a processor register.""" names: Final[Sequence[str]] """Names of this register.""" host_platform: Platform """The platform of the host which is running drgn.""" class Language: """ A ``Language`` represents a programming language supported by drgn. This class cannot be constructed; there are singletons for the supported languages. """ name: Final[str] """Name of the programming language.""" C: ClassVar[Language] """The C programming language.""" CPP: ClassVar[Language] """The C++ programming language.""" class Object: """ An ``Object`` represents a symbol or value in a program. An object may exist in the memory of the program (a *reference*), it may be a constant or temporary computed value (a *value*), or it may be absent entirely (an *absent* object). All instances of this class have two attributes: :attr:`prog_`, the program that the object is from; and :attr:`type_`, the type of the object. Reference objects also have an :attr:`address_` and a :attr:`bit_offset_`. Objects may also have a :attr:`bit_field_size_`. :func:`repr()` of an object returns a Python representation of the object: >>> print(repr(prog['jiffies'])) Object(prog, 'volatile unsigned long', address=0xffffffffbf005000) :class:`str() ` returns a "pretty" representation of the object in programming language syntax: >>> print(prog['jiffies']) (volatile unsigned long)4326237045 The output format of ``str()`` can be modified by using the :meth:`format_()` method instead: >>> sysname = prog['init_uts_ns'].name.sysname >>> print(sysname) (char [65])"Linux" >>> print(sysname.format_(type_name=False)) "Linux" >>> print(sysname.format_(string=False)) (char [65]){ 76, 105, 110, 117, 120 } .. note:: The drgn CLI is set up so that objects are displayed in the "pretty" format instead of with ``repr()`` (the latter is the default behavior of Python's interactive mode). Therefore, it's usually not necessary to call ``print()`` in the drgn CLI. Objects support the following operators: * Arithmetic operators: ``+``, ``-``, ``*``, ``/``, ``%`` * Bitwise operators: ``<<``, ``>>``, ``&``, ``|``, ``^``, ``~`` * Relational operators: ``==``, ``!=``, ``<``, ``>``, ``<=``, ``>=`` * Subscripting: :meth:`[] <__getitem__>` (Python does not have a unary ``*`` operator, so pointers are dereferenced with ``ptr[0]``) * Member access: :meth:`. <__getattr__>` (Python does not have a ``->`` operator, so ``.`` is also used to access members of pointers to structures) * The address-of operator: :meth:`drgn.Object.address_of_()` (this is a method because Python does not have a ``&`` operator) * Array length: :meth:`len() <__len__>` These operators all have the semantics of the program's programming language. For example, adding two objects from a program written in C results in an object with a type and value according to the rules of C: >>> Object(prog, 'unsigned long', 2**64 - 1) + Object(prog, 'int', 1) Object(prog, 'unsigned long', value=0) If only one operand to a binary operator is an object, the other operand will be converted to an object according to the language's rules for literals: >>> Object(prog, 'char', 0) - 1 Object(prog, 'int', value=-1) The standard :class:`int() `, :class:`float() `, and :class:`bool() ` functions convert an object to that Python type. Conversion to ``bool`` uses the programming language's notion of "truthiness". Additionally, certain Python functions will automatically coerce an object to the appropriate Python type (e.g., :func:`hex()`, :func:`round()`, and :meth:`list subscripting `). Object attributes and methods are named with a trailing underscore to avoid conflicting with structure, union, or class members. The attributes and methods always take precedence; use :meth:`member_()` if there is a conflict. Objects are usually obtained directly from a :class:`Program`, but they can be constructed manually, as well (for example, if you got a variable address from a log file). """ @overload def __init__( self, prog: Program, type: Union[str, Type], # This should use numbers.Number, but mypy doesn't support it yet; see # python/mypy#3186. Additionally, once mypy supports recursive types, # we can make the Mapping and Sequence item types stricter; see # python/mypy#731. value: Union[IntegerLike, float, bool, Mapping[str, Any], Sequence[Any]], *, bit_field_size: Optional[IntegerLike] = None, ) -> None: """ Create a value object given its type and value. :param prog: Program to create the object in. :param type: Type of the object. :param value: Value of the object. See :meth:`value_()`. :param bit_field_size: Size in bits of the object if it is a bit field. The default is ``None``, which means the object is not a bit field. """ ... @overload def __init__(self, prog: Program, *, value: Union[int, float, bool]) -> None: """ Create a value object from a "literal". This is used to emulate a literal number in the source code of the program. The type is deduced from *value* according to the language's rules for literals. :param value: Value of the literal. """ ... @overload def __init__( self, prog: Program, type: Union[str, Type], *, address: IntegerLike, bit_offset: IntegerLike = 0, bit_field_size: Optional[IntegerLike] = None, ) -> None: """ Create a reference object. :param address: Address of the object in the program. :param bit_offset: Offset in bits from *address* to the beginning of the object. """ ... @overload def __init__( self, prog: Program, type: Union[str, Type], *, absence_reason: AbsenceReason = AbsenceReason.OTHER, bit_field_size: Optional[IntegerLike] = None, ) -> None: """Create an absent object.""" ... prog_: Final[Program] """Program that this object is from.""" type_: Final[Type] """Type of this object.""" address_: Final[Optional[int]] """ Address of this object if it is a reference, ``None`` if it is a value or absent. """ absent_: Final[bool] """ Whether this object is absent. This is ``False`` for all values and references (even if the reference has an invalid address). """ absence_reason_: Final[Optional[AbsenceReason]] """ Reason that this object is absent. This is ``None`` for all values and references. """ bit_offset_: Final[Optional[int]] """ Offset in bits from this object's address to the beginning of the object if it is a reference, ``None`` otherwise. This can only be non-zero for scalars. """ bit_field_size_: Final[Optional[int]] """ Size in bits of this object if it is a bit field, ``None`` if it is not. """ def __getattr__(self, name: str) -> Object: """ Implement ``self.name``. This corresponds to both the member access (``.``) and member access through pointer (``->``) operators in C. Note that if *name* is an attribute or method of the :class:`Object` class, then that takes precedence. Otherwise, this is equivalent to :meth:`member_()`. >>> print(prog['init_task'].pid) (pid_t)0 :param name: Attribute name. """ ... def __getitem__(self, idx: IntegerLike) -> Object: """ Implement ``self[idx]``. Get the array element at the given index. >>> print(prog['init_task'].comm[1]) (char)119 ``[0]`` is also the equivalent of the pointer dereference (``*``) operator in C: >>> ptr_to_ptr *(void **)0xffff9b86801e2968 = 0xffff9b86801e2460 >>> print(ptr_to_ptr[0]) (void *)0xffff9b86801e2460 This is only valid for pointers and arrays. .. note:: Negative indices behave as they would in the object's language (as opposed to the Python semantics of indexing from the end of the array). :param idx: The array index. :raises TypeError: if this object is not a pointer or array """ ... def __len__(self) -> int: """ Implement ``len(self)``. Get the number of elements in this object. >>> len(prog['init_task'].comm) 16 This is only valid for arrays. :raises TypeError: if this object is not an array with complete type """ ... def value_(self) -> Any: """ Get the value of this object as a Python object. For basic types (integer, floating-point, boolean), this returns an object of the directly corresponding Python type (``int``, ``float``, ``bool``). For pointers, this returns the address value of the pointer. For enums, this returns an ``int``. For structures and unions, this returns a ``dict`` of members. For arrays, this returns a ``list`` of values. .. note:: Helpers that wish to accept an argument that may be an :class:`Object` or an :class:`int` should use :func:`operator.index()` and :class:`IntegerLike` instead: .. code-block:: python3 import operator from drgn import IntegerLike def my_helper(i: IntegerLike) -> ...: value = operator.index(i) # Returns an int ... :raises FaultError: if reading the object causes a bad memory access :raises TypeError: if this object has an unreadable type (e.g., ``void``) """ ... def string_(self) -> bytes: """ Read a null-terminated string pointed to by this object. This is only valid for pointers and arrays. The element type is ignored; this operates byte-by-byte. For pointers and flexible arrays, this stops at the first null byte. For complete arrays, this stops at the first null byte or at the end of the array. :raises FaultError: if reading the string causes a bad memory access :raises TypeError: if this object is not a pointer or array """ ... def member_(self, name: str) -> Object: """ Get a member of this object. This is valid for structures, unions, classes, and pointers to any of those. If the object is a pointer, it is automatically dereferenced first. Normally the dot operator (:meth:`. <__getattr__>`) can be used to accomplish the same thing, but this method can be used if there is a name conflict with an ``Object`` attribute or method. :param name: Name of the member. :raises TypeError: if this object is not a structure, union, class, or a pointer to one of those :raises LookupError: if this object does not have a member with the given name """ ... def address_of_(self) -> Object: """ Get a pointer to this object. This corresponds to the address-of (``&``) operator in C. It is only possible for reference objects, as value objects don't have an address in the program. As opposed to :attr:`address_`, this returns an ``Object``, not an ``int``. :raises ValueError: if this object is a value """ ... def read_(self) -> Object: """ Read this object (which may be a reference or a value) and return it as a value object. This is useful if the object can change in the running program (but of course nothing stops the program from modifying the object while it is being read). As opposed to :meth:`value_()`, this returns an ``Object``, not a standard Python type. :raises FaultError: if reading this object causes a bad memory access :raises TypeError: if this object has an unreadable type (e.g., ``void``) """ ... def to_bytes_(self) -> bytes: """Return the binary representation of this object's value.""" ... @classmethod def from_bytes_( cls, prog: Program, type: Union[str, Type], bytes: Buffer, *, bit_offset: IntegerLike = 0, bit_field_size: Optional[IntegerLike] = None, ) -> Object: """ Return a value object from its binary representation. >>> print(Object.from_bytes_(prog, "int", b"\x10\x00\x00\x00")) (int)16 :param prog: Program to create the object in. :param type: Type of the object. :param bytes: Buffer containing value of the object. :param bit_offset: Offset in bits from the beginning of *bytes* to the beginning of the object. :param bit_field_size: Size in bits of the object if it is a bit field. The default is ``None``, which means the object is not a bit field. """ ... def format_( self, *, columns: Optional[IntegerLike] = None, dereference: Optional[bool] = None, symbolize: Optional[bool] = None, string: Optional[bool] = None, char: Optional[bool] = None, type_name: Optional[bool] = None, member_type_names: Optional[bool] = None, element_type_names: Optional[bool] = None, members_same_line: Optional[bool] = None, elements_same_line: Optional[bool] = None, member_names: Optional[bool] = None, element_indices: Optional[bool] = None, implicit_members: Optional[bool] = None, implicit_elements: Optional[bool] = None, ) -> str: """ Format this object in programming language syntax. Various format options can be passed (as keyword arguments) to control the output. Options that aren't passed or are passed as ``None`` fall back to a default. Specifically, ``obj.format_()`` (i.e., with no passed options) is equivalent to ``str(obj)``. >>> workqueues = prog['workqueues'] >>> print(workqueues) (struct list_head){ .next = (struct list_head *)0xffff932ecfc0ae10, .prev = (struct list_head *)0xffff932e3818fc10, } >>> print(workqueues.format_(type_name=False, ... member_type_names=False, ... member_names=False, ... members_same_line=True)) { 0xffff932ecfc0ae10, 0xffff932e3818fc10 } :param columns: Number of columns to limit output to when the expression can be reasonably wrapped. Defaults to no limit. :param dereference: If this object is a pointer, include the dereferenced value. This does not apply to structure, union, or class members, or array elements, as dereferencing those could lead to an infinite loop. Defaults to ``True``. :param symbolize: Include a symbol name and offset for pointer objects. Defaults to ``True``. :param string: Format the values of objects with string type as strings. For C, this applies to pointers to and arrays of ``char``, ``signed char``, and ``unsigned char``. Defaults to ``True``. :param char: Format objects with character type as character literals. For C, this applies to ``char``, ``signed char``, and ``unsigned char``. Defaults to ``False``. :param type_name: Include the type name of this object. Defaults to ``True``. :param member_type_names: Include the type names of structure, union, and class members. Defaults to ``True``. :param element_type_names: Include the type names of array elements. Defaults to ``False``. :param members_same_line: Place multiple structure, union, and class members on the same line if they fit within the specified number of ``columns``. Defaults to ``False``. :param elements_same_line: Place multiple array elements on the same line if they fit within the specified number of ``columns``. Defaults to ``True``. :param member_names: Include the names of structure, union, and class members. Defaults to ``True``. :param element_indices: Include the indices of array elements. Defaults to ``False``. :param implicit_members: Include structure, union, and class members which have an implicit value (i.e., for C, zero-initialized). Defaults to ``True``. :param implicit_elements: Include array elements which have an implicit value (i.e., for C, zero-initialized). Defaults to ``False``. """ ... def __iter__(self) -> Iterator[Object]: ... def __bool__(self) -> bool: ... def __lt__(self, other: Any) -> bool: ... def __le__(self, other: Any) -> bool: ... def __eq__(self, other: Any) -> bool: ... def __ne__(self, other: Any) -> bool: ... def __gt__(self, other: Any) -> bool: ... def __ge__(self, other: Any) -> bool: ... def __add__(self, other: Any) -> Object: ... def __sub__(self, other: Any) -> Object: ... def __mul__(self, other: Any) -> Object: ... def __truediv__(self, other: Any) -> Object: ... def __mod__(self, other: Any) -> Object: ... def __lshift__(self, other: Any) -> Object: ... def __rshift__(self, other: Any) -> Object: ... def __and__(self, other: Any) -> Object: ... def __xor__(self, other: Any) -> Object: ... def __or__(self, other: Any) -> Object: ... def __radd__(self, other: Any) -> Object: ... def __rsub__(self, other: Any) -> Object: ... def __rmul__(self, other: Any) -> Object: ... def __rtruediv__(self, other: Any) -> Object: ... def __rmod__(self, other: Any) -> Object: ... def __rlshift__(self, other: Any) -> Object: ... def __rrshift__(self, other: Any) -> Object: ... def __rand__(self, other: Any) -> Object: ... def __rxor__(self, other: Any) -> Object: ... def __ror__(self, other: Any) -> Object: ... def __neg__(self) -> Object: ... def __pos__(self) -> Object: ... def __invert__(self) -> Object: ... def __int__(self) -> int: ... def __float__(self) -> float: ... def __index__(self) -> int: ... @overload def __round__(self, ndigits: None = None) -> int: ... @overload def __round__(self, ndigits: int) -> Any: ... def __trunc__(self) -> int: ... def __floor__(self) -> int: ... def __ceil__(self) -> int: ... def _repr_pretty_(self, p: Any, cycle: bool) -> None: ... class AbsenceReason(enum.Enum): """Reason an object is :ref:absent `.""" OTHER = ... """Another reason not listed below.""" OPTIMIZED_OUT = ... """Object was optimized out by the compiler.""" NOT_IMPLEMENTED = ... """Encountered unknown debugging information.""" def NULL(prog: Program, type: Union[str, Type]) -> Object: """ Get an object representing ``NULL`` casted to the given type. This is equivalent to ``Object(prog, type, 0)``. :param prog: The program. :param type: The type. """ ... def cast(type: Union[str, Type], obj: Object) -> Object: """ Get the value of an object explicitly casted to another type. This uses the programming language's rules for explicit conversions, like the cast operator. >>> cast("unsigned int", Object(prog, "float", 2.0)) (unsigned int)2 >>> cast("void *", Object(prog, "int", 0)) (void *)0x0 See also :func:`implicit_convert()` for implicit conversions (which usually do stricter type checking) and :func:`reinterpret()` for reinterpreting the raw memory of an object. :param type: Type to cast to. :param obj: Object to cast. :return: Casted object. This is always a value object. :raises TypeError: if casting *obj* to *type* is not allowed """ ... def implicit_convert(type: Union[str, Type], obj: Object) -> Object: """ Get the value of an object implicitly converted to another type. This uses the programming language's rules for implicit conversions, like when assigning to a variable or passing arguments to a function call. >>> implicit_convert("unsigned int", Object(prog, "float", 2.0)) (unsigned int)2 >>> implicit_convert("void *", Object(prog, "int", 0)) Traceback (most recent call last): ... TypeError: cannot convert 'int' to incompatible type 'void *' See also :func:`cast()` for explicit conversions and :func:`reinterpret()` for reinterpreting the raw memory of an object. :param type: Type to convert to. :param obj: Object to convert. :return: Converted object. This is always a value object. :raises TypeError: if converting *obj* to *type* is not allowed """ ... def reinterpret(type: Union[str, Type], obj: Object) -> Object: """ Get the representation of an object reinterpreted as another type. This reinterprets the raw memory of the object, so an object can be reinterpreted as any other type. >>> reinterpret("unsigned int", Object(prog, "float", 2.0)) (unsigned int)1073741824 .. note:: You usually want :func:`cast()` or :func:`implicit_convert()` instead, which convert the *value* of an object instead of its in-memory representation. :param type: Type to reinterpret as. :param obj: Object to reinterpret. :return: Reinterpreted object. If *obj* is a reference object, then this is a reference object. If *obj* is a value object, then this is a value object. :raises OutOfBoundsError: if *obj* is a value object and *type* is larger than *obj* """ ... def container_of(ptr: Object, type: Union[str, Type], member: str) -> Object: """ Get the containing object of a pointer object. This corresponds to the ``container_of()`` macro in C. :param ptr: Pointer to member in containing object. :param type: Type of containing object. :param member: Name of member in containing object. May include one or more member references and zero or more array subscripts. :return: Pointer to containing object. :raises TypeError: if *ptr* is not a pointer or *type* is not a structure, union, or class type :raises ValueError: if the member is not byte-aligned (e.g., because it is a bit field) :raises LookupError: if *type* does not have a member with the given name """ ... class Symbol: """ A ``Symbol`` represents an entry in the symbol table of a program, i.e., an identifier along with its corresponding address range in the program. """ def __init__( self, name: str, address: int, size: int, binding: SymbolBinding, kind: SymbolKind, ) -> None: """ Create a ``Symbol``. :param name: :attr:`Symbol.name` :param address: :attr:`Symbol.address` :param size: :attr:`Symbol.size` :param binding: :attr:`Symbol.binding` :param kind: :attr:`Symbol.kind` """ ... name: Final[str] """Name of this symbol.""" address: Final[int] """Start address of this symbol.""" size: Final[int] """Size of this symbol in bytes.""" binding: Final[SymbolBinding] """Linkage behavior and visibility of this symbol.""" kind: Final[SymbolKind] """Kind of entity represented by this symbol.""" class SymbolIndex: """ A ``SymbolIndex`` contains a static set of symbols and allows efficient lookup by name and address. With :meth:`Program.register_symbol_finder()`, you can add a callback to provide custom symbol finding logic. However, in many cases, all that is necessary is to provide drgn with a list of symbols that you know to be part of the program. This object allows you to do that. It efficiently implements the Symbol Finder API given a static set of symbols. For example:: >>> prog = drgn.Program() >>> symbol = drgn.Symbol("foo", 0x123, 1, drgn.SymbolBinding.GLOBAL, drgn.SymbolKind.OBJECT) >>> finder = drgn.SymbolIndex([symbol]) >>> prog.register_symbol_finder("SymbolIndex", finder, enable_index=0) >>> prog.symbols() [Symbol(name='foo', address=0x123, size=0x1, binding=, kind=)] >>> prog.symbol("bar") Traceback (most recent call last): File "", line 1, in LookupError: not found >>> prog.symbol("foo") Symbol(name='foo', address=0x123, size=0x1, binding=, kind=) >>> prog.symbol(0x100) Traceback (most recent call last): File "", line 1, in LookupError: not found >>> prog.symbol(0x123) Symbol(name='foo', address=0x123, size=0x1, binding=, kind=) """ def __init__(self, symbols: Iterable[Symbol]) -> None: """ Create a ``SymbolIndex`` from a sequence of symbols The returned symbol index satisfies the Symbol Finder API. It supports overlapping symbol address ranges and duplicate symbol names. However, in the case of these sorts of conflicts, it doesn't provide any guarantee on the order of the results, or which result is returned when a single symbol is requested. :param symbols: An iterable of symbols :returns: A callable object suitable to provide to :meth:`Program.register_symbol_finder()`. """ def __call__( self, prog: Program, name: Optional[str], address: Optional[int], one: bool, ) -> List[Symbol]: """ Lookup symbol by name, address, or both. :param prog: (unused) the program looking up this symbol :param name: if given, only return symbols with this name :param address: if given, only return symbols spanning this address :param one: if given, limit the result to a single symbol :returns: a list of matching symbols (empty if none are found) """ class SymbolBinding(enum.Enum): """ A ``SymbolBinding`` describes the linkage behavior and visibility of a symbol. """ UNKNOWN = ... """Unknown.""" LOCAL = ... """Not visible outside of the object file containing its definition.""" GLOBAL = ... """Globally visible.""" WEAK = ... """Globally visible but may be overridden by a non-weak global symbol.""" UNIQUE = ... """ Globally visible even if dynamic shared object is loaded locally. See GCC's ``-fno-gnu-unique`` `option `_. """ class SymbolKind(enum.Enum): """ A ``SymbolKind`` describes the kind of entity that a symbol represents. """ UNKNOWN = ... """Unknown or not defined.""" OBJECT = ... """Data object (e.g., variable or array).""" FUNC = ... """Function or other executable code.""" SECTION = ... """Object file section.""" FILE = ... """Source file.""" COMMON = ... """Data object in common block.""" TLS = ... """Thread-local storage entity.""" IFUNC = ... """`Indirect function `_.""" class StackTrace: """ A ``StackTrace`` is a :ref:`sequence ` of :class:`StackFrame`. ``len(trace)`` is the number of stack frames in the trace. ``trace[0]`` is the innermost stack frame, ``trace[1]`` is its caller, and ``trace[len(trace) - 1]`` is the outermost frame. Negative indexing also works: ``trace[-1]`` is the outermost frame and ``trace[-len(trace)]`` is the innermost frame. It is also iterable: .. code-block:: python3 for frame in trace: if frame.name == 'io_schedule': print('Thread is doing I/O') :class:`str() ` returns a pretty-printed stack trace: >>> prog.stack_trace(1) #0 context_switch (kernel/sched/core.c:4339:2) #1 __schedule (kernel/sched/core.c:5147:8) #2 schedule (kernel/sched/core.c:5226:3) #3 do_wait (kernel/exit.c:1534:4) #4 kernel_wait4 (kernel/exit.c:1678:8) #5 __do_sys_wait4 (kernel/exit.c:1706:13) #6 do_syscall_64 (arch/x86/entry/common.c:47:14) #7 entry_SYSCALL_64+0x7c/0x15b (arch/x86/entry/entry_64.S:112) #8 0x4d49dd The format is subject to change. The drgn CLI is set up so that stack traces are displayed with ``str()`` by default. """ prog: Final[Program] """Program that this stack trace is from.""" def __getitem__(self, idx: IntegerLike) -> StackFrame: ... def __len__(self) -> int: ... def __iter__(self) -> Iterator[StackFrame]: ... def _repr_pretty_(self, p: Any, cycle: bool) -> None: ... class StackFrame: """ A ``StackFrame`` represents a single *frame* in a thread's call stack. :class:`str() ` returns a pretty-printed stack frame: >>> prog.stack_trace(1)[0] #0 at 0xffffffffb64ac287 (__schedule+0x227/0x606) in context_switch at kernel/sched/core.c:4339:2 (inlined) This includes more information than when printing the full stack trace. The format is subject to change. The drgn CLI is set up so that stack frames are displayed with ``str()`` by default. The :meth:`[] <.__getitem__>` operator can look up function parameters, local variables, and global variables in the scope of the stack frame: >>> prog.stack_trace(1)[0]['prev'].pid (pid_t)1 >>> prog.stack_trace(1)[0]['scheduler_running'] (int)1 """ name: Final[str] """ Name of the function or symbol at this frame. This tries to get the best available name for this frame in the following order: 1. The name of the function in the source code based on debugging information (:attr:`frame.function_name `). 2. The name of the symbol in the binary (:meth:`frame.symbol().name `). 3. The program counter in hexadecimal (:attr:`hex(frame.pc) `). 4. The string "???". """ function_name: Final[Optional[str]] """ Name of the function at this frame, or ``None`` if it could not be determined. The name cannot be determined if debugging information is not available for the function, e.g., because it is implemented in assembly. """ is_inline: Final[bool] """ Whether this frame is for an inlined call. An inline frame shares the same stack frame in memory as its caller. Therefore, it has the same registers (including program counter and thus symbol). """ interrupted: Final[bool] """ Whether this stack frame was interrupted (for example, by a hardware interrupt, signal, trap, etc.). If this is ``True``, then the register values in this frame are the values at the time that the frame was interrupted. This is ``False`` if the frame is for a function call, in which case the register values are the values when control returns to this frame. In particular, the program counter is the return address, which is typically the instruction after the call instruction. """ pc: Final[int] """Program counter at this stack frame.""" sp: Final[int] """Stack pointer at this stack frame.""" def __getitem__(self, name: str) -> Object: """ Implement ``self[name]``. Get the object (variable, function parameter, constant, or function) with the given name in the scope of this frame. If the object exists but has been optimized out, this returns an :ref:`absent object `. :param name: Object name. """ ... def __contains__(self, name: str) -> bool: """ Implement ``name in self``. Return whether an object with the given name exists in the scope of this frame. :param name: Object name. """ ... def locals(self) -> List[str]: """ Get a list of the names of all local objects (local variables, function parameters, local constants, and nested functions) in the scope of this frame. Not all names may have present values, but they can be used with the :meth:`[] <.__getitem__>` operator to check. """ ... def source(self) -> Tuple[str, int, int]: """ Get the source code location of this frame. :return: Location as a ``(filename, line, column)`` triple. :raises LookupError: if the source code location is not available """ ... def symbol(self) -> Symbol: """ Get the function symbol at this stack frame. This is equivalent to: .. code-block:: python3 prog.symbol(frame.pc - (0 if frame.interrupted else 1)) """ ... def register(self, reg: str) -> int: """ Get the value of the given register at this stack frame. :param reg: Register name. :raises ValueError: if the register name is not recognized :raises LookupError: if the register value is not known """ ... def registers(self) -> Dict[str, int]: """ Get the values of all available registers at this stack frame as a dictionary with the register names as keys. """ ... def _repr_pretty_(self, p: Any, cycle: bool) -> None: ... class Type: """ A ``Type`` object describes a type in a program. Each kind of type (e.g., integer, structure) has different attributes (e.g., name, size). Types can also have qualifiers (e.g., constant, atomic). Accessing an attribute which does not apply to a type raises an :exc:`AttributeError`. :func:`repr()` of a ``Type`` returns a Python representation of the type: >>> print(repr(prog.type('sector_t'))) prog.typedef_type(name='sector_t', type=prog.int_type(name='unsigned long', size=8, is_signed=False)) :class:`str() ` returns a representation of the type in programming language syntax: >>> print(prog.type('sector_t')) typedef unsigned long sector_t The drgn CLI is set up so that types are displayed with ``str()`` instead of ``repr()`` by default. This class cannot be constructed directly. Instead, use one of the :ref:`api-type-constructors`. """ prog: Final[Program] """Program that this type is from.""" kind: Final[TypeKind] """Kind of this type.""" primitive: Final[Optional[PrimitiveType]] """ If this is a primitive type (e.g., ``int`` or ``double``), the kind of primitive type. Otherwise, ``None``. """ qualifiers: Final[Qualifiers] """Bitmask of this type's qualifier.""" language: Final[Language] """Programming language of this type.""" name: Final[str] """ Name of this type. This is present for integer, boolean, floating-point, and typedef types. """ tag: Final[Optional[str]] """ Tag of this type, or ``None`` if this is an anonymous type. This is present for structure, union, class, and enumerated types. """ size: Final[Optional[int]] """ Size of this type in bytes, or ``None`` if this is an incomplete type. This is present for integer, boolean, floating-point, structure, union, class, and pointer types. """ length: Final[Optional[int]] """ Number of elements in this type, or ``None`` if this is an incomplete type. This is only present for array types. """ is_signed: Final[bool] """Whether this type is signed. This is only present for integer types.""" byteorder: Final[str] """ Byte order of this type: ``'little'`` if it is little-endian, or ``'big'`` if it is big-endian. This is present for integer, boolean, floating-point, and pointer types. """ type: Final[Type] """ Type underlying this type, defined as follows: * For typedef types, the aliased type. * For enumerated types, the compatible integer type, which is ``None`` if this is an incomplete type. * For pointer types, the referenced type. * For array types, the element type. * For function types, the return type. For other types, this attribute is not present. """ members: Final[Optional[Sequence[TypeMember]]] """ List of members of this type, or ``None`` if this is an incomplete type. This is present for structure, union, and class types. """ enumerators: Final[Optional[Sequence[TypeEnumerator]]] """ List of enumeration constants of this type, or ``None`` if this is an incomplete type. This is only present for enumerated types. """ parameters: Final[Sequence[TypeParameter]] """ List of parameters of this type. This is only present for function types. """ is_variadic: Final[bool] """ Whether this type takes a variable number of arguments. This is only present for function types. """ template_parameters: Final[Sequence[TypeTemplateParameter]] """ List of template parameters of this type. This is present for structure, union, class, and function types. """ def type_name(self) -> str: """Get a descriptive full name of this type.""" ... def is_complete(self) -> bool: """ Get whether this type is complete (i.e., the type definition is known). This is always ``False`` for void types. It may be ``False`` for structure, union, class, enumerated, and array types, as well as typedef types where the underlying type is one of those. Otherwise, it is always ``True``. """ ... def qualified(self, qualifiers: Qualifiers) -> Type: """ Get a copy of this type with different qualifiers. Note that the original qualifiers are replaced, not added to. :param qualifiers: New type qualifiers. """ ... def unqualified(self) -> Type: """Get a copy of this type with no qualifiers.""" ... def member(self, name: str) -> TypeMember: """ Look up a member in this type by name. If this type has any unnamed members, this also matches members of those unnamed members, recursively. If the member is found in an unnamed member, :attr:`TypeMember.bit_offset` and :attr:`TypeMember.offset` are adjusted accordingly. :param name: Name of the member. :raises TypeError: if this type is not a structure, union, or class type :raises LookupError: if this type does not have a member with the given name """ ... def has_member(self, name: str) -> bool: """ Return whether this type has a member with the given name. If this type has any unnamed members, this also matches members of those unnamed members, recursively. :param name: Name of the member. :raises TypeError: if this type is not a structure, union, or class type """ def _repr_pretty_(self, p: Any, cycle: bool) -> None: ... class TypeMember: """ A ``TypeMember`` represents a member of a structure, union, or class type. """ def __init__( self, object_or_type: Union[Object, Type, Callable[[], Union[Object, Type]]], name: Optional[str] = None, bit_offset: int = 0, ) -> None: """ Create a ``TypeMember``. :param object_or_type: One of: 1. :attr:`TypeMember.object` as an :class:`Object`. 2. :attr:`TypeMember.type` as a :class:`Type`. In this case, ``object`` is set to an absent object with that type. 3. A callable that takes no arguments and returns one of the above. It is called when ``object`` or ``type`` is first accessed, and the result is cached. :param name: :attr:`TypeMember.name` :param bit_offset: :attr:`TypeMember.bit_offset` """ ... object: Final[Object] """ Member as an :class:`Object`. This is the default initializer for the member, or an absent object if the member has no default initializer. (However, the DWARF specification as of version 5 does not actually support default member initializers, so this is usually absent.) """ type: Final[Type] """ Member type. This is a shortcut for ``TypeMember.object.type``. """ name: Final[Optional[str]] """Member name, or ``None`` if the member is unnamed.""" bit_offset: Final[int] """Offset of the member from the beginning of the type in bits.""" offset: Final[int] """ Offset of the member from the beginning of the type in bytes. If the offset is not byte-aligned, accessing this attribute raises :exc:`ValueError`. """ bit_field_size: Final[Optional[int]] """ Size in bits of this member if it is a bit field, ``None`` if it is not. This is a shortcut for ``TypeMember.object.bit_field_size_``. """ class TypeEnumerator: """ A ``TypeEnumerator`` represents a constant in an enumerated type. Its name and value may be accessed as attributes or unpacked: >>> prog.type('enum pid_type').enumerators[0].name 'PIDTYPE_PID' >>> name, value = prog.type('enum pid_type').enumerators[0] >>> value 0 """ def __init__(self, name: str, value: int) -> None: """ Create a ``TypeEnumerator``. :param name: :attr:`TypeEnumerator.name` :param value: :attr:`TypeEnumerator.value` """ ... name: Final[str] "Enumerator name." value: Final[int] "Enumerator value." def __len__(self) -> int: ... def __getitem__(self, idx: int) -> Any: ... def __iter__(self) -> Iterator[Any]: ... class TypeParameter: """ A ``TypeParameter`` represents a parameter of a function type. """ def __init__( self, default_argument_or_type: Union[ Object, Type, Callable[[], Union[Object, Type]] ], name: Optional[str] = None, ) -> None: """ Create a ``TypeParameter``. :param default_argument_or_type: One of: 1. :attr:`TypeParameter.default_argument` as an :class:`Object`. 2. :attr:`TypeParameter.type` as a :class:`Type`. In this case, ``default_argument`` is set to an absent object with that type. 3. A callable that takes no arguments and returns one of the above. It is called when ``default_argument`` or ``type`` is first accessed, and the result is cached. :param name: :attr:`TypeParameter.name` """ ... default_argument: Final[Object] """ Default argument for parameter. If the parameter does not have a default argument, then this is an absent object. .. note:: Neither GCC nor Clang emits debugging information for default arguments (as of GCC 10 and Clang 11), and drgn does not yet parse it, so this is usually absent. """ type: Final[Type] """ Parameter type. This is the same as ``TypeParameter.default_argument.type_``. """ name: Final[Optional[str]] """Parameter name, or ``None`` if the parameter is unnamed.""" class TypeTemplateParameter: """ A ``TypeTemplateParameter`` represents a template parameter of a structure, union, class, or function type. """ def __init__( self, argument: Union[Type, Object, Callable[[], Union[Type, Object]]], name: Optional[str] = None, is_default: bool = False, ) -> None: """ Create a ``TypeTemplateParameter``. :param argument: One of: 1. :attr:`TypeTemplateParameter.argument` as a :class:`Type` if the parameter is a type template parameter. 2. :attr:`TypeTemplateParameter.argument` as a non-absent :class:`Object` if the parameter is a non-type template parameter. 3. A callable that takes no arguments and returns one of the above. It is called when ``argument`` is first accessed, and the result is cached. :param name: :attr:`TypeTemplateParameter.name` :param is_default: :attr:`TypeTemplateParameter.is_default` """ ... argument: Final[Union[Type, Object]] """ Template argument. If this is a type template parameter, then this is a :class:`Type`. If this is a non-type template parameter, then this is an :class:`Object`. """ name: Final[Optional[str]] """Template parameter name, or ``None`` if the parameter is unnamed.""" is_default: Final[bool] """ Whether :attr:`argument` is the default for the template parameter. .. note:: There are two ways to interpret this: 1. The argument was omitted entirely and thus defaulted to the default argument. 2. The (specified or defaulted) argument is the same as the default argument. Compilers are inconsistent about which interpretation they use. GCC added this information in version 4.9. Clang added it in version 11 (and only when emitting DWARF version 5). If the program was compiled by an older version, this is always false. """ class TypeKind(enum.Enum): """A ``TypeKind`` represents a kind of type.""" VOID = ... """Void type.""" INT = ... """Integer type.""" BOOL = ... """Boolean type.""" FLOAT = ... """Floating-point type.""" STRUCT = ... """Structure type.""" UNION = ... """Union type.""" CLASS = ... """Class type.""" ENUM = ... """Enumerated type.""" TYPEDEF = ... """Type definition (a.k.a. alias) type.""" POINTER = ... """Pointer type.""" ARRAY = ... """Array type.""" FUNCTION = ... """Function type.""" class TypeKindSet(collections.abc.Set[TypeKind]): """ Immutable set of :class:`TypeKind`\\ s. >>> kinds = TypeKindSet({TypeKind.STRUCT, TypeKind.CLASS}) >>> TypeKind.STRUCT in kinds True >>> TypeKind.INT in kinds False >>> for kind in kinds: ... print(kind) ... TypeKind.STRUCT TypeKind.CLASS """ def __contains__(self, __x: object) -> bool: ... def __iter__(self) -> Iterator[TypeKind]: ... def __len__(self) -> int: ... class PrimitiveType(enum.Enum): """A ``PrimitiveType`` represents a primitive type known to drgn.""" C_VOID = ... "" C_CHAR = ... "" C_SIGNED_CHAR = ... "" C_UNSIGNED_CHAR = ... "" C_SHORT = ... "" C_UNSIGNED_SHORT = ... "" C_INT = ... "" C_UNSIGNED_INT = ... "" C_LONG = ... "" C_UNSIGNED_LONG = ... "" C_LONG_LONG = ... "" C_UNSIGNED_LONG_LONG = ... "" C_BOOL = ... "" C_FLOAT = ... "" C_DOUBLE = ... "" C_LONG_DOUBLE = ... "" C_SIZE_T = ... "" C_PTRDIFF_T = ... "" class Qualifiers(enum.Flag): """``Qualifiers`` are modifiers on types.""" NONE = ... """No qualifiers.""" CONST = ... """Constant type.""" VOLATILE = ... """Volatile type.""" RESTRICT = ... """`Restrict `_ type.""" ATOMIC = ... """Atomic type.""" def sizeof(__type_or_obj: Union[Type, Object]) -> int: """ Get the size of a :class:`Type` or :class:`Object` in bytes. :param type_or_obj: Entity to get the size of. :raises TypeError: if the type does not have a size (e.g., because it is incomplete or void) """ ... def alignof(__type: Type) -> int: """ Get the alignment requirement (in bytes) of a :class:`Type`. This corresponds to |alignof()|_ in C. .. |alignof()| replace:: ``_Alignof()`` .. _alignof(): https://en.cppreference.com/w/c/language/_Alignof :raises TypeError: if *type* is a function type or an incomplete type """ ... def offsetof(type: Type, member: str) -> int: """ Get the offset (in bytes) of a member in a :class:`Type`. This corresponds to |offsetof()|_ in C. .. |offsetof()| replace:: ``offsetof()`` .. _offsetof(): https://en.cppreference.com/w/c/types/offsetof :param type: Structure, union, or class type. :param member: Name of member. May include one or more member references and zero or more array subscripts. :raises TypeError: if *type* is not a structure, union, or class type :raises ValueError: if the member is not byte-aligned (e.g., because it is a bit field) :raises LookupError: if *type* does not have a member with the given name """ ... class FaultError(Exception): """ This error is raised when a bad memory access is attempted (i.e., when accessing a memory address which is not valid in a program). """ def __init__(self, message: str, address: int) -> None: """ :param message: :attr:`FaultError.message` :param address: :attr:`FaultError.address` """ ... message: str """Error message.""" address: int """Address that couldn't be accessed.""" class MissingDebugInfoError(Exception): """ This error is raised when one or more files in a program do not have debug information. """ ... class ObjectAbsentError(Exception): """This error is raised when attempting to use an absent object.""" ... class OutOfBoundsError(Exception): """ This error is raised when attempting to access beyond the bounds of a value object. """ ... _elfutils_version: str _have_debuginfod: bool _enable_dlopen_debuginfod: bool _with_libkdumpfile: bool def _linux_helper_direct_mapping_offset(__prog: Program) -> int: ... def _linux_helper_read_vm( prog: Program, pgtable: Object, address: IntegerLike, size: IntegerLike ) -> bytes: ... def _linux_helper_follow_phys( prog: Program, pgtable: Object, address: IntegerLike ) -> int: ... def _linux_helper_xa_load(xa: Object, index: IntegerLike) -> Object: ... def _linux_helper_per_cpu_ptr(ptr: Object, cpu: IntegerLike) -> Object: """ Return the per-CPU pointer for a given CPU. >>> prog["init_net"].loopback_dev.pcpu_refcnt (int *)0x2c980 >>> per_cpu_ptr(prog["init_net"].loopback_dev.pcpu_refcnt, 7) *(int *)0xffff925e3ddec980 = 4 :param ptr: Per-CPU pointer, i.e., ``type __percpu *``. For global variables, it's usually easier to use :func:`per_cpu()`. :param cpu: CPU number. :return: ``type *`` object. """ ... def _linux_helper_cpu_curr(__prog: Program, __cpu: IntegerLike) -> Object: ... def _linux_helper_idle_task(__prog: Program, __cpu: IntegerLike) -> Object: ... def _linux_helper_task_thread_info(task: Object) -> Object: """ Return the thread information structure for a task. :param task: ``struct task_struct *`` :return: ``struct thread_info *`` """ ... def _linux_helper_task_cpu(task: Object) -> int: """ Return the CPU number that the given task last ran on. :param task: ``struct task_struct *`` """ ... def _linux_helper_idr_find(idr: Object, id: IntegerLike) -> Object: ... def _linux_helper_find_pid(__ns: Object, __pid: IntegerLike) -> Object: ... def _linux_helper_pid_task(pid: Object, pid_type: IntegerLike) -> Object: """ Return the ``struct task_struct *`` containing the given ``struct pid *`` of the given type. :param pid: ``struct pid *`` :param pid_type: ``enum pid_type`` :return: ``struct task_struct *`` """ ... def _linux_helper_find_task(__ns: Object, __pid: IntegerLike) -> Object: ... def _linux_helper_kaslr_offset(__prog: Program) -> int: ... def _linux_helper_pgtable_l5_enabled(__prog: Program) -> bool: ... def _linux_helper_load_proc_kallsyms( filename: Optional[str] = None, modules: bool = False, ) -> SymbolIndex: ... def _linux_helper_load_builtin_kallsyms(prog: Program) -> SymbolIndex: ... drgn-0.0.31/_drgn_util/000077500000000000000000000000001477777462700147075ustar00rootroot00000000000000drgn-0.0.31/_drgn_util/__init__.py000066400000000000000000000005671477777462700170300ustar00rootroot00000000000000# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: LGPL-2.1-or-later """ Internal utilities for drgn This package contains utilities shared between the drgn package and supporting build/test code. You should not use them. This package must not depend on the drgn package itself since it is used before the _drgn extension module is built. """ drgn-0.0.31/_drgn_util/elf.py000066400000000000000000000120001477777462700160200ustar00rootroot00000000000000# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: LGPL-2.1-or-later # Generated by scripts/gen_elf_py.py. import enum class ET(enum.IntEnum): NONE = 0x0 REL = 0x1 EXEC = 0x2 DYN = 0x3 CORE = 0x4 NUM = 0x5 LOOS = 0xFE00 HIOS = 0xFEFF LOPROC = 0xFF00 HIPROC = 0xFFFF class PT(enum.IntEnum): NULL = 0x0 LOAD = 0x1 DYNAMIC = 0x2 INTERP = 0x3 NOTE = 0x4 SHLIB = 0x5 PHDR = 0x6 TLS = 0x7 NUM = 0x8 LOOS = 0x60000000 GNU_EH_FRAME = 0x6474E550 GNU_STACK = 0x6474E551 GNU_RELRO = 0x6474E552 GNU_PROPERTY = 0x6474E553 GNU_SFRAME = 0x6474E554 LOSUNW = 0x6FFFFFFA SUNWBSS = 0x6FFFFFFA SUNWSTACK = 0x6FFFFFFB HISUNW = 0x6FFFFFFF HIOS = 0x6FFFFFFF LOPROC = 0x70000000 HIPROC = 0x7FFFFFFF MIPS_REGINFO = 0x70000000 MIPS_RTPROC = 0x70000001 MIPS_OPTIONS = 0x70000002 MIPS_ABIFLAGS = 0x70000003 PARISC_ARCHEXT = 0x70000000 PARISC_UNWIND = 0x70000001 class SHF(enum.IntFlag): WRITE = 0x1 ALLOC = 0x2 EXECINSTR = 0x4 MERGE = 0x10 STRINGS = 0x20 INFO_LINK = 0x40 LINK_ORDER = 0x80 OS_NONCONFORMING = 0x100 GROUP = 0x200 TLS = 0x400 COMPRESSED = 0x800 MASKOS = 0xFF00000 MASKPROC = 0xF0000000 GNU_RETAIN = 0x200000 ORDERED = 0x40000000 EXCLUDE = 0x80000000 MIPS_GPREL = 0x10000000 MIPS_MERGE = 0x20000000 MIPS_ADDR = 0x40000000 MIPS_STRINGS = 0x80000000 MIPS_NOSTRIP = 0x8000000 MIPS_LOCAL = 0x4000000 MIPS_NAMES = 0x2000000 MIPS_NODUPE = 0x1000000 PARISC_SHORT = 0x20000000 PARISC_HUGE = 0x40000000 PARISC_SBP = 0x80000000 ALPHA_GPREL = 0x10000000 ARM_ENTRYSECT = 0x10000000 ARM_COMDEF = 0x80000000 IA_64_SHORT = 0x10000000 IA_64_NORECOV = 0x20000000 class SHN(enum.IntEnum): UNDEF = 0x0 LORESERVE = 0xFF00 LOPROC = 0xFF00 BEFORE = 0xFF00 AFTER = 0xFF01 HIPROC = 0xFF1F LOOS = 0xFF20 HIOS = 0xFF3F ABS = 0xFFF1 COMMON = 0xFFF2 XINDEX = 0xFFFF HIRESERVE = 0xFFFF MIPS_ACOMMON = 0xFF00 MIPS_TEXT = 0xFF01 MIPS_DATA = 0xFF02 MIPS_SCOMMON = 0xFF03 MIPS_SUNDEFINED = 0xFF04 PARISC_ANSI_COMMON = 0xFF00 PARISC_HUGE_COMMON = 0xFF01 class SHT(enum.IntEnum): NULL = 0x0 PROGBITS = 0x1 SYMTAB = 0x2 STRTAB = 0x3 RELA = 0x4 HASH = 0x5 DYNAMIC = 0x6 NOTE = 0x7 NOBITS = 0x8 REL = 0x9 SHLIB = 0xA DYNSYM = 0xB INIT_ARRAY = 0xE FINI_ARRAY = 0xF PREINIT_ARRAY = 0x10 GROUP = 0x11 SYMTAB_SHNDX = 0x12 RELR = 0x13 NUM = 0x14 LOOS = 0x60000000 GNU_ATTRIBUTES = 0x6FFFFFF5 GNU_HASH = 0x6FFFFFF6 GNU_LIBLIST = 0x6FFFFFF7 CHECKSUM = 0x6FFFFFF8 LOSUNW = 0x6FFFFFFA SUNW_move = 0x6FFFFFFA SUNW_COMDAT = 0x6FFFFFFB SUNW_syminfo = 0x6FFFFFFC GNU_verdef = 0x6FFFFFFD GNU_verneed = 0x6FFFFFFE GNU_versym = 0x6FFFFFFF HISUNW = 0x6FFFFFFF HIOS = 0x6FFFFFFF LOPROC = 0x70000000 HIPROC = 0x7FFFFFFF LOUSER = 0x80000000 HIUSER = 0x8FFFFFFF MIPS_LIBLIST = 0x70000000 MIPS_MSYM = 0x70000001 MIPS_CONFLICT = 0x70000002 MIPS_GPTAB = 0x70000003 MIPS_UCODE = 0x70000004 MIPS_DEBUG = 0x70000005 MIPS_REGINFO = 0x70000006 MIPS_PACKAGE = 0x70000007 MIPS_PACKSYM = 0x70000008 MIPS_RELD = 0x70000009 MIPS_IFACE = 0x7000000B MIPS_CONTENT = 0x7000000C MIPS_OPTIONS = 0x7000000D MIPS_SHDR = 0x70000010 MIPS_FDESC = 0x70000011 MIPS_EXTSYM = 0x70000012 MIPS_DENSE = 0x70000013 MIPS_PDESC = 0x70000014 MIPS_LOCSYM = 0x70000015 MIPS_AUXSYM = 0x70000016 MIPS_OPTSYM = 0x70000017 MIPS_LOCSTR = 0x70000018 MIPS_LINE = 0x70000019 MIPS_RFDESC = 0x7000001A MIPS_DELTASYM = 0x7000001B MIPS_DELTAINST = 0x7000001C MIPS_DELTACLASS = 0x7000001D MIPS_DWARF = 0x7000001E MIPS_DELTADECL = 0x7000001F MIPS_SYMBOL_LIB = 0x70000020 MIPS_EVENTS = 0x70000021 MIPS_TRANSLATE = 0x70000022 MIPS_PIXIE = 0x70000023 MIPS_XLATE = 0x70000024 MIPS_XLATE_DEBUG = 0x70000025 MIPS_WHIRL = 0x70000026 MIPS_EH_REGION = 0x70000027 MIPS_XLATE_OLD = 0x70000028 MIPS_PDR_EXCEPTION = 0x70000029 MIPS_ABIFLAGS = 0x7000002A MIPS_XHASH = 0x7000002B PARISC_EXT = 0x70000000 PARISC_UNWIND = 0x70000001 PARISC_DOC = 0x70000002 ALPHA_DEBUG = 0x70000001 ALPHA_REGINFO = 0x70000002 X86_64_UNWIND = 0x70000001 class STB(enum.IntEnum): LOCAL = 0x0 GLOBAL = 0x1 WEAK = 0x2 NUM = 0x3 LOOS = 0xA GNU_UNIQUE = 0xA HIOS = 0xC LOPROC = 0xD HIPROC = 0xF MIPS_SPLIT_COMMON = 0xD class STT(enum.IntEnum): NOTYPE = 0x0 OBJECT = 0x1 FUNC = 0x2 SECTION = 0x3 FILE = 0x4 COMMON = 0x5 TLS = 0x6 NUM = 0x7 LOOS = 0xA GNU_IFUNC = 0xA HIOS = 0xC LOPROC = 0xD HIPROC = 0xF SPARC_REGISTER = 0xD PARISC_MILLICODE = 0xD class STV(enum.IntEnum): DEFAULT = 0x0 INTERNAL = 0x1 HIDDEN = 0x2 PROTECTED = 0x3 drgn-0.0.31/_drgn_util/platform.py000066400000000000000000000121001477777462700170770ustar00rootroot00000000000000# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: LGPL-2.1-or-later import platform import re NORMALIZED_MACHINE_NAME = platform.machine() if NORMALIZED_MACHINE_NAME.startswith("aarch64") or NORMALIZED_MACHINE_NAME == "arm64": NORMALIZED_MACHINE_NAME = "aarch64" elif NORMALIZED_MACHINE_NAME.startswith("arm") or NORMALIZED_MACHINE_NAME == "sa110": NORMALIZED_MACHINE_NAME = "arm" elif re.fullmatch(r"i.86", NORMALIZED_MACHINE_NAME): NORMALIZED_MACHINE_NAME = "i386" elif NORMALIZED_MACHINE_NAME.startswith("ppc64"): NORMALIZED_MACHINE_NAME = "ppc64" elif NORMALIZED_MACHINE_NAME.startswith("ppc"): NORMALIZED_MACHINE_NAME = "ppc" elif NORMALIZED_MACHINE_NAME == "riscv": NORMALIZED_MACHINE_NAME = "riscv32" elif re.match(r"sh[0-9]", NORMALIZED_MACHINE_NAME): NORMALIZED_MACHINE_NAME = "sh" elif NORMALIZED_MACHINE_NAME == "sun4u": NORMALIZED_MACHINE_NAME = "sparc64" SYS = { "aarch64": { "bpf": 280, "finit_module": 273, "kexec_file_load": 294, "memfd_create": 279, "perf_event_open": 241, }, "alpha": { "bpf": 515, "finit_module": 507, "memfd_create": 512, "perf_event_open": 493, }, "arc": { "bpf": 280, "finit_module": 273, "kexec_file_load": 294, "memfd_create": 279, "perf_event_open": 241, }, "arm": { "bpf": 386, "finit_module": 379, "kexec_file_load": 401, "memfd_create": 385, "perf_event_open": 364, }, "csky": { "bpf": 280, "finit_module": 273, "kexec_file_load": 294, "memfd_create": 279, "perf_event_open": 241, }, "hexagon": { "bpf": 280, "finit_module": 273, "kexec_file_load": 294, "memfd_create": 279, "perf_event_open": 241, }, "i386": { "bpf": 357, "finit_module": 350, "memfd_create": 356, "perf_event_open": 336, }, "loongarch": { "bpf": 280, "finit_module": 273, "kexec_file_load": 294, "memfd_create": 279, "perf_event_open": 241, }, "loongarch64": { "bpf": 280, "finit_module": 273, "kexec_file_load": 294, "memfd_create": 279, "perf_event_open": 241, }, "m68k": { "bpf": 354, "finit_module": 348, "memfd_create": 353, "perf_event_open": 332, }, "microblaze": { "bpf": 387, "finit_module": 380, "memfd_create": 386, "perf_event_open": 366, }, # TODO: mips is missing here because I don't know how to distinguish # between the o32 and n32 ABIs. "mips64": { "bpf": 315, "finit_module": 307, "memfd_create": 314, "perf_event_open": 292, }, "nios2": { "bpf": 280, "finit_module": 273, "kexec_file_load": 294, "memfd_create": 279, "perf_event_open": 241, }, "openrisc": { "bpf": 280, "finit_module": 273, "kexec_file_load": 294, "memfd_create": 279, "perf_event_open": 241, }, "parisc": { "bpf": 341, "finit_module": 333, "kexec_file_load": 355, "memfd_create": 340, "perf_event_open": 318, }, "parisc64": { "bpf": 341, "finit_module": 333, "kexec_file_load": 355, "memfd_create": 340, "perf_event_open": 318, }, "ppc": { "bpf": 361, "finit_module": 353, "memfd_create": 360, "perf_event_open": 319, }, "ppc64": { "bpf": 361, "finit_module": 353, "memfd_create": 360, "perf_event_open": 319, }, "riscv32": { "bpf": 280, "finit_module": 273, "kexec_file_load": 294, "memfd_create": 279, "perf_event_open": 241, }, "riscv64": { "bpf": 280, "finit_module": 273, "kexec_file_load": 294, "memfd_create": 279, "perf_event_open": 241, }, "s390": { "bpf": 351, "finit_module": 344, "kexec_file_load": 381, "memfd_create": 350, "perf_event_open": 331, }, "s390x": { "bpf": 351, "finit_module": 344, "kexec_file_load": 381, "memfd_create": 350, "perf_event_open": 331, }, "sh": { "bpf": 375, "finit_module": 368, "memfd_create": 374, "perf_event_open": 336, }, "sparc": { "bpf": 349, "finit_module": 342, "memfd_create": 348, "perf_event_open": 327, }, "sparc64": { "bpf": 349, "finit_module": 342, "memfd_create": 348, "perf_event_open": 327, }, "x86_64": { "bpf": 321, "finit_module": 313, "kexec_file_load": 320, "memfd_create": 319, "perf_event_open": 298, }, "xtensa": { "bpf": 340, "finit_module": 332, "memfd_create": 339, "perf_event_open": 327, }, }.get(NORMALIZED_MACHINE_NAME, {}) drgn-0.0.31/_drgn_util/plugins.py000066400000000000000000000106661477777462700167530ustar00rootroot00000000000000# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: LGPL-2.1-or-later import fnmatch from importlib import import_module import logging import os import runpy import sys from types import SimpleNamespace from typing import Any, Callable, Dict, List, Tuple logger = logging.getLogger("drgn.plugins") _plugins = None _hooks: Dict[str, List[Tuple[str, Callable[..., Any]]]] = {} def _load_plugins() -> List[Tuple[str, object]]: plugins = [] # Mapping from plugin name requested with DRGN_PLUGINS to whether we found # an entry point with that name. enabled_entry_points = {} env = os.getenv("DRGN_PLUGINS") if env: for item in env.split(","): if not item: # Ignore empty items for convenience. continue name, sep, value = item.partition("=") if sep: try: if "/" in value: plugin: object = SimpleNamespace(**runpy.run_path(value)) else: plugin = import_module(value) except Exception: logger.warning("failed to load %r:", item, exc_info=True) else: plugins.append((name, plugin)) logger.debug("loaded %r", item) else: enabled_entry_points[name] = False env = os.getenv("DRGN_DISABLE_PLUGINS") # If all plugins are disabled, avoid the entry point machinery entirely. if env != "*" or enabled_entry_points: disable_plugins = env.split(",") if env else [] group = "drgn.plugins" if sys.version_info >= (3, 10): import importlib.metadata # novermin entry_points = importlib.metadata.entry_points(group=group) # novermin def entry_point_str( # novermin entry_point: importlib.metadata.EntryPoint, ) -> str: return f"{entry_point.name} = {entry_point.value}" elif sys.version_info >= (3, 8): import importlib.metadata # novermin entry_points = importlib.metadata.entry_points().get(group, ()) # novermin def entry_point_str( # novermin entry_point: importlib.metadata.EntryPoint, ) -> str: return f"{entry_point.name} = {entry_point.value}" else: import pkg_resources entry_points = pkg_resources.iter_entry_points(group) entry_point_str = str for entry_point in entry_points: if entry_point.name in enabled_entry_points: enabled_entry_points[entry_point.name] = True elif any( fnmatch.fnmatch(entry_point.name, disable) for disable in disable_plugins ): continue try: plugin = entry_point.load() except Exception: logger.warning( "failed to load %r:", entry_point_str(entry_point), exc_info=True, ) else: plugins.append((entry_point.name, plugin)) logger.debug( "loaded entry point %r", entry_point_str(entry_point), ) missing_entry_points = [ key for key, value in enabled_entry_points.items() if not value ] if missing_entry_points: missing_entry_points.sort() logger.warning( "not found: %s", ", ".join([repr(name) for name in missing_entry_points]), ) return plugins def _load_hook(hook_name: str) -> List[Tuple[str, Callable[..., Any]]]: global _plugins if _plugins is None: _plugins = _load_plugins() hooks = [] for name, plugin in _plugins: try: hook = getattr(plugin, hook_name) except AttributeError: continue hooks.append((name, hook)) hooks.sort(key=lambda hook: (getattr(hook[1], "drgn_priority", 50), hook[0])) return hooks def call_plugins(hook_name: str, *args: object) -> None: try: hooks = _hooks[hook_name] except KeyError: _hooks[hook_name] = hooks = _load_hook(hook_name) for name, hook in hooks: try: hook(*args) except Exception: logger.warning("%r %s failed:", name, hook_name, exc_info=True) drgn-0.0.31/contrib/000077500000000000000000000000001477777462700142215ustar00rootroot00000000000000drgn-0.0.31/contrib/README.rst000066400000000000000000000027021477777462700157110ustar00rootroot00000000000000Community-Contributed Content ============================= This directory contains drgn scripts, libraries, and notes that have been contributed by the community but aren't considered a part of drgn proper. Code in this directory is not tested and not necessarily up to the rest of the project's standards. This is intended as a central location to share drgn ideas with a low barrier to entry. If you have time to polish your code, consider submitting it as a proper helper or tool. If not, feel free to dump it here. Someone else might find it useful as a starting point for their own investigation. It could even be adapted into a helper or tool later. Contributing to ``contrib`` --------------------------- The bar for contributing to ``contrib`` is intentionally low. Code submitted here can be rough and will be only lightly reviewed. The only hard requirements are: * It must be relevant to drgn. * All files must have a comment or docstring at the top describing what they are. This can be short. There are also some boring legal requirements: * All files must have a copyright notice. * All files must be licensed under the LGPLv2.1+ (using ``SPDX-License-Identifier: LGPL-2.1-or-later``). * All commits must have a ``Signed-off-by`` trailer. See `Signing Off <../CONTRIBUTING.rst#signing-off>`_. We may choose to edit, reorganize, or drop parts your contribution. If in doubt, go ahead and open a pull request, and we'll decide what to do with it. drgn-0.0.31/contrib/bpf_inspect.py000077500000000000000000000315511477777462700170770ustar00rootroot00000000000000#!/usr/bin/env drgn # Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: LGPL-2.1-or-later """List BPF programs or maps and their properties unavailable via kernel API.""" import sys import drgn import argparse from drgn import container_of from drgn.helpers.common.type import enum_type_to_class from drgn.helpers.linux import ( bpf_map_for_each, bpf_prog_for_each, bpf_link_for_each, list_for_each_entry, hlist_for_each_entry, ) BpfMapType = enum_type_to_class(prog.type("enum bpf_map_type"), "BpfMapType") BpfProgType = enum_type_to_class(prog.type("enum bpf_prog_type"), "BpfProgType") BpfAttachType = enum_type_to_class(prog.type("enum bpf_attach_type"), "BpfAttachType") BpfLinkType = enum_type_to_class(prog.type("enum bpf_link_type"), "BpfLinkType") def bpf_attach_type_to_tramp(attach_type): # bpf_tramp_prog_type is available since linux kernel 5.5, this code should # be called only after checking for bpf_prog.aux.trampoline to be present # though so no error checking here. BpfProgTrampType = enum_type_to_class( prog.type("enum bpf_tramp_prog_type"), "BpfProgTrampType" ) at = BpfAttachType(attach_type) if at == BpfAttachType.BPF_TRACE_FENTRY: return BpfProgTrampType.BPF_TRAMP_FENTRY if at == BpfAttachType.BPF_TRACE_FEXIT: return BpfProgTrampType.BPF_TRAMP_FEXIT return BpfProgTrampType.BPF_TRAMP_REPLACE class BpfTramp(object): def __init__(self, tr): self.tr = tr def get_progs(self): if not self.tr: return if self.tr.extension_prog: yield self.tr.extension_prog return try: for head in self.tr.progs_hlist: for tramp_aux in hlist_for_each_entry( "struct bpf_prog_aux", head, "tramp_hlist" ): yield tramp_aux.prog except LookupError: return class BpfMap(object): def __init__(self, bpf_map): self.map = bpf_map @staticmethod def inspect_owner(owner): type_ = BpfProgType(owner.type).name jited = " JITed" if owner.jited.value_() else "" return f"{type_:32}{jited}" def get_owner(self): try: owner = self.map.member_("owner") return self.inspect_owner(owner) except LookupError: return "" def __repr__(self): id_ = self.map.id.value_() type_ = BpfMapType(self.map.map_type).name name = self.map.name.string_().decode() return f"{id_:>6}: {type_:32} {name:32}" class BpfProg(object): def __init__(self, bpf_prog): self.prog = bpf_prog def is_subprog(self): return self.prog.aux.func_idx.value_() != 0 @staticmethod def __get_btf_name(btf, btf_id): type_ = btf.types[btf_id] if type_.name_off < btf.hdr.str_len: return btf.strings[type_.name_off].address_of_().string_().decode() return "" def get_btf_name(self): aux = self.prog.aux if aux.btf: # func_info[0] points to BPF program function itself. return self.__get_btf_name(aux.btf, aux.func_info[0].type_id) return "" def get_ksym_name(self): try: ksym = self.prog.aux.member_("ksym") return ksym.name.string_().decode()[26:] except LookupError: return "" def get_prog_name(self): if self.is_subprog(): return self.get_ksym_name() or self.prog.aux.name.string_().decode() return self.get_btf_name() or self.prog.aux.name.string_().decode() def get_used_maps(self): for i in range(0, self.prog.aux.used_map_cnt.value_()): yield BpfMap(self.prog.aux.used_maps[i]) def get_subprogs(self): for i in range(0, self.prog.aux.func_cnt.value_()): yield i, BpfProg(self.prog.aux.func[i]) def get_linked_func(self): kind = bpf_attach_type_to_tramp(self.prog.expected_attach_type) linked_prog = self.prog.aux.linked_prog linked_prog_id = linked_prog.aux.id.value_() linked_btf_id = self.prog.aux.attach_btf_id.value_() linked_name = ( f"{BpfProg(linked_prog).get_prog_name()}->" f"{self.__get_btf_name(linked_prog.aux.btf, linked_btf_id)}()" ) return f"{linked_prog_id}->{linked_btf_id}: {kind.name} {linked_name}" def get_attach_func(self): try: func_ = self.prog.aux.attach_func_name if func_: return func_.string_().decode() except LookupError: pass return "" def get_tramp_progs(self): try: # Trampoline was changed to dst_trampoline since Linux kernel commit # 3aac1ead5eb6 ("bpf: Move prog->aux->linked_prog and trampoline into # bpf_link on attach") (in v5.10). # Try to get dst_trampoline first. tr = self.prog.aux.member_("dst_trampoline") except LookupError: tr = None try: tr = self.prog.aux.member_("trampoline") if not tr else tr except LookupError: # Trampoline is available since Linux kernel commit # fec56f5890d9 ("bpf: Introduce BPF trampoline") (in v5.5). # Skip trampoline if current kernel doesn't support it. return return BpfTramp(tr).get_progs() def __repr__(self): id_ = self.prog.aux.id.value_() type_ = BpfProgType(self.prog.type).name name = self.get_prog_name() try: tail_call_reachable = self.prog.aux.member_("tail_call_reachable").value_() except LookupError: tail_call_reachable = None tail_call_desc = " tail_call_reachable" if tail_call_reachable else "" return f"{id_:>6}: {type_:32} {name:32}{tail_call_desc}" def list_bpf_progs(show_details=False): for bpf_prog_ in bpf_prog_for_each(prog): bpf_prog = BpfProg(bpf_prog_) print(f"{bpf_prog}") if not show_details: continue linked_progs = bpf_prog.get_tramp_progs() if linked_progs: for linked_prog in linked_progs: print(f"\tlinked: {BpfProg(linked_prog)}") for map_ in bpf_prog.get_used_maps(): print(f"\t{'used map:':9} {map_}") for index, subprog in bpf_prog.get_subprogs(): print(f"\t{f'func[{index:>2}]:':9} {subprog}") def __list_bpf_progs(args): list_bpf_progs(args.show_details) class BpfProgArrayMap(BpfMap): def __init__(self, bpf_map): super().__init__(bpf_map) self.prog_array = container_of(bpf_map, "struct bpf_array", "map") def get_owner(self): try: owner = self.prog_array.aux.member_("owner") return super().inspect_owner(owner) except LookupError: return "" def get_prog_array(self): for i in range(0, self.map.max_entries): prog_ = self.prog_array.ptrs[i] if prog_: yield i, drgn.cast("struct bpf_prog *", prog_) def get_poke_progs(self): for poke in list_for_each_entry( "struct prog_poke_elem", self.prog_array.aux.poke_progs.address_of_(), "list", ): yield poke.aux.prog def __repr__(self): owner = self.get_owner() owner = super().get_owner() if not owner else owner array = self.get_prog_array() poke_progs = self.get_poke_progs() owner_str = f"{'owner:':9} {owner}" if owner else "" array_str = ( "\n\t".join( f"{f'idx[{index:>3}]:':9} {BpfProg(prog)}" for index, prog in array ) if array else "" ) poke_progs_str = ( "\n\t".join(f"{'poke:':9} {BpfProg(poke)}" for poke in poke_progs) if poke_progs else "" ) return "\n\t".join(x for x in [owner_str, array_str, poke_progs_str] if x) def show_bpf_map_details(bpf_map): if bpf_map.map_type == BpfMapType.BPF_MAP_TYPE_PROG_ARRAY: r = BpfProgArrayMap(bpf_map).__repr__() else: r = None if r: print(f"\t{r}") def list_bpf_maps(show_details=False): for map_ in bpf_map_for_each(prog): bpf_map = BpfMap(map_) print(f"{bpf_map}") if show_details: show_bpf_map_details(map_) def __list_bpf_maps(args): list_bpf_maps(args.show_details) class BpfLink(object): def __init__(self, bpf_link): self.link = bpf_link def __repr__(self): id_ = self.link.id.value_() type_ = BpfLinkType(self.link.type).name return f"{id_:>6}: {type_:32}" class BpfTracingLink(BpfLink): def __init__(self, link): super().__init__(link) self.tracing = drgn.cast("struct bpf_tracing_link *", link) def get_tgt_prog(self): return self.tracing.tgt_prog def get_linked_progs(self): return BpfTramp(self.tracing.trampoline).get_progs() def __repr__(self): tgt_prog = self.get_tgt_prog() linked_progs = self.get_linked_progs() tgt_prog_str = f"target: {BpfProg(tgt_prog)}" if tgt_prog else "" linked_progs_str = ( "\n".join(f"linked: {BpfProg(linked_prog)}" for linked_prog in linked_progs) if linked_progs else "" ) return "\n\t".join(x for x in [tgt_prog_str, linked_progs_str] if x) class BpfXdpLink(BpfLink): def __init__(self, link): super().__init__(link) self.xdp = drgn.cast("struct bpf_xdp_link *", link) def get_dev(self): return self.xdp.dev XDP_FLAGS_SKB_MODE = 1 << 1 XDP_FLAGS_DRV_MODE = 1 << 2 XDP_FLAGS_HW_MODE = 1 << 3 def get_mode(self): flags = self.xdp.flags.value_() if flags & self.XDP_FLAGS_HW_MODE: return "HARDWARE" if flags & self.XDP_FLAGS_DRV_MODE: return "DRIVER" if flags & self.XDP_FLAGS_SKB_MODE: return "GENERIC" return "UNKNOWN" def __repr__(self): dev = self.get_dev() mode = self.get_mode() ifname, ifindex = dev.name.string_().decode(), dev.ifindex.value_() return f"{'netdev:':<9} {ifname}({ifindex})" + f"\n\t{'mode:':<9} {mode}" def show_bpf_link_details(link): if link.type == BpfLinkType.BPF_LINK_TYPE_TRACING: r = BpfTracingLink(link).__repr__() elif link.type == BpfLinkType.BPF_LINK_TYPE_XDP: r = BpfXdpLink(link).__repr__() else: r = None if r: print(f"\t{r}") def list_bpf_links(show_details=False): for link in bpf_link_for_each(prog): bpf_link = BpfLink(link) print(f"{bpf_link}") bpf_prog = BpfProg(link.prog) print(f"\tprog: {bpf_prog}") attach_func = bpf_prog.get_attach_func() if attach_func: print(f"\tattach: {attach_func}") if show_details: show_bpf_link_details(link) def __list_bpf_links(args): list_bpf_links(args.show_details) def __run_interactive(args): try: from drgn.cli import run_interactive except ImportError: sys.exit("Interactive mode requires drgn 0.0.23+") def should_add_to_globals(name): if name.startswith("__"): return False return "bpf" in name or "Bpf" in name or "btf" in name globals_keys = globals().keys() def globals_func(globals_): for key in globals_keys: if should_add_to_globals(key): globals_[key] = globals()[key] return globals_ run_interactive(prog, globals_func=globals_func) def main(): parser = argparse.ArgumentParser( description="drgn script to list BPF programs or maps and their properties unavailable via kernel API" ) subparsers = parser.add_subparsers(title="subcommands", dest="subcommand") subparsers.required = True prog_parser = subparsers.add_parser("prog", aliases=["p"], help="list BPF programs") prog_parser.set_defaults(func=__list_bpf_progs) prog_parser.add_argument( "--show-details", action="store_true", help="show program internal details" ) map_parser = subparsers.add_parser("map", aliases=["m"], help="list BPF maps") map_parser.set_defaults(func=__list_bpf_maps) map_parser.add_argument( "--show-details", action="store_true", help="show map internal details" ) link_parser = subparsers.add_parser("link", aliases=["l"], help="list BPF links") link_parser.set_defaults(func=__list_bpf_links) link_parser.add_argument( "--show-details", action="store_true", help="show link internal details" ) interact_parser = subparsers.add_parser( "interact", aliases=["i"], help="start interactive shell, requires 0.0.23+ drgn" ) interact_parser.set_defaults(func=__run_interactive) args = parser.parse_args() args.func(args) if __name__ == "__main__": main() drgn-0.0.31/contrib/btrfs_orphan_subvolumes.py000077500000000000000000000031111477777462700215450ustar00rootroot00000000000000#!/usr/bin/env drgn # Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: LGPL-2.1-or-later """Dump Btrfs subvolumes that have been deleted but not cleaned up.""" from drgn import Object, cast from drgn.helpers.linux.fs import inode_path, path_lookup from drgn.helpers.linux.radixtree import radix_tree_for_each from drgn.helpers.linux.rbtree import rbtree_inorder_for_each_entry def dump_orphan_subvolumes(fs_info: Object) -> None: prog = fs_info.prog_ BTRFS_ROOT_ORPHAN_ITEM_INSERTED = prog["BTRFS_ROOT_ORPHAN_ITEM_INSERTED"] for objectid, entry in radix_tree_for_each(fs_info.fs_roots_radix): root = cast("struct btrfs_root *", entry) if root.state & (1 << BTRFS_ROOT_ORPHAN_ITEM_INSERTED): print(f"orphan root {objectid} has the following inodes in memory:") for inode in rbtree_inorder_for_each_entry( "struct btrfs_inode", root.inode_tree.address_of_(), "rb_node" ): path = inode_path(inode.vfs_inode.address_of_()) if path is None: print(f" inode {inode.vfs_inode.i_ino.value_()} with no cached names") else: print(f" {path.decode()}") if __name__ == "__main__": import argparse from pathlib import Path parser = argparse.ArgumentParser() parser.add_argument("path", type=Path) args = parser.parse_args() dump_orphan_subvolumes( cast( "struct btrfs_fs_info *", path_lookup(prog, args.path.resolve()).mnt.mnt_sb.s_fs_info, ) ) drgn-0.0.31/contrib/btrfs_print_fs_uuids_cache.py000066400000000000000000000160221477777462700221540ustar00rootroot00000000000000#!/usr/bin/env drgn # Copyright (c) 2024, Oracle and/or its affiliates. # SPDX-License-Identifier: LGPL-2.1-or-later """ Print key details of the filesystems in the btrfs fs_uuids cache Usage: * drgn -s -s -c btrfs_print_fs_uuids_cache.py - This only prints key details of the filesystems * drgn -s -s -c btrfs_print_fs_uuids_cache.py -d yes - This dumps all the structures. The output will be enormous, so it's recommended to redirect it to a file and read it from there. """ import drgn import argparse from drgn.helpers.common import * from drgn.helpers.linux import ( list_for_each_entry, for_each_possible_cpu, per_cpu_ptr, ) parser = argparse.ArgumentParser() parser.add_argument("-d", "--dump", required = False, help = "Dump all structure data") args = parser.parse_args() def format(uuid_list): """Helper to format uuid from list representation""" part = "" for ele in uuid_list: part += "%02x" %ele return part def get_uuid_from_list(uuid_list): """Helper to format uuid from list representation""" uuid = "" uuid += format(uuid_list[0:4]) + "-" uuid += format(uuid_list[4:6]) + "-" uuid += format(uuid_list[6:8]) + "-" uuid += format(uuid_list[8:10]) + "-" uuid += format(uuid_list[10:]) return uuid def print_mount_info(mnt): """Print key fields of `struct mount`""" print(f"Mount Info:") print(f"\tMount Point: {mnt.mnt_mountpoint.d_iname.string_().decode('utf-8')}") print(f"\tVFS Mount Flags: {mnt.mnt.mnt_flags}") print(f"\tDevice Name: {mnt.mnt_devname.string_().decode('utf-8')}") if mnt.mnt_mp: print(f"\tMount Point Count: {mnt.mnt_mp.m_count.value_()}") else: print("\tMount Point is NULL") print(f"\tMount ID: {mnt.mnt_id}") print(f"\tMount Group ID: {mnt.mnt_group_id}") print(f"\tMount Expiry Mark: {mnt.mnt_expiry_mark}") return def print_super_block_info(sb): """Print key fields of `struct super_block`""" print("Super Block:") if not sb: print("\tSuper block is NULL") return print(f"\tsb ref count: {sb.s_active.counter}") print(f"\tsb s_count: {sb.s_count}") print(f"\tsb umount rw_sem counter {sb.s_umount.count.counter}") print(f"\tsb s_flags: {sb.s_flags}") print(f"\tsb s_dev {sb.s_dev}") print(f"\tsb s_id {sb.s_id}") print(f"\tsb i_flags {sb.s_iflags}") for mnt in list_for_each_entry("struct mount", sb.s_mounts.address_of_(), "mnt_instance"): if not mnt.mnt_master: print_mount_info(mnt) break return def print_fs_info(fs_info): """Print key fields of `struct btrfs_fs_info`""" print("FS Info:") if not fs_info: print("\tfs_info is NULL") return print(f"\tFS State: {fs_info.fs_state}") print(f"\tFlags: {fs_info.flags}") print(f"\tmount opt: {fs_info.mount_opt}") print_super_block_info(fs_info.sb) return def print_fs_devices_info(fs_dev): """"Print key fields of `struct btrfs_fs_devices`""" print("FS Devices:") print(f"\tFS UUID: {get_uuid_from_list(list(fs_dev.fsid))}") print(f"\tMETADATA UUID: {get_uuid_from_list(list(fs_dev.metadata_uuid))}") print(f"\tnum_devices: {fs_dev.num_devices}") print(f"\topen_devices: {fs_dev.open_devices}") print(f"\trw_devices: {fs_dev.rw_devices}") print(f"\tmissing_devices: {fs_dev.missing_devices}") print(f"\ttotal_rw_bytes: {fs_dev.total_rw_bytes}") print(f"\ttotal_devices: {fs_dev.total_devices}") print(f"\tlatest_generation: {fs_dev.latest_generation}") print(f"\topened: {fs_dev.opened}") try: print(f"\tfsid_change: {fs_dev.fsid_change}") except AttributeError: pass return def print_dev_info(dev): """Print key fields of `struct btrfs_device`""" print("Device Info:") print(f"\tName: {str(dev.name.str)}") print(f"\tstate: {dev.dev_state}") print(f"\tdevid: {dev.devid}") print(f"\tgeneration: {dev.generation}") print(f"\ttype: {dev.type}") if not dev.bdev: print(f"\tbdev: NULL") else: print(f"\tbd_dev: {dev.bdev.bd_dev}") print(f"\tbd_partno: {dev.bdev.bd_partno}") return def dump_mounts(s_mounts): """Dump master `struct mount` of a mount point""" for mnt in list_for_each_entry("struct mount", s_mounts.address_of_(), "mnt_instance"): if not mnt.mnt_master: print(f"struct mount:\n{mnt}") print(f"Mount Point Dentry:\n{mnt.mnt_mountpoint}") print(f"Namespace:\n{mnt.mnt_ns}") print(f"Mount Point Struct:\n{mnt.mnt_mp}") for cpu in for_each_possible_cpu(prog): print(f"mnt_pcp {cpu}:\n{per_cpu_ptr(mnt.mnt_pcp, cpu)}") break return def dump_all(fs_uuids): """ Dump all structure data of: - struct btrfs_fs_devices - struct btrfs_device - struct btrfs_fs_info - Running transaction => struct btrfs_transaction - struct btrfs_super_block - struct super_block - struct file_system_type - Root Dentry => struct dentry - Root Inode => struct inode - struct user_namespace """ for fs_dev in list_for_each_entry("struct btrfs_fs_devices", fs_uuids.address_of_(), "fs_list"): print("-"*30 + f"{get_uuid_from_list(list(fs_dev.fsid))}" + '-'*30) print("FS Devices:\n", fs_dev) print("fs_dev.latest_bdev:", fs_dev.latest_bdev) if fs_dev.fs_info: print("FS Info:\n", fs_dev.fs_info) print("fs_info Running transaction:\n", fs_dev.fs_info.running_transaction) print("Disk superblock:\n", fs_dev.fs_info.super_copy) if fs_dev.fs_info.sb: print("Superblock:\n", fs_dev.fs_info.sb) print("Fs Type:\n", fs_dev.fs_info.sb.s_type) print("Root Dentry:\n", fs_dev.fs_info.sb.s_root) print("Root Inode:\n", fs_dev.fs_info.sb.s_root.d_inode) print("sb user namespace:\n", fs_dev.fs_info.sb.s_user_ns) dump_mounts(fs_dev.fs_info.sb.s_mounts) else: print("Superblock: NULL") else: print("FS Info: NULL") for dev in list_for_each_entry("struct btrfs_device", fs_dev.devices.address_of_(), "dev_list"): print("Device:\n", dev) print("Block Device:\n", dev.bdev) print("dev.flush_bio:\n", dev.flush_bio) print("\n") return def print_btrfs_cache(fs_uuids): """ Iterate through the fs_uuids cache and print the details of each filesystem. """ for fs_dev in list_for_each_entry("struct btrfs_fs_devices", fs_uuids.address_of_(), "fs_list"): print("-"*60) print_fs_devices_info(fs_dev) print_fs_info(fs_dev.fs_info) for dev in list_for_each_entry("struct btrfs_device", fs_dev.devices.address_of_(), "dev_list"): print_dev_info(dev) return fs_uuids = prog['fs_uuids'] if args.dump == "yes": dump_all(fs_uuids) else: print_btrfs_cache(fs_uuids)drgn-0.0.31/contrib/btrfs_tree.py000066400000000000000000002401521477777462700167360ustar00rootroot00000000000000# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: LGPL-2.1-or-later """Helpers for introspecting btrfs btree structures""" from contextlib import suppress import enum import functools import operator import struct import sys import time from typing import ( TYPE_CHECKING, Any, Callable, Dict, Generic, Iterable, List, NamedTuple, Optional, Sequence, Tuple, Type, TypeVar, Union, ) import uuid if TYPE_CHECKING: from _typeshed import SupportsWrite from typing import Final, Self # novermin from drgn import IntegerLike, Object, cast from drgn.helpers.common.format import escape_ascii_string from drgn.helpers.linux.mm import page_size, page_to_virt from drgn.helpers.linux.radixtree import radix_tree_lookup _T = TypeVar("_T") _T_co = TypeVar("_T_co", covariant=True) _NOT_FOUND = object() class cached_property(Generic[_T_co]): def __init__(self, func: Callable[[Any], _T_co]) -> None: self.func = func self.__doc__ = func.__doc__ self.__module__ = func.__module__ def __set_name__(self, owner: Type[Any], name: str) -> None: self.attrname = name def __get__(self, instance: object, owner: Optional[Type[Any]] = None) -> _T_co: cache = instance.__dict__ val = cache.get(self.attrname, _NOT_FOUND) if val is _NOT_FOUND: val = self.func(instance) cache[self.attrname] = val return val _crc32c_table = [0] * 256 for i in range(256): fwd = i for j in range(8, 0, -1): if fwd & 1: fwd = (fwd >> 1) ^ 0x82F63B78 else: fwd >>= 1 _crc32c_table[i] = fwd & 0xFFFFFFFF def _crc32c(b: bytes, crc: int = 0) -> int: for c in b: crc = (crc >> 8) ^ _crc32c_table[(crc ^ c) & 0xFF] return crc def btrfs_name_hash(name: bytes) -> int: return _crc32c(name, 0xFFFFFFFE) def _hash_extent_data_ref(root_objectid: int, owner: int, offset: int) -> int: high_crc = _crc32c(root_objectid.to_bytes(8, "little"), 0xFFFFFFFF) low_crc = _crc32c(owner.to_bytes(8, "little"), 0xFFFFFFFF) low_crc = _crc32c(offset.to_bytes(8, "little"), low_crc) return (high_crc << 31) ^ low_crc class _BtrfsEnum(enum.IntEnum): def __str__(self) -> str: return self._name_ class _BtrfsFlag(enum.IntFlag): def __str__(self) -> str: if not self: return "0x0(none)" # btrfs-progs as of v6.8.1 ignores unknown flags when printing them, # but _name_ includes the numeric value of unknown flags. return f"{hex(self)}({self._name_})" EnumT = TypeVar("EnumT", bound=enum.Enum) def _try_cast_enum(enum_type: Type[EnumT], value: int) -> Union[EnumT, int]: try: return enum_type(value) except ValueError: return value class BtrfsType(_BtrfsEnum): # Generated with # sed -rn 's/^#\s*define\s+BTRFS_(([0-9A-Za-z_]+)_KEY|(UUID_KEY_SUBVOL|UUID_KEY_RECEIVED_SUBVOL))\s+([0-9]+).*/ \2\3 = \4/p' include/uapi/linux/btrfs_tree.h | # grep -v -e BALANCE_ITEM -e DEV_STATS # # UUID_KEY_{,RECEIVED_}SUBVOL broke with the usual naming scheme. # BALANCE_ITEM and DEV_STATS are obsolete names for TEMPORARY_ITEM and # PERSISTENT_ITEM, respectively. INODE_ITEM = 1 INODE_REF = 12 INODE_EXTREF = 13 XATTR_ITEM = 24 VERITY_DESC_ITEM = 36 VERITY_MERKLE_ITEM = 37 ORPHAN_ITEM = 48 DIR_LOG_ITEM = 60 DIR_LOG_INDEX = 72 DIR_ITEM = 84 DIR_INDEX = 96 EXTENT_DATA = 108 EXTENT_CSUM = 128 ROOT_ITEM = 132 ROOT_BACKREF = 144 ROOT_REF = 156 EXTENT_ITEM = 168 METADATA_ITEM = 169 EXTENT_OWNER_REF = 172 TREE_BLOCK_REF = 176 EXTENT_DATA_REF = 178 SHARED_BLOCK_REF = 182 SHARED_DATA_REF = 184 BLOCK_GROUP_ITEM = 192 FREE_SPACE_INFO = 198 FREE_SPACE_EXTENT = 199 FREE_SPACE_BITMAP = 200 DEV_EXTENT = 204 DEV_ITEM = 216 CHUNK_ITEM = 228 RAID_STRIPE = 230 QGROUP_STATUS = 240 QGROUP_INFO = 242 QGROUP_LIMIT = 244 QGROUP_RELATION = 246 TEMPORARY_ITEM = 248 PERSISTENT_ITEM = 249 DEV_REPLACE = 250 UUID_KEY_SUBVOL = 251 UUID_KEY_RECEIVED_SUBVOL = 252 STRING_ITEM = 253 class BtrfsObjectid(_BtrfsEnum): # Generated with # sed -rn 's/^#\s*define\s+BTRFS_([0-9A-Za-z_]+)_OBJECTID\s+(-?[0-9]+).*/ \1 = \2/p' include/uapi/linux/btrfs_tree.h | # grep -v -e DEV_STATS -e FIRST_FREE -e LAST_FREE -e FIRST_CHUNK_TREE -e DEV_ITEMS -e BTREE_INODE -e EMPTY_SUBVOL_DIR | # sed -r 's/-[0-9]+/& \& 0xffffffffffffffff/' # # DEV_STATS (0) only applies if the type is PERSISTENT_ITEM. # FIRST_FREE (256) and LAST_FREE (-256) define the range of normal # objectids and aren't meaningful on their own. # FIRST_CHUNK_TREE (256) only applies if the type is CHUNK_ITEM. # DEV_ITEMS (1) only applies if the type is DEV_ITEM. # BTREE_INODE (1) and EMPTY_SUBVOL_DIR (2) are only used as special inode # numbers in memory. ROOT_TREE = 1 EXTENT_TREE = 2 CHUNK_TREE = 3 DEV_TREE = 4 FS_TREE = 5 ROOT_TREE_DIR = 6 CSUM_TREE = 7 QUOTA_TREE = 8 UUID_TREE = 9 FREE_SPACE_TREE = 10 BLOCK_GROUP_TREE = 11 RAID_STRIPE_TREE = 12 BALANCE = -4 & 0xFFFFFFFFFFFFFFFF ORPHAN = -5 & 0xFFFFFFFFFFFFFFFF TREE_LOG = -6 & 0xFFFFFFFFFFFFFFFF TREE_LOG_FIXUP = -7 & 0xFFFFFFFFFFFFFFFF TREE_RELOC = -8 & 0xFFFFFFFFFFFFFFFF DATA_RELOC_TREE = -9 & 0xFFFFFFFFFFFFFFFF EXTENT_CSUM = -10 & 0xFFFFFFFFFFFFFFFF FREE_SPACE = -11 & 0xFFFFFFFFFFFFFFFF FREE_INO = -12 & 0xFFFFFFFFFFFFFFFF _non_standard_objectid_types = frozenset( { BtrfsType.PERSISTENT_ITEM, BtrfsType.DEV_EXTENT, BtrfsType.QGROUP_RELATION, BtrfsType.UUID_KEY_SUBVOL, BtrfsType.UUID_KEY_RECEIVED_SUBVOL, BtrfsType.DEV_ITEM, } ) _BTRFS_QGROUP_LEVEL_SHIFT = 48 def _qgroup_id_str(id: int) -> str: level = id >> _BTRFS_QGROUP_LEVEL_SHIFT subvolid = id & ((1 << _BTRFS_QGROUP_LEVEL_SHIFT) - 1) return f"{level}/{subvolid}" def _objectid_to_str(objectid: int, type: int) -> str: # Based on print_objectid() in btrfs-progs. if type == BtrfsType.PERSISTENT_ITEM: if objectid == 0: return "DEV_STATS" elif type == BtrfsType.DEV_EXTENT: return str(objectid) elif type == BtrfsType.QGROUP_RELATION: return _qgroup_id_str(objectid) elif type in (BtrfsType.UUID_KEY_SUBVOL, BtrfsType.UUID_KEY_RECEIVED_SUBVOL): return f"0x{objectid:016x}" elif objectid == 1 and type == BtrfsType.DEV_ITEM: return "DEV_ITEMS" elif objectid == 256 and type == BtrfsType.CHUNK_ITEM: return "FIRST_CHUNK_TREE" elif objectid == 0xFFFFFFFFFFFFFFFF: return "-1" else: try: return str(BtrfsObjectid(objectid)) except ValueError: pass return str(int(objectid)) _btrfs_disk_key_fmt = " "BtrfsHeader": ( csum, fsid, bytenr, flags, chunk_tree_uuid, generation, owner, nritems, level, ) = _btrfs_header_struct.unpack_from(b) return BtrfsHeader( csum=csum, fsid=uuid.UUID(bytes=fsid), bytenr=bytenr, flags=BtrfsHeaderFlag(flags), chunk_tree_uuid=uuid.UUID(bytes=chunk_tree_uuid), generation=generation, owner=owner, nritems=nritems, level=level, ) class BtrfsKey( NamedTuple( "BtrfsKey", [ ("objectid", Union[BtrfsObjectid, int]), ("type", Union[BtrfsType, int]), ("offset", int), ], ) ): def __new__(cls, objectid: int, type: int, offset: int) -> "Self": with suppress(ValueError): type = BtrfsType(type) if type not in _non_standard_objectid_types: with suppress(ValueError): objectid = BtrfsObjectid(objectid) return super().__new__(cls, objectid, type, offset) @classmethod def _make(cls, iterable: Iterable[Any]) -> "Self": return cls.__new__(cls, *iterable) @staticmethod def from_bytes(b: bytes) -> "BtrfsKey": return BtrfsKey._make(_btrfs_disk_key_struct.unpack_from(b)) def __str__(self) -> str: # Based on btrfs_print_key() in btrfs-progs. type = ( self.type._name_ if isinstance(self.type, BtrfsType) else f"UNKNOWN.{self.type}" ) if self.type in ( BtrfsType.QGROUP_INFO, BtrfsType.QGROUP_LIMIT, BtrfsType.QGROUP_RELATION, ): offset = _qgroup_id_str(self.offset) elif self.type in ( BtrfsType.UUID_KEY_SUBVOL, BtrfsType.UUID_KEY_RECEIVED_SUBVOL, ): offset = f"0x{self.offset:016x}" elif ( self.type == BtrfsType.ROOT_ITEM and self.objectid == BtrfsObjectid.TREE_RELOC ): offset = _objectid_to_str(self.offset, self.type) elif self.offset == 0xFFFFFFFFFFFFFFFF: # btrfs-progs as of v6.8.1 skips this for ROOT_ITEM. offset = "-1" else: offset = str(self.offset) return f"({_objectid_to_str(self.objectid, self.type)} {type} {offset})" BTRFS_MIN_KEY = BtrfsKey(0, 0, 0) BTRFS_MAX_KEY = BtrfsKey(2**64 - 1, 2**8 - 1, 2**64 - 1) class BtrfsKeyPtr(NamedTuple): key: BtrfsKey blockptr: int generation: int @staticmethod def from_bytes(b: bytes) -> "BtrfsKeyPtr": t = _btrfs_key_ptr_struct.unpack_from(b) return BtrfsKeyPtr(BtrfsKey._make(t[:3]), *t[3:]) # class _BtrfsItemHandler(NamedTuple, Generic[_T]) and replacing Any with _T # would be more accurate, but that fails at runtime on Python 3.6; see # python/typing#449. This is good enough since it's checked more strictly # through _register_item_handler(). class _BtrfsItemHandler(NamedTuple): parse: Callable[[BtrfsKey, bytes], Any] print: Callable[[BtrfsKey, bytes, Any, str, "Optional[SupportsWrite[str]]"], None] _btrfs_item_handlers = {} # We could define one big dictionary literal with type # Dict[int, _BtrfsItemHandler], but then mypy won't enforce that the return # type of parse() matches the parameter type of print(). def _register_item_handler( type: BtrfsType, parse: Callable[[BtrfsKey, bytes], _T], print: Callable[[BtrfsKey, bytes, _T, str, "Optional[SupportsWrite[str]]"], None], ) -> None: assert type not in _btrfs_item_handlers _btrfs_item_handlers[int(type)] = _BtrfsItemHandler(parse, print) def _parse_unknown_item(key: BtrfsKey, raw_data: bytes) -> None: return None def _print_unknown_item( key: BtrfsKey, raw_data: bytes, data: None, indent: str, file: "Optional[SupportsWrite[str]]", ) -> None: step = 30 for i in range(0, len(raw_data), step): print(f"{indent}{'raw' if i == 0 else ' '} {raw_data[i:i + step].hex()}") _unknown_item_type_handler = _BtrfsItemHandler( parse=_parse_unknown_item, print=_print_unknown_item, ) def _parse_empty_item(key: BtrfsKey, raw_data: bytes) -> None: if raw_data: raise ValueError("expected empty item") return None def _parse_raw_item(key: BtrfsKey, raw_data: bytes) -> bytes: return raw_data def _parse_item_from_bytes( from_bytes: Callable[[bytes], _T] ) -> Callable[[BtrfsKey, bytes], _T]: @functools.wraps(from_bytes) def wrapper(key: BtrfsKey, raw_data: bytes) -> _T: return from_bytes(raw_data) return wrapper def _print_nothing( key: BtrfsKey, raw_data: bytes, data: None, indent: str, file: "Optional[SupportsWrite[str]]", ) -> None: return def _print_empty_item( s: str, ) -> Callable[[BtrfsKey, bytes, None, str, "Optional[SupportsWrite[str]]"], None]: def print_empty_item( key: BtrfsKey, raw_data: bytes, data: None, indent: str, file: "Optional[SupportsWrite[str]]", ) -> None: print(f"{indent}{s}", file=file) return print_empty_item class BtrfsTimespec(NamedTuple): sec: int nsec: int def __str__(self) -> str: # btrfs-progs as of v6.8.1 doesn't zero-pad nsec. This is a bug. return f"{self.sec}.{self.nsec:09} ({time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(self.sec))})" class BtrfsInodeFlag(_BtrfsFlag): NODATASUM = 1 << 0 NODATACOW = 1 << 1 READONLY = 1 << 2 NOCOMPRESS = 1 << 3 PREALLOC = 1 << 4 SYNC = 1 << 5 IMMUTABLE = 1 << 6 APPEND = 1 << 7 NODUMP = 1 << 8 NOATIME = 1 << 9 DIRSYNC = 1 << 10 COMPRESS = 1 << 11 _btrfs_inode_item_struct = struct.Struct("<5Q4I3Q32xQIQIQIQI") class BtrfsInodeItem( NamedTuple( "BtrfsInodeItem", [ ("generation", int), ("transid", int), ("size", int), ("nbytes", int), ("block_group", int), ("nlink", int), ("uid", int), ("gid", int), ("mode", int), ("rdev", int), ("flags", BtrfsInodeFlag), ("sequence", int), ("atime", BtrfsTimespec), ("ctime", BtrfsTimespec), ("mtime", BtrfsTimespec), ("otime", BtrfsTimespec), ], ) ): def __new__( cls, generation: int, transid: int, size: int, nbytes: int, block_group: int, nlink: int, uid: int, gid: int, mode: int, rdev: int, flags: int, sequence: int, atime: BtrfsTimespec, ctime: BtrfsTimespec, mtime: BtrfsTimespec, otime: BtrfsTimespec, ) -> "Self": return super().__new__( cls, generation=generation, transid=transid, size=size, nbytes=nbytes, block_group=block_group, nlink=nlink, uid=uid, gid=gid, mode=mode, rdev=rdev, flags=BtrfsInodeFlag(flags), sequence=sequence, atime=atime, ctime=ctime, mtime=mtime, otime=otime, ) @classmethod def _make(cls, iterable: Iterable[Any]) -> "Self": return cls.__new__(cls, *iterable) @staticmethod def from_bytes(b: bytes) -> "BtrfsInodeItem": ( generation, transid, size, nbytes, block_group, nlink, uid, gid, mode, rdev, flags, sequence, atime_sec, atime_nsec, ctime_sec, ctime_nsec, mtime_sec, mtime_nsec, otime_sec, otime_nsec, ) = _btrfs_inode_item_struct.unpack_from(b) return BtrfsInodeItem( generation=generation, transid=transid, size=size, nbytes=nbytes, block_group=block_group, nlink=nlink, uid=uid, gid=gid, mode=mode, rdev=rdev, flags=flags, sequence=sequence, atime=BtrfsTimespec(atime_sec, atime_nsec), ctime=BtrfsTimespec(ctime_sec, ctime_nsec), mtime=BtrfsTimespec(mtime_sec, mtime_nsec), otime=BtrfsTimespec(otime_sec, otime_nsec), ) def _print_inode_item( key: BtrfsKey, raw_data: bytes, item: BtrfsInodeItem, indent: str, file: "Optional[SupportsWrite[str]]", ) -> None: print( f"""\ {indent}generation {item.generation} transid {item.transid} size {item.size} nbytes {item.nbytes} {indent}block group {item.block_group} mode {item.mode:o} links {item.nlink} uid {item.uid} gid {item.gid} rdev {item.rdev} {indent}sequence {item.sequence} flags {item.flags} {indent}atime {item.atime} {indent}ctime {item.ctime} {indent}mtime {item.mtime} {indent}otime {item.otime} """, end="", file=file, ) _register_item_handler( BtrfsType.INODE_ITEM, _parse_item_from_bytes(BtrfsInodeItem.from_bytes), _print_inode_item, ) _btrfs_inode_ref_struct = struct.Struct(" "BtrfsInodeRef": index, name_len = _btrfs_inode_ref_struct.unpack_from(b) name_offset = _btrfs_inode_ref_struct.size return BtrfsInodeRef( index=index, name=b[name_offset : name_offset + name_len], ) def _print_inode_ref( key: BtrfsKey, raw_data: bytes, ref: BtrfsInodeRef, indent: str, file: "Optional[SupportsWrite[str]]", ) -> None: print( f"""\ {indent}index {ref.index} namelen {len(ref.name)} name: {escape_ascii_string(ref.name)} """, end="", file=file, ) _register_item_handler( BtrfsType.INODE_REF, _parse_item_from_bytes(BtrfsInodeRef.from_bytes), _print_inode_ref, ) _btrfs_inode_extref_struct = struct.Struct(" "BtrfsInodeExtref": parent_objectid, index, name_len = _btrfs_inode_extref_struct.unpack_from(b) name_offset = _btrfs_inode_extref_struct.size return BtrfsInodeExtref( parent_objectid=parent_objectid, index=index, name=b[name_offset : name_offset + name_len], ) def _parse_inode_extref_array( key: BtrfsKey, raw_data: bytes ) -> Sequence[BtrfsInodeExtref]: view = memoryview(raw_data) offset = 0 refs = [] while offset < len(raw_data): extref = BtrfsInodeExtref.from_bytes(view[offset:]) refs.append(extref) offset += _btrfs_inode_extref_struct.size + len(extref.name) return refs def _print_inode_extref_array( key: BtrfsKey, raw_data: bytes, refs: Sequence[BtrfsInodeExtref], indent: str, file: "Optional[SupportsWrite[str]]", ) -> None: for ref in refs: print( f"""\ {indent}index {ref.index} parent {ref.parent_objectid} namelen {len(ref.name)} name {escape_ascii_string(ref.name)} """, end="", file=file, ) _register_item_handler( BtrfsType.INODE_EXTREF, _parse_inode_extref_array, _print_inode_extref_array, ) class BtrfsFileType(_BtrfsEnum): FILE = 1 DIR = 2 CHRDEV = 3 BLKDEV = 4 FIFO = 5 SOCK = 6 SYMLINK = 7 XATTR = 8 _btrfs_dir_item_struct = struct.Struct(" "Self": return super().__new__( cls, location=location, transid=transid, type=_try_cast_enum(BtrfsFileType, type), name=name, data=data, ) @classmethod def _make(cls, iterable: Iterable[Any]) -> "Self": return cls.__new__(cls, *iterable) @staticmethod def from_bytes(b: bytes) -> "BtrfsDirItem": ( location_objectid, location_type, location_offset, transid, data_len, name_len, type, ) = _btrfs_dir_item_struct.unpack_from(b) name_offset = _btrfs_dir_item_struct.size data_offset = name_offset + name_len return BtrfsDirItem( location=BtrfsKey(location_objectid, location_type, location_offset), transid=transid, type=type, name=b[name_offset:data_offset], data=b[data_offset : data_offset + data_len], ) def _parse_dir_item_array(key: BtrfsKey, raw_data: bytes) -> Sequence[BtrfsDirItem]: view = memoryview(raw_data) offset = 0 items = [] while offset < len(raw_data): di = BtrfsDirItem.from_bytes(view[offset:]) items.append(di) offset += _btrfs_dir_item_struct.size + len(di.name) + len(di.data) return items def _print_dir_item( key: BtrfsKey, raw_data: bytes, item: BtrfsDirItem, indent: str, file: "Optional[SupportsWrite[str]]", ) -> None: type = ( item.type._name_ if isinstance(item.type, BtrfsFileType) else f"DIR_ITEM.{item.type}" ) # btrfs-progs as of v6.8.1 doesn't escape any strings. print( f"""\ {indent}location key {item.location} type {type} {indent}transid {item.transid} data_len {len(item.data)} name_len {len(item.name)} {indent}name: {escape_ascii_string(item.name)} """, end="", file=file, ) if item.data: print(f"{indent}data {escape_ascii_string(item.data)}", file=file) def _print_dir_item_array( key: BtrfsKey, raw_data: bytes, items: Sequence[BtrfsDirItem], indent: str, file: "Optional[SupportsWrite[str]]", ) -> None: for item in items: _print_dir_item(key, raw_data, item, indent, file) _register_item_handler( BtrfsType.XATTR_ITEM, _parse_dir_item_array, _print_dir_item_array, ) _register_item_handler( BtrfsType.DIR_ITEM, _parse_dir_item_array, _print_dir_item_array, ) _register_item_handler( BtrfsType.DIR_INDEX, _parse_dir_item_array, _print_dir_item_array, ) # TODO: VERITY_DESC_ITEM handler # TODO: VERITY_MERKLE_ITEM handler _register_item_handler( BtrfsType.ORPHAN_ITEM, _parse_empty_item, _print_empty_item("orphan item"), ) # TODO: DIR_LOG_ITEM handler # TODO: DIR_LOG_INDEX handler class BtrfsCompressionType(_BtrfsEnum): NONE = 0 ZLIB = 1 LZO = 2 ZSTD = 3 _compression_type_to_str_dict: Dict[int, str] = { BtrfsCompressionType.NONE: "none", BtrfsCompressionType.ZLIB: "zlib", BtrfsCompressionType.LZO: "lzo", BtrfsCompressionType.ZSTD: "zstd", } def _compress_type_to_str(compression: int) -> str: try: return _compression_type_to_str_dict[compression] except KeyError: return f"UNKNOWN.{int(compression)}" class BtrfsFileExtentType(_BtrfsEnum): INLINE = 0 REG = 1 PREALLOC = 2 _file_extent_type_to_str_dict: Dict[int, str] = { BtrfsFileExtentType.INLINE: "inline", BtrfsFileExtentType.REG: "regular", BtrfsFileExtentType.PREALLOC: "prealloc", } def _file_extent_type_to_str(type: int) -> str: return _file_extent_type_to_str_dict.get(type, "unknown") class BtrfsFileExtentItem( NamedTuple( "BtrfsFileExtentItem", [ ("generation", int), ("ram_bytes", int), ("compression", Union[BtrfsCompressionType, int]), ("encryption", int), ("other_encoding", int), ("type", Union[BtrfsFileExtentType, int]), ("disk_bytenr", int), ("disk_num_bytes", int), ("offset", int), ("num_bytes", int), ], ) ): def __new__( cls, generation: int, ram_bytes: int, compression: int, encryption: int, other_encoding: int, type: int, disk_bytenr: int, disk_num_bytes: int, offset: int, num_bytes: int, ) -> "Self": return super().__new__( cls, generation=generation, ram_bytes=ram_bytes, compression=_try_cast_enum(BtrfsCompressionType, compression), encryption=encryption, other_encoding=other_encoding, type=_try_cast_enum(BtrfsFileExtentType, type), disk_bytenr=disk_bytenr, disk_num_bytes=disk_num_bytes, offset=offset, num_bytes=num_bytes, ) @classmethod def _make(cls, iterable: Iterable[Any]) -> "Self": return cls.__new__(cls, *iterable) class BtrfsInlineFileExtentItem( NamedTuple( "BtrfsInlineFileExtentItem", [ ("generation", int), ("ram_bytes", int), ("compression", Union[BtrfsCompressionType, int]), ("encryption", int), ("other_encoding", int), ("type", Union[BtrfsFileExtentType, int]), ("data", bytes), ], ) ): def __new__( cls, generation: int, ram_bytes: int, compression: int, encryption: int, other_encoding: int, type: int, data: bytes, ) -> "Self": return super().__new__( cls, generation=generation, ram_bytes=ram_bytes, compression=_try_cast_enum(BtrfsCompressionType, compression), encryption=encryption, other_encoding=other_encoding, type=_try_cast_enum(BtrfsFileExtentType, type), data=data, ) @classmethod def _make(cls, iterable: Iterable[Any]) -> "Self": return cls.__new__(cls, *iterable) _btrfs_file_extent_item_common_struct = struct.Struct(" Union[BtrfsFileExtentItem, BtrfsInlineFileExtentItem]: ( generation, ram_bytes, compression, encryption, other_encoding, type, ) = _btrfs_file_extent_item_common_struct.unpack_from(raw_data) if type == BtrfsFileExtentType.INLINE: return BtrfsInlineFileExtentItem( generation=generation, ram_bytes=ram_bytes, compression=compression, encryption=encryption, other_encoding=other_encoding, type=type, data=raw_data[_btrfs_file_extent_item_common_struct.size :], ) else: ( disk_bytenr, disk_num_bytes, offset, num_bytes, ) = _btrfs_file_extent_item_not_inline_struct.unpack_from( raw_data, _btrfs_file_extent_item_common_struct.size ) return BtrfsFileExtentItem( generation=generation, ram_bytes=ram_bytes, compression=compression, encryption=encryption, other_encoding=other_encoding, type=type, disk_bytenr=disk_bytenr, disk_num_bytes=disk_num_bytes, offset=offset, num_bytes=num_bytes, ) def _print_file_extent_item( key: BtrfsKey, raw_data: bytes, item: Union[BtrfsFileExtentItem, BtrfsInlineFileExtentItem], indent: str, file: "Optional[SupportsWrite[str]]", ) -> None: print( f"""\ {indent}generation {item.generation} type {int(item.type)} ({_file_extent_type_to_str(item.type)}) """, end="", file=file, ) compression = f"compression {int(item.compression)} ({_compress_type_to_str(item.compression)})" if isinstance(item, BtrfsInlineFileExtentItem): print( f"""\ {indent}inline extent data size {len(item.data)} ram_bytes {item.ram_bytes} {compression} """, end="", file=file, ) else: title = "prealloc" if item.type == BtrfsFileExtentType.PREALLOC else "extent" print( f"""\ {indent}{title} data disk byte {item.disk_bytenr} nr {item.disk_num_bytes} {indent}{title} data offset {item.offset} nr {item.num_bytes}\ """, end="", file=file, ) if item.type == BtrfsFileExtentType.PREALLOC: print(file=file) else: print( f"""\ ram {item.ram_bytes} {indent}extent {compression} """, end="", file=file, ) _register_item_handler( BtrfsType.EXTENT_DATA, _parse_file_extent_item, _print_file_extent_item, ) # TODO: EXTENT_CSUM handler (depends on filesystem csum setting) class BtrfsRootFlag(_BtrfsFlag): # btrfs-progs as of v6.8.1 prints this as RDONLY. SUBVOL_RDONLY = 1 << 0 # NB: this doesn't include the inode item. _btrfs_root_item_struct = struct.Struct("<7QIQBQBBQ16s16s16s5QIQIQIQI64x") class BtrfsRootItem( NamedTuple( "BtrfsRootItem", [ ("inode", BtrfsInodeItem), ("generation", int), ("root_dirid", int), ("bytenr", int), ("byte_limit", int), ("bytes_used", int), ("last_snapshot", int), ("flags", BtrfsRootFlag), ("refs", int), ("drop_progress", BtrfsKey), ("drop_level", int), ("level", int), ("generation_v2", int), ("uuid", uuid.UUID), ("parent_uuid", uuid.UUID), ("received_uuid", uuid.UUID), ("ctransid", int), ("otransid", int), ("stransid", int), ("rtransid", int), ("ctime", BtrfsTimespec), ("otime", BtrfsTimespec), ("stime", BtrfsTimespec), ("rtime", BtrfsTimespec), ], ) ): def __new__( cls, inode: BtrfsInodeItem, generation: int, root_dirid: int, bytenr: int, byte_limit: int, bytes_used: int, last_snapshot: int, flags: int, refs: int, drop_progress: BtrfsKey, drop_level: int, level: int, generation_v2: int, uuid: uuid.UUID, parent_uuid: uuid.UUID, received_uuid: uuid.UUID, ctransid: int, otransid: int, stransid: int, rtransid: int, ctime: BtrfsTimespec, otime: BtrfsTimespec, stime: BtrfsTimespec, rtime: BtrfsTimespec, ) -> "Self": return super().__new__( cls, inode=inode, generation=generation, root_dirid=root_dirid, bytenr=bytenr, byte_limit=byte_limit, bytes_used=bytes_used, last_snapshot=last_snapshot, flags=BtrfsRootFlag(flags), refs=refs, drop_progress=drop_progress, drop_level=drop_level, level=level, generation_v2=generation_v2, uuid=uuid, parent_uuid=parent_uuid, received_uuid=received_uuid, ctransid=ctransid, otransid=otransid, stransid=stransid, rtransid=rtransid, ctime=ctime, otime=otime, stime=stime, rtime=rtime, ) @classmethod def _make(cls, iterable: Iterable[Any]) -> "Self": return cls.__new__(cls, *iterable) @staticmethod def from_bytes(b: bytes) -> "BtrfsRootItem": inode = BtrfsInodeItem.from_bytes(b) ( generation, root_dirid, bytenr, byte_limit, bytes_used, last_snapshot, flags, refs, drop_progress_objectid, drop_progress_type, drop_progress_offset, drop_level, level, generation_v2, uuid_, parent_uuid, received_uuid, ctransid, otransid, stransid, rtransid, ctime_sec, ctime_nsec, otime_sec, otime_nsec, stime_sec, stime_nsec, rtime_sec, rtime_nsec, ) = _btrfs_root_item_struct.unpack_from(b, _btrfs_inode_item_struct.size) return BtrfsRootItem( inode=inode, generation=generation, root_dirid=root_dirid, bytenr=bytenr, byte_limit=byte_limit, bytes_used=bytes_used, last_snapshot=last_snapshot, flags=flags, refs=refs, drop_progress=BtrfsKey( drop_progress_objectid, drop_progress_type, drop_progress_offset ), drop_level=drop_level, level=level, generation_v2=generation_v2, uuid=uuid.UUID(bytes=uuid_), parent_uuid=uuid.UUID(bytes=parent_uuid), received_uuid=uuid.UUID(bytes=received_uuid), ctransid=ctransid, otransid=otransid, stransid=stransid, rtransid=rtransid, ctime=BtrfsTimespec(ctime_sec, ctime_nsec), otime=BtrfsTimespec(otime_sec, otime_nsec), stime=BtrfsTimespec(stime_sec, stime_nsec), rtime=BtrfsTimespec(rtime_sec, rtime_nsec), ) def _print_root_item( key: BtrfsKey, raw_data: bytes, item: BtrfsRootItem, indent: str, file: "Optional[SupportsWrite[str]]", ) -> None: print( f"""\ {indent}generation {item.generation} root_dirid {item.root_dirid} bytenr {item.bytenr} byte_limit {item.byte_limit} bytes_used {item.bytes_used} {indent}last_snapshot {item.last_snapshot} flags {item.flags} refs {item.refs} {indent}drop_progress key {item.drop_progress} drop_level {item.drop_level} {indent}level {item.level} generation_v2 {item.generation_v2} {indent}uuid {item.uuid} {indent}parent_uuid {item.parent_uuid} {indent}received_uuid {item.received_uuid} {indent}ctransid {item.ctransid} otransid {item.otransid} stransid {item.stransid} rtransid {item.rtransid} {indent}ctime {item.ctime} {indent}otime {item.otime} {indent}stime {item.stime} {indent}rtime {item.rtime} """, end="", file=file, ) _register_item_handler( BtrfsType.ROOT_ITEM, _parse_item_from_bytes(BtrfsRootItem.from_bytes), _print_root_item, ) _btrfs_root_ref_struct = struct.Struct(" "BtrfsRootRef": dirid, sequence, name_len = _btrfs_root_ref_struct.unpack_from(b) name_offset = _btrfs_root_ref_struct.size return BtrfsRootRef( dirid=dirid, sequence=sequence, name=b[name_offset : name_offset + name_len], ) def _print_root_ref( key: BtrfsKey, raw_data: bytes, item: BtrfsRootRef, indent: str, file: "Optional[SupportsWrite[str]]", ) -> None: back = "back" if key.type == BtrfsType.ROOT_BACKREF else "" print( f"{indent}root {back}ref key dirid {item.dirid} sequence {item.sequence} name {escape_ascii_string(item.name)}", file=file, ) _register_item_handler( BtrfsType.ROOT_BACKREF, _parse_item_from_bytes(BtrfsRootRef.from_bytes), _print_root_ref, ) _register_item_handler( BtrfsType.ROOT_REF, _parse_item_from_bytes(BtrfsRootRef.from_bytes), _print_root_ref, ) class BtrfsExtentFlag(_BtrfsFlag): DATA = 1 << 0 TREE_BLOCK = 1 << 1 FULL_BACKREF = 1 << 8 _btrfs_tree_block_info_struct = struct.Struct(" "BtrfsTreeBlockInfo": ( key_objectid, key_type, key_offset, level, ) = _btrfs_tree_block_info_struct.unpack_from(b) return BtrfsTreeBlockInfo( key=BtrfsKey(key_objectid, key_type, key_offset), level=level, ) _btrfs_extent_owner_ref_struct = struct.Struct(" "Self": return super().__new__(cls, _try_cast_enum(BtrfsObjectid, root_id)) @classmethod def _make(cls, iterable: Iterable[Any]) -> "Self": return cls.__new__(cls, *iterable) @staticmethod def from_bytes(b: bytes) -> "BtrfsExtentOwnerRef": return BtrfsExtentOwnerRef._make(_btrfs_extent_owner_ref_struct.unpack_from(b)) def _print_extent_owner_ref( key: BtrfsKey, raw_data: bytes, ref: BtrfsExtentOwnerRef, indent: str, file: "Optional[SupportsWrite[str]]", ) -> None: print(f"{indent}extent owner root {ref.root_id}", file=file) _btrfs_extent_data_ref_struct = struct.Struct("<3QI") class BtrfsExtentDataRef( NamedTuple( "BtrfsExtentDataRef", [ ("root", Union[BtrfsObjectid, int]), ("objectid", int), ("offset", int), ("count", int), ], ) ): def __new__(cls, root: int, objectid: int, offset: int, count: int) -> "Self": return super().__new__( cls, _try_cast_enum(BtrfsObjectid, root), objectid, offset, count, ) @classmethod def _make(cls, iterable: Iterable[Any]) -> "Self": return cls.__new__(cls, *iterable) @staticmethod def from_bytes(b: bytes) -> "BtrfsExtentDataRef": return BtrfsExtentDataRef._make(_btrfs_extent_data_ref_struct.unpack_from(b)) def _print_extent_data_ref( key: BtrfsKey, raw_data: bytes, ref: BtrfsExtentDataRef, indent: str, file: "Optional[SupportsWrite[str]]", ) -> None: print( f"extent data backref root {ref.root} objectid {ref.objectid} offset {ref.offset} count {ref.count}", file=file, ) _btrfs_shared_data_ref_struct = struct.Struct(" "BtrfsSharedDataRef": return BtrfsSharedDataRef._make(_btrfs_shared_data_ref_struct.unpack_from(b)) def _print_shared_data_ref( key: BtrfsKey, raw_data: bytes, ref: BtrfsSharedDataRef, indent: str, file: "Optional[SupportsWrite[str]]", ) -> None: print(f"shared data backref count {ref.count}", file=file) class BtrfsExtentInlineRef(NamedTuple): type: BtrfsType offset: int class BtrfsExtentInlineOwnerRef(NamedTuple): type: BtrfsType root_id: Union[BtrfsObjectid, int] class BtrfsExtentInlineDataRef(NamedTuple): type: BtrfsType root: Union[BtrfsObjectid, int] objectid: int offset: int count: int # type: ignore[assignment] # Conflicts with tuple.count() class BtrfsExtentInlineSharedDataRef(NamedTuple): type: BtrfsType offset: int count: int # type: ignore[assignment] # Conflicts with tuple.count() _btrfs_extent_item_struct = struct.Struct("<3Q") class BtrfsExtentItem( NamedTuple( "BtrfsExtentItem", [ ("refs", int), ("generation", int), ("flags", BtrfsExtentFlag), ("tree_block_info", Optional[BtrfsTreeBlockInfo]), ( "inline_refs", Sequence[ Union[ BtrfsExtentInlineRef, BtrfsExtentInlineOwnerRef, BtrfsExtentInlineDataRef, BtrfsExtentInlineSharedDataRef, ] ], ), ], ) ): def __new__( cls, refs: int, generation: int, flags: int, tree_block_info: Optional[BtrfsTreeBlockInfo], inline_refs: Sequence[ Union[ BtrfsExtentInlineRef, BtrfsExtentInlineOwnerRef, BtrfsExtentInlineDataRef, BtrfsExtentInlineSharedDataRef, ] ], ) -> "Self": return super().__new__( cls, refs=refs, generation=generation, flags=BtrfsExtentFlag(flags), tree_block_info=tree_block_info, inline_refs=inline_refs, ) @classmethod def _make(cls, iterable: Iterable[Any]) -> "Self": return cls.__new__(cls, *iterable) def _parse_extent_item(key: BtrfsKey, raw_data: bytes) -> Optional[BtrfsExtentItem]: view = memoryview(raw_data) refs, generation, flags = _btrfs_extent_item_struct.unpack_from(raw_data) offset = _btrfs_extent_item_struct.size if key.type == BtrfsType.EXTENT_ITEM and (flags & BtrfsExtentFlag.TREE_BLOCK): tree_block_info = BtrfsTreeBlockInfo.from_bytes(view[offset:]) offset += _btrfs_tree_block_info_struct.size else: tree_block_info = None inline_refs: List[ Union[ BtrfsExtentInlineRef, BtrfsExtentInlineOwnerRef, BtrfsExtentInlineDataRef, BtrfsExtentInlineSharedDataRef, ] ] = [] while offset < len(raw_data): type = _try_cast_enum(BtrfsType, raw_data[offset]) offset += 1 if type == BtrfsType.EXTENT_OWNER_REF: inline_refs.append( BtrfsExtentInlineOwnerRef( type, *BtrfsExtentOwnerRef.from_bytes(view[offset:]), ) ) offset += _btrfs_extent_owner_ref_struct.size elif type == BtrfsType.EXTENT_DATA_REF: inline_refs.append( BtrfsExtentInlineDataRef( type, *BtrfsExtentDataRef.from_bytes(view[offset:]), ) ) offset += _btrfs_extent_data_ref_struct.size else: ref_offset = int.from_bytes(raw_data[offset : offset + 8], "little") offset += 8 if type == BtrfsType.TREE_BLOCK_REF or type == BtrfsType.SHARED_BLOCK_REF: inline_refs.append(BtrfsExtentInlineRef(type, ref_offset)) elif type == BtrfsType.SHARED_DATA_REF: inline_refs.append( BtrfsExtentInlineSharedDataRef( type, ref_offset, *_btrfs_shared_data_ref_struct.unpack_from(raw_data, offset), ) ) offset += _btrfs_shared_data_ref_struct.size else: return None return BtrfsExtentItem( refs=refs, generation=generation, flags=flags, tree_block_info=tree_block_info, inline_refs=inline_refs, ) def _print_extent_item( key: BtrfsKey, raw_data: bytes, item: Optional[BtrfsExtentItem], indent: str, file: "Optional[SupportsWrite[str]]", ) -> None: if item is None: return _print_unknown_item(key, raw_data, item, indent, file) print( f"{indent}refs {item.refs} gen {item.generation} flags {item.flags}", file=file ) if item.tree_block_info is not None: print( f"{indent}tree block {item.tree_block_info.key} level {item.tree_block_info.level}", file=file, ) elif key.type == BtrfsType.METADATA_ITEM: print(f"{indent}tree block skinny level {key.offset}") for ref in item.inline_refs: if isinstance(ref, BtrfsExtentInlineRef): if ref.type == BtrfsType.TREE_BLOCK_REF: print( f"{indent}({int(ref.type)} {hex(ref.offset)}) tree block backref root {_objectid_to_str(ref.offset, 0)}", file=file, ) elif ref.type == BtrfsType.SHARED_BLOCK_REF: print( f"{indent}({int(ref.type)} {hex(ref.offset)}) shared block backref parent {ref.offset}", file=file, ) else: assert False, ref.type elif isinstance(ref, BtrfsExtentInlineOwnerRef): print( f"{indent}({int(ref.type)} {hex(ref.root_id)}) extent owner root {ref.root_id}", file=file, ) elif isinstance(ref, BtrfsExtentInlineDataRef): seq = _hash_extent_data_ref(ref.root, ref.objectid, ref.offset) print( f"{indent}({int(ref.type)} {hex(seq)}) extent data backref root {ref.root} objectid {ref.objectid} offset {ref.offset} count {ref.count}", file=file, ) elif isinstance(ref, BtrfsExtentInlineSharedDataRef): print( f"{indent}({int(ref.type)} {hex(ref.offset)}) shared data backref parent {ref.offset} count {ref.count}", file=file, ) else: assert False _register_item_handler( BtrfsType.EXTENT_ITEM, _parse_extent_item, _print_extent_item, ) _register_item_handler( BtrfsType.METADATA_ITEM, _parse_extent_item, _print_extent_item, ) _register_item_handler( BtrfsType.EXTENT_OWNER_REF, _parse_item_from_bytes(BtrfsExtentOwnerRef.from_bytes), _print_extent_owner_ref, ) _register_item_handler( BtrfsType.TREE_BLOCK_REF, _parse_empty_item, _print_empty_item("tree block backref"), ) _register_item_handler( BtrfsType.EXTENT_DATA_REF, _parse_item_from_bytes(BtrfsExtentDataRef.from_bytes), _print_extent_data_ref, ) _register_item_handler( BtrfsType.SHARED_BLOCK_REF, _parse_empty_item, _print_empty_item("shared block backref"), ) _register_item_handler( BtrfsType.SHARED_DATA_REF, _parse_item_from_bytes(BtrfsSharedDataRef.from_bytes), _print_shared_data_ref, ) # btrfs-progs as of v6.8.1 pretty-prints these flags without their numeric # value and errantly adds "|single" if no profile flag is set. class BtrfsBlockGroupFlag(_BtrfsFlag): DATA = 1 << 0 SYSTEM = 1 << 1 METADATA = 1 << 2 RAID0 = 1 << 3 RAID1 = 1 << 4 DUP = 1 << 5 RAID10 = 1 << 6 RAID5 = 1 << 7 RAID6 = 1 << 8 RAID1C3 = 1 << 9 RAID1C4 = 1 << 10 AVAIL_ALLOC_BIT_SINGLE = 1 << 48 _btrfs_block_group_item_struct = struct.Struct("<3Q") class BtrfsBlockGroupItem( NamedTuple( "BtrfsBlockGroupItem", [ ("used", int), ("chunk_objectid", int), ("flags", BtrfsBlockGroupFlag), ], ) ): def __new__(cls, used: int, chunk_objectid: int, flags: int) -> "Self": return super().__new__( cls, used=used, chunk_objectid=chunk_objectid, flags=BtrfsBlockGroupFlag(flags), ) @classmethod def _make(cls, iterable: Iterable[Any]) -> "Self": return cls.__new__(cls, *iterable) @staticmethod def from_bytes(b: bytes) -> "BtrfsBlockGroupItem": return BtrfsBlockGroupItem._make(_btrfs_block_group_item_struct.unpack_from(b)) def _print_block_group_item( key: BtrfsKey, raw_data: bytes, item: BtrfsBlockGroupItem, indent: str, file: "Optional[SupportsWrite[str]]", ) -> None: print( f"{indent}block group used {item.used} chunk_objectid {item.chunk_objectid} flags {item.flags}", file=file, ) _register_item_handler( BtrfsType.BLOCK_GROUP_ITEM, _parse_item_from_bytes(BtrfsBlockGroupItem.from_bytes), _print_block_group_item, ) class BtrfsFreeSpaceFlag(_BtrfsFlag): USING_BITMAPS = 1 << 0 _btrfs_free_space_info_struct = struct.Struct(" "Self": return super().__new__( cls, extent_count=extent_count, flags=BtrfsFreeSpaceFlag(flags), ) @classmethod def _make(cls, iterable: Iterable[Any]) -> "Self": return cls.__new__(cls, *iterable) @staticmethod def from_bytes(b: bytes) -> "BtrfsFreeSpaceInfo": return BtrfsFreeSpaceInfo._make(_btrfs_free_space_info_struct.unpack_from(b)) def _print_free_space_info( key: BtrfsKey, raw_data: bytes, info: BtrfsFreeSpaceInfo, indent: str, file: "Optional[SupportsWrite[str]]", ) -> None: # btrfs-progs as of v6.8.1 doesn't pretty-print these flags. print( f"""\ {indent}free space info extent count {info.extent_count} flags {info.flags} """, end="", file=file, ) _register_item_handler( BtrfsType.FREE_SPACE_INFO, _parse_item_from_bytes(BtrfsFreeSpaceInfo.from_bytes), _print_free_space_info, ) _register_item_handler( BtrfsType.FREE_SPACE_EXTENT, _parse_empty_item, _print_empty_item("free space extent"), ) def _print_free_space_bitmap( key: BtrfsKey, raw_data: bytes, bitmap: bytes, indent: str, file: "Optional[SupportsWrite[str]]", ) -> None: print(f"{indent}free space bitmap", file=file) _register_item_handler( BtrfsType.FREE_SPACE_BITMAP, _parse_raw_item, _print_free_space_bitmap, ) _btrfs_dev_extent_struct = struct.Struct("<4Q16s") class BtrfsDevExtent(NamedTuple): chunk_tree: int chunk_objectid: int chunk_offset: int length: int chunk_tree_uuid: uuid.UUID @staticmethod def from_bytes(b: bytes) -> "BtrfsDevExtent": ( chunk_tree, chunk_objectid, chunk_offset, length, chunk_tree_uuid, ) = _btrfs_dev_extent_struct.unpack_from(b) return BtrfsDevExtent( chunk_tree=chunk_tree, chunk_objectid=chunk_objectid, chunk_offset=chunk_offset, length=length, chunk_tree_uuid=uuid.UUID(bytes=chunk_tree_uuid), ) def _print_dev_extent( key: BtrfsKey, raw_data: bytes, item: BtrfsDevExtent, indent: str, file: "Optional[SupportsWrite[str]]", ) -> None: print( f"""\ {indent}dev extent chunk_tree {item.chunk_tree} {indent}chunk_objectid {item.chunk_objectid} chunk_offset {item.chunk_offset} length {item.length} {indent}chunk_tree_uuid {item.chunk_tree_uuid} """, end="", file=file, ) _register_item_handler( BtrfsType.DEV_EXTENT, _parse_item_from_bytes(BtrfsDevExtent.from_bytes), _print_dev_extent, ) _btrfs_dev_item_struct = struct.Struct("<3Q3I3QIBB16s16s") class BtrfsDevItem(NamedTuple): devid: int total_bytes: int bytes_used: int io_align: int io_width: int sector_size: int type: int generation: int start_offset: int dev_group: int seek_speed: int bandwidth: int uuid: uuid.UUID fsid: uuid.UUID @staticmethod def from_bytes(b: bytes) -> "BtrfsDevItem": ( devid, total_bytes, bytes_used, io_align, io_width, sector_size, type, generation, start_offset, dev_group, seek_speed, bandwidth, uuid_, fsid, ) = _btrfs_dev_item_struct.unpack_from(b) return BtrfsDevItem( devid=devid, total_bytes=total_bytes, bytes_used=bytes_used, io_align=io_align, io_width=io_width, sector_size=sector_size, type=type, generation=generation, start_offset=start_offset, dev_group=dev_group, seek_speed=seek_speed, bandwidth=bandwidth, uuid=uuid.UUID(bytes=uuid_), fsid=uuid.UUID(bytes=fsid), ) def _print_dev_item( key: BtrfsKey, raw_data: bytes, item: BtrfsDevItem, indent: str, file: "Optional[SupportsWrite[str]]", ) -> None: print( f"""\ {indent}devid {item.devid} total_bytes {item.total_bytes} bytes_used {item.bytes_used} {indent}io_align {item.io_align} io_width {item.io_width} sector_size {item.sector_size} type {item.type} {indent}generation {item.generation} start_offset {item.start_offset} dev_group {item.dev_group} {indent}seek_speed {item.seek_speed} bandwidth {item.bandwidth} {indent}uuid {item.uuid} {indent}fsid {item.fsid} """, end="", file=file, ) _register_item_handler( BtrfsType.DEV_ITEM, _parse_item_from_bytes(BtrfsDevItem.from_bytes), _print_dev_item, ) _btrfs_stripe_struct = struct.Struct(" "BtrfsStripe": devid, offset, dev_uuid = _btrfs_stripe_struct.unpack_from(b) return BtrfsStripe( devid=devid, offset=offset, dev_uuid=uuid.UUID(bytes=dev_uuid), ) # NB: this doesn't include the stripes _btrfs_chunk_struct = struct.Struct("<4Q3IHH") class BtrfsChunk( NamedTuple( "BtrfsChunk", [ ("length", int), ("owner", int), ("stripe_len", int), ("type", BtrfsBlockGroupFlag), ("io_align", int), ("io_width", int), ("sector_size", int), ("num_stripes", int), ("sub_stripes", int), ("stripes", Sequence[BtrfsStripe]), ], ) ): def __new__( cls, length: int, owner: int, stripe_len: int, type: int, io_align: int, io_width: int, sector_size: int, num_stripes: int, sub_stripes: int, stripes: Sequence[BtrfsStripe], ) -> "Self": return super().__new__( cls, length=length, owner=owner, stripe_len=stripe_len, type=BtrfsBlockGroupFlag(type), io_align=io_align, io_width=io_width, sector_size=sector_size, num_stripes=num_stripes, sub_stripes=sub_stripes, stripes=stripes, ) @classmethod def _make(cls, iterable: Iterable[Any]) -> "Self": return cls.__new__(cls, *iterable) @staticmethod def from_bytes(b: bytes) -> "BtrfsChunk": view = memoryview(b) ( length, owner, stripe_len, type, io_align, io_width, sector_size, num_stripes, sub_stripes, ) = _btrfs_chunk_struct.unpack_from(b) return BtrfsChunk( length=length, owner=owner, stripe_len=stripe_len, type=type, io_align=io_align, io_width=io_width, sector_size=sector_size, num_stripes=num_stripes, sub_stripes=sub_stripes, stripes=[ BtrfsStripe.from_bytes(view[stripe_offset:]) for stripe_offset in range( _btrfs_chunk_struct.size, _btrfs_chunk_struct.size + num_stripes * _btrfs_stripe_struct.size, _btrfs_stripe_struct.size, ) ], ) def _print_chunk( key: BtrfsKey, raw_data: bytes, chunk: BtrfsChunk, indent: str, file: "Optional[SupportsWrite[str]]", ) -> None: print( f"""\ {indent}length {chunk.length} owner {chunk.owner} stripe_len {chunk.stripe_len} type {chunk.type} {indent}io_align {chunk.io_align} io_width {chunk.io_width} sector_size {chunk.sector_size} {indent}num_stripes {chunk.num_stripes} sub_stripes {chunk.sub_stripes} """, end="", file=file, ) for i, stripe in enumerate(chunk.stripes): print( f"""\ {indent}\tstripe {i} devid {stripe.devid} offset {stripe.offset} {indent}\tdev_uuid {stripe.dev_uuid} """, end="", file=file, ) _register_item_handler( BtrfsType.CHUNK_ITEM, _parse_item_from_bytes(BtrfsChunk.from_bytes), _print_chunk, ) # TODO: RAID_STRIPE handler class BtrfsQgroupStatusFlag(_BtrfsFlag): ON = 1 << 0 RESCAN = 1 << 1 INCONSISTENT = 1 << 2 SIMPLE_MODE = 1 << 3 _btrfs_qgroup_status_item_simple_quota_struct = struct.Struct("<5Q") _btrfs_qgroup_status_item_struct = struct.Struct("<4Q") class BtrfsQgroupStatusItem(NamedTuple): version: int generation: int flags: BtrfsQgroupStatusFlag rescan: int enable_gen: Optional[int] @staticmethod def from_bytes(b: bytes) -> "BtrfsQgroupStatusItem": # TODO: we should technically check the SIMPLE_QGROUP incompat flag instead. if len(b) >= _btrfs_qgroup_status_item_simple_quota_struct.size: ( version, generation, flags, rescan, enable_gen, ) = _btrfs_qgroup_status_item_simple_quota_struct.unpack_from(b) else: ( version, generation, flags, rescan, ) = _btrfs_qgroup_status_item_struct.unpack_from(b) enable_gen = None return BtrfsQgroupStatusItem( version=version, generation=generation, flags=BtrfsQgroupStatusFlag(flags), rescan=rescan, enable_gen=enable_gen, ) def _print_qgroup_status_item( key: BtrfsKey, raw_data: bytes, item: BtrfsQgroupStatusItem, indent: str, file: "Optional[SupportsWrite[str]]", ) -> None: enable_gen = "" if item.enable_gen is None else f" enable_gen {item.enable_gen}" # btrfs-progs as of v6.8.1 pretty-prints these flags without their numeric # value and adds "OFF" if the "ON" flag is not set. print( f"{indent}version {item.version} generation {item.generation} flags {item.flags} scan {item.rescan}{enable_gen}", file=file, ) _register_item_handler( BtrfsType.QGROUP_STATUS, _parse_item_from_bytes(BtrfsQgroupStatusItem.from_bytes), _print_qgroup_status_item, ) _btrfs_qgroup_info_item_struct = struct.Struct("<5Q") class BtrfsQgroupInfoItem(NamedTuple): generation: int rfer: int rfer_cmpr: int excl: int excl_cmpr: int @staticmethod def from_bytes(b: bytes) -> "BtrfsQgroupInfoItem": return BtrfsQgroupInfoItem._make(_btrfs_qgroup_info_item_struct.unpack_from(b)) def _print_qgroup_info_item( key: BtrfsKey, raw_data: bytes, item: BtrfsQgroupInfoItem, indent: str, file: "Optional[SupportsWrite[str]]", ) -> None: print( f"""\ {indent}generation {item.generation} {indent}referenced {item.rfer} referenced_compressed {item.rfer_cmpr} {indent}exclusive {item.excl} exclusive_compressed {item.excl_cmpr} """, end="", file=file, ) _register_item_handler( BtrfsType.QGROUP_INFO, _parse_item_from_bytes(BtrfsQgroupInfoItem.from_bytes), _print_qgroup_info_item, ) class BtrfsQgroupLimitFlag(_BtrfsFlag): MAX_RFER = 1 << 0 MAX_EXCL = 1 << 1 RSV_RFER = 1 << 2 RSV_EXCL = 1 << 3 RFER_CMPR = 1 << 4 EXCL_CMPR = 1 << 5 _btrfs_qgroup_limit_item_struct = struct.Struct("<5Q") class BtrfsQgroupLimitItem( NamedTuple( "BtrfsQgroupLimitItem", [ ("flags", BtrfsQgroupLimitFlag), ("max_rfer", int), ("max_excl", int), ("rsv_rfer", int), ("rsv_excl", int), ], ) ): def __new__( cls, flags: int, max_rfer: int, max_excl: int, rsv_rfer: int, rsv_excl: int ) -> "Self": return super().__new__( cls, flags=BtrfsQgroupLimitFlag(flags), max_rfer=max_rfer, max_excl=max_excl, rsv_rfer=rsv_rfer, rsv_excl=rsv_excl, ) @classmethod def _make(cls, iterable: Iterable[Any]) -> "Self": return cls.__new__(cls, *iterable) @staticmethod def from_bytes(b: bytes) -> "BtrfsQgroupLimitItem": return BtrfsQgroupLimitItem._make( _btrfs_qgroup_limit_item_struct.unpack_from(b) ) def _print_qgroup_limit_item( key: BtrfsKey, raw_data: bytes, item: BtrfsQgroupLimitItem, indent: str, file: "Optional[SupportsWrite[str]]", ) -> None: # btrfs-progs as of v6.8.1 doesn't pretty-print these flags. print( f"""\ {indent}flags {item.flags} {indent}max_referenced {item.max_rfer} max_exclusive {item.max_excl} {indent}rsv_referenced {item.rsv_rfer} rsv_exclusive {item.rsv_excl} """, end="", file=file, ) _register_item_handler( BtrfsType.QGROUP_LIMIT, _parse_item_from_bytes(BtrfsQgroupLimitItem.from_bytes), _print_qgroup_limit_item, ) _register_item_handler( BtrfsType.QGROUP_RELATION, _parse_empty_item, _print_nothing, ) class BtrfsBalanceFlag(_BtrfsFlag): DATA = 1 << 0 SYSTEM = 1 << 1 METADATA = 1 << 2 FORCE = 1 << 3 RESUME = 1 << 4 class BtrfsDiskBalanceArgsRange(NamedTuple): min: int max: int def _disk_balance_range_from_args( min: int, max: int, upper_bound: bool ) -> BtrfsDiskBalanceArgsRange: if upper_bound: return BtrfsDiskBalanceArgsRange(0, (max << 32) | min) else: return BtrfsDiskBalanceArgsRange(min, max) class BtrfsDiskBalanceArgsFlag(_BtrfsFlag): PROFILES = 1 << 0 USAGE = 1 << 1 DEVID = 1 << 2 DRANGE = 1 << 3 VRANGE = 1 << 4 LIMIT = 1 << 5 LIMIT_RANGE = 1 << 6 STRIPES_RANGE = 1 << 7 CONVERT = 1 << 8 SOFT = 1 << 9 USAGE_RANGE = 1 << 10 _btrfs_disk_balance_args_struct = struct.Struct(" "Self": return super().__new__( cls, profiles=BtrfsBlockGroupFlag(profiles), usage=usage, devid=devid, pstart=pstart, pend=pend, vstart=vstart, vend=vend, target=BtrfsBlockGroupFlag(target), flags=BtrfsDiskBalanceArgsFlag(flags), limit=limit, stripes=stripes, ) @classmethod def _make(cls, iterable: Iterable[Any]) -> "Self": return cls.__new__(cls, *iterable) @staticmethod def from_bytes(b: bytes) -> "BtrfsDiskBalanceArgs": ( profiles, usage_min, usage_max, devid, pstart, pend, vstart, vend, target, flags, limit_min, limit_max, stripes_min, stripes_max, ) = _btrfs_disk_balance_args_struct.unpack_from(b) return BtrfsDiskBalanceArgs( profiles=profiles, usage=_disk_balance_range_from_args( usage_min, usage_max, flags & BtrfsDiskBalanceArgsFlag.USAGE ), devid=devid, pstart=pstart, pend=pend, vstart=vstart, vend=vend, target=target, flags=flags, limit=_disk_balance_range_from_args( limit_min, limit_max, flags & BtrfsDiskBalanceArgsFlag.LIMIT ), stripes=BtrfsDiskBalanceArgsRange(stripes_min, stripes_max), ) _btrfs_balance_item_flags_struct = struct.Struct(" "Self": return super().__new__( cls, flags=BtrfsBalanceFlag(flags), data=data, meta=meta, sys=sys, ) @classmethod def _make(cls, iterable: Iterable[Any]) -> "Self": return cls.__new__(cls, *iterable) @staticmethod def from_bytes(b: bytes) -> "BtrfsBalanceItem": view = memoryview(b) return BtrfsBalanceItem( *_btrfs_balance_item_flags_struct.unpack_from(b), *[ BtrfsDiskBalanceArgs.from_bytes( view[ _btrfs_balance_item_flags_struct.size + i * _btrfs_disk_balance_args_struct.size : ] ) for i in range(3) ], ) def _parse_temporary_item( key: BtrfsKey, raw_data: bytes ) -> Union[BtrfsBalanceItem, None]: if key.objectid == BtrfsObjectid.BALANCE: return BtrfsBalanceItem.from_bytes(raw_data) else: return None def _print_balance_item( item: BtrfsBalanceItem, indent: str, file: "Optional[SupportsWrite[str]]" ) -> None: # btrfs-progs as of v6.8.1 doesn't pretty-print any of these flags. print(f"{indent}balance status flags {item.flags}", file=file) for title, args in ( ("DATA", item.data), ("METADATA", item.meta), ("SYSTEM", item.sys), ): # btrfs-progs as of v6.8.1 doesn't handle the USAGE and LIMIT flags # when printing {usage,limit}_{min,max}. This is a bug. print( f"""\ {indent}{title} {indent}profiles {args.profiles} devid {args.devid} target {args.target} flags {args.flags} {indent}usage_min {args.usage.min} usage_max {args.usage.max} pstart {args.pstart} pend {args.pend} {indent}vstart {args.vstart} vend {args.vend} limit_min {args.limit.min} limit_max {args.limit.max} {indent}stripes_min {args.stripes.min} stripes_max {args.stripes.max} """, end="", file=file, ) def _print_temporary_item( key: BtrfsKey, raw_data: bytes, item: Union[BtrfsBalanceItem, None], indent: str, file: "Optional[SupportsWrite[str]]", ) -> None: print( f"{indent}temporary item objectid {key.objectid} offset {key.offset}", file=file ) if isinstance(item, BtrfsBalanceItem): _print_balance_item(item, indent, file) else: _print_unknown_item(key, raw_data, item, indent, file) _register_item_handler( BtrfsType.TEMPORARY_ITEM, _parse_temporary_item, _print_temporary_item, ) _btrfs_dev_stats_item_struct = struct.Struct("<5Q") class BtrfsDevStatsItem(NamedTuple): write_errs: int read_errs: int flush_errs: int corruption_errs: int generation_errs: int @staticmethod def from_bytes(b: bytes) -> "BtrfsDevStatsItem": return BtrfsDevStatsItem._make(_btrfs_dev_stats_item_struct.unpack_from(b)) def _parse_persistent_item( key: BtrfsKey, raw_data: bytes ) -> Union[BtrfsDevStatsItem, None]: if key.objectid == 0: # BTRFS_DEV_STATS_OBJECTID return BtrfsDevStatsItem.from_bytes(raw_data) else: return None def _print_dev_stats_item( item: BtrfsDevStatsItem, indent: str, file: "Optional[SupportsWrite[str]]" ) -> None: print( f"""\ {indent}device stats {indent}write_errs {item.write_errs} read_errs {item.read_errs} flush_errs {item.flush_errs} corruption_errs {item.flush_errs} generation {item.generation_errs} """, end="", file=file, ) def _print_persistent_item( key: BtrfsKey, raw_data: bytes, item: Union[BtrfsDevStatsItem, None], indent: str, file: "Optional[SupportsWrite[str]]", ) -> None: print( f"{indent}persistent item objectid {_objectid_to_str(key.objectid, key.type)} offset {key.offset}", file=file, ) if isinstance(item, BtrfsDevStatsItem): _print_dev_stats_item(item, indent, file) else: _print_unknown_item(key, raw_data, item, indent, file) _register_item_handler( BtrfsType.PERSISTENT_ITEM, _parse_persistent_item, _print_persistent_item, ) # TODO: DEV_REPLACE handler def _parse_uuid_tree_item(key: BtrfsKey, raw_data: bytes) -> Sequence[int]: return [x[0] for x in struct.iter_unpack(" None: for id in ids: print(f"{indent}subvol_id {id}", file=file) _register_item_handler( BtrfsType.UUID_KEY_SUBVOL, _parse_uuid_tree_item, _print_uuid_tree_item, ) _register_item_handler( BtrfsType.UUID_KEY_RECEIVED_SUBVOL, _parse_uuid_tree_item, _print_uuid_tree_item, ) def _print_string_item( key: BtrfsKey, raw_data: bytes, string: bytes, indent: str, file: "Optional[SupportsWrite[str]]", ) -> None: print(f"{indent}item data {escape_ascii_string(string)}", file=file) _register_item_handler( BtrfsType.STRING_ITEM, _parse_raw_item, _print_string_item, ) class BtrfsItemData: key: "Final[BtrfsKey]" offset: "Final[int]" size: "Final[int]" raw_data: "Final[bytes]" def __init__( self, key: BtrfsKey, offset: int, size: int, raw_data: bytes, ) -> None: self.key = key self.offset = offset self.size = size self.raw_data = raw_data def __repr__(self) -> str: return f"BtrfsItemData(key={self.key!r}, offset={self.offset!r}, size={self.size!r}, raw_data={self.raw_data!r})" @cached_property def data(self) -> Any: return _btrfs_item_handlers.get( self.key.type, _unknown_item_type_handler ).parse(self.key, self.raw_data) class BtrfsNode(NamedTuple): header: BtrfsHeader ptrs: Sequence[BtrfsKeyPtr] class BtrfsLeaf(NamedTuple): header: BtrfsHeader items: Sequence[BtrfsItemData] _btrfs_disk_key_size = 17 # sizeof(struct btrfs_disk_key) _btrfs_leaf_items_offset = 101 # offsetof(struct btrfs_leaf, items) _btrfs_item_size = 25 # sizeof(struct btrfs_item) _btrfs_node_ptrs_offset = 101 # offsetof(struct btrfs_node, ptrs) _btrfs_key_ptr_size = 33 # sizeof(prog.type("struct btrfs_key_ptr")) _btrfs_key_ptr_blockptr_offset = 17 # offsetof(struct btrfs_key_ptr, blockptr) _btrfs_header_nritems_offset = 96 # offsetof(struct btrfs_header, nritems) _btrfs_header_level_offset = 100 # offsetof(struct btrfs_header, level) def _read_extent_buffer_folios(eb: Object, start: int, len: int) -> bytes: prog = eb.prog_ eb_addr = eb.addr.value_() if eb_addr: return prog.read(eb_addr + start, len) pagep_type = prog.type("struct page *") # TODO: we should probably add real folio_size() and folio_address() # helpers. folios = eb.folios unit_size = page_size(cast(pagep_type, folios[0])).value_() i = start // unit_size offset = (eb.start.value_() + start) & (unit_size - 1) ret = [] while len > 0: cur = min(len, unit_size - offset) ret.append( prog.read(page_to_virt(cast(pagep_type, folios[i])).value_() + offset, cur) ) len -= cur offset = 0 i += 1 return b"".join(ret) def _read_extent_buffer_pages(eb: Object, start: int, len: int) -> bytes: prog = eb.prog_ unit_size = prog["PAGE_SIZE"].value_() i = start // unit_size offset = (eb.start.value_() + start) & (unit_size - 1) ret = [] while len > 0: cur = min(len, unit_size - offset) ret.append(prog.read(page_to_virt(eb.pages[i]).value_() + offset, cur)) len -= cur offset = 0 i += 1 return b"".join(ret) def read_extent_buffer(eb: Object, start: IntegerLike, len: IntegerLike) -> bytes: prog = eb.prog_ start = operator.index(start) len = operator.index(len) try: impl = prog.cache["read_extent_buffer"] except KeyError: # Since Linux kernel commit 082d5bb9b336 ("btrfs: migrate # extent_buffer::pages[] to folio") (in v6.8), an extent_buffer # contains an array of folios. Before that, it's an array of pages. if prog.type("struct extent_buffer").has_member("folios"): impl = _read_extent_buffer_folios else: impl = _read_extent_buffer_pages prog.cache["read_extent_buffer"] = impl return impl(eb, start, len) def btrfs_header_level(eb: Object) -> int: return read_extent_buffer(eb, _btrfs_header_level_offset, 1)[0] def btrfs_header_nritems(eb: Object) -> int: return int.from_bytes( read_extent_buffer(eb, _btrfs_header_nritems_offset, 4), "little" ) def btrfs_node_blockptr(eb: Object, nr: IntegerLike) -> int: return int.from_bytes( read_extent_buffer( eb, _btrfs_node_ptrs_offset + _btrfs_key_ptr_size * operator.index(nr) + _btrfs_key_ptr_blockptr_offset, 8, ), "little", ) def _btrfs_bin_search( eb: Object, p: int, item_size: int, key: BtrfsKey ) -> Tuple[int, int]: low = 0 high = btrfs_header_nritems(eb) while low < high: mid = (low + high) // 2 offset = p + mid * item_size disk_key = BtrfsKey.from_bytes( read_extent_buffer(eb, offset, _btrfs_disk_key_size) ) if disk_key < key: low = mid + 1 elif disk_key > key: high = mid else: return 0, mid return 1, low class BtrfsTreeError(Exception): pass def find_extent_buffer(fs_info: Object, start: IntegerLike) -> Object: return cast( "struct extent_buffer *", radix_tree_lookup( fs_info.buffer_radix.address_of_(), start >> fs_info.sectorsize_bits ), ) def _get_block_for_search(fs_info: Object, eb: Object, slot: int) -> Object: blocknr = btrfs_node_blockptr(eb, slot) tmp = cast( eb.type_, radix_tree_lookup( fs_info.buffer_radix.address_of_(), blocknr >> fs_info.sectorsize_bits ), ) if not tmp: raise BtrfsTreeError(f"extent_buffer {blocknr} is not cached") if not tmp.refs.counter: raise BtrfsTreeError(f"extent_buffer {blocknr} is dead") if not tmp.bflags & (1 << fs_info.prog_["EXTENT_BUFFER_UPTODATE"]): raise BtrfsTreeError(f"extent_buffer {blocknr} is not up to date") # The kernel also checks the eb's transid and level to detect corruption, # but we probably don't need to. return tmp def btrfs_search_slot( root: Object, key: BtrfsKey, *, search_commit_root: bool = False, allow_partial: bool = False, ) -> Tuple[int, List[Object], List[int]]: fs_info = root.fs_info.read_() nodes = [] slots = [] prev_cmp = -1 if search_commit_root: b = root.commit_root.read_() else: b = root.node.read_() level = btrfs_header_level(b) try: for level in range(level, -1, -1): nodes.append(b) if prev_cmp == 0: slot = 0 ret = 0 else: if level == 0: ret, slot = _btrfs_bin_search( b, _btrfs_leaf_items_offset, _btrfs_item_size, key ) else: ret, slot = _btrfs_bin_search( b, _btrfs_node_ptrs_offset, _btrfs_key_ptr_size, key ) prev_cmp = ret if level == 0: slots.append(slot) break if ret and slot > 0: slot -= 1 slots.append(slot) b = _get_block_for_search(fs_info, b, slot) except BtrfsTreeError as e: if not allow_partial: raise ret = -1 print(e, file=sys.stderr) nodes.reverse() slots.reverse() return ret, nodes, slots def btrfs_next_leaf(nodes: List[Object], slots: List[int]) -> int: for i in range(1, len(slots)): if slots[i] + 1 < btrfs_header_nritems(nodes[i]): break else: return 1 fs_info = nodes[0].fs_info.read_() slots[i] += 1 for j in range(i - 1, -1, -1): slots[j] = 0 nodes[j] = _get_block_for_search(fs_info, nodes[j + 1], slots[j + 1]) return 0 def btrfs_read_item(eb: Object, slot: IntegerLike) -> BtrfsItemData: if btrfs_header_level(eb) != 0: raise ValueError("buffer is not leaf") slot = operator.index(slot) if slot >= btrfs_header_nritems(eb): raise IndexError("slot is out of bounds") item_buf = read_extent_buffer( eb, _btrfs_leaf_items_offset + slot * _btrfs_item_size, _btrfs_item_size ) objectid, type, offset, data_offset, data_size = _btrfs_item_struct.unpack(item_buf) key = BtrfsKey(objectid, type, offset) raw_data = read_extent_buffer(eb, _btrfs_leaf_items_offset + data_offset, data_size) return BtrfsItemData(key, data_offset, data_size, raw_data) def _parse_extent_buffer(buf: bytes) -> Union[BtrfsNode, BtrfsLeaf]: header = BtrfsHeader.from_bytes(buf) if header.level == 0: items = [] for i in range(header.nritems): ( objectid, type, offset, data_offset, data_size, ) = _btrfs_item_struct.unpack_from( buf, _btrfs_leaf_items_offset + i * _btrfs_item_size ) key = BtrfsKey(objectid, type, offset) raw_data = buf[ _btrfs_leaf_items_offset + data_offset : _btrfs_leaf_items_offset + data_offset + data_size ] items.append( BtrfsItemData( key, data_offset, data_size, raw_data, ) ) return BtrfsLeaf(header, items) else: view = memoryview(buf) return BtrfsNode( header, [ BtrfsKeyPtr.from_bytes( view[_btrfs_node_ptrs_offset + i * _btrfs_key_ptr_size :] ) for i in range(header.nritems) ], ) def parse_extent_buffer(eb: Object) -> Union[BtrfsNode, BtrfsLeaf]: return _parse_extent_buffer(read_extent_buffer(eb, 0, eb.len.value_())) def _print_btrfs_item(item: BtrfsItemData, indent: str = "") -> None: print(f"key {item.key} itemoff {item.offset} itemsize {item.size}") _btrfs_item_handlers.get(item.key.type, _unknown_item_type_handler).print( item.key, item.raw_data, item.data, indent + "\t", None, ) def print_btrfs_node(node: Union[BtrfsNode, BtrfsLeaf]) -> None: node_or_leaf = "leaf" if isinstance(node, BtrfsLeaf) else "node" print( f"{node_or_leaf} {node.header.bytenr} level {node.header.level} items {node.header.nritems} generation {node.header.generation} owner {node.header.owner}" ) print(f"{node_or_leaf} {node.header.bytenr} flags {node.header.flags:#x}") print(f"fs uuid {node.header.fsid}") print(f"chunk uuid {node.header.chunk_tree_uuid}") if isinstance(node, BtrfsLeaf): for i, item in enumerate(node.items): print(f"\titem {i} ", end="") _print_btrfs_item(item, "\t") else: for i, ptr in enumerate(node.ptrs): # btrfs-progs as of v6.8.1 doesn't print the "ptr {i}", but it's # useful for making sense of slot numbers. print(f"\tptr {i} key {ptr.key} block {ptr.blockptr} gen {ptr.generation}") def print_extent_buffer(eb: Object) -> None: print_btrfs_node(parse_extent_buffer(eb)) def btrfs_print_tree_items( root: Object, *, min_key: BtrfsKey = BTRFS_MIN_KEY, max_key: BtrfsKey = BTRFS_MAX_KEY, ) -> None: ret, nodes, slots = btrfs_search_slot(root, min_key) while True: node: BtrfsLeaf = parse_extent_buffer(nodes[0]) # type: ignore[assignment] for item in node.items: if item.key > max_key: return if item.key >= min_key: _print_btrfs_item(item) if btrfs_next_leaf(nodes, slots): break drgn-0.0.31/contrib/btrfs_tree_mod_log.py000066400000000000000000000055371477777462700204440ustar00rootroot00000000000000# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: LGPL-2.1-or-later import drgn from drgn import NULL, Object, cast, container_of, execscript, offsetof, reinterpret, sizeof from drgn.helpers.common import * from drgn.helpers.linux import * import collections import sys """Btrfs Tree Mod Log rewind simulator""" # you can get a tree mod log from fs_info.tree_mod_log # search for tree mod log entries for the given offset and minimum sequence # returns a sorted list of matching entries. def tree_mod_log_search(tml, start, min_seq): es = collections.defaultdict(list) for e in rbtree_inorder_for_each_entry("struct tree_mod_elem", tml, "node"): es[int(e.logical)].append((int(e.seq), e)) return [p[1] for p in sorted(es[start]) if p[0] >= min_seq] # apply the tree mod log entries returned by tree_mod_log_search # in reverse to a model of a blank extent_buffer. Pay extra attention # to a particular slot. def tree_mod_log_rewind(tmes, my_slot): eb_rewind = {} for tme in reversed(tmes): print(tme) op = int(tme.op) slot = int(tme.slot) # replace if op == 0: if slot == my_slot: print(f"writing {tme} into {my_slot}!") eb_rewind[slot] = (tme.blockptr, tme.generation, tme.key) # add if op == 1: if slot == my_slot: print(f"nuking {my_slot}!") del(eb_rewind[slot]) # remove if op in [2,3,4]: if slot == my_slot: print(f"writing {tme} into {my_slot}!") eb_rewind[slot] = (int(tme.blockptr), int(tme.generation), tme.key) # move if op == 5: src_slot = int(tme.move.dst_slot) nr = int(tme.move.nr_items) off = 0 for src in range(src_slot, src_slot+nr): if src in eb_rewind: if slot + off == my_slot: print(f"moving {eb_rewind[src]} into {my_slot}!") eb_rewind[slot + off] = eb_rewind[src] else: if slot + off == my_slot: print(f"moving garbage into {my_slot}!") eb_rewind[slot + off] = (0,0,(0,0,0)) off += 1 return eb_rewind # compare the slots of a real eb and a rewound eb # parsed_eb is the output of 'parse_extent_buffer' in btrfs.py def diff_eb_rewind(parsed_eb, eb_rewind): ebptrs = parsed_eb.ptrs mismatch = False for i, ptr in enumerate(ebptrs): if i in eb_rewind: if ptr.blockptr != eb_rewind[i][0]: mismatch = True print(f"EB {i}: {ptr.blockptr} EB_REWIND {i}: {eb_rewind[i][0]}") elif ptr.blockptr != 0 and ptr.blockptr < 1 << 41: mismatch = True print(f"EB ONLY {i}: {ptr.blockptr}") return mismatch drgn-0.0.31/contrib/cgroup.py000077500000000000000000000104541477777462700161010ustar00rootroot00000000000000#!/usr/bin/env drgn # Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: LGPL-2.1-or-later """List the paths of all descendants of a cgroup v2""" import argparse from contextlib import contextmanager import os import sys from collections import Counter from drgn import cast from drgn.helpers.common.type import enum_type_to_class from drgn.helpers.linux import ( cgroup_bpf_prog_for_each, cgroup_path, css_for_each_descendant_pre, fget, find_task, ) # Since Linux kernel commit 6fc88c354f3a ("bpf: Migrate cgroup_bpf to internal # cgroup_bpf_attach_type enum") (in v5.15), the attach type is the # cgroup-specific enum cgroup_bpf_attach_type. Before that, it was the generic # enum bpf_attach_type. try: enum_cgroup_bpf_attach_type = prog.type("enum cgroup_bpf_attach_type") except LookupError: CgroupBpfAttachType = enum_type_to_class( prog.type("enum bpf_attach_type"), "CgroupBpfAttachType", exclude=("__MAX_BPF_ATTACH_TYPE",), ) else: CgroupBpfAttachType = enum_type_to_class( enum_cgroup_bpf_attach_type, "CgroupBpfAttachType", exclude=("CGROUP_BPF_ATTACH_TYPE_INVALID", "MAX_CGROUP_BPF_ATTACH_TYPE",), ) CgroupSubsysId = enum_type_to_class( prog.type("enum cgroup_subsys_id"), "CgroupSubsysId", exclude=("CGROUP_SUBSYS_COUNT",), ) @contextmanager def open_dir(*args, **kwds): # Built-in open() context manager can't deal with directories. fd = os.open(*args, **kwds) try: yield fd finally: os.close(fd) def print_cgroup_bpf_progs(cgrp): cgroup_printed = False for attach_type in CgroupBpfAttachType: attach_flags = cgrp.bpf.flags[attach_type.value].value_() for prog in cgroup_bpf_prog_for_each(cgrp, attach_type.value): prog_id = prog.aux.id.value_() prog_name = prog.aux.name.string_().decode() if not cgroup_printed: print(cgroup_path(cgrp).decode()) cgroup_printed = True print( " {:<8} {:<30} {:<15} {:<15}".format( prog_id, attach_type.name, attach_flags, prog_name ) ) def get_cgroup(path): task = find_task(prog, os.getpid()) try: with open_dir(path, os.O_RDONLY) as fd: file_ = fget(task, fd) kn = cast("struct kernfs_node *", file_.f_path.dentry.d_inode.i_private) return cast("struct cgroup *", kn.priv) except FileNotFoundError as e: raise argparse.ArgumentTypeError(e) def cmd_tree(cgroup): css = cgroup.self.address_of_() for pos in css_for_each_descendant_pre(css): if not pos.flags & prog["CSS_ONLINE"]: continue print(cgroup_path(pos.cgroup).decode()) def cmd_bpf(cgroup): css = cgroup.self.address_of_() for pos in css_for_each_descendant_pre(css): if not pos.flags & prog["CSS_ONLINE"]: continue print_cgroup_bpf_progs(pos.cgroup) def cmd_stat(cgroup): stat = Counter() stat_dying = Counter() for ssid in CgroupSubsysId: css = cgroup.subsys[ssid.value] # XXX if subsys of offlined or cgroup rmdir'd under our hands we won't see its subtree if not css: continue for pos in css_for_each_descendant_pre(css): stat[ssid] +=1 if not pos.flags & prog["CSS_ONLINE"]: stat_dying[ssid] += 1 for ssid in CgroupSubsysId: if stat[ssid.value] == 0: continue print("nr_{:<30} {:>4}".format( ssid.name, stat[ssid.value] ) ) for ssid in CgroupSubsysId: if stat_dying[ssid.value] == 0: continue print("nr_dying_{:<24} {:>4}".format( ssid.name, stat_dying[ssid.value] ) ) if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("command", choices=["tree", "bpf", "stat"]) parser.add_argument("cgroups", help="Cgroups", nargs="*", type=get_cgroup) args = parser.parse_args() if len(args.cgroups) == 0: args.cgroups.append(prog["cgrp_dfl_root"].cgrp) for cg in args.cgroups: if len(args.cgroups) > 1: print(cg.kn.name.string_()) locals()["cmd_" + args.command](cg) drgn-0.0.31/contrib/dm_crypt_key.py000077500000000000000000000071161477777462700172740ustar00rootroot00000000000000#!/usr/bin/env drgn # Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: LGPL-2.1-or-later """Dump the master key of a dm-crypt device that uses aes-xts-plain64.""" import argparse import os from pathlib import Path import sys from drgn import cast, container_of from drgn.helpers.linux.block import disk_name, for_each_disk def crypto_skcipher_alg(tfm): return container_of(tfm.base.__crt_alg, "struct skcipher_alg", "base") def crypto_skcipher_ctx(tfm): return cast("void *", tfm.base.__crt_ctx) def crypto_lskcipher_ctx(tfm): return cast("void *", tfm.base.__crt_ctx) def aes_xts_ctx(tfm): AESNI_ALIGN = 16 mask = AESNI_ALIGN - 1 ctx = cast("unsigned long", crypto_skcipher_ctx(tfm)) return cast("struct aesni_xts_ctx *", (ctx + mask) & ~mask) def aes_key_from_ctx(ctx): words = ctx.key_enc.value_()[: ctx.key_length / 4] return b"".join(word.to_bytes(4, "little") for word in words) def is_function(obj, name): try: global_ = obj.prog_[name] except KeyError: return False return obj == global_ def main(): parser = argparse.ArgumentParser() parser.add_argument("name") args = parser.parse_args() if "/" in args.name: device_path = Path(args.name) else: device_path = Path("/dev/mapper") / args.name name = os.fsencode(device_path.resolve().name) for disk in for_each_disk(): if disk_name(disk) == name: break else: sys.exit("target not found") md = cast("struct mapped_device *", disk.private_data) map = cast("struct dm_table *", md.map) if map.num_targets != 1: sys.exit("dm table has multiple targets") ti = map.targets if not is_function(ti.type.map, "crypt_map"): sys.exit("target is not dm-crypt") cc = cast("struct crypt_config *", ti.private) if cc.cipher_string.string_() != b"aes-xts-plain64": sys.exit("cipher is not aes-xts-plain64") tfm = cc.cipher_tfm.tfms[0] exit = crypto_skcipher_alg(tfm).exit.read_() if is_function(exit, "simd_skcipher_exit"): cryptd_tfm = cast( "struct simd_skcipher_ctx *", crypto_skcipher_ctx(tfm) ).cryptd_tfm cryptd_ctx = cast( "struct cryptd_skcipher_ctx *", crypto_skcipher_ctx(cryptd_tfm.base) ) child_tfm = cryptd_ctx.child xts_ctx = aes_xts_ctx(cryptd_ctx.child) try: crypt_aes_ctx = xts_ctx.crypt_ctx tweak_aes_ctx = xts_ctx.tweak_ctx except AttributeError: # Before Linux kernel commit d148736ff17d ("crypto: x86/aesni - # Correct the data type in struct aesni_xts_ctx") (in v6.7), the # AES contexts were arrays that we need to cast. crypt_aes_ctx = cast("struct crypto_aes_ctx *", xts_ctx.raw_crypt_ctx) tweak_aes_ctx = cast("struct crypto_aes_ctx *", xts_ctx.raw_tweak_ctx) elif is_function(exit, "xts_exit_tfm"): xts_ctx = cast("struct xts_tfm_ctx *", crypto_skcipher_ctx(tfm)) lskcipher_tfm = cast( "struct crypto_lskcipher **", crypto_skcipher_ctx(xts_ctx.child) )[0] cipher_tfm = cast( "struct crypto_cipher **", crypto_lskcipher_ctx(lskcipher_tfm) )[0] crypt_aes_ctx = cast("struct crypto_aes_ctx *", cipher_tfm.base.__crt_ctx) tweak_aes_ctx = cast("struct crypto_aes_ctx *", xts_ctx.tweak.base.__crt_ctx) else: sys.exit("unknown skcipher") print(aes_key_from_ctx(crypt_aes_ctx).hex()) print(aes_key_from_ctx(tweak_aes_ctx).hex()) if __name__ == "__main__": main() drgn-0.0.31/contrib/dump_btrfs_bgs.py000077500000000000000000000050111477777462700175730ustar00rootroot00000000000000#!/usr/bin/env drgn # Copyright (c) Western Digital Corporation, Inc. and affiliates. # SPDX-License-Identifier: LGPL-2.1-or-later """ Dump all block group caches for a given btrfs file-system """ import sys import drgn from enum import Flag from drgn import NULL, Object, cast, container_of, execscript, \ reinterpret, sizeof from drgn.helpers.linux import * from drgn.helpers.common import decode_flags BTRFS_BLOCK_GROUP_FLAGS = [ ("BTRFS_BLOCK_GROUP_DATA", 0), ("BTRFS_BLOCK_GROUP_SYSTEM", 1), ("BTRFS_BLOCK_GROUP_METADATA", 2), ("BTRFS_BLOCK_GROUP_RAID0", 3), ("BTRFS_BLOCK_GROUP_RAID1", 4), ("BTRFS_BLOCK_GROUP_DUP", 5), ("BTRFS_BLOCK_GROUP_RAID10", 6), ("BTRFS_BLOCK_GROUP_RAID5", 9), ("BTRFS_BLOCK_GROUP_RAID6", 8), ("BTRFS_BLOCK_GROUP_RAID1C3", 9), ("BTRFS_BLOCK_GROUP_RAID1C4", 10) ] BTRFS_BLOCK_GROUP_RUNTIME_FLAGS = [ ("BLOCK_GROUP_FLAG_IREF", 0), ("BLOCK_GROUP_FLAG_REMOVED", 1), ("BLOCK_GROUP_FLAG_TO_COPY", 2), ("BLOCK_GROUP_FLAG_RELOCATING_REPAIR", 3), ("BLOCK_GROUP_FLAG_CHUNK_ITEM_INSERTED", 4), ("BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE", 5), ("BLOCK_GROUP_FLAG_ZONED_DATA_RELOC", 6), ("BLOCK_GROUP_FLAG_NEEDS_FREE_SPACE", 7), ("BLOCK_GROUP_FLAG_SEQUENTIAL_ZONE", 8) ] if len(sys.argv) > 1: mnt_path = sys.argv[1] mnt_path = mnt_path.rstrip('/') else: mnt_path = "/" mnt = None for mnt in for_each_mount(prog, dst = mnt_path): pass if mnt is None: sys.stderr.write(f'Error: mount point {mnt_path} not found') sys.exit(1) try: fs_info = cast('struct btrfs_fs_info *', mnt.mnt.mnt_sb.s_fs_info) except LookupError: print('cannot find \'struct btrfs_fs_info *\', module not loaded?') sys.exit(1) def dump_bg(bg): print(f'BG at {bg.start.value_()}') print(f'\tflags: {decode_flags(bg.flags.value_(), BTRFS_BLOCK_GROUP_FLAGS)} ({hex(bg.flags)})') print(f'\tlength: {bg.length.value_()}') print(f'\tused: {bg.used.value_()}') print(f'\tpinned: {bg.pinned.value_()}') print(f'\treserved: {bg.reserved.value_()}') print(f'\truntime_flags: {decode_flags(bg.runtime_flags.value_(), BTRFS_BLOCK_GROUP_RUNTIME_FLAGS)} ({hex(bg.runtime_flags)})') if bg.fs_info.zone_size.value_() > 0: print(f'\tzone_unsuable: {bg.zone_unusable.value_()}') print() for bg in rbtree_inorder_for_each_entry("struct btrfs_block_group",\ fs_info.block_group_cache_tree.rb_root, "cache_node"): dump_bg(bg) drgn-0.0.31/contrib/find_struct_file.py000077500000000000000000000023751477777462700201300ustar00rootroot00000000000000#!/usr/bin/env drgn # Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: LGPL-2.1-or-later """Print what is using a struct file *, given as an address.""" import os import sys from drgn import Object from drgn.helpers.linux.fs import for_each_file from drgn.helpers.linux.list import list_for_each_entry from drgn.helpers.linux.pid import for_each_task def find_struct_file_fds(file: Object) -> None: for task in for_each_task(file.prog_): for fd, fd_file in for_each_file(task): if fd_file == file: print( f"PID {task.pid.value_()} COMM {task.comm.string_().decode()} FD {fd}" ) def find_struct_file_binfmt_misc(file: Object) -> None: prog = file.prog_ for node in list_for_each_entry( prog.type("Node", filename="binfmt_misc.c"), prog.object("entries", filename="binfmt_misc.c").address_of_(), "list", ): if node.interp_file == file: print(f"binfmt_misc {os.fsdecode(node.name.string_())}") def find_struct_file(file: Object) -> None: find_struct_file_fds(file) find_struct_file_binfmt_misc(file) if __name__ == "__main__": find_struct_file(Object(prog, "struct file *", int(sys.argv[1], 0))) drgn-0.0.31/contrib/fs_inodes.py000077500000000000000000000013521477777462700165500ustar00rootroot00000000000000#!/usr/bin/env drgn # Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: LGPL-2.1-or-later """List the paths of all inodes cached in a given filesystem""" import os import sys from drgn.helpers.linux.fs import for_each_mount, inode_path from drgn.helpers.linux.list import list_for_each_entry if len(sys.argv) == 1: path = "/" else: path = sys.argv[1] mnt = None for mnt in for_each_mount(prog, dst=path): pass if mnt is None: sys.exit(f"No filesystem mounted at {path}") sb = mnt.mnt.mnt_sb for inode in list_for_each_entry( "struct inode", sb.s_inodes.address_of_(), "i_sb_list" ): try: print(os.fsdecode(inode_path(inode))) except (TypeError, ValueError): continue drgn-0.0.31/contrib/gcore.py000077500000000000000000000412721477777462700157030ustar00rootroot00000000000000#!/usr/bin/env drgn # Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: LGPL-2.1-or-later """Get a process core dump from drgn running against the kernel.""" import argparse import contextlib import functools import io from pathlib import Path import struct import sys from typing import Iterator, List, NamedTuple, Sequence, Tuple from drgn import ( Architecture, FaultError, Object, PlatformFlags, Program, ProgramFlags, cast, ) from drgn.helpers.linux.fs import d_path from drgn.helpers.linux.list import list_for_each_entry from drgn.helpers.linux.mm import access_remote_vm, cmdline, for_each_vma from drgn.helpers.linux.pid import find_task ELFCLASS32 = 1 ELFCLASS64 = 2 ELFDATA2LSB = 1 ELFDATA2MSB = 2 PT_LOAD = 1 PT_NOTE = 4 PF_X = 1 << 0 PF_W = 1 << 1 PF_R = 1 << 2 VM_READ = 0x1 VM_WRITE = 0x2 VM_EXEC = 0x4 VM_SHARED = 0x8 VM_IO = 0x4000 VM_DONTDUMP = 0x4000000 class Segment(NamedTuple): start: int end: int p_flags: int dump_size: int class MappedFile(NamedTuple): path: bytes offset: int start: int end: int class Phdr(NamedTuple): p_type: int p_flags: int p_offset: int p_vaddr: int # No p_paddr, we always set it to 0. p_filesz: int p_memsz: int p_align: int def vma_snapshot( page_size: int, task: Object ) -> Tuple[Sequence[Segment], Sequence[MappedFile]]: gate_vma = task.prog_["gate_vma"].address_of_() special_mapping_name = task.prog_["special_mapping_name"] segments: List[Segment] = [] mapped_files: List[MappedFile] = [] def always_dump_vma(vma: Object) -> bool: if vma == gate_vma: return True # The kernel checks (vma->vm_ops && vma->vm_ops->name && vma->vm_ops->name(vma)). # As of Linux 6.9, gate_vma_name() and special_mapping_name() are the # only instances of ->name(). vm_ops = vma.vm_ops.read_() if vm_ops: if vm_ops.name == special_mapping_name: return bool( cast("struct vm_special_mapping *", vma.vm_private_data).name ) return False def add_vma(vma: Object) -> None: start = vma.vm_start.value_() end = vma.vm_end.value_() flags = vma.vm_flags.read_() file = vma.vm_file.read_() p_flags = 0 if flags & VM_READ: p_flags |= PF_R if flags & VM_WRITE: p_flags |= PF_W if flags & VM_EXEC: p_flags |= PF_X # Dumbed down version of vma_dump_size() assuming # (MMF_DUMP_ANON_PRIVATE|MMF_DUMP_ANON_SHARED). if always_dump_vma(vma): dump_size = end - start elif flags & (VM_IO | VM_DONTDUMP): dump_size = 0 elif vma.anon_vma or ((flags & VM_SHARED) and file.f_inode.i_nlink == 0): dump_size = end - start elif ( file and vma.vm_pgoff == 0 and (vma.vm_flags & VM_READ) and file.f_inode.i_mode & 0o111 ): # Include first page of executables. # TODO: this is out of date with Linux kernel commit 84158b7f6a06 # ("coredump: Also dump first pages of non-executable ELF # libraries") (in v5.18). dump_size = page_size else: dump_size = 0 if ( segments and segments[-1].end == start and segments[-1].p_flags == p_flags and ( dump_size == 0 or segments[-1].dump_size == segments[-1].end - segments[-1].start ) ): segments[-1] = Segment( start=segments[-1].start, end=end, p_flags=p_flags, dump_size=segments[-1].dump_size + dump_size, ) else: segments.append( Segment( start=start, end=end, p_flags=p_flags, dump_size=dump_size, ) ) if file: path = d_path(file.f_path) offset = vma.vm_pgoff.value_() * page_size if ( mapped_files and mapped_files[-1].path == path and mapped_files[-1].end == start and mapped_files[-1].offset + (mapped_files[-1].end - mapped_files[-1].start) == offset ): mapped_files[-1] = MappedFile( path=path, offset=mapped_files[-1].offset, start=mapped_files[-1].start, end=end, ) else: mapped_files.append( MappedFile( path=path, offset=offset, start=start, end=end, ) ) for vma in for_each_vma(task.mm): add_vma(vma) add_vma(gate_vma) return segments, mapped_files def _nt_pids(task: Object) -> Tuple[int, int, int, int]: return ( task.pid.value_(), # pid task.real_parent.pid.value_(), # ppid task.tgid.value_(), # pgrp task.signal.pids[prog["PIDTYPE_SID"]].numbers[0].nr.value_(), # sid ) def nt_prstatus(task: Object) -> bytes: return struct.pack( "3IH2Q4I8Q27QI4x", 0, # info.si_signo 0, # info.si_code 0, # info.si_errno # TODO: can we get some of these? 0, # cursig 0, # sigpend 0, # sighold *_nt_pids(task), 0, 0, # utime 0, 0, # stime 0, 0, # cutime 0, 0, # cstime # reg *struct.unpack( "21Q", prog.read(task.stack.value_() + (4096 << 2) - 21 * 8, 21 * 8), ), 0, 0, 0, 0, 0, 0, # TODO: floating point registers. 0, # fpvalid ) MAX_NICE = 19 MIN_NICE = -20 NICE_WIDTH = MAX_NICE - MIN_NICE + 1 MAX_RT_PRIO = 100 DEFAULT_PRIO = MAX_RT_PRIO + NICE_WIDTH // 2 def PRIO_TO_NICE(prio: int) -> int: return prio - DEFAULT_PRIO def nt_prpsinfo(task: Object, use_procfs: bool) -> bytes: fname_len = 16 ELF_PRARGSZ = 80 try: state: int = task.__state.value_() except AttributeError: state = task.state.value_() if state: state = (state & -state).bit_length() sname = ord(".") if state > 5 else b"RSDTZW"[state] cred = task.real_cred.read_() uid = cred.uid.val.value_() gid = cred.gid.val.value_() pids = _nt_pids(task) if use_procfs: psargs = ( Path(f"/proc/{pids[0]}/cmdline") .read_bytes() .rstrip(b"\0") .replace(b"\0", b" ") ) else: psargs = b" ".join(cmdline(task) or []) return struct.pack( f"4BQ6I{fname_len}s{ELF_PRARGSZ}s", state, sname, sname == ord("Z"), # zomb PRIO_TO_NICE(task.static_prio.value_()), # nice task.flags.value_(), # flag uid, gid, *_nt_pids(task), task.comm.string_(), # fname psargs, ) def nt_auxv(task: Object) -> bytes: auxv = task.mm.saved_auxv i = 0 while auxv[i]: i += 2 return prog.read(auxv.address_, auxv[i + 2].address_ - auxv.address_) def nt_file(mapped_files: Sequence[MappedFile], page_size: int) -> bytes: buf = bytearray(16 + 24 * len(mapped_files)) struct.pack_into("QQ", buf, 0, len(mapped_files), page_size) for i, mapped_file in enumerate(mapped_files): struct.pack_into( "QQQ", buf, 16 + 24 * i, mapped_file.start, mapped_file.end, mapped_file.offset // page_size, ) for mapped_file in mapped_files: buf.extend(mapped_file.path) buf.append(0) return buf def gen_notes( task: Object, mapped_files: Sequence[MappedFile], page_size: int, use_procfs: bool ) -> bytearray: notes = [] def add_nt_prstatus(t: Object) -> None: # This is obviously racy for the live kernel, but it's best effort. if t.on_cpu: print(f"skipping running thread {t.pid.value_()}", file=sys.stderr) else: notes.append( ( b"CORE", 1, # NT_PRSTATUS nt_prstatus(t), ) ) add_nt_prstatus(task) for t in list_for_each_entry( task.type_.type, task.signal.thread_head.address_of_(), "thread_node" ): if t != task: add_nt_prstatus(t) notes.append( ( b"CORE", 3, # NT_PRPSINFO nt_prpsinfo(task.group_leader.read_(), use_procfs), ) ) # No NT_SIGINFO since we have no signal. notes.append( ( b"CORE", 6, # NT_AUXV nt_auxv(task), ) ) notes.append( ( b"CORE", 0x46494C45, # NT_FILE nt_file(mapped_files, page_size), ) ) buf = bytearray() for name, type_, desc in notes: buf.extend(struct.pack("III", len(name) + 1, len(desc), type_)) buf.extend(name) buf.extend(bytes(4 - (len(name) & 3))) buf.extend(desc) buf.extend(bytes(-len(buf) & 3)) return buf def try_read_memory_procfs( page_size: int, mem_file: io.FileIO, address: int, size: int ) -> Iterator[Tuple[int, bytes]]: # An address may overflow a signed long, but we can still seek to it in # increments of sys.maxsize. whence = 0 offset = address while offset: seek = min(offset, sys.maxsize) try: mem_file.seek(seek, whence) except OSError: # The offset returned by the lseek() system call may be negative # when interpreted as an off_t, which makes Python think that there # was an error even though the seek succeeded. pass offset -= seek whence = 1 while size > 0: try: buf = mem_file.read(size) yield address, buf address += len(buf) size -= len(buf) except IOError: try: mem_file.seek(page_size, 1) except OSError: # See above. pass address += page_size size -= page_size def try_read_memory_remote( page_size: int, mm: Object, address: int, size: int ) -> Iterator[Tuple[int, bytes]]: # Reading page by page isn't very efficient, but it's foolproof. while size > 0: try: yield address, access_remote_vm(mm, address, page_size) except FaultError: pass address += page_size size -= page_size def main(prog: Program, argv: Sequence[str]) -> None: parser = argparse.ArgumentParser( description="Capture a process core dump without stopping it or from a kernel core dump (using drgn)" ) parser.add_argument( "--no-procfs", dest="use_procfs", action="store_false", help="don't use the proc filesystem to get information about the process even when the process is local; " "this will skip memory that is paged out and is slower, " "but it can be useful if the mmap lock is deadlocked", ) parser.add_argument("pid", type=int, help="PID of process to capture") parser.add_argument("core", type=str, help="output file") args = parser.parse_args(argv) args.use_procfs = args.use_procfs and ( prog.flags & (ProgramFlags.IS_LIVE | ProgramFlags.IS_LOCAL) == (ProgramFlags.IS_LIVE | ProgramFlags.IS_LOCAL) ) # TODO: these aren't necessarily the same as the kernel (e.g., when running # a 32-bit application on a 64-bit kernel). platform = prog.platform assert platform is not None ei_class = ELFCLASS64 if (platform.flags & PlatformFlags.IS_64_BIT) else ELFCLASS32 ei_data = ( ELFDATA2LSB if (platform.flags & PlatformFlags.IS_LITTLE_ENDIAN) else ELFDATA2MSB ) if platform.arch == Architecture.X86_64: e_machine = 62 # EM_X86_64 else: # TODO: there are assumptions that the host and target are x86-64 # throughout this script (in struct.pack() calls, note contents). sys.exit("only x86-64 is supported") page_size = prog["PAGE_SIZE"].value_() ehdr_struct = struct.Struct("16BHHIQQQIHHHHHH") phdr_struct = struct.Struct("IIQQQQQQ") task = find_task(prog, args.pid) if not task: sys.exit(f"PID {args.pid} not found") segments, mapped_files = vma_snapshot(page_size, task) notes = gen_notes(task, mapped_files, page_size, args.use_procfs) with contextlib.ExitStack() as exit_stack: if args.use_procfs: try_read_memory = functools.partial( try_read_memory_procfs, page_size, exit_stack.enter_context( open(f"/proc/{args.pid}/mem", "rb", buffering=0) ), ) else: try_read_memory = functools.partial( try_read_memory_remote, page_size, task.mm.read_() ) f = exit_stack.enter_context(open(args.core, "wb")) offset = f.seek(ehdr_struct.size) phdrs = [ Phdr( p_type=PT_NOTE, p_flags=0, p_offset=offset, p_vaddr=0, p_filesz=len(notes), p_memsz=0, p_align=0, ) ] f.write(notes) offset += len(notes) # Align up to a page. offset = f.seek(-offset % page_size, 1) for segment in segments: written_start_address = written_end_address = segment.start written_offset = offset for address, buf in try_read_memory(segment.start, segment.dump_size): if address == written_end_address: written_end_address += len(buf) else: phdrs.append( Phdr( p_type=PT_LOAD, p_flags=segment.p_flags, p_offset=written_offset, p_vaddr=written_start_address, p_filesz=written_end_address - written_start_address, p_memsz=address - written_start_address, p_align=page_size, ) ) written_start_address = address written_end_address = address + len(buf) written_offset = offset f.write(buf) offset += len(buf) phdrs.append( Phdr( p_type=PT_LOAD, p_flags=segment.p_flags, p_offset=written_offset, p_vaddr=written_start_address, p_filesz=written_end_address - written_start_address, p_memsz=segment.end - written_start_address, p_align=page_size, ) ) e_phoff = offset for phdr in phdrs: f.write( phdr_struct.pack( phdr.p_type, phdr.p_flags, phdr.p_offset, phdr.p_vaddr, 0, # p_paddr phdr.p_filesz, phdr.p_memsz, phdr.p_align, ) ) # TODO: >= 2**16 phdrs f.seek(0) f.write( ehdr_struct.pack( 0x7F, # ELFMAG0 ord("E"), # ELFMAG1 ord("L"), # ELFMAG2 ord("F"), # ELFMAG3 ei_class, ei_data, 1, # EI_VERSION = EV_CURRENT 0, # EI_OSABI = ELFOSABI_NONE 0, # EI_ABIVERSION 0, # EI_PAD 0, 0, 0, 0, 0, 0, 4, # e_type = ET_CORE e_machine, 1, # e_version = EV_CURRENT 0, # e_entry e_phoff, 0, # e_shoff 0, # e_flags ehdr_struct.size, # e_ehsize phdr_struct.size, # e_phentsize len(phdrs), # e_phnum 0, # e_shentsize, 0, # e_shnum, 0, # e_shstrndx ) ) if __name__ == "__main__": prog: Program main(prog, sys.argv[1:]) drgn-0.0.31/contrib/irq.py000066400000000000000000000340741477777462700153760ustar00rootroot00000000000000#!/usr/bin/env drgn # Copyright (c) 2023, Oracle and/or its affiliates. # SPDX-License-Identifier: LGPL-2.1-or-later """ Script to dump irq stats using drgn""" from typing import Iterator from typing import Tuple from drgn import NULL from drgn import Object from drgn import Program from drgn.helpers.common.format import escape_ascii_string from drgn.helpers.linux.cpumask import for_each_present_cpu from drgn.helpers.linux.cpumask import cpumask_to_cpulist from drgn.helpers.linux.mapletree import mtree_load from drgn.helpers.linux.mapletree import mt_for_each from drgn.helpers.linux.percpu import per_cpu_ptr from drgn.helpers.linux.radixtree import radix_tree_for_each from drgn.helpers.linux.radixtree import radix_tree_lookup def _sparse_irq_supported(prog: Program) -> Tuple[bool, str]: try: # Since Linux kernel commit 721255b9826b ("genirq: Use a maple # tree for interrupt descriptor management") (in v6.5), sparse # irq descriptors are stored in a maple tree. _ = prog["sparse_irqs"] return True, "maple" except KeyError: # Before that, they are in radix tree. try: _ = prog["irq_desc_tree"] return True, "radix" except KeyError: return False, None def _kstat_irqs_cpu(prog: Program, irq: int, cpu: int) -> int: desc = irq_to_desc(prog, irq) if not desc: return 0 addr = per_cpu_ptr(desc.kstat_irqs, cpu) return Object(prog, "int", address=addr).value_() def irq_in_use(prog: Program, irq: int) -> bool: """ Check if a given irq number is in use or not. An irq number is considered to be in use by the kernel, if the kernel has allocated a irq descriptor for it. The irq may not yet have any registered irq handlers. :param prog: drgn program :param irq: irq number :return: True if irq is in use, False otherwise """ desc = irq_to_desc(prog, irq) # An irq number is in use if irq_desc object has been allocated for it return bool(desc) def irq_has_action(prog: Program, irq: int) -> bool: """ Check if a given irq has handler(s) registered or not. :param prog: drgn program :param irq: irq number :return: True if irq has registered handler(s), False otherwise """ desc = irq_to_desc(prog, irq) return bool(desc and desc.action) def for_each_irq(prog: Program) -> Iterator[int]: """ Iterate through all allocated irq numbers. :param prog: drgn program :return: Iterator of irq numbers """ _, tree_type = _sparse_irq_supported(prog) if tree_type == "radix": irq_desc_tree = prog["irq_desc_tree"].address_of_() for irq, _ in radix_tree_for_each(irq_desc_tree): yield irq elif tree_type == "maple": irq_desc_tree = prog["sparse_irqs"].address_of_() for irq, _, _ in mt_for_each(irq_desc_tree): yield irq else: count = len(prog["irq_desc"]) for irq_num in range(count): yield irq_num def for_each_irq_desc(prog: Program) -> Iterator[Object]: """ Iterate through all allocated irq descriptors. :param prog: drgn program :return: Iterator of ``struct irq_desc *`` objects. """ _, tree_type = _sparse_irq_supported(prog) if tree_type == "radix": irq_desc_tree = prog["irq_desc_tree"].address_of_() for _, addr in radix_tree_for_each(irq_desc_tree): irq_desc = Object(prog, "struct irq_desc", address=addr).address_of_() yield irq_desc elif tree_type == "maple": irq_desc_tree = prog["sparse_irqs"].address_of_() for _, _, addr in mt_for_each(irq_desc_tree): irq_desc = Object(prog, "struct irq_desc", address=addr).address_of_() yield irq_desc else: count = len(prog["irq_desc"]) for irq_num in range(count): yield (prog["irq_desc"][irq_num]).address_of_() def irq_name_to_desc(prog: Program, name: str) -> Object: """ Get ``struct irq_desc *`` for irq handler of given name :param prog: drgn program :param name: name of irq handler :return: ``struct irq_desc *`` object if irq descriptor is found. NULL otherwise """ for desc in for_each_irq_desc(prog): if desc.action and desc.action.name == name: return desc return NULL(prog, "void *") def irq_to_desc(prog: Program, irq: int) -> Object: """ Get ``struct irq_desc *`` for given irq number :param prog: drgn program :param irq: irq number :return: ``struct irq_desc *`` object if irq descriptor is found. NULL otherwise """ _, tree_type = _sparse_irq_supported(prog) if tree_type: if tree_type == "radix": addr = radix_tree_lookup(prog["irq_desc_tree"].address_of_(), irq) else: addr = mtree_load(prog["sparse_irqs"].address_of_(), irq) if addr: return Object(prog, "struct irq_desc", address=addr).address_of_() else: return NULL(prog, "void *") else: return (prog["irq_desc"][irq]).address_of_() def get_irq_affinity(prog: Program, irq: int) -> Object: """ Get ``struct cpumask`` for given irq's cpu affinity :param prog: drgn program :param irq: irq number :return: ``struct cpumask`` object if irq descriptor is found. NULL otherwise """ if not irq_in_use(prog, irq): print("IRQ not in use so affinity data is not reliable") irq_desc = irq_to_desc(prog, irq) # if CONFIG_CPUMASK_OFFSTACK is enabled then affinity is an array # of cpumask objects otherwise it is pointer to a cpumask object if hasattr(irq_desc, "irq_common_data"): try: _ = len(irq_desc.irq_common_data.affinity) addr = irq_desc.irq_common_data.affinity.address_ except TypeError: addr = irq_desc.irq_common_data.affinity.value_() elif hasattr(irq_desc, "irq_data"): try: _ = len(irq_desc.irq_data.affinity) addr = irq_desc.irq_data.affinity.address_ except TypeError: addr = irq_desc.irq_data.affinity.value_() else: return None return Object(prog, "struct cpumask", address=addr) def get_irq_affinity_list(prog: Program, irq: int) -> Object: """ Get affinity of a given cpu. :param prog: drgn program :param irq: irq number :return: range of cpus to which irq is affined to """ affinity = get_irq_affinity(prog, irq) if affinity is not None: return cpumask_to_cpulist(affinity) else: return None def show_irq_num_stats(prog: Program, irq: int) -> None: """ Show stats for a given irq number :param prog: drgn program :param irq: irq number :return: None """ if not irq_in_use(prog, irq): print(f"irq: {irq} is not in use") return if not irq_has_action(prog, irq): print(f"irq: {irq} has no handlers registered") return print_header = True total_count = 0 for cpu in for_each_present_cpu(prog): kstat_irqs = _kstat_irqs_cpu(prog, irq, cpu) if not kstat_irqs: continue desc = irq_to_desc(prog, irq) name = escape_ascii_string( desc.action.name.string_(), escape_backslash=True ) if print_header: print( f"irq: {irq} name: {name} ({desc.type_.type_name()})0x{desc.value_():x}" ) print_header = False total_count += kstat_irqs print(f" CPU: {cpu} \t count: {kstat_irqs}") print(f" Total: {total_count}") def show_irq_name_stats(prog: Program, irq_name: str) -> None: """ Show irq stats for irqs whose handler have specified name or for irqs whose handler names begin with specified string. :param prog: drgn program :param irq_name: name or beginning of name of irq handler :return: None """ found = False for irq in for_each_irq(prog): if irq_in_use(prog, irq) and irq_has_action(prog, irq): desc = irq_to_desc(prog, irq) name = escape_ascii_string( desc.action.name.string_(), escape_backslash=True ) if name.startswith(irq_name): found = True show_irq_num_stats(prog, irq) if not found: print( f"Found no irq with name: {irq_name} or with name starting with: {irq_name}" ) def show_irq_stats(prog: Program) -> None: """ Show stats for all irqs. :param prog: drgn program :return: None """ for irq in for_each_irq(prog): if irq_in_use(prog, irq) and irq_has_action(prog, irq): show_irq_num_stats(prog, irq) def show_cpu_irq_num_stats(prog: Program, cpu: int, irq: int) -> None: """ Show irq stats of a cpu for a given irq number :param prog: drgn program :param cpu: cpu index :param irq: irq number :return: None """ if not irq_in_use(prog, irq): print(f"irq: {irq} is not in use") return if not irq_has_action(prog, irq): print(f"irq: {irq} has no handlers registered") return print(f"IRQ stats for cpu: {cpu}") desc = irq_to_desc(prog, irq) name = escape_ascii_string( desc.action.name.string_(), escape_backslash=True ) kstat_irqs = _kstat_irqs_cpu(prog, irq, cpu) print( f" irq: {irq} name: {name} ({desc.type_.type_name()})0x{desc.value_():x} count: {kstat_irqs}" ) def show_cpu_irq_name_stats(prog: Program, cpu: int, irq_name: str) -> None: """ Show irq stats of a cpu for irqs whose handler have specified name or for irqs whose handler names begin with specified string. :param prog: drgn program :param cpu: cpu index :param irq_name: name or beginning of name of irq handler :return: None """ found = False total_irqs_on_cpu = 0 print(f"IRQ stats for cpu: {cpu}") for irq in for_each_irq(prog): if irq_in_use(prog, irq) and irq_has_action(prog, irq): desc = irq_to_desc(prog, irq) name = escape_ascii_string( desc.action.name.string_(), escape_backslash=True ) if name.startswith(irq_name): found = True kstat_irqs = _kstat_irqs_cpu(prog, irq, cpu) if not kstat_irqs: continue total_irqs_on_cpu += kstat_irqs print( f" irq: {irq} name: {name} ({desc.type_.type_name()})0x{desc.value_():x} count: {kstat_irqs}" ) if not found: print( f"Found no irq with name: {irq_name} or with name starting with: {irq_name}" ) else: print(f"Total: {total_irqs_on_cpu}") def show_cpu_irq_stats(prog: Program, cpu: int) -> None: """ Show irq stats for specified cpu. :param prog: drgn program :param cpu: cpu index :return: None """ total_irqs_on_cpu = 0 print(f"IRQ stats for cpu: {cpu}") for irq in for_each_irq(prog): if irq_in_use(prog, irq) and irq_has_action(prog, irq): kstat_irqs = _kstat_irqs_cpu(prog, irq, cpu) if not kstat_irqs: continue desc = irq_to_desc(prog, irq) name = escape_ascii_string( desc.action.name.string_(), escape_backslash=True ) print( f" irq: {irq} name: {name} ({desc.type_.type_name()})0x{desc.value_():x} count: {kstat_irqs}" ) total_irqs_on_cpu += kstat_irqs print(f"Total: {total_irqs_on_cpu}") def show_each_cpu_irq_stats(prog: Program) -> None: """ Show irq stats for each cpu. :param prog: drgn program :return: None """ for cpu in for_each_present_cpu(prog): show_cpu_irq_stats(prog, cpu) print("\n") def print_irq_affinity(prog: Program, irq: int) -> None: """ Print cpu affinity of specified irq. :param prog: drgn program :param irq: irq number :return: None """ if not irq_in_use(prog, irq): print(f"irq: {irq} is not in use") return if not irq_has_action(prog, irq): print(f"irq: {irq} has no handlers registered") return desc = irq_to_desc(prog, irq) name = escape_ascii_string( desc.action.name.string_(), escape_backslash=True ) affinity = get_irq_affinity_list(prog, irq) print(f"irq: {irq} name: {name} affinity: {affinity}") def print_irqs_affinities(prog: Program) -> None: """ Print cpu affinities for all irqs in use. :param prog: drgn program :return: None """ for irq in for_each_irq(prog): if irq_in_use(prog, irq) and irq_has_action(prog, irq): print_irq_affinity(prog, irq) def print_all_irqs(prog: Program) -> None: """ Print number, name, ``struct irq_desc *`` and ``struct irqaction *`` for all irqs in use. :param prog: drgn program :return: None """ for irq in for_each_irq(prog): if irq_in_use(prog, irq) and irq_has_action(prog, irq): desc = irq_to_desc(prog, irq) name = escape_ascii_string( desc.action.name.string_(), escape_backslash=True ) print( f"irq: {irq} name: {name} ({desc.type_.type_name()})0x{desc.value_():x} ({desc.action.type_.type_name()})0x{desc.action.value_():x}" ) print("###################################################") print("List of IRQs") print("###################################################") print_all_irqs(prog) print("\n") print("###################################################") print("IRQ affinities") print("###################################################") print_irqs_affinities(prog) print("\n") print("###################################################") print("IRQ stats") print("###################################################") show_irq_stats(prog) print("\n") print("###################################################") print("cpuwise IRQ stats") print("###################################################") show_each_cpu_irq_stats(prog) print("\n") drgn-0.0.31/contrib/kcore_list.py000077500000000000000000000010171477777462700167330ustar00rootroot00000000000000#!/usr/bin/env drgn # Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: LGPL-2.1-or-later """Dump the list of memory regions exposed by /proc/kcore.""" from drgn import cast from drgn.helpers.linux.list import list_for_each_entry kcore_type = prog.type("enum kcore_type") for entry in list_for_each_entry( "struct kcore_list", prog["kclist_head"].address_of_(), "list" ): print( f"{cast(kcore_type, entry.type).format_(type_name=False)} {hex(entry.addr)} {hex(entry.size)}" ) drgn-0.0.31/contrib/kernel_sys.py000077500000000000000000000024461477777462700167620ustar00rootroot00000000000000#!/usr/bin/env drgn # Copyright (c) SUSE Linux. # SPDX-License-Identifier: LGPL-2.1-or-later """Display system information and configuration data.""" from datetime import datetime from datetime import timedelta from drgn.helpers.common.format import number_in_binary_units from drgn.helpers.linux import for_each_online_cpu from drgn.helpers.linux.mm import totalram_pages from drgn.helpers.linux.pid import for_each_task from drgn.helpers.linux.sched import loadavg def print_line(key, value): print(f"{key:<16} {value}") uts = prog["init_uts_ns"].name timekeeper = prog["shadow_timekeeper"] date = datetime.fromtimestamp(timekeeper.xtime_sec).strftime("%c") uptime = timedelta(seconds=timekeeper.ktime_sec.value_()) load = ", ".join([f"{v:.2f}" for v in loadavg(prog)]) totalram = (prog['PAGE_SIZE'] * totalram_pages(prog)).value_() print_line("CPUS", len(list(for_each_online_cpu(prog)))) print_line("DATE", date) print_line("UPTIME", uptime) print_line("LOAD AVERAGE", load) print_line("TASKS", len(list(for_each_task(prog)))) print_line("NODENAME", uts.nodename.string_().decode()) print_line("RELEASE", uts.release.string_().decode()) print_line("VERSION", uts.version.string_().decode()) print_line("MACHINE", uts.machine.string_().decode()) print_line("MEMORY", number_in_binary_units(totalram)) drgn-0.0.31/contrib/lsmod.py000077500000000000000000000027601477777462700157210ustar00rootroot00000000000000#!/usr/bin/env drgn # Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: LGPL-2.1-or-later """An implementation of lsmod(8) using drgn""" from drgn.helpers.linux.list import list_for_each_entry def module_total_size(mod): # Since Linux kernel commit ac3b43283923 ("module: replace module_layout # with module_memory") (in v6.4), the memory sizes are in the struct # module::mem array. Before that, they are in struct module::init_layout # and struct module::core_layout. try: num_types = mod.prog_["MOD_MEM_NUM_TYPES"] except KeyError: return (mod.init_layout.size + mod.core_layout.size).value_() else: return sum( mod.mem[type].size.value_() for type in range(num_types) ) print("Module Size Used by") config_module_unload = prog.type("struct module").has_member("refcnt") for mod in list_for_each_entry("struct module", prog["modules"].address_of_(), "list"): name = mod.name.string_().decode() if config_module_unload: refcnt = mod.refcnt.counter.value_() - 1 used_by = [ use.source.name.string_().decode() for use in list_for_each_entry( "struct module_use", mod.source_list.address_of_(), "source_list" ) ] else: refcnt = "-" used_by = [] used = ",".join(used_by) if used: used = " " + used print(f"{name:19} {module_total_size(mod):>8} {refcnt}{used}") drgn-0.0.31/contrib/mount.py000077500000000000000000000010211477777462700157320ustar00rootroot00000000000000#!/usr/bin/env drgn # Copyright (c) SUSE Linux. # SPDX-License-Identifier: LGPL-2.1-or-later """A simplified implementation of mount(1) using drgn""" from drgn.helpers.linux.fs import for_each_mount, mount_dst, mount_fstype, mount_src print("Mount Type Devname Dirname") for mount in for_each_mount(prog): maddr = mount.value_() src = mount_src(mount).decode() dst = mount_dst(mount).decode() type_ = mount_fstype(mount).decode() print(f"{maddr:<16x} {type_:<12} {src:<12} {dst}") drgn-0.0.31/contrib/negdentdelete.py000066400000000000000000000113011477777462700173760ustar00rootroot00000000000000# Copyright (c) 2024, Oracle and/or its affiliates. """ negdentdelete - remove negative dentries from a directory Normally, there aren't many good ways to get rid of negative dentries. You could: 1. Delete the entire directory containing them. This works, but if there are real files in it, then it's a lot of work. 2. Use drop_caches. This option is unfortunately global and so you can't target specific files. 3. Wait until your memory fills up and the LRU starts to handle it... This script can help you target a specific directory and remove all negative dentries within it. Note that while the script is reasonably safe, it can't be 100% reliable: iterating over a frequently changing linked list in the kernel may go awry. The script also creates and unlinks files within the target directory, so the user must have permission to do so. If the script runs as root, then you should ensure that root-owned files inside the directory won't cause any problems. """ import argparse import os import time from typing import Iterator, List from drgn import Object, Program from drgn.helpers.linux.fs import path_lookup from drgn.helpers.linux.list import hlist_for_each_entry, list_for_each_entry def for_each_child_dentry(dentry: Object) -> Iterator[Object]: try: # da549bdd15c29 ("dentry: switch the lists of children to hlist") return hlist_for_each_entry( "struct dentry", dentry.d_children.address_of_(), "d_sib", ) except AttributeError: return list_for_each_entry( "struct dentry", dentry.d_subdirs.address_of_(), "d_child" ) def yield_negative_dentries( prog: Program, dir_: str, chunk_size: int = 10000 ) -> Iterator[List[bytes]]: parent = path_lookup(prog, dir_).dentry negdent_names = [] for child in for_each_child_dentry(parent): # Do this at the top of the loop so that there's less of a chance of the # current child being freed out from under us. It's sort of like # list_for_each_entry_safe(). Of course, there's no guarantee that this # dentry will still be here when we come back (we're not holding a # reference, after all). But at least we're not actively freeing the # dentry we're currently looking at. if len(negdent_names) >= chunk_size: yield negdent_names negdent_names = [] if not child.d_inode: negdent_names.append(child.d_name.name.string_()) if negdent_names: yield negdent_names def remove_negative_dentries( dir_: str, names: List[bytes], verbose: bool = False ) -> None: dir_fd = os.open(dir_, os.O_PATH) try: for name in names: # When a file is open and it is unlinked, its associated dentry cannot # remain a part of the dentry cache (since a new file of the same name # could be created). So, it is removed from the dentry hash table so it # can no longer be looked up (see d_delete() in fs/dcache.c). # # When the file is closed, dput() will find that the dentry is # unhashed, and so it will immediately free it. Thus, creating a file # with the same name as a negative dentry, unlinking, and then closing # it, is a sneaky way of removing the cached negative dentry. While # this isn't ideal (creating the file does result in some I/O), it is # still remarkably quick. fd = os.open(name, os.O_RDONLY | os.O_CREAT, dir_fd=dir_fd) os.unlink(name, dir_fd=dir_fd) os.close(fd) if verbose: print(name.decode(), fd) finally: os.close(dir_fd) def main(prog: Program): parser = argparse.ArgumentParser( description="remove negative dentries from a directory" ) parser.add_argument( "directory", help="directory to clear negative dentries from", ) parser.add_argument( "--verbose", "-v", action="store_true", help="print each dentry we delete (much slower!)", ) parser.add_argument( "--chunk-size", type=int, default=10000, help="number of negative dentries to read at a time", ) args = parser.parse_args() directory = os.path.abspath(args.directory) removed = 0 start = time.time() for batch in yield_negative_dentries(prog, directory, args.chunk_size): remove_negative_dentries(directory, batch, verbose=args.verbose) removed += len(batch) total = time.time() - start dps = removed / total print(f"removed {removed} negative dentries in {total:.2f}s ({dps:.2f}/s)") if __name__ == "__main__": prog: Program main(prog) drgn-0.0.31/contrib/platform_drivers.py000066400000000000000000000015331477777462700201570ustar00rootroot00000000000000# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: LGPL-2.1-or-later """Print registered platform drivers.""" from drgn import NULL, container_of from drgn.helpers.linux.list import list_for_each_entry def bus_to_subsys(bus): for sp in list_for_each_entry( "struct subsys_private", prog["bus_kset"].list.address_of_(), "subsys.kobj.entry", ): if sp.bus == bus: return sp return NULL(bus.prog_, "struct subsys_private *") sp = bus_to_subsys(prog["platform_bus_type"].address_of_()) for priv in list_for_each_entry( "struct driver_private", sp.drivers_kset.list.address_of_(), "kobj.entry" ): driver = priv.driver print(driver.name.string_().decode()) platform_driver = container_of(driver, "struct platform_driver", "driver") print(platform_driver) drgn-0.0.31/contrib/ps.py000077500000000000000000000215331477777462700152240ustar00rootroot00000000000000#!/usr/bin/env drgn # Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: LGPL-2.1-or-later """An implementation of ps(1) using drgn""" import sys from argparse import ArgumentParser from collections import OrderedDict from drgn import ProgramFlags from drgn.helpers.linux.cpumask import for_each_online_cpu from drgn.helpers.linux.list import list_for_each_entry, list_count_nodes from drgn.helpers.linux.mm import cmdline, totalram_pages from drgn.helpers.linux.percpu import per_cpu, percpu_counter_sum from drgn.helpers.linux.pid import for_each_task from drgn.helpers.linux.sched import task_cpu, task_state_to_char PAGE_SIZE = prog["PAGE_SIZE"].value_() def get_number_of_children(task): """ Returns number of children of a given task """ return list_count_nodes(task.children.address_of_()) def get_cmd(task): """ Return the commandline arguments of a given task """ return b" ".join(cmdline(task)).decode() def parse_cmdline_args(args): """ Command line argument parser """ parser = ArgumentParser(prog="drgn ps", description="Report process status infromation") group = parser.add_mutually_exclusive_group() group.add_argument("-a", "--active", action="store_true", default=False, help="Print active thread on each CPU " "(data may be inconsistent for live kernel)") group.add_argument("-c", "--children", nargs="*", default=None, type=int, help="Print data about children of the given process(es)") group.add_argument("--cpus", nargs="*", default=None, type=int, help="Ready/running processes for given CPUs") parser.add_argument("-d", "--detailed", action="store_true", default=False, help="Print additional details about the threads") group.add_argument("--hierarchy", metavar="PID", nargs="+", default=None, help="Print parent hierarchy") group.add_argument("-k", "--kthread", action='store_true', default=False, help="Print kernel threads only") group.add_argument("-t", "--threads", nargs="+", default=None, type=int, help="Print detailed information of a given threads") group.add_argument("-u", "--uthread", action="store_true", default=False, help="Print userspace threads only") cmd_opts = parser.parse_args(args) return cmd_opts def get_rss(task): """ Returns the Resident Set Size """ try: return PAGE_SIZE * sum([percpu_counter_sum(x) for x in task.mm.rss_stat]) except (AttributeError, TypeError): return PAGE_SIZE * sum([x.counter for x in task.mm.rss_stat.count]) def print_active_tasks(cmd_opts): """ Function to print active task on each CPU """ if prog.flags & ProgramFlags.IS_LIVE: print("Running on live kernel - The data may be inconsistent\n") runqueues = prog["runqueues"] print_hdr = True for cpu in for_each_online_cpu(prog): task = per_cpu(runqueues, cpu).curr print_std(task, print_hdr, cmd_opts) print_hdr = False def print_cpu_tasks(cmd_opts): """ Print running and runnable tasks on a given CPU """ for cpu in for_each_online_cpu(prog): if cmd_opts.cpus: if cpu not in cmd_opts.cpus: continue print_hdr = True for task in for_each_task(prog): if task_cpu(task) == cpu: print_std(task, print_hdr, cmd_opts) print_hdr = False def hierarchy(cmd_opts): """ Print information of all parent processes """ pids = cmd_opts.hierarchy tasks = [] for task in for_each_task(prog): if str(task.pid.value_()) not in pids: continue pid = task.pid.value_() pids.remove(str(pid)) while (pid > 1): pid = task.pid.value_() tasks.append(task) task = task.parent print_hdr = True while len(tasks) != 0: print_std(tasks.pop(), print_hdr, cmd_opts) print_hdr = False print("\n") if len(pids) != 0: print("the following pids are invalid: {0}".format(pids)) def thread_details(cmd_opts): """ Prints details of all the threads including kernel and userspace both. """ print_hdr = True for task in for_each_task(prog): if cmd_opts.threads: if task.pid.value_() in cmd_opts.threads: cmd_opts.kthread = 0 cmd_opts.uthread = 0 else: continue if cmd_opts.kthread: if task.mm: continue elif cmd_opts.uthread: if not task.mm: continue print_std(task, print_hdr, cmd_opts) print_hdr = False def process_child_task(cmd_opts): """ Print all child tasks of the given parent tasks """ for task in for_each_task(prog): if cmd_opts.children: if task.pid not in cmd_opts.children: continue print("Parent Task:") print_hdr = True print_std(task, print_hdr, cmd_opts) print("Child Tasks:") head = task.children.address_of_() for c_task in list_for_each_entry("struct task_struct", head, "sibling"): print_std(c_task, print_hdr, cmd_opts) print_hdr = False # No child found if print_hdr: print("NA") print("\n") def get_task_memory_info(task): """ Return RSS (Resident Set Size) memory and VMS (Virtual Memory Size) for a given task. Return None if the task is a kernel thread. """ if not task.mm: return None vms = PAGE_SIZE * task.mm.total_vm.value_() # Since Linux kernel commit f1a7941243c102a44e ("mm: convert mm's rss # stats into percpu_counter") (in v6.2), rss_stat is percpu counter. try: rss = PAGE_SIZE * sum([percpu_counter_sum(x) for x in task.mm.rss_stat]) except (AttributeError, TypeError): rss = PAGE_SIZE * sum([x.counter for x in task.mm.rss_stat.count]) return (vms, rss) """ The headers_and_vals dictionary's entries follow: key: (val, ) """ headers_and_vals = OrderedDict([ ("PID", (lambda task: task.pid.value_(), 7)), ("PPID", (lambda task: task.parent.pid.value_(), 7)), ("CPU", (task_cpu, 4)), ("Task Address", (hex, 19)), ("Stack Address", (lambda task: hex(task.stack.value_()), 19)), ("State", (task_state_to_char, 4)), ("VMS", (lambda task: PAGE_SIZE * task.mm.total_vm.value_(), 10)), ("RSS", (get_rss, 10)), ("MEM%", (lambda task: (round(((get_rss(task) * 100) / (PAGE_SIZE * totalram_pages(prog))), 4)), 8)), ("comm", (lambda task: task.comm.string_().decode(), 9)) ]) """ No need to worry about specific spacing here """ headers_and_vals_detailed = OrderedDict([ ("Execution Time (sec)", lambda task: (task.utime + task.stime).value_()/1e9), ("mm_struct addr", lambda task: hex(task.mm.value_())), ("cmdline", get_cmd), ("Number of children", get_number_of_children), ("Stack Trace", lambda task: '\n' + str(prog.stack_trace(task))) ]) def print_std(task, need_hdr, cmd_opts): """ The print function is responsible for generating reports """ data_points = [] detailed = cmd_opts.detailed if detailed: need_hdr = True for header in headers_and_vals.keys(): try: data = (headers_and_vals[header])[0](task) data_points.append(data) except Exception: data_points.append('NA') # Print the headers header_line = "" if need_hdr: for header in headers_and_vals.keys(): width = headers_and_vals[header][1] header_line += ''.join(format(str(header), f"<{width+2}")) print(header_line) print("-" * len(header_line)) # Print the data rows formatted_row = "" index = 0 for header in headers_and_vals.keys(): width = headers_and_vals[header][1] formatted_row += ''.join(format(str(data_points[index]), f"<{width+2}")) index += 1 print(formatted_row + "\n") if detailed: for header in headers_and_vals_detailed.keys(): try: data = headers_and_vals_detailed[header](task) except Exception: data = 'NA' print(header, ":", data) def main(): cmd_opts = parse_cmdline_args(sys.argv[1:]) if cmd_opts.active: print_active_tasks(cmd_opts) elif isinstance(cmd_opts.children, list): process_child_task(cmd_opts) elif isinstance(cmd_opts.cpus, list): print_cpu_tasks(cmd_opts) elif cmd_opts.hierarchy: hierarchy(cmd_opts) else: thread_details(cmd_opts) if __name__ == "__main__": main() drgn-0.0.31/contrib/ptdrgn.py000066400000000000000000000106551477777462700161000ustar00rootroot00000000000000#!/usr/bin/python3 # Copyright (c) 2023, Oracle and/or its affiliates. # Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: LGPL-2.1-or-later """ Drgn CLI, but using ptpython rather than the standard code.interact() NOTE: this is definitely a bit of a hack, using implementation details of Drgn *and* ptpython. It may break at any time, but it is also quite useful, and this makes it worth sharing. Requires: "pip install ptpython" which brings in pygments and prompt_toolkit """ import functools import os import shutil from typing import Any, Dict, Set from prompt_toolkit.completion import Completer from prompt_toolkit.formatted_text import PygmentsTokens from ptpython import embed from ptpython.repl import run_config from pygments.lexers.c_cpp import CLexer import drgn import drgn.cli class DummyForRepr: """ A dummy class to pass back to _format_result_output() that pretends to have the given repr() """ def __init__(self, s): self.s = s def __repr__(self): return self.s class DummyForPtRepr: """A similar dummy class for the __pt_repr__() method.""" def __init__(self, s): self.s = s def __pt_repr__(self): return self.s def _maybe_c_format(s): """Given a string, try to use pygments to highlight it it as a C string.""" try: tokens = CLexer().get_tokens_unprocessed(s) formatted = PygmentsTokens([(tokentype, value) for index, tokentype, value in tokens]) to_format = DummyForPtRepr(formatted) except Exception as e: to_format = DummyForRepr(s) return to_format @functools.lru_cache(maxsize=1) def _object_fields() -> Set[str]: return set(dir(drgn.Object)) class ReorderDrgnObjectCompleter(Completer): """A completer which puts Object member fields above Object defaults""" def __init__(self, c: Completer): self.c = c def get_completions(self, document, complete_event): completions = list(self.c.get_completions(document, complete_event)) if not completions: return completions text = completions[0].text member_fields = [] # If the first completion is "absent_", it is *very likely* that we are # now looking at the completion of on Object. Move the default Object # attributes to the end of the list so that we get the struct attributes if text == "absent_": fields = _object_fields() for i in reversed(range(len(completions))): text = completions[i].text if text not in fields: member_fields.append(completions[i]) del completions[i] return list(reversed(member_fields)) + completions return completions def configure(repl) -> None: """ Muck around with the internals of PythonRepl so that we will special case the drgn data structures, similar to how drgn messes with sys.displayhook. We can do C syntax highlighting too, which is really nice. This also automatically runs the default config file: ~/.config/ptpython/config.py """ _format_result_output_orig = repl._format_result_output def _format_result_output(result: object): if isinstance(result, drgn.Object): try: s = result.format_(columns=shutil.get_terminal_size((0, 0)).columns) to_format = _maybe_c_format(s) except drgn.FaultError: to_format = DummyForRepr(repr(result)) elif isinstance(result, (drgn.StackFrame, drgn.StackTrace)): to_format = DummyForRepr(str(result)) elif isinstance(result, drgn.Type): to_format = _maybe_c_format(str(result)) else: to_format = result return _format_result_output_orig(to_format) repl._format_result_output = _format_result_output run_config(repl) repl._completer = ReorderDrgnObjectCompleter(repl._completer) repl.completer = ReorderDrgnObjectCompleter(repl.completer) def interact(local: Dict[str, Any], banner: str): histfile = os.path.expanduser("~/.drgn_history.ptpython") print(banner) embed(globals=local, history_filename=histfile, title="drgn", configure=configure) if __name__ == "__main__": # Muck around with the internals of drgn: swap out run_interactive() with our # ptpython version, and then call main as if nothing happened. drgn.cli.interact = interact drgn.cli._main() drgn-0.0.31/contrib/search_kernel_memory.py000077500000000000000000000050241477777462700207740ustar00rootroot00000000000000#!/usr/bin/env drgn # Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: LGPL-2.1-or-later """ Search kernel memory for a byte string. This only works if the kernel was built with CONFIG_PROC_KCORE=y. """ import argparse import sys from drgn import Object from drgn.helpers.common.memory import identify_address from drgn.helpers.linux.list import list_for_each_entry from drgn.helpers.linux.mm import for_each_vmap_area, virt_to_page def virt_to_vmap_address(prog, addr): page = virt_to_page(addr) for va in for_each_vmap_area(prog): vm = va.vm.read_() if vm: for i, va_page in enumerate( Object( prog, prog.array_type(page.type_, vm.nr_pages), address=vm.pages ).read_() ): if va_page == page: return ( va.va_start.value_() + (i << prog["PAGE_SHIFT"]) + (addr & (prog["PAGE_SIZE"].value_() - 1)) ) return None def search_memory(prog, needle): KCORE_RAM = prog["KCORE_RAM"] CHUNK_SIZE = 1024 * 1024 for kc in list_for_each_entry( "struct kcore_list", prog["kclist_head"].address_of_(), "list" ): if kc.type != KCORE_RAM: continue start = kc.addr.value_() end = start + kc.size.value_() for addr in range(start, end, CHUNK_SIZE): buf = prog.read(addr, min(CHUNK_SIZE, end - addr)) i = 0 while i < len(buf): i = buf.find(needle, i) if i < 0: break vmap_address = virt_to_vmap_address(prog, addr + i) if vmap_address is not None: identity = identify_address(prog, vmap_address) else: identity = identify_address(prog, addr + i) if identity is None: print(hex(addr + i)) else: print(hex(addr + i), identity) i += 1 if __name__ == "__main__": parser = argparse.ArgumentParser( description="Search kernel memory for a byte string" ) parser.add_argument( "bytes", nargs="?", help="hexadecimal bytes to read; if omitted, read byte string from stdin", ) args = parser.parse_args() if args.bytes is None: needle = sys.stdin.buffer.read() else: needle = bytes.fromhex(args.bytes) search_memory(prog, needle) drgn-0.0.31/contrib/slabinfo.py000077500000000000000000000057021477777462700163770ustar00rootroot00000000000000#!/usr/bin/env drgn # Copyright (c) Canonical Ltd. # SPDX-License-Identifier: LGPL-2.1-or-later """ Script to dump slabinfo status using drgn""" from typing import Iterator, Optional from drgn import Object from drgn.helpers.common.format import escape_ascii_string from drgn.helpers.linux.list import list_for_each_entry, list_for_each_entry_reverse from drgn.helpers.linux.slab import for_each_slab_cache MAX_PARTIAL_TO_SCAN = 10000 OO_SHIFT = 16 OO_MASK = (1 << OO_SHIFT) - 1 def for_each_kmem_cache_node(slab_cache: Object) -> Iterator[Object]: """ Iterate over all kmem_cache_node of specific slab cache. :return: Iterator of ``struct kmem_cache_node *`` objects. """ for nid in range(0, prog["nr_node_ids"].value_()): yield slab_cache.node[nid] def count_partial_free_approx(kmem_cache_node: Object) -> Optional[Object]: x = Object(prog, "unsigned long", 0) n = kmem_cache_node if n.nr_partial <= MAX_PARTIAL_TO_SCAN: for slab in list_for_each_entry( "struct slab", n.partial.address_of_(), "slab_list" ): x += slab.objects - slab.inuse else: scanned = 0 for slab in list_for_each_entry( "struct slab", n.partial.address_of_(), "slab_list" ): x += slab.objects - slab.inuse scanned += 1 if scanned == MAX_PARTIAL_TO_SCAN / 2: break for slab in list_for_each_entry_reverse( "struct slab", n.partial.address_of_(), "slab_list" ): x += slab.objects - slab.inuse scanned += 1 if scanned == MAX_PARTIAL_TO_SCAN / 2: break x = x * n.nr_partial / scanned x = min(x, n.total_objects) return x def oo_objects(kmem_cache_order_objects: Object) -> Optional[Object]: return kmem_cache_order_objects.x & OO_MASK def oo_order(kmem_cache_order_objects: Object) -> Optional[Object]: return kmem_cache_order_objects.x >> OO_SHIFT print( f"{'struct kmem_cache *':^20} | {'name':^20} | {'active_objs':^12} | {'num_objs':^12} | {'objsize':^8} | {'objperslab':^11} | {'pageperslab':^13}" ) print( f"{'':-^20} | {'':-^20} | {'':-^12} | {'':-^12} | {'':-^8} | {'':-^11} | {'':-^13}" ) for s in for_each_slab_cache(prog): nr_slabs = 0 nr_objs = 0 nr_free = 0 for node in for_each_kmem_cache_node(s): nr_slabs += node.nr_slabs.counter.value_() nr_objs += node.total_objects.counter.value_() nr_free += count_partial_free_approx(node).value_() active_objs = nr_objs - nr_free num_objs = nr_objs active_slab = nr_slabs num_slabs = nr_slabs objects_per_slab = oo_objects(s.oo).value_() cache_order = oo_order(s.oo).value_() name = escape_ascii_string(s.name.string_(), escape_backslash=True) print( f"0x{s.value_():<18x} | {name:20.19s} | {active_objs:12} | {num_objs:12} | {s.size.value_():8} | {objects_per_slab:11} | {1<40}:{:<6}".format(_brackets(ip), port) def _print_sk(sk): inet = inet_sk(sk) tcp_state = TcpState(sk_tcpstate(sk)) if sk.__sk_common.skc_family == socket.AF_INET: src_ip = _ipv4(sk.__sk_common.skc_rcv_saddr) dst_ip = _ipv4(sk.__sk_common.skc_daddr) elif sk.__sk_common.skc_family == socket.AF_INET6: src_ip = _ipv6(sk.__sk_common.skc_v6_rcv_saddr) dst_ip = _ipv6(sk.__sk_common.skc_v6_daddr) else: return src_port = socket.ntohs(inet.inet_sport) dst_port = socket.ntohs(sk.__sk_common.skc_dport) cgrp_path = "" if sk_fullsock(sk): cgrp = sock_cgroup_ptr(sk.sk_cgrp_data) cgrp_path = cgroup_path(cgrp).decode() print( "{:<12} {} {} {}".format( tcp_state.name, _ip_port(src_ip, src_port), _ip_port(dst_ip, dst_port), cgrp_path, ) ) # Uncomment to print whole struct: # print(sk) # print(inet) # print(cgrp) tcp_hashinfo = prog.object("tcp_hashinfo") # 1. Iterate over all TCP sockets in TCP_LISTEN state. # Since Linux kernel commit cae3873c5b3a ("net: inet: Retire port only # listening_hash") (in v5.19), listening_hash is removed and we need # to iterate lhash2 table. try: for ilb in tcp_hashinfo.listening_hash: for sk in hlist_nulls_for_each_entry( "struct sock", ilb.nulls_head, "__sk_common.skc_node" ): _print_sk(sk) except AttributeError: for i in range(tcp_hashinfo.lhash2_mask + 1): head = tcp_hashinfo.lhash2[i].nulls_head if hlist_nulls_empty(head): continue for sk in sk_nulls_for_each(head): _print_sk(sk) # 2. And all other TCP sockets. for i in range(tcp_hashinfo.ehash_mask + 1): head = tcp_hashinfo.ehash[i].chain if hlist_nulls_empty(head): continue for sk in sk_nulls_for_each(head): _print_sk(sk) drgn-0.0.31/contrib/vmallocinfo.py000077500000000000000000000045521477777462700171150ustar00rootroot00000000000000#!/usr/bin/env drgn # Copyright (c) Canonical Ltd. # SPDX-License-Identifier: LGPL-2.1-or-later """ Script to dump vmallocinfo status using drgn""" import re from typing import Optional from drgn import IntegerLike, Program from drgn.helpers.linux.mm import for_each_vmap_area VMAP_RAM = 0x1 # indicates vm_map_ram area VM_IOREMAP = 0x00000001 # ioremap() and friends VM_ALLOC = 0x00000002 # vmalloc() VM_MAP = 0x00000004 # vmap()ed pages VM_USERMAP = 0x00000008 # suitable for remap_vmalloc_range VM_DMA_COHERENT = 0x00000010 # dma_alloc_coherent VM_SPARSE = 0x00001000 # sparse vm_area. not all pages are present. def is_vmalloc_addr(prog: Program, addr: IntegerLike) -> Optional[bool]: vmcoreinfo = prog["VMCOREINFO"].string_() match = re.search( rb"^NUMBER\(VMALLOC_START\)=(0x[0-9a-f]+)$", vmcoreinfo, flags=re.M ) if match: VMALLOC_START = int(match.group(1), 16) match = re.search(rb"^NUMBER\(VMALLOC_END\)=(0x[0-9a-f]+)$", vmcoreinfo, flags=re.M) if match: VMALLOC_END = int(match.group(1), 16) try: return True if addr >= VMALLOC_START and addr < VMALLOC_END else False except: return None for vmap_area in for_each_vmap_area(prog): if not vmap_area.vm: if vmap_area.flags & VMAP_RAM: print( f"0x{vmap_area.va_start:x}-0x{vmap_area.va_end:x} {vmap_area.va_end - vmap_area.va_start:10d} vm_map_ram" ) continue v = vmap_area.vm print( f"0x{v.addr.value_():x}-0x{(v.addr+v.size).value_():x} {v.size.value_():10d}", end="", ) if v.caller: try: print(f" {prog.symbol(v.caller.value_()).name}", end="") except LookupError: print(f" 0x{v.caller.value_():x}", end="") if v.nr_pages: print(f" pages={v.nr_pages.value_():d}", end="") if v.phys_addr: print(f" phys=0x{v.phys_addr.value_():x}", end="") if v.flags & VM_IOREMAP: print(" ioremap", end="") if v.flags & VM_SPARSE: print(" sparse", end="") if v.flags & VM_ALLOC: print(" vmalloc", end="") if v.flags & VM_MAP: print(" vmap", end="") if v.flags & VM_USERMAP: print(" user", end="") if v.flags & VM_DMA_COHERENT: print(" dma-coherent", end="") if is_vmalloc_addr(prog, v.pages): print(" vpages", end="") print("") drgn-0.0.31/contrib/vmmap.py000077500000000000000000000025211477777462700157160ustar00rootroot00000000000000#!/usr/bin/env drgn # Copyright (c) SUSE Linux. # SPDX-License-Identifier: LGPL-2.1-or-later """Print memory map of a given task.""" import os import sys from drgn.helpers.linux.device import MAJOR, MINOR from drgn.helpers.linux.fs import d_path from drgn.helpers.linux.mm import for_each_vma from drgn.helpers.linux.pid import find_task if len(sys.argv) != 2: sys.exit("Usage: ./vmmap.py PID") pid = int(sys.argv[1]) task = find_task(prog, int(pid)) if not task: sys.exit(f"Cannot find task {pid}") FLAGS = ((0x1, "r"), (0x2, "w"), (0x4, "x")) PAGE_SHIFT = prog["PAGE_SHIFT"] print("Start End Flgs Offset Dev Inode File path") for vma in for_each_vma(task.mm): flags = "".join([v if f & vma.vm_flags else "-" for f, v in FLAGS]) flags += "s" if vma.vm_flags & 0x8 else "p" print(f"{vma.vm_start.value_():0x}-{vma.vm_end.value_():0x} {flags} ", end="") vmfile = vma.vm_file if vmfile: inode = vmfile.f_inode.i_ino.value_() dev = vmfile.f_inode.i_sb.s_dev major, minor = MAJOR(dev), MINOR(dev) path = os.fsdecode(d_path(vmfile.f_path)) pgoff = (vma.vm_pgoff << PAGE_SHIFT).value_() else: inode = 0 major, minor = 0, 0 path = "" pgoff = 0 print(f"{pgoff:08x} {major:02x}:{minor:02x} {inode:<16} {path}") drgn-0.0.31/contrib/vmstat.py000077500000000000000000000033721477777462700161210ustar00rootroot00000000000000#!/usr/bin/env drgn # Copyright (c) SUSE Linux. # SPDX-License-Identifier: LGPL-2.1-or-later """Dump /proc/vmstat statistics.""" from drgn.helpers.linux.cpumask import for_each_online_cpu from drgn.helpers.linux.percpu import per_cpu def print_event_line(event, counter): print(f"{event.name:<36} {counter.value_():>16}") print(f"{'Event':<36} {'Count':>16}") # For all of the below, we skip the last enumerator item as it holds the number # of enumerators. # 1) vm_zone_stat statistics are there since v4.8. if "vm_zone_stat" in prog: print("VM_ZONE_STAT:") vm_zone_stat = prog["vm_zone_stat"] for event in prog.type("enum zone_stat_item").enumerators[:-1]: print_event_line(event, vm_zone_stat[event.value].counter) print() # 2) vm_node_stat statistics are there since v4.8. if "vm_node_stat" in prog: print("VM_NODE_STAT:") vm_node_stat = prog["vm_node_stat"] for event in prog.type("enum node_stat_item").enumerators[:-1]: print_event_line(event, vm_node_stat[event.value].counter) print() # 3) vm_numa_event statistics are there since v5.14. They are only populated if # CONFIG_NUMA is enabled. if "node_subsys" in prog and "vm_numa_event" in prog: print("VM_NUMA_EVENT:") vm_numa_event = prog["vm_numa_event"] for event in prog.type("enum numa_stat_item").enumerators[:-1]: print_event_line(event, vm_numa_event[event.value].counter) print() # 4) vm_event_states statistics (uses per-CPU counters) print("VM_EVENT_STATES:") vm_event_states = prog["vm_event_states"] cpulist = list(for_each_online_cpu(prog)) for event in prog.type("enum vm_event_item").enumerators[:-1]: count = sum([per_cpu(vm_event_states, cpu).event[event.value] for cpu in cpulist]) print_event_line(event, count) drgn-0.0.31/docs/000077500000000000000000000000001477777462700135115ustar00rootroot00000000000000drgn-0.0.31/docs/_static/000077500000000000000000000000001477777462700151375ustar00rootroot00000000000000drgn-0.0.31/docs/_static/custom.css000066400000000000000000000011741477777462700171660ustar00rootroot00000000000000div.sphinxsidebar p.caption { font-weight: 300; font-size: 1.4rem; } details { margin-block-start: 1em; margin-block-end: 1em; } div.admonition { padding-bottom: 0; } div.admonition p.admonition-title { font-size: 17px; font-weight: bold; } div.tip { background-color: #DFD; border-color: #ACA; } div.scroll-y pre { max-height: 20em; overflow-y: auto; } div.tutorial pre { border-left: 5px solid #5A5; } @media screen and (min-width: 875px) { div.document { width: 100%; } } @media screen and (min-width: 1095px) { div.document { width: 1095px; } } drgn-0.0.31/docs/_static/logo.png000066400000000000000000000023531477777462700166100ustar00rootroot00000000000000PNG  IHDR` n!iCCPICC profile(}=H@_?";tR,8jP! :\MGbYWWAqquRtZxp܏wwT38e ![B #*1S<=||,s^%o2'2ݰ77->q$x̠ ?r]vsa?ό*uE^xwwgoi>lr1bKGDC pHYs.#.#x?vtIME 03W=IDATxܱAhceVJT'\%(D[++lAU@$p"rU@턀VL 2هef68Uf\yV(@X^J&EMG.@GV]d'W@ yfթ1]?9(j*@ "9LAQ-@"#6nރ Ђ-)h2DSJ4$9q>4pQZ ,[ 1em$"riک =h4RZ ,[ Jz*J#wrk_COr:Z 6PN\:ʑKG)H 6Z*ԝRNG9).zIhA0)HZȥ7&]5M{:9~<*@ )h:7rvJ}5-ZqaʎR\-9m&@@˙eG,FT4u/壔J;*@ HAEѼ՝vT Bv?vT7_*@ )h6gNyrߠ-N.5ͭz*@ "EMA7_H] ~!}ֱw.H )h4Jgן?9L|L IAZ HA S-oS@Cң7KZIENDB`drgn-0.0.31/docs/advanced_usage.rst000066400000000000000000000360611477777462700172020ustar00rootroot00000000000000Advanced Usage ============== .. highlight:: pycon The :doc:`user_guide` covers basic usage of drgn, but drgn also supports more advanced use cases which are covered here. .. _advanced-modules: Modules and Debugging Symbols ----------------------------- drgn tries to determine what executable, libraries, etc. a program uses and load debugging symbols automatically. As long as :doc:`debugging symbols are installed `, this should work out of the box on standard setups. For non-standard scenarios, drgn allows overriding the defaults with different levels of control and complexity. Loading Debugging Symbols From Non-Standard Locations ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ .. program:: drgn drgn searches standard locations for debugging symbols. If you have debugging symbols available in a non-standard location, you can provide it to the CLI with the :option:`-s`/:option:`--symbols` option: .. code-block:: console $ drgn -s ./libfoo.so -s /usr/lib/libbar.so.debug Or with the :meth:`drgn.Program.load_debug_info()` method:: >>> prog.load_debug_info(["./libfoo.so", "/usr/lib/libbar.so.debug"]) Loading Debugging Symbols For Specific Modules ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ :option:`-s` and :meth:`~drgn.Program.load_debug_info()` try the given files against all of the modules loaded in the program based on build IDs. You can also :ref:`look up ` a specific module and try a given file for just that module with :meth:`drgn.Module.try_file()`:: >>> prog.main_module().try_file("build/vmlinux") Loading Additional Debugging Symbols ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ :option:`-s` and :meth:`~drgn.Program.load_debug_info()` ignore files that don't correspond to a loaded module. To load debugging symbols from an arbitrary file, pass :option:`--extra-symbols` to the CLI: .. code-block:: console $ drgn --extra-symbols ./my_extra_symbols.debug Or create a :class:`drgn.ExtraModule`:: >>> module = prog.extra_module("my_extra_symbols", create=True) >>> module.try_file("./my_extra_symbols.debug") Listing Modules ^^^^^^^^^^^^^^^ By default, drgn creates a module for everything loaded in the program. You can disable this in the CLI with :option:`--no-default-symbols`. You can find or create the loaded modules programmatically with :meth:`drgn.Program.loaded_modules()`:: >>> for module, new in prog.loaded_modules(): ... print("Created" if new else "Found", module) You can see all of the created modules with :meth:`drgn.Program.modules()`. Overriding Modules ^^^^^^^^^^^^^^^^^^ You can create modules with the :ref:`module factory functions `. You can also modify various attributes of the :class:`drgn.Module` class. .. _debugging-information-finders-example: Debugging Information Finders ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ A callback for automatically finding debugging symbols for a set of modules can be registered with :meth:`drgn.Program.register_debug_info_finder()`. Here is an example for getting debugging symbols on Fedora Linux using DNF: .. code-block:: python3 import subprocess import drgn # Install debugging symbols using the DNF debuginfo-install plugin. Note that # this is mainly for demonstration purposes; debuginfod, which drgn supports # out of the box, is more reliable. def dnf_debug_info_finder(modules: list[drgn.Module]) -> None: # Determine all of the packages for the given modules. packages = set() for module in modules: if not module.wants_debug_file(): continue if not module.name.startswith("/"): continue proc = subprocess.run( ["rpm", "--query", "--file", module.name], stdout=subprocess.PIPE, stderr=subprocess.DEVNULL, text=True, ) if proc.returncode == 0: packages.add(proc.stdout.rstrip("\n")) # Try installing their debug info. subprocess.call( ["sudo", "dnf", "debuginfo-install", "--skip-broken", "--"] + sorted(packages) ) # Now that it's installed, try the standard locations. Other finders may # need to try specific files for specific modules with module.try_file() # instead. modules[0].prog.find_standard_debug_info(modules) prog.register_debug_info_finder("dnf", dnf_debug_info_finder, enable_index=-1) Custom debugging information finders can even be configured automatically through the :ref:`plugin system `. .. _writing-plugins: Writing Plugins --------------- In order for drgn to load a plugin automatically, it must be registered as an `entry point `_ for the ``drgn.plugins`` group. Here is a minimal example. First: .. code-block:: console $ mkdir drgn_plugin_example $ cd drgn_plugin_example Then, create ``pyproject.toml`` with the following contents: .. code-block:: toml :caption: pyproject.toml :emphasize-lines: 5-6 [project] name = 'drgn_plugin_example' version = '0.0.1' [project.entry-points.'drgn.plugins'] example = 'drgn_plugin_example' See the `Python Packaging User Guide `_ for a complete description of ``pyproject.toml``. We are most interested in the last two lines, which define the entry point. In ``example = 'drgn_plugin_example'``, ``example`` is the plugin name, and ``drgn_plugin_example`` is the plugin module. Create ``drgn_plugin_example.py`` with the following contents: .. code-block:: python3 :caption: drgn_plugin_example.py import drgn def example_debug_info_finder(modules: list[drgn.Module]) -> None: for module in modules: if isinstance(module, drgn.MainModule): module.try_file("/my/vmlinux") def drgn_prog_set(prog: drgn.Program) -> None: if prog.flags & drgn.ProgramFlags.IS_LINUX_KERNEL: prog.register_debug_info_finder( "example", example_debug_info_finder, enable_index=-1 ) # Optional; the default is 50; drgn_prog_set.drgn_priority = 100 This is a typical usage of the :func:`drgn_prog_set()` hook to register finders. See :ref:`plugins` for more details. After creating the above files, the plugin can be installed with ``pip install .``. Library ------- In addition to the CLI, drgn is also available as a library. :func:`drgn.program_from_core_dump()`, :func:`drgn.program_from_kernel()`, and :func:`drgn.program_from_pid()` correspond to the :option:`-c`, :option:`-k`, and :option:`-p` command line options, respectively; they return a :class:`drgn.Program` that can be used just like the one initialized by the CLI:: >>> import drgn >>> prog = drgn.program_from_kernel() C Library --------- The core functionality of drgn is implemented in C and is available as a C library, ``libdrgn``. See |drgn.h|_. .. |drgn.h| replace:: ``drgn.h`` .. _drgn.h: https://github.com/osandov/drgn/blob/main/libdrgn/drgn.h Full documentation can be generated by running ``doxygen`` in the ``libdrgn`` directory of the source code. Note that the API and ABI are not yet stable. Custom Programs --------------- The main components of a :class:`drgn.Program` are the program memory, types, and objects. The CLI and equivalent library interfaces automatically determine these. However, it is also possible to create a "blank" ``Program`` and plug in the main components. The :func:`drgn.cli.run_interactive()` function allows you to run the same drgn CLI once you've created a :class:`drgn.Program`, so it's easy to make a custom program which allows interactive debugging. :meth:`drgn.Program.add_memory_segment()` defines a range of memory and how to read that memory. The following example uses a Btrfs filesystem image as the program "memory": .. code-block:: python3 import os import sys import drgn from drgn.cli import run_interactive def btrfs_debugger(dev): file = open(dev, "rb") size = file.seek(0, 2) def read_file(address, count, offset, physical): file.seek(offset) return file.read(count) platform = drgn.Platform( drgn.Architecture.UNKNOWN, drgn.PlatformFlags.IS_LITTLE_ENDIAN ) prog = drgn.Program(platform) prog.add_memory_segment(0, size, read_file) module = prog.extra_module("btrfs", create=True) module.try_file(f"/lib/modules/{os.uname().release}/kernel/fs/btrfs/btrfs.ko") return prog prog = btrfs_debugger(sys.argv[1] if len(sys.argv) >= 2 else "/dev/sda") print(drgn.Object(prog, "struct btrfs_super_block", address=65536)) run_interactive(prog, banner_func=lambda _: "BTRFS debugger") :meth:`drgn.Program.register_type_finder()` and :meth:`drgn.Program.register_object_finder()` are the equivalent methods for plugging in types and objects. Environment Variables --------------------- Some of drgn's behavior can be modified through environment variables: .. envvar:: DRGN_DISABLE_PLUGINS Comma-separated list of plugins to disable. Each item is a glob pattern matching plugin entry point names. .. envvar:: DRGN_PLUGINS Comma-separated list of plugins to enable. Each item is either a plugin entry point name, a file path, or a module name. Empty items are ignored. An item not containing ``=`` is interpreted as a plugin entry point name. This takes precedence over :envvar:`DRGN_DISABLE_PLUGINS`. An item containing ``=`` is interpreted as an extra plugin to load manually instead of via an entry point. The string before ``=`` is the plugin name. The string after ``=`` is the value. If the value contains a ``/``, it is the file path of a Python module. Otherwise, it is a module name. So, ``DRGN_DISABLE_PLUGINS=* DRGN_PLUGINS=foo,bar=/hello/world.py,baz=my.module`` results in three plugins being loaded: the entry point ``foo``, the file ``/hello/world.py`` as ``bar``, and the module ``my.module`` as ``baz``. All other plugins are disabled. .. envvar:: DRGN_MAX_DEBUG_INFO_ERRORS The maximum number of warnings about missing debugging information to log on CLI startup or from :meth:`drgn.Program.load_debug_info()`. Any additional errors are truncated. The default is 5; -1 is unlimited. .. envvar:: DRGN_PREFER_ORC_UNWINDER Whether to prefer using `ORC `_ over DWARF for stack unwinding (0 or 1). The default is 0. Note that drgn will always fall back to ORC for functions lacking DWARF call frame information and vice versa. This environment variable is mainly intended for testing and may be ignored in the future. .. envvar:: DRGN_USE_LIBKDUMPFILE_FOR_ELF Whether drgn should use libkdumpfile for ELF vmcores (0 or 1). The default is 0. This functionality will be removed in the future. .. envvar:: DRGN_USE_SYS_MODULE Whether drgn should use ``/sys/module`` to find information about loaded kernel modules for the running kernel instead of getting them from the core dump (0 or 1). The default is 1. This environment variable is mainly intended for testing and may be ignored in the future. .. envvar:: PYTHON_BASIC_REPL If non-empty, don't try to use the `new interactive REPL `_ added in Python 3.13. drgn makes use of the new REPL through internal implementation details since there is `not yet `_ a public API for it. If it breaks, this may be used as an escape hatch. .. _kernel-special-objects: Linux Kernel Special Objects ---------------------------- When debugging the Linux kernel, there are some special :class:`drgn.Object`\ s accessible with :meth:`drgn.Program.object()` and :meth:`drgn.Program[] `. Some of these are available even without debugging information, thanks to metadata called "vmcoreinfo" which is present in kernel core dumps. These special objects include: ``UTS_RELEASE`` Object type: ``const char []`` This corresponds to the ``UTS_RELEASE`` macro in the Linux kernel source code. This is the exact kernel release (i.e., the output of ``uname -r``). To use this as a Python string, you must convert it:: >>> release = prog["UTS_RELEASE"].string_().decode("ascii") This is available without debugging information. ``PAGE_SIZE`` Object type: ``unsigned long`` ``PAGE_SHIFT`` Object type: ``unsigned int`` ``PAGE_MASK`` Object type: ``unsigned long`` These correspond to the macros of the same name in the Linux kernel source code. The page size is the smallest contiguous unit of physical memory which can be allocated or mapped by the kernel. >>> prog['PAGE_SIZE'] (unsigned long)4096 >>> prog['PAGE_SHIFT'] (int)12 >>> prog['PAGE_MASK'] (unsigned long)18446744073709547520 >>> 1 << prog['PAGE_SHIFT'] == prog['PAGE_SIZE'] True >>> ~(prog['PAGE_SIZE'] - 1) == prog['PAGE_MASK'] True These are available without debugging information. ``jiffies`` Object type: ``volatile unsigned long`` This is a counter of timer ticks. It is actually an alias of ``jiffies_64`` on 64-bit architectures, or the least significant 32 bits of ``jiffies_64`` on 32-bit architectures. Since this alias is defined via the linker, drgn handles it specially. This is *not* available without debugging information. ``vmemmap`` Object type: ``struct page *`` This is a pointer to the "virtual memory map", an array of ``struct page`` for each physical page of memory. While the purpose and implementation details of this array are beyond the scope of this documentation, it is enough to say that it is represented in the kernel source in an architecture-dependent way, frequently as a macro or constant. The definition provided by drgn ensures that users can access it without resorting to architecture-specific logic. This is *not* available without debugging information. ``VMCOREINFO`` Object type: ``const char []`` This is the data contained in the vmcoreinfo note, which is present either as an ELF note in ``/proc/kcore`` or ELF vmcores, or as a special data section in kdump-formatted vmcores. The vmcoreinfo note contains critical data necessary for interpreting the kernel image, such as KASLR offsets and data structure locations. In the Linux kernel, this data is normally stored in a variable called ``vmcoreinfo_data``. However, drgn reads this information from ELF note or from the diskdump header. It is possible (in rare cases, usually with vmcores created by hypervisors) for a vmcore to contain vmcoreinfo which differs from the data in ``vmcoreinfo_data``, so it is important to distinguish the contents. For that reason, we use the name ``VMCOREINFO`` to distinguish it from the kernel variable ``vmcoreinfo_data``. This is available without debugging information. drgn-0.0.31/docs/api_reference.rst000066400000000000000000000155461477777462700170450ustar00rootroot00000000000000API Reference ============= .. module:: drgn Programs -------- .. drgndoc:: Program :exclude: (void|int|bool|float|struct|union|class|enum|typedef|pointer|array|function)_type|(main|shared_library|vdso|relocatable|linux_kernel_loadable|extra)_module .. drgndoc:: ProgramFlags .. drgndoc:: FindObjectFlags .. drgndoc:: DebugInfoOptions .. drgndoc:: KmodSearchMethod .. drgndoc:: Thread .. _api-filenames: Filenames ^^^^^^^^^ The :meth:`Program.type()`, :meth:`Program.object()`, :meth:`Program.variable()`, :meth:`Program.constant()`, and :meth:`Program.function()` methods all take a *filename* parameter to distinguish between multiple definitions with the same name. The filename refers to the source code file that contains the definition. It is matched with :func:`filename_matches()`. If multiple definitions match, one is returned arbitrarily. .. drgndoc:: filename_matches .. _api-program-constructors: Program Constructors ^^^^^^^^^^^^^^^^^^^^ The drgn command line interface automatically creates a :class:`Program` named ``prog``. However, drgn may also be used as a library without the CLI, in which case a ``Program`` must be created manually. .. drgndoc:: program_from_core_dump .. drgndoc:: program_from_kernel .. drgndoc:: program_from_pid .. _default-program: Default Program ^^^^^^^^^^^^^^^ Most functions that take a :class:`Program` can be called without the *prog* argument. In that case, the *default program argument* is used, which is determined by the rules below. .. note:: In the drgn CLI, you probably don't need to care about these details. Simply omit *prog*: .. code-block:: python3 # Equivalent in the CLI. find_task(pid) find_task(prog, pid) find_task(prog["init_pid_ns"].address_of_(), pid) 1. If *prog* is given explicitly, either as a positional or keyword argument, then it is used. 2. Otherwise, if the first argument is an :class:`Object`, then :attr:`Object.prog_` is used. 3. Otherwise, the *default program* is used. The default program is set automatically in the CLI. Library users can get and set it manually. The default program is a per-thread setting. See `Thread Safety`_. .. drgndoc:: get_default_prog .. drgndoc:: set_default_prog .. drgndoc:: NoDefaultProgramError For helpers, it is recommended to use the decorators from the :mod:`drgn.helpers.common.prog` module instead. Platforms ^^^^^^^^^ .. drgndoc:: Platform .. drgndoc:: Architecture .. drgndoc:: PlatformFlags .. drgndoc:: Register .. drgndoc:: host_platform Languages ^^^^^^^^^ .. drgndoc:: Language Objects ------- .. drgndoc:: Object .. drgndoc:: AbsenceReason .. drgndoc:: NULL .. drgndoc:: cast .. drgndoc:: implicit_convert .. drgndoc:: reinterpret .. drgndoc:: container_of Symbols ------- .. drgndoc:: Symbol .. drgndoc:: SymbolBinding .. drgndoc:: SymbolKind .. drgndoc:: SymbolIndex Stack Traces ------------ Stack traces are retrieved with :func:`stack_trace()`, :meth:`Program.stack_trace()`, or :meth:`Thread.stack_trace()`. .. drgndoc:: stack_trace .. drgndoc:: StackTrace .. drgndoc:: StackFrame .. _api-reference-types: Types ----- .. drgndoc:: Type .. drgndoc:: TypeMember .. drgndoc:: TypeEnumerator .. drgndoc:: TypeParameter .. drgndoc:: TypeTemplateParameter .. drgndoc:: TypeKind .. drgndoc:: TypeKindSet .. drgndoc:: PrimitiveType .. drgndoc:: Qualifiers .. drgndoc:: alignof .. drgndoc:: offsetof .. _api-type-constructors: Type Constructors ^^^^^^^^^^^^^^^^^ Custom drgn types can be created with the following factory functions. These can be used just like types obtained from :meth:`Program.type()`. .. drgndoc:: Program.void_type .. drgndoc:: Program.int_type .. drgndoc:: Program.bool_type .. drgndoc:: Program.float_type .. drgndoc:: Program.struct_type .. drgndoc:: Program.union_type .. drgndoc:: Program.class_type .. drgndoc:: Program.enum_type .. drgndoc:: Program.typedef_type .. drgndoc:: Program.pointer_type .. drgndoc:: Program.array_type .. drgndoc:: Program.function_type Modules ------- .. drgndoc:: Module .. drgndoc:: MainModule .. drgndoc:: SharedLibraryModule .. drgndoc:: VdsoModule .. drgndoc:: RelocatableModule .. drgndoc:: ExtraModule .. drgndoc:: ModuleFileStatus .. drgndoc:: WantedSupplementaryFile .. drgndoc:: SupplementaryFileKind .. _api-module-constructors: Module Lookups/Constructors ^^^^^^^^^^^^^^^^^^^^^^^^^^^ For each module type, there is a corresponding method to create a module of that type or find one that was previously created:: >>> prog.extra_module("foo", 1234) Traceback (most recent call last): ... LookupError: module not found >>> prog.extra_module("foo", 1234, create=True) prog.extra_module(name='foo', id=0x4d2) >>> prog.extra_module("foo", 1234) prog.extra_module(name='foo', id=0x4d2) .. drgndoc:: Program.main_module .. drgndoc:: Program.shared_library_module .. drgndoc:: Program.vdso_module .. drgndoc:: Program.relocatable_module .. drgndoc:: Program.linux_kernel_loadable_module .. drgndoc:: Program.extra_module Miscellaneous ------------- .. drgndoc:: sizeof .. drgndoc:: execscript .. drgndoc:: IntegerLike .. drgndoc:: Path Exceptions ---------- .. drgndoc:: FaultError .. drgndoc:: MissingDebugInfoError .. drgndoc:: ObjectAbsentError .. drgndoc:: OutOfBoundsError CLI --- .. drgndoc:: cli .. _plugins: Plugins ------- drgn can be extended with plugins. A drgn plugin is a Python module defining one or more hook functions that are called at specific times. By default, drgn loads installed modules registered as :ref:`entry points ` for the ``drgn.plugins`` group. The :envvar:`DRGN_PLUGINS` and :envvar:`DRGN_DISABLE_PLUGINS` environment variables can be used to configure this. The following hooks are currently defined: .. py:currentmodule:: None .. function:: drgn_prog_set(prog: drgn.Program) -> None Called after the program target has been set (e.g., one of :meth:`drgn.Program.set_core_dump()`, :meth:`drgn.Program.set_kernel()`, or :meth:`drgn.Program.set_pid()` has been called). A ``drgn_priority`` integer attribute can be assigned to a hook function to define when it is called relative to other plugins. Hook functions with lower ``drgn_priority`` values are called earlier. Functions with equal ``drgn_priority`` values are called in an unspecified order. The default if not defined is 50. See :ref:`writing-plugins` for an example. Logging ------- drgn logs using the standard :mod:`logging` module to a logger named ``"drgn"``. drgn will also display progress bars on standard error if standard error is a terminal, the ``"drgn"`` logger has a :class:`~logging.StreamHandler` for ``stderr``, and its log level is less than or equal to ``WARNING``. Thread Safety ------------- Only one thread at a time should access the same :class:`Program` (including :class:`Object`, :class:`Type`, :class:`StackTrace`, etc. from that program). It is safe to use different :class:`Program`\ s from concurrent threads. drgn-0.0.31/docs/case_studies.rst000066400000000000000000000002751477777462700167220ustar00rootroot00000000000000Case Studies ============ These are writeups of real-world problems solved with drgn. .. toctree:: :maxdepth: 1 case_studies/dm_crypt_key.rst case_studies/kyber_stack_trace.rst drgn-0.0.31/docs/case_studies/000077500000000000000000000000001477777462700161645ustar00rootroot00000000000000drgn-0.0.31/docs/case_studies/dm_crypt_key.rst000066400000000000000000000553021477777462700214140ustar00rootroot00000000000000Recovering a dm-crypt Encryption Key ==================================== | Author: Omar Sandoval | Date: January 11th, 2024 .. highlight:: pycon .. linuxversion:: v6.7 `dm-crypt `_ is the Linux kernel's transparent disk encryption subsystem. I recently had to recover the master key for an encrypted disk where the passphrase was no longer known, but the dm-crypt device was still open. Normally, the key is stored in kernel space and cannot be accessed by user space. However, with drgn, we can traverse kernel data structures to recover the key. This is a great example of how to jump between kernel code and drgn to navigate a subsystem. .. warning:: The dm-crypt master key is obviously very sensitive information that shouldn't be exposed carelessly. As a disclaimer for anyone concerned about the security implications: everything is working as intended here. Debugging the live kernel with drgn requires ``root``, and ``root`` has many other ways to access sensitive information (loading kernel modules, triggering a kernel core dump, etc.). Solutions like `inline encryption `_ and :manpage:`kernel_lockdown(7)` can be used for defense in depth if necessary. Setup ----- For this writeup, I'm going to set up dm-crypt in a virtual machine running Linux 6.7. .. code-block:: console # uname -r 6.7.0 # cryptsetup luksFormat /dev/vdb WARNING! ======== This will overwrite data on /dev/vdb irrevocably. Are you sure? (Type 'yes' in capital letters): YES Enter passphrase for /dev/vdb: hello Verify passphrase: hello # cryptsetup open /dev/vdb mycrypt Enter passphrase for /dev/vdb: hello The default configuration is `AES `_ in `XTS `_ mode with a 512-bit key: .. code-block:: console # cryptsetup status mycrypt /dev/mapper/mycrypt is active. type: LUKS2 cipher: aes-xts-plain64 keysize: 512 bits key location: keyring device: /dev/vdb sector size: 512 offset: 32768 sectors size: 33521664 sectors mode: read/write The new device is ``dm-0``: .. code-block:: console # realpath /dev/mapper/mycrypt /dev/dm-0 Getting from Device Mapper to the Crypto API -------------------------------------------- The `dm-crypt documentation `_ tells us that "Device-mapper is infrastructure in the Linux kernel that provides a generic way to create virtual layers of block devices. Device-mapper crypt target provides transparent encryption of block devices using the kernel crypto API." Our first goal is therefore to get to whatever context is used by the crypto API, which likely includes the encryption key. To do that, we're going to have to navigate through the device mapper code. To start, let's find the virtual disk for our dm-crypt target in drgn using the :meth:`~drgn.helpers.linux.block.for_each_disk()` and :meth:`~drgn.helpers.linux.block.disk_name()` helpers: >>> for disk in for_each_disk(): ... if disk_name(disk) == b"dm-0": ... print(disk) ... break ... *(struct gendisk *)0xffffa3b9421b2c00 = { ... } ``struct gendisk`` has a function table, ``fops``, with callbacks to the disk driver. Specifically, the ``submit_bio`` callback intercepts disk reads and writes:: >>> disk.fops.submit_bio (void (*)(struct bio *))dm_submit_bio+0x0 = 0xffffffffc05761e0 Let's take a look at :linux:`dm_submit_bio() `: .. code-block:: c static void dm_submit_bio(struct bio *bio) { struct mapped_device *md = bio->bi_bdev->bd_disk->private_data; int srcu_idx; struct dm_table *map; map = dm_get_live_table(md, &srcu_idx); ... dm_split_and_process_bio(md, map, bio); ... } So the disk's private data is a ``struct mapped_device``. Let's get it in drgn:: >>> md = cast("struct mapped_device *", disk.private_data) :linux:`dm_get_live_table() ` gets the device mapper table: .. code-block:: c struct dm_table *dm_get_live_table(struct mapped_device *md, int *srcu_idx) __acquires(md->io_barrier) { *srcu_idx = srcu_read_lock(&md->io_barrier); return srcu_dereference(md->map, &md->io_barrier); } `SRCU `_ is a synchronization mechanism which we can blithely ignore:: >>> map = cast("struct dm_table *", md.map) ``dm_submit_bio()`` then calls :linux:`dm_split_and_process_bio() `, which calls :linux:`__split_and_process_bio() `: .. code-block:: c static blk_status_t __split_and_process_bio(struct clone_info *ci) { struct bio *clone; struct dm_target *ti; unsigned int len; ti = dm_table_find_target(ci->map, ci->sector); ... __map_bio(clone); } :linux:`dm_table_find_target() ` finds the appropriate device mapper target in a table: .. code-block:: c struct dm_target *dm_table_find_target(struct dm_table *t, sector_t sector) { ... return &t->targets[(KEYS_PER_NODE * n) + k]; } Our simple case only has one target:: >>> map.num_targets (unsigned int)1 >>> ti = map.targets ``__split_and_process_bio()`` then calls :linux:`__map_bio() `: .. code-block:: c static void __map_bio(struct bio *clone) { struct dm_target_io *tio = clone_to_tio(clone); struct dm_target *ti = tio->ti; struct dm_io *io = tio->io; struct mapped_device *md = io->md; int r; ... if (likely(ti->type->map == linear_map)) r = linear_map(ti, clone); else if (ti->type->map == stripe_map) r = stripe_map(ti, clone); else r = ti->type->map(ti, clone); ... } So we need to look at another callback:: >>> ti.type.map (dm_map_fn)crypt_map+0x0 = 0xffffffffc08a03f0 :linux:`crypt_map() ` is part of dm-crypt, so we've finally made it out of generic device mapper: .. code-block:: c static int crypt_map(struct dm_target *ti, struct bio *bio) { struct dm_crypt_io *io; struct crypt_config *cc = ti->private; ... And we have the dm-crypt configuration:: >>> cc = cast("struct crypt_config *", ti.private) Dumping it out reveals some crypto API context! .. code-block:: pycon >>> cc *(struct crypt_config *)0xffffa3b9421b2400 = { ... .cipher_tfm = (union ){ .tfms = (struct crypto_skcipher **)0xffffa3b9438667c0, ... }, .tfms_count = (unsigned int)1, ... } >>> tfm = cc.cipher_tfm.tfms[0] Descending Down the Crypto API ------------------------------ The Linux kernel crypto API is very generic and is implemented with a lot of runtime polymorphism. Our next goal is to traverse through the crypto API data structures to find the key. The crypto API refers to cryptographic ciphers as `"transformations" `_. Transformations can be combined and nested in various ways. The ``tfm`` variable we found is a `"transformation object" `_, which is an instance of a transformation:: >>> tfm *(struct crypto_skcipher *)0xffffa3b948218c00 = { .reqsize = (unsigned int)160, .base = (struct crypto_tfm){ .refcnt = (refcount_t){ .refs = (atomic_t){ .counter = (int)1, }, }, .crt_flags = (u32)0, .node = (int)-1, .exit = (void (*)(struct crypto_tfm *))crypto_skcipher_exit_tfm+0x0 = 0xffffffffb77d2600, .__crt_alg = (struct crypto_alg *)0xffffa3b943dab448, .__crt_ctx = (void *[]){}, }, } >>> tfm.base.__crt_alg *(struct crypto_alg *)0xffffa3b943dab448 = { ... .cra_name = (char [128])"xts(aes)", ... } This is an ``skcipher``, or a symmetric key cipher. It is using the ``xts(aes)`` algorithm as expected. ``__crt_ctx`` is an opaque context, which is promising if we can figure out how to interpret it. The ``exit`` callback looks like a cleanup function. That seems like a good way for us to figure out how ``__crt_ctx`` is used. Here are :linux:`crypto_skcipher_exit_tfm() ` and the :linux:`crypto_skcipher_alg() ` and :linux:`crypto_skcipher_tfm() ` getters it uses: .. code-block:: c static void crypto_skcipher_exit_tfm(struct crypto_tfm *tfm) { struct crypto_skcipher *skcipher = __crypto_skcipher_cast(tfm); struct skcipher_alg *alg = crypto_skcipher_alg(skcipher); alg->exit(skcipher); } static inline struct skcipher_alg *crypto_skcipher_alg( struct crypto_skcipher *tfm) { return container_of(crypto_skcipher_tfm(tfm)->__crt_alg, struct skcipher_alg, base); } static inline struct crypto_tfm *crypto_skcipher_tfm( struct crypto_skcipher *tfm) { return &tfm->base; } We can emulate the getters in drgn to find the underlying implementation:: >>> def crypto_skcipher_alg(tfm): ... return container_of(tfm.base.__crt_alg, "struct skcipher_alg", "base") ... >>> crypto_skcipher_alg(tfm).exit (void (*)(struct crypto_skcipher *))simd_skcipher_exit+0x0 = 0xffffffffc058b1f0 My machine supports the `AES-NI `_ x86 extension. The kernel cannot use SIMD instructions like AES-NI in some contexts, so it has an :linuxt:`extra layer of indirection ` to go through an asynchronous daemon when necessary. This involves a couple of wrapper transformation objects. :linux:`simd_skcipher_exit() ` shows us how to unwrap the first one: .. code-block:: c static void simd_skcipher_exit(struct crypto_skcipher *tfm) { struct simd_skcipher_ctx *ctx = crypto_skcipher_ctx(tfm); cryptd_free_skcipher(ctx->cryptd_tfm); } We just need one more getter in drgn, :linux:`crypto_skcipher_ctx() `:: >>> def crypto_skcipher_ctx(tfm): ... return cast("void *", tfm.base.__crt_ctx) ... >>> simd_ctx = cast("struct simd_skcipher_ctx *", crypto_skcipher_ctx(tfm)) >>> cryptd_tfm = simd_ctx.cryptd_tfm >>> cryptd_tfm *(struct cryptd_skcipher *)0xffffa3b94b5e4cc0 = { .base = (struct crypto_skcipher){ .reqsize = (unsigned int)80, .base = (struct crypto_tfm){ .refcnt = (refcount_t){ .refs = (atomic_t){ .counter = (int)1, }, }, .crt_flags = (u32)0, .node = (int)-1, .exit = (void (*)(struct crypto_tfm *))crypto_skcipher_exit_tfm+0x0 = 0xffffffffb77d2600, .__crt_alg = (struct crypto_alg *)0xffffa3b9421b2848, .__crt_ctx = (void *[]){}, }, }, } We saw ``crypto_skcipher_exit_tfm()`` earlier, so we know where to look next:: >>> crypto_skcipher_alg(cryptd_tfm.base).exit (void (*)(struct crypto_skcipher *))cryptd_skcipher_exit_tfm+0x0 = 0xffffffffc04d6210 :linux:`cryptd_skcipher_exit_tfm() ` shows us how to unwrap this transformation object: .. code-block:: c static void cryptd_skcipher_exit_tfm(struct crypto_skcipher *tfm) { struct cryptd_skcipher_ctx *ctx = crypto_skcipher_ctx(tfm); crypto_free_skcipher(ctx->child); } Now we can get the actual cipher transformation object:: >>> cryptd_ctx = cast("struct cryptd_skcipher_ctx *", crypto_skcipher_ctx(cryptd_tfm.base)) >>> child_tfm = cryptd_ctx.child >>> child_tfm *(struct crypto_skcipher *)0xffffa3b945dc4000 = { .reqsize = (unsigned int)0, .base = (struct crypto_tfm){ .refcnt = (refcount_t){ .refs = (atomic_t){ .counter = (int)1, }, }, .crt_flags = (u32)0, .node = (int)-1, .exit = (void (*)(struct crypto_tfm *))0x0, .__crt_alg = (struct crypto_alg *)0xffffffffc05e7d80, .__crt_ctx = (void *[]){}, }, } This one doesn't have an exit callback, so let's look at the algorithm:: >>> crypto_skcipher_alg(child_tfm) *(struct skcipher_alg *)0xffffffffc05e7d40 = { .setkey = (int (*)(struct crypto_skcipher *, const u8 *, unsigned int))xts_aesni_setkey+0x0 = 0xffffffffc059efb0, ... } :linux:`xts_aesni_setkey() ` is very enlightening: .. code-block:: c static int xts_aesni_setkey(struct crypto_skcipher *tfm, const u8 *key, unsigned int keylen) { struct aesni_xts_ctx *ctx = aes_xts_ctx(tfm); int err; err = xts_verify_key(tfm, key, keylen); if (err) return err; keylen /= 2; /* first half of xts-key is for crypt */ err = aes_set_key_common(&ctx->crypt_ctx, key, keylen); if (err) return err; /* second half of xts-key is for tweak */ return aes_set_key_common(&ctx->tweak_ctx, key + keylen, keylen); } XTS splits the provided key into two keys: one for data and one for a "tweak". They are stored in ``ctx->crypt_ctx`` and ``ctx->tweak_ctx``, respectively. To reach ``ctx``, we need one more getter, :linux:`aes_xts_ctx() `: .. code-block:: c static inline struct aesni_xts_ctx *aes_xts_ctx(struct crypto_skcipher *tfm) { return aes_align_addr(crypto_skcipher_ctx(tfm)); } Which uses :linux:`aes_align_addr() `: .. code-block:: c #define AESNI_ALIGN 16 static inline void *aes_align_addr(void *addr) { if (crypto_tfm_ctx_alignment() >= AESNI_ALIGN) return addr; return PTR_ALIGN(addr, AESNI_ALIGN); } Implementing that in drgn gets us the key material! .. code-block:: pycon >>> def aes_xts_ctx(tfm): ... AESNI_ALIGN = 16 ... mask = AESNI_ALIGN - 1 ... ctx = cast("unsigned long", crypto_skcipher_ctx(tfm)) ... return cast("struct aesni_xts_ctx *", (ctx + mask) & ~mask) ... >>> xts_ctx = aes_xts_ctx(cryptd_ctx.child) >>> xts_ctx *(struct aesni_xts_ctx *)0xffffa3b945dc4030 = { .tweak_ctx = (struct crypto_aes_ctx){ .key_enc = (u32 [60]){ 4053857025, 2535432618, 3497512106, 429624542, 190965574, 620881567, 2728140233, 1574816406, 1642869364, 4143158238, 646209396, 1059050410, 2124513770, 1537238901, 4181490364, 2766254122, 2225457809, 1918261583, 1423050299, 1808651665, 18645611, 1522328862, 2743115682, 123809672, 1080042880, 842431695, 1726249716, 220835685, 3602512678, 2349145656, 797278618, 686075410, 2304003180, 3143774371, 3716565591, 3501188402, 2797609477, 717569085, 88128935, 765727669, 1552680193, 3891148194, 979927029, 3938949831, 554080963, 197371646, 243473241, 589760748, 2460666129, 1967455411, 1328317254, 2783648129, 669994703, 741140529, 581956456, 25754500, 3453357406, 3096637933, 4156453547, 1381329706, }, .key_dec = (u32 [60]){ 3453357406, 3096637933, 4156453547, 1381329706, 1691590497, 1611861415, 2033812690, 3535200077, 1503779265, 1400120959, 2713205381, 402136101, 2278736107, 79729350, 422218101, 2878299039, 3072023845, 181796798, 4073463034, 3057657504, 2722800653, 2199015981, 501881779, 2997211882, 893456792, 3184435867, 4162446148, 1150040666, 3430456984, 559478304, 2667071902, 2941241689, 2504843709, 2291118851, 1171735007, 3163937054, 4210330224, 3978324152, 3214983102, 834109639, 179351664, 499339966, 3445158620, 4181891265, 4283462504, 399827656, 1384175366, 2383888249, 3581021031, 393470670, 3499860066, 874146333, 3319833674, 3901002144, 1163146702, 3700942975, 4053857025, 2535432618, 3497512106, 429624542, }, .key_length = (u32)32, }, .crypt_ctx = (struct crypto_aes_ctx){ .key_enc = (u32 [60]){ 91118336, 1683438947, 280915620, 1674463119, 3416529787, 95371281, 156839573, 539041733, 2748950209, 3348011938, 3610309894, 3036590729, 1176448220, 1135635661, 1256800856, 1791516061, 4259008143, 978703661, 3982827563, 1503367842, 2366333926, 3468365611, 2219986291, 4003074286, 3589535297, 4020642668, 46334791, 1532531173, 3026313791, 2061167892, 4270366823, 269660297, 1916354478, 2644450498, 2673614725, 3288632928, 2828270575, 3528005371, 750892700, 1020462613, 735205841, 3058517267, 689003158, 3977630966, 4257919917, 797156694, 54662090, 1066472927, 3047676072, 65707451, 721143597, 3354268635, 1004719636, 341928770, 388200584, 682782039, 4002672596, 3984159343, 3347232066, 7120537, }, .key_dec = (u32 [60]){ 4002672596, 3984159343, 3347232066, 7120537, 2275767381, 3582792214, 728749911, 250810445, 2145441323, 3415330885, 1171250799, 717236012, 72947820, 1378379331, 4276274497, 631031578, 3286455042, 3027306094, 2388528682, 1863317827, 1027747936, 1450278447, 2898961154, 3682468443, 2929020077, 2006078828, 976160836, 3780245353, 3002856629, 1798524495, 4206615853, 2008326489, 523503039, 3641121217, 1304255784, 3682533165, 3583917429, 3653810938, 2441646946, 2366602356, 2101484483, 3325238398, 2495235305, 2529403397, 1276800912, 206997391, 1212164504, 478670614, 2260253082, 3144746941, 1384732823, 41543404, 2858181789, 1078781983, 1142337047, 1422378638, 91118336, 1683438947, 280915620, 1674463119, }, .key_length = (u32)32, }, } Extracting the AES Key ---------------------- Since we have a 512-bit key, XTS uses two 256-bit AES keys. You'll notice that the ``key_enc`` fields above are much larger than that. This is because AES expands the key into a number of "round keys" using a `"key schedule" `_. Luckily, the first few round keys are copied directly from the original key. With that information, we can finally recover the original key:: >>> def aes_key_from_ctx(ctx): ... words = ctx.key_enc.value_()[:ctx.key_length / 4] ... return b"".join(word.to_bytes(4, "little") for word in words) ... >>> aes_key_from_ctx(xts_ctx.crypt_ctx).hex() '005b6e05633d5764a46ebe108f47ce637b1ba4cb1140af05952e5909c51f2120' >>> aes_key_from_ctx(xts_ctx.tweak_ctx).hex() '01f3a0f1aaa11f97aacc77d0de8c9b1946e7610b9fe60125c91d9ca296cadd5d' Which we can double check with cryptsetup: .. code-block:: console :emphasize-lines: 17-20 # cryptsetup luksDump --dump-master-key /dev/vdb WARNING! ======== The header dump with volume key is sensitive information that allows access to encrypted partition without a passphrase. This dump should be stored encrypted in a safe place. Are you sure? (Type 'yes' in capital letters): YES Enter passphrase for /dev/vdb: hello LUKS header information for /dev/vdb Cipher name: aes Cipher mode: xts-plain64 Payload offset: 32768 UUID: b43cba2c-532b-4491-bbb9-763b55bd7f03 MK bits: 512 MK dump: 00 5b 6e 05 63 3d 57 64 a4 6e be 10 8f 47 ce 63 7b 1b a4 cb 11 40 af 05 95 2e 59 09 c5 1f 21 20 01 f3 a0 f1 aa a1 1f 97 aa cc 77 d0 de 8c 9b 19 46 e7 61 0b 9f e6 01 25 c9 1d 9c a2 96 ca dd 5d Conclusion ---------- Before this, I had almost no knowledge of device mapper or crypto API internals. drgn makes it easy to explore the kernel and learn how it works. Note that different system configurations will have different representations in the crypto API. For example, different ciphers modes will obviously have different transformations. Even the lack of AES-NI with the same cipher mode results in different transformation objects. I converted this case study to the :contrib:`dm_crypt_key.py` script in drgn's ``contrib`` directory. It could be extended to cover other ciphers in the future. drgn-0.0.31/docs/case_studies/kyber_stack_trace.rst000066400000000000000000000122541477777462700224010ustar00rootroot00000000000000Using Stack Trace Variables to Find a Kyber Bug =============================================== | Author: Omar Sandoval | Date: June 9th, 2021 .. highlight:: pycon Jakub Kicinski reported a crash in the :linuxt:`Kyber I/O scheduler ` when he was testing Linux 5.12. He captured a core dump and asked me to debug it. This is a quick writeup of that investigation. First, we can get the task that crashed:: >>> task = per_cpu(prog["runqueues"], prog["crashing_cpu"]).curr Then, we can get its stack trace:: >>> trace = prog.stack_trace(task) >>> trace #0 queued_spin_lock_slowpath (../kernel/locking/qspinlock.c:471:3) #1 queued_spin_lock (../include/asm-generic/qspinlock.h:85:2) #2 do_raw_spin_lock (../kernel/locking/spinlock_debug.c:113:2) #3 spin_lock (../include/linux/spinlock.h:354:2) #4 kyber_bio_merge (../block/kyber-iosched.c:573:2) #5 blk_mq_sched_bio_merge (../block/blk-mq-sched.h:37:9) #6 blk_mq_submit_bio (../block/blk-mq.c:2182:6) #7 __submit_bio_noacct_mq (../block/blk-core.c:1015:9) #8 submit_bio_noacct (../block/blk-core.c:1048:10) #9 submit_bio (../block/blk-core.c:1125:9) #10 submit_stripe_bio (../fs/btrfs/volumes.c:6553:2) #11 btrfs_map_bio (../fs/btrfs/volumes.c:6642:3) #12 btrfs_submit_data_bio (../fs/btrfs/inode.c:2440:8) #13 submit_one_bio (../fs/btrfs/extent_io.c:175:9) #14 submit_extent_page (../fs/btrfs/extent_io.c:3229:10) #15 __extent_writepage_io (../fs/btrfs/extent_io.c:3793:9) #16 __extent_writepage (../fs/btrfs/extent_io.c:3872:8) #17 extent_write_cache_pages (../fs/btrfs/extent_io.c:4514:10) #18 extent_writepages (../fs/btrfs/extent_io.c:4635:8) #19 do_writepages (../mm/page-writeback.c:2352:10) #20 __writeback_single_inode (../fs/fs-writeback.c:1467:8) #21 writeback_sb_inodes (../fs/fs-writeback.c:1732:3) #22 __writeback_inodes_wb (../fs/fs-writeback.c:1801:12) #23 wb_writeback (../fs/fs-writeback.c:1907:15) #24 wb_check_background_flush (../fs/fs-writeback.c:1975:10) #25 wb_do_writeback (../fs/fs-writeback.c:2063:11) #26 wb_workfn (../fs/fs-writeback.c:2091:20) #27 process_one_work (../kernel/workqueue.c:2275:2) #28 worker_thread (../kernel/workqueue.c:2421:4) #29 kthread (../kernel/kthread.c:292:9) #30 ret_from_fork+0x1f/0x2a (../arch/x86/entry/entry_64.S:294) It looks like ``kyber_bio_merge()`` tried to lock an invalid spinlock. For reference, this is the source code of ``kyber_bio_merge()``: .. code-block:: c :lineno-start: 563 static bool kyber_bio_merge(struct blk_mq_hw_ctx *hctx, struct bio *bio, unsigned int nr_segs) { struct kyber_hctx_data *khd = hctx->sched_data; struct blk_mq_ctx *ctx = blk_mq_get_ctx(hctx->queue); struct kyber_ctx_queue *kcq = &khd->kcqs[ctx->index_hw[hctx->type]]; unsigned int sched_domain = kyber_sched_domain(bio->bi_opf); struct list_head *rq_list = &kcq->rq_list[sched_domain]; bool merged; spin_lock(&kcq->lock); merged = blk_bio_list_merge(hctx->queue, rq_list, bio, nr_segs); spin_unlock(&kcq->lock); return merged; } When printed, the ``kcq`` structure containing the spinlock indeed looks like garbage (omitted for brevity). A crash course on the Linux kernel block layer: for each block device, there is a "software queue" (``struct blk_mq_ctx *ctx``) for each CPU and a "hardware queue" (``struct blk_mq_hw_ctx *hctx``) for each I/O queue provided by the device. Each hardware queue has one or more software queues assigned to it. Kyber keeps additional data per hardware queue (``struct kyber_hctx_data *khd``) and per software queue (``struct kyber_ctx_queue *kcq``). Let's try to figure out where the bad ``kcq`` came from. It should be an element of the ``khd->kcqs`` array (``khd`` is optimized out, but we can recover it from ``hctx->sched_data``):: >>> trace[4]["khd"] (struct kyber_hctx_data *) >>> hctx = trace[4]["hctx"] >>> khd = cast("struct kyber_hctx_data *", hctx.sched_data) >>> trace[4]["kcq"] - khd.kcqs (ptrdiff_t)1 >>> hctx.nr_ctx (unsigned short)1 So the ``kcq`` is for the second software queue, but the hardware queue is only supposed to have one software queue. Let's see which CPU was assigned to the hardware queue:: >>> hctx.ctxs[0].cpu (unsigned int)6 Here's the problem: we're not running on CPU 6, we're running on CPU 19:: >>> prog["crashing_cpu"] (int)19 And CPU 19 is assigned to a different hardware queue that actually does have two software queues:: >>> ctx = per_cpu_ptr(hctx.queue.queue_ctx, 19) >>> other_hctx = ctx.hctxs[hctx.type] >>> other_hctx == hctx False >>> other_hctx.nr_ctx (unsigned short)2 The bug is that the caller gets the ``hctx`` for the current CPU, then ``kyber_bio_merge()`` gets the ``ctx`` for the current CPU, and if the thread is migrated to another CPU in between, they won't match. The fix is to get a consistent view of the ``hctx`` and ``ctx``. The commit that fixes this is `here `_. drgn-0.0.31/docs/conf.py000066400000000000000000000021771477777462700150170ustar00rootroot00000000000000import os.path import sys sys.path.append(os.path.abspath("..")) sys.path.append(os.path.abspath("exts")) master_doc = "index" man_pages = [ ("man/drgn", "drgn", "programmable debugger", "", "1"), ] option_emphasise_placeholders = True extensions = [ "details", "drgndoc.ext", "linuxsrc", "setuptools_config", "sphinx.ext.extlinks", "sphinx.ext.graphviz", "sphinx.ext.intersphinx", ] drgndoc_paths = ["../drgn", "../_drgn.pyi"] drgndoc_substitutions = [ (r"^_drgn\b", "drgn"), ] extlinks = { "contrib": ( "https://github.com/osandov/drgn/blob/main/contrib/%s", "%s", ), } intersphinx_mapping = { "python": ("https://docs.python.org/3", None), } manpages_url = "http://man7.org/linux/man-pages/man{section}/{page}.{section}.html" html_static_path = ["_static"] html_theme = "alabaster" html_theme_options = { "description": "Programmable debugger", "logo": "logo.png", "logo_name": True, "logo_text_align": "center", "github_user": "osandov", "github_repo": "drgn", "github_button": True, "github_type": "star", } html_favicon = "favicon.ico" drgn-0.0.31/docs/exts/000077500000000000000000000000001477777462700144745ustar00rootroot00000000000000drgn-0.0.31/docs/exts/details.py000066400000000000000000000050211477777462700164710ustar00rootroot00000000000000# Copyright 2017-2019 by Takeshi KOMIYA # SPDX-License-Identifier: Apache-2.0 # From https://pypi.org/project/sphinxcontrib-details-directive/, patched to # use the proper name for the :class: option. from docutils import nodes from docutils.parsers.rst import Directive, directives from sphinx.transforms.post_transforms import SphinxPostTransform from sphinx.util.nodes import NodeMatcher class details(nodes.Element, nodes.General): pass class summary(nodes.TextElement, nodes.General): pass def visit_details(self, node): if node.get('opened'): self.body.append(self.starttag(node, 'details', open="open")) else: self.body.append(self.starttag(node, 'details')) def depart_details(self, node): self.body.append('') def visit_summary(self, node): self.body.append(self.starttag(node, 'summary')) def depart_summary(self, node): self.body.append('') class DetailsDirective(Directive): required_arguments = 1 final_argument_whitespace = True has_content = True option_spec = { 'class': directives.class_option, 'name': directives.unchanged, 'open': directives.flag, } def run(self): admonition = nodes.container('', classes=self.options.get('class', []), opened='open' in self.options, type='details') textnodes, messages = self.state.inline_text(self.arguments[0], self.lineno) admonition += nodes.paragraph(self.arguments[0], '', *textnodes) admonition += messages self.state.nested_parse(self.content, self.content_offset, admonition) self.add_name(admonition) return [admonition] class DetailsTransform(SphinxPostTransform): default_priority = 200 builders = ('html',) def run(self): matcher = NodeMatcher(nodes.container, type='details') for node in self.document.traverse(matcher): newnode = details(**node.attributes) newnode += summary('', '', *node[0]) newnode.extend(node[1:]) node.replace_self(newnode) def setup(app): app.add_node(details, html=(visit_details, depart_details)) app.add_node(summary, html=(visit_summary, depart_summary)) app.add_directive('details', DetailsDirective) app.add_post_transform(DetailsTransform) return { 'parallel_read_safe': True, 'parallel_write_safe': True, } drgn-0.0.31/docs/exts/drgndoc/000077500000000000000000000000001477777462700161145ustar00rootroot00000000000000drgn-0.0.31/docs/exts/drgndoc/__init__.py000066400000000000000000000000001477777462700202130ustar00rootroot00000000000000drgn-0.0.31/docs/exts/drgndoc/docstrings.py000066400000000000000000000075171477777462700206570ustar00rootroot00000000000000#!/usr/bin/env python3 # Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: LGPL-2.1-or-later import argparse import functools import sys from typing import cast from drgndoc.format import Formatter from drgndoc.namespace import Namespace, ResolvedNode from drgndoc.parse import Class, DocumentedNode, Node, parse_paths from drgndoc.util import dot_join escapes = [] for c in range(256): if c == 0: e = r"\0" elif c == 7: e = r"\a" elif c == 8: e = r"\b" elif c == 9: e = r"\t" elif c == 10: e = r"\n" elif c == 11: e = r"\v" elif c == 12: e = r"\f" elif c == 13: e = r"\r" elif c == 34: e = r"\"" elif c == 92: e = r"\\" elif 32 <= c <= 126: e = chr(c) else: e = f"\\x{c:02x}" escapes.append(e) def escape_string(s: str) -> str: return "".join([escapes[c] for c in s.encode("utf-8")]) if __name__ == "__main__": parser = argparse.ArgumentParser( description="generate docstring definitions for a C extension from Python source code/stub files" ) parser.add_argument( "--header", "-H", action="store_true", help="generate header file" ) parser.add_argument( "-m", "--module", dest="modules", metavar="MODULE[:NAME]", action="append", help="generate docstrings for the given module instead of all modules " "(may be given multiple times); " "an alternate name to use for the generated variables may also be given", ) parser.add_argument( "paths", metavar="PATH", nargs="+", help="module or package path" ) args = parser.parse_args() modules = parse_paths(args.paths, functools.partial(print, file=sys.stderr)) namespace = Namespace(modules) formatter = Formatter(namespace) output_file = sys.stdout if args.header: output_file.write( """\ /* * Generated by drgndoc.docstrings -H. * * Before Python 3.7, various docstring fields were defined as char * (see * https://bugs.python.org/issue28761). We still want the strings to be * read-only, so just cast away the const. */ """ ) else: output_file.write("/* Generated by drgndoc.docstrings. */\n\n") def aux(resolved: ResolvedNode[Node], name: str) -> None: node = resolved.node if node.has_docstring(): var_name = name.replace(".", "_") + "_DOC" if args.header: output_file.write("extern ") output_file.write(f"const char {var_name}[]") if not args.header: output_file.write(" =") lines = formatter.format( cast(ResolvedNode[DocumentedNode], resolved), name.rpartition(".")[2], rst=False, ) if lines: for i, line in enumerate(lines): output_file.write(f'\n\t"{escape_string(line)}') if i != len(lines) - 1: output_file.write("\\n") output_file.write('"') else: output_file.write(' ""') output_file.write(";\n") if args.header: output_file.write(f"#define {var_name} (char *){var_name}\n") for attr in resolved.attrs(): if isinstance(node, Class) and attr.name == "__init__": continue aux(attr, dot_join(name, attr.name)) for module in args.modules or namespace.modules.keys(): module, _, name = module.partition(":") resolved = namespace.resolve_global_name(module) if isinstance(resolved, ResolvedNode): aux(resolved, name or module) else: sys.exit(f"name {module} not found") drgn-0.0.31/docs/exts/drgndoc/ext.py000066400000000000000000000252521477777462700172740ustar00rootroot00000000000000# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: LGPL-2.1-or-later """ drgn consists of a core C extension and supporting Python code. It also makes use of type hints. As a result, its documentation generation has a few requirements: 1. It must work without compiling the C extension, which can't be done on Read the Docs because of missing dependencies. 2. It must support generating documentation from type hints (ideally with proper markup rather than by including the raw type annotations). 3. It must support type hint stub files. 4. It must support classes/functions/etc. which are defined in one module but should canonically be documented in another. This is common for C extensions that are wrapped by a higher-level Python module. The main existing solutions are ruled out by these requirements: 1. sphinx.ext.autodoc (and other solutions based on runtime introspection) require excluding the C extension (e.g., with autodoc_mock_imports) and providing the documentation for it elsewhere. Additionally, type hints from stub files are not available at runtime, so extensions like sphinx-autodoc-typehints and sphinx.ext.autodoc.typehints won't work. 2. sphinx.ext.autoapi doesn't generate markup for type hints and doesn't have any support for objects which should documented under a different name than they were defined. It also only supports documenting directory trees, not individual files. This extension addresses these requirements. In the future, it may be worthwhile to make it a standalone package, as I imagine other projects that make heavy use of C extensions have encountered similar issues. Overall, it works by parsing Python source code and stub files (drgndoc.parse), building a tree representing the namespace (drgndoc.namespace), and using that namespace to resolve definitions and type annotations to generate markup (drgndoc.format). This also provides a script that can generate docstring definitions from a stub file for the C extension itself (drgndoc.docstrings). """ import ast import os.path import re from typing import Any, Dict, Optional, cast import docutils.nodes import docutils.parsers.rst.directives import docutils.statemachine import sphinx.addnodes import sphinx.application import sphinx.environment import sphinx.util.docutils import sphinx.util.logging import sphinx.util.nodes from drgndoc.format import Formatter from drgndoc.namespace import Namespace, ResolvedNode from drgndoc.parse import ( Class, DocumentedNode, Import, ImportFrom, Module, Node, Variable, parse_paths, ) from drgndoc.util import dot_join logger = sphinx.util.logging.getLogger(__name__) # Needed for type checking. class DrgnDocBuildEnvironment(sphinx.environment.BuildEnvironment): drgndoc_namespace: Namespace drgndoc_formatter: Formatter def drgndoc_init(app: sphinx.application.Sphinx) -> None: env = cast(DrgnDocBuildEnvironment, app.env) paths = [os.path.join(app.confdir, path) for path in app.config.drgndoc_paths] env.drgndoc_namespace = Namespace(parse_paths(paths, logger.warning)) env.drgndoc_formatter = Formatter( env.drgndoc_namespace, [ (re.compile(pattern), repl) for pattern, repl in app.config.drgndoc_substitutions ], ) # Sphinx looks up type annotations as py:class references. This doesn't work # for type aliases, which are py:data. See # https://github.com/sphinx-doc/sphinx/issues/10785. This hack intercepts # missing py:class references, and if they resolve to a variable annotated as # TypeAlias, retries them as py:data. def missing_reference( app: sphinx.application.Sphinx, env: DrgnDocBuildEnvironment, node: sphinx.addnodes.pending_xref, contnode: docutils.nodes.Element, ) -> Optional[docutils.nodes.Element]: if node.get("refdomain") == "py": reftarget = node.get("reftarget") if reftarget and node.get("reftype") == "class": resolved = env.drgndoc_namespace.resolve_global_name(reftarget) if ( isinstance(resolved, ResolvedNode) and isinstance(resolved.node, Variable) and isinstance(resolved.node.annotation, ast.Name) and resolved.node.annotation.id == "TypeAlias" ): node.attributes["reftype"] = "data" return env.domains["py"].resolve_xref( env, node.get("refdoc"), app.builder, "data", reftarget, node, contnode, ) return None class DrgnDocDirective(sphinx.util.docutils.SphinxDirective): env: DrgnDocBuildEnvironment required_arguments = 1 optional_arguments = 0 option_spec = { "exclude": docutils.parsers.rst.directives.unchanged, } def run(self) -> Any: parts = [] py_module = self.env.ref_context.get("py:module") if py_module: parts.append(py_module) py_classes = self.env.ref_context.get("py:classes", []) if py_classes: parts.extend(py_classes) parts.append(self.arguments[0]) name = ".".join(parts) resolved = self.env.drgndoc_namespace.resolve_global_name(name) if not isinstance(resolved, ResolvedNode): logger.warning("name %r not found", resolved) return [] if not resolved.node.has_docstring(): logger.warning("name %r is not documented", resolved.qualified_name()) return [] docnode = docutils.nodes.section() self._run(name, "", self.arguments[0], resolved, docnode) return docnode.children def _run( self, top_name: str, attr_name: str, name: str, resolved: ResolvedNode[Node], docnode: docutils.nodes.Node, ) -> None: exclude_pattern = self.options.get("exclude") if exclude_pattern is not None and re.fullmatch(exclude_pattern, attr_name): return if isinstance(resolved.node, (Import, ImportFrom)): # Only include imports that are explicitly aliased (i.e., import # ... as ... or from ... import ... as ...). # TODO: we should also include imports listed in __all__. if not resolved.node.aliased: return imported = self.env.drgndoc_namespace.resolve_name_in_scope( resolved.modules, resolved.classes, resolved.name ) if not isinstance(imported, ResolvedNode): return resolved = imported resolved = cast(ResolvedNode[DocumentedNode], resolved) if isinstance(resolved.node, Module): return self._run_module( top_name, attr_name, cast(ResolvedNode[Module], resolved), docnode ) lines = self.env.drgndoc_formatter.format( resolved, name, self.env.ref_context.get("py:module", ""), ".".join(self.env.ref_context.get("py:classes", ())), ) if not lines: # Not documented. Ignore it. return sourcename = "" if resolved.modules and resolved.modules[-1].node.path: sourcename = resolved.modules[-1].node.path if sourcename: self.env.note_dependency(sourcename) contents = docutils.statemachine.StringList(lines, sourcename) contents.append("", sourcename) self.state.nested_parse(contents, 0, docnode) if isinstance(resolved.node, Class): for desc in reversed(docnode.children): if isinstance(desc, sphinx.addnodes.desc): break else: logger.warning("desc node not found") return for desc_content in reversed(desc.children): if isinstance(desc_content, sphinx.addnodes.desc_content): break else: logger.warning("desc_content node not found") return py_classes = self.env.ref_context.setdefault("py:classes", []) py_classes.append(resolved.name) self.env.ref_context["py:class"] = resolved.name for member in resolved.attrs(): if member.name != "__init__": self._run( top_name, dot_join(attr_name, member.name), member.name, member, desc_content, ) py_classes.pop() self.env.ref_context["py:class"] = py_classes[-1] if py_classes else None def _run_module( self, top_name: str, attr_name: str, resolved: ResolvedNode[Module], docnode: docutils.nodes.Node, ) -> None: node = resolved.node if node.docstring is None: # Not documented. Ignore it. return try: old_py_module = self.env.ref_context["py:module"] have_old_py_module = True except KeyError: have_old_py_module = False sourcename = node.path or "" if sourcename: self.env.note_dependency(sourcename) contents = docutils.statemachine.StringList( [ ".. py:module:: " + dot_join(top_name, attr_name), "", *node.docstring.splitlines(), ], sourcename, ) sphinx.util.nodes.nested_parse_with_titles(self.state, contents, docnode) # If the module docstring defines any sections, then the contents # should go inside of the last one. section = docnode for child in reversed(docnode.children): if isinstance(child, docutils.nodes.section): section = child break for attr in resolved.attrs(): self._run( top_name, dot_join(attr_name, attr.name), attr.name, attr, section ) if have_old_py_module: self.env.ref_context["py:module"] = old_py_module else: del self.env.ref_context["py:module"] def setup(app: sphinx.application.Sphinx) -> Dict[str, Any]: app.connect("builder-inited", drgndoc_init) app.connect("missing-reference", missing_reference) # List of modules or packages. app.add_config_value("drgndoc_paths", [], "env") # List of (regex pattern, substitution) to apply to resolved names. app.add_config_value("drgndoc_substitutions", [], "env") app.add_directive("drgndoc", DrgnDocDirective) return {"env_version": 1, "parallel_read_safe": True, "parallel_write_safe": True} drgn-0.0.31/docs/exts/drgndoc/format.py000066400000000000000000000524501477777462700177640ustar00rootroot00000000000000# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: LGPL-2.1-or-later import ast from typing import Any, List, Optional, Pattern, Sequence, Tuple, cast from drgndoc.namespace import BoundNode, Namespace, ResolvedNode from drgndoc.parse import ( Class, DocumentedNode, Function, FunctionSignature, Module, Variable, ) from drgndoc.visitor import NodeVisitor def _is_name_constant(node: ast.Constant) -> bool: return node.value is None or node.value is True or node.value is False class _FormatVisitor(NodeVisitor): def __init__( self, namespace: Namespace, substitutions: Sequence[Tuple[Pattern[str], Any]], modules: Sequence[BoundNode[Module]], classes: Sequence[BoundNode[Class]], context_module: Optional[str], context_class: Optional[str], ) -> None: self._namespace = namespace self._substitutions = substitutions self._modules = modules self._classes = classes self._context_module = context_module self._context_class = context_class self._parts: List[str] = [] def visit( # type: ignore[override] # This is intentionally incompatible with the supertype. self, node: ast.AST, *, rst: bool, qualify_typing: bool, qualify_typeshed: bool, ) -> str: self._rst = rst self._qualify_typing = qualify_typing self._qualify_typeshed = qualify_typeshed super().visit(node) ret = "".join(self._parts) self._parts.clear() return ret def generic_visit(self, node: ast.AST) -> None: raise NotImplementedError( f"{node.__class__.__name__} formatting is not implemented" ) @staticmethod def _check_ctx_is_load(node: Any) -> None: if not isinstance(node.ctx, ast.Load): raise NotImplementedError( f"{node.ctx.__class__.__name__} formatting is not implemented" ) def visit_Constant( self, node: ast.Constant, parent: Optional[ast.AST], sibling: Optional[ast.AST] ) -> None: if node.value is ...: self._parts.append("...") else: obj = self._rst and _is_name_constant(node) quote = self._rst and not isinstance(node.value, (int, float)) if obj: self._parts.append(":py:obj:`") elif quote: self._parts.append("``") self._parts.append(repr(node.value)) if obj: self._parts.append("`") elif quote: self._parts.append("``") def _append_resolved_name(self, name: str) -> None: if self._rst: self._parts.append(":py:obj:`") resolved = self._namespace.resolve_name_in_scope( self._modules, self._classes, name ) if isinstance(resolved, ResolvedNode): target = resolved.qualified_name() else: target = resolved for pattern, repl in self._substitutions: target, num_subs = pattern.subn(repl, target) if num_subs: break title = target if not self._qualify_typing and title.startswith("typing."): title = title[len("typing.") :] elif not self._qualify_typeshed and title.startswith("_typeshed."): title = title[len("_typeshed.") :] elif self._context_module and title.startswith(self._context_module + "."): title = title[len(self._context_module) + 1 :] if self._context_class and title.startswith(self._context_class + "."): title = title[len(self._context_class) + 1 :] self._parts.append(title) if self._rst: if title != target: self._parts.append(" <") self._parts.append(target) self._parts.append(">") self._parts.append("`") def visit_Name( self, node: ast.Name, parent: Optional[ast.AST], sibling: Optional[ast.AST] ) -> None: self._check_ctx_is_load(node) self._append_resolved_name(node.id) def visit_Attribute( self, node: ast.Attribute, parent: Optional[ast.AST], sibling: Optional[ast.AST] ) -> None: self._check_ctx_is_load(node) name_stack = [node.attr] while True: value = node.value if isinstance(value, ast.Attribute): name_stack.append(value.attr) node = value continue elif isinstance(value, ast.Name): name_stack.append(value.id) name_stack.reverse() self._append_resolved_name(".".join(name_stack)) elif isinstance(value, ast.Constant) and _is_name_constant(value): name_stack.append(repr(value.value)) name_stack.reverse() self._append_resolved_name(".".join(name_stack)) elif isinstance(value, ast.Constant) and not isinstance( value.value, (type(...), int, float) ): name_stack.append(repr(value.value)) name_stack.reverse() if self._rst: self._parts.append("``") self._parts.append(".".join(name_stack)) if self._rst: self._parts.append("``") else: self._visit(value, node, None) name_stack.append("") name_stack.reverse() if isinstance(value, ast.Constant) and isinstance(value.value, int): # "1.foo()" is a syntax error without parentheses or an # extra space. self._parts.append(" ") elif self._rst: # Make sure the "``" doesn't get squashed into a previous # special character. self._parts.append("\\ ") if self._rst: self._parts.append("``") self._parts.append(".".join(name_stack)) if self._rst: self._parts.append("``") break def visit_Subscript( self, node: ast.Subscript, parent: Optional[ast.AST], sibling: Optional[ast.AST] ) -> None: self._check_ctx_is_load(node) self._visit(node.value, node, None) if self._rst: self._parts.append("\\") self._parts.append("[") self._visit(node.slice, node, None) if self._rst: self._parts.append("\\") self._parts.append("]") def visit_Tuple( self, node: ast.Tuple, parent: Optional[ast.AST], sibling: Optional[ast.AST] ) -> None: self._check_ctx_is_load(node) parens = ( len(node.elts) == 0 or not isinstance(parent, ast.Subscript) or node is not parent.slice ) if parens: self._parts.append("(") for i, elt in enumerate(node.elts): if i > 0: self._parts.append(", ") self._visit(elt, node, node.elts[i + 1] if i < len(node.elts) - 1 else None) if len(node.elts) == 1: self._parts.append(",") if parens: self._parts.append(")") def visit_List( self, node: ast.List, parent: Optional[ast.AST], sibling: Optional[ast.AST] ) -> None: self._check_ctx_is_load(node) if self._rst: self._parts.append("\\") self._parts.append("[") for i, elt in enumerate(node.elts): if i > 0: self._parts.append(", ") self._visit(elt, node, node.elts[i + 1] if i < len(node.elts) - 1 else None) if self._rst: self._parts.append("\\") self._parts.append("]") def visit_UnaryOp( self, node: ast.UnaryOp, parent: Optional[ast.AST], sibling: Optional[ast.AST] ) -> None: if isinstance(node.op, ast.UAdd): self._parts.append("+") elif isinstance(node.op, ast.USub): self._parts.append("-") elif isinstance(node.op, ast.Not): self._parts.append("not ") elif isinstance(node.op, ast.Invert): self._parts.append("~") else: raise NotImplementedError( f"{node.op.__class__.__name__} formatting is not implemented" ) parens = not isinstance(node.operand, (ast.Constant, ast.Name)) if parens: self._parts.append("(") self._visit(node.operand, node, None) if parens: self._parts.append(")") class Formatter: def __init__( self, namespace: Namespace, substitutions: Sequence[Tuple[Pattern[str], Any]] = (), ) -> None: self._namespace = namespace self._substitutions = substitutions def _format_function_signature( self, node: FunctionSignature, modules: Sequence[BoundNode[Module]], classes: Sequence[BoundNode[Class]], context_module: Optional[str], context_class: Optional[str], rst: bool, want_rtype: bool, ) -> Tuple[str, List[str]]: visitor = _FormatVisitor( self._namespace, self._substitutions, modules, classes, context_module, context_class, ) assert node.docstring is not None lines = node.docstring.splitlines() if rst: lines = [" " + line for line in lines] signature = ["("] need_comma = False need_blank_line = bool(lines) def visit_arg( arg: ast.arg, default: Optional[ast.expr] = None, name: Optional[str] = None ) -> None: nonlocal need_comma, need_blank_line if need_comma: signature.append(", ") signature.append(arg.arg if name is None else name) default_sep = "=" if arg.annotation: signature.append(": ") signature.append( visitor.visit( arg.annotation, rst=False, qualify_typing=rst, qualify_typeshed=False, ) ) default_sep = " = " if default: signature.append(default_sep) signature.append( visitor.visit( default, rst=False, qualify_typing=True, qualify_typeshed=True ) ) need_comma = True try: posargs = node.args.posonlyargs + node.args.args num_posonlyargs = len(node.args.posonlyargs) except AttributeError: posargs = node.args.args num_posonlyargs = 0 # Type checkers treat parameters with names that begin but don't end # with __ as positional-only: # https://typing.readthedocs.io/en/latest/spec/historical.html#positional-only-parameters # We translate those to the PEP 570 syntax. def _is_posonly(arg: ast.arg) -> bool: return arg.arg.startswith("__") and not arg.arg.endswith("__") num_pep_570_posonlyargs = num_posonlyargs if ( num_posonlyargs == 0 and classes and not node.has_decorator("staticmethod") and len(posargs) > 1 and _is_posonly(posargs[1]) ): num_posonlyargs = 2 while num_posonlyargs < len(posargs) and _is_posonly(posargs[num_posonlyargs]): num_posonlyargs += 1 for i, arg in enumerate(posargs): default: Optional[ast.expr] if i >= len(posargs) - len(node.args.defaults): default = node.args.defaults[ i - (len(posargs) - len(node.args.defaults)) ] else: default = None if i == 0 and classes and not node.has_decorator("staticmethod"): # Skip self for methods and cls for class methods. continue visit_arg( arg, default, name=( arg.arg[2:] if num_pep_570_posonlyargs <= i < num_posonlyargs else arg.arg ), ) if i == num_posonlyargs - 1: signature.append(", /") if node.args.vararg: visit_arg(node.args.vararg, name="*" + node.args.vararg.arg) if node.args.kwonlyargs: if not node.args.vararg: if need_comma: signature.append(", ") signature.append("*") need_comma = True for i, arg in enumerate(node.args.kwonlyargs): visit_arg(arg, node.args.kw_defaults[i]) if node.args.kwarg: visit_arg(node.args.kwarg, name="**" + node.args.kwarg.arg) signature.append(")") if want_rtype and node.returns: signature.append(" -> ") signature.append( visitor.visit( node.returns, rst=False, qualify_typing=rst, qualify_typeshed=False ) ) return "".join(signature), lines def _format_class( self, resolved: ResolvedNode[Class], name: str, context_module: Optional[str] = None, context_class: Optional[str] = None, rst: bool = True, ) -> List[str]: node = resolved.node init_signatures: List[FunctionSignature] = [] try: init = resolved.attr("__init__") except KeyError: pass else: if isinstance(init.node, Function): init_signatures = [ signature for signature in init.node.signatures if signature.docstring is not None ] init_context_class = resolved.name if context_class: init_context_class = context_class + "." + init_context_class lines = [] if rst and len(init_signatures) == 1 and node.docstring is None: class_signature, class_docstring_lines = self._format_function_signature( init_signatures[0], init.modules, init.classes, context_module, init_context_class, rst, False, ) del init_signatures[0] else: class_signature = "" class_docstring_lines = ( node.docstring.splitlines() if node.docstring else [] ) if rst: lines.append(f".. py:class:: {name}{class_signature}") if node.bases: visitor = _FormatVisitor( self._namespace, self._substitutions, resolved.modules, resolved.classes, context_module, context_class, ) bases = [ visitor.visit( base, rst=rst, qualify_typing=False, qualify_typeshed=False ) for base in node.bases ] if lines: lines.append("") lines.append((" " if rst else "") + "Bases: " + ", ".join(bases)) if class_docstring_lines: if lines: lines.append("") if rst: for line in class_docstring_lines: lines.append(" " + line) else: lines.extend(class_docstring_lines) for i, signature_node in enumerate(init_signatures): if lines: lines.append("") signature, signature_lines = self._format_function_signature( signature_node, init.modules, init.classes, context_module, init_context_class, rst, False, ) if rst: lines.append(f" .. py:method:: {name}{signature}") lines.append(" :noindex:") elif signature: lines.append(f"{name}{signature}") lines.append("") if rst: for line in signature_lines: lines.append(" " + line) else: lines.extend(signature_lines) return lines def _format_function( self, resolved: ResolvedNode[Function], name: str, context_module: Optional[str] = None, context_class: Optional[str] = None, rst: bool = True, ) -> List[str]: node = resolved.node lines = [] for i, signature_node in enumerate( signature for signature in node.signatures if signature.docstring is not None ): if i > 0: lines.append("") signature, signature_lines = self._format_function_signature( signature_node, resolved.modules, resolved.classes, context_module, context_class, rst, True, ) if rst: directive = "py:method" if resolved.classes else "py:function" lines.append(f".. {directive}:: {name}{signature}") if i > 0: lines.append(" :noindex:") if node.async_: lines.append(" :async:") if signature_node.has_decorator("classmethod") or name in ( "__init_subclass__", "__class_getitem__", ): lines.append(" :classmethod:") if signature_node.has_decorator("staticmethod"): lines.append(" :staticmethod:") else: lines.append(f"{name}{signature}") if signature_lines: lines.append("") lines.extend(signature_lines) return lines def _format_variable( self, resolved: ResolvedNode[Variable], name: str, context_module: Optional[str], context_class: Optional[str], rst: bool, ) -> List[str]: node = resolved.node assert node.docstring is not None docstring_lines = node.docstring.splitlines() visitor = _FormatVisitor( self._namespace, self._substitutions, resolved.modules, resolved.classes, context_module, context_class, ) if rst: directive = "py:attribute" if resolved.classes else "py:data" lines = [f".. {directive}:: {name}"] if node.annotation: lines.append( " :type: " + visitor.visit( node.annotation, rst=False, qualify_typing=True, qualify_typeshed=False, ) ) if docstring_lines: lines.append("") for line in docstring_lines: lines.append(" " + line) return lines else: if node.annotation: if docstring_lines: docstring_lines.insert(0, "") docstring_lines.insert( 0, visitor.visit( node.annotation, rst=False, qualify_typing=False, qualify_typeshed=False, ), ) return docstring_lines def format( self, resolved: ResolvedNode[DocumentedNode], name: Optional[str] = None, context_module: Optional[str] = None, context_class: Optional[str] = None, rst: bool = True, ) -> List[str]: node = resolved.node if not node.has_docstring(): return [] if name is None: name = resolved.name if context_module is None and resolved.modules: context_module = ".".join([module.name for module in resolved.modules]) if context_class is None and resolved.classes: context_module = ".".join([class_.name for class_ in resolved.classes]) if isinstance(node, Class): return self._format_class( cast(ResolvedNode[Class], resolved), name, context_module, context_class, rst, ) elif isinstance(node, Function): return self._format_function( cast(ResolvedNode[Function], resolved), name, context_module, context_class, rst, ) elif isinstance(node, Variable): return self._format_variable( cast(ResolvedNode[Variable], resolved), name, context_module, context_class, rst, ) else: assert isinstance(node, Module) assert node.docstring is not None return node.docstring.splitlines() drgn-0.0.31/docs/exts/drgndoc/namespace.py000066400000000000000000000123051477777462700204230ustar00rootroot00000000000000# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: LGPL-2.1-or-later import itertools from typing import Generic, Iterator, List, Mapping, Sequence, TypeVar, Union from drgndoc.parse import ( Class, DocumentedNode, Function, Import, ImportFrom, Module, Node, Variable, ) NodeT_co = TypeVar("NodeT_co", bound=Node, covariant=True) class BoundNode(Generic[NodeT_co]): def __init__(self, name: str, node: NodeT_co) -> None: self.name = name self.node = node class ResolvedNode(Generic[NodeT_co]): def __init__( self, modules: Sequence[BoundNode[Module]], classes: Sequence[BoundNode[Class]], name: str, node: NodeT_co, ) -> None: self.modules = modules self.classes = classes self.name = name self.node = node def qualified_name(self) -> str: return ".".join( itertools.chain( (module.name for module in self.modules), (class_.name for class_ in self.classes), (self.name,), ) ) def attrs(self) -> Iterator["ResolvedNode[Node]"]: if isinstance(self.node, Module): modules = list(self.modules) modules.append(BoundNode(self.name, self.node)) for attr, node in self.node.attrs.items(): yield ResolvedNode(modules, self.classes, attr, node) elif isinstance(self.node, Class): classes = list(self.classes) classes.append(BoundNode(self.name, self.node)) for attr, node in self.node.attrs.items(): yield ResolvedNode(self.modules, classes, attr, node) def attr(self, attr: str) -> "ResolvedNode[Node]": if isinstance(self.node, Module): modules = list(self.modules) modules.append(BoundNode(self.name, self.node)) return ResolvedNode(modules, self.classes, attr, self.node.attrs[attr]) elif isinstance(self.node, Class): classes = list(self.classes) classes.append(BoundNode(self.name, self.node)) return ResolvedNode(self.modules, classes, attr, self.node.attrs[attr]) else: raise KeyError(attr) UnresolvedName = str class Namespace: def __init__(self, modules: Mapping[str, Module]) -> None: self.modules = modules # NB: this modifies the passed lists. def _resolve_name( self, modules: List[BoundNode[Module]], classes: List[BoundNode[Class]], name_components: List[str], ) -> Union[ResolvedNode[DocumentedNode], UnresolvedName]: name_components.reverse() while name_components: attrs: Mapping[str, Node] if classes: attrs = classes[-1].node.attrs elif modules: attrs = modules[-1].node.attrs else: attrs = self.modules name = name_components.pop() try: node = attrs[name] except KeyError: break if isinstance(node, (Import, ImportFrom)): classes.clear() if isinstance(node, Import): modules.clear() elif isinstance(node, ImportFrom): if node.level >= len(modules): # Relative import beyond top-level package. Bail. break # Absolute import is level 0, which clears the whole list. del modules[-node.level :] name_components.append(node.name) if node.module is not None: name_components.extend(reversed(node.module.split("."))) elif name_components: if isinstance(node, Module): assert not classes modules.append(BoundNode(name, node)) elif isinstance(node, Class): classes.append(BoundNode(name, node)) else: break else: assert isinstance(node, (Module, Class, Function, Variable)) return ResolvedNode(modules, classes, name, node) return ".".join( itertools.chain( (module.name for module in modules), (class_.name for class_ in classes), (name,), reversed(name_components), ) ) def resolve_global_name( self, name: str ) -> Union[ResolvedNode[DocumentedNode], UnresolvedName]: return self._resolve_name([], [], name.split(".")) def resolve_name_in_scope( self, modules: Sequence[BoundNode[Module]], classes: Sequence[BoundNode[Class]], name: str, ) -> Union[ResolvedNode[DocumentedNode], UnresolvedName]: name_components = name.split(".") attr = name_components[0] if classes and attr in classes[-1].node.attrs: classes = list(classes) elif modules and attr in modules[-1].node.attrs: classes = [] else: return name modules = list(modules) return self._resolve_name(modules, classes, name_components) drgn-0.0.31/docs/exts/drgndoc/parse.py000066400000000000000000000344731477777462700176130ustar00rootroot00000000000000# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: LGPL-2.1-or-later import ast import inspect import operator import os.path import re import stat from typing import ( Any, Callable, Dict, Iterable, Mapping, Optional, Sequence, Tuple, Union, cast, overload, ) from drgndoc.visitor import NodeVisitor class _PreTransformer(ast.NodeTransformer): # Replace string forward references with the parsed expression. @overload def _visit_annotation(self, node: ast.expr) -> ast.expr: ... @overload def _visit_annotation(self, node: None) -> None: ... def _visit_annotation(self, node: Optional[ast.expr]) -> Optional[ast.expr]: if isinstance(node, ast.Constant) and isinstance(node.value, str): node = self.visit(ast.parse(node.value, "", "eval").body) return node def visit_arg(self, node: ast.arg) -> ast.arg: node = cast(ast.arg, self.generic_visit(node)) node.annotation = self._visit_annotation(node.annotation) return node def visit_FunctionDef(self, node: ast.FunctionDef) -> ast.FunctionDef: node = cast(ast.FunctionDef, self.generic_visit(node)) node.returns = self._visit_annotation(node.returns) return node def visit_AsyncFunctionDef( self, node: ast.AsyncFunctionDef ) -> ast.AsyncFunctionDef: node = cast(ast.AsyncFunctionDef, self.generic_visit(node)) node.returns = self._visit_annotation(node.returns) return node def visit_AnnAssign(self, node: ast.AnnAssign) -> ast.AnnAssign: node = cast(ast.AnnAssign, self.generic_visit(node)) node.annotation = self._visit_annotation(node.annotation) return node # Replace the old constant nodes produced by ast.parse() before Python 3.8 # with Constant. def visit_Num(self, node: Any) -> ast.Constant: return ast.copy_location(ast.Constant(node.n), node) def visit_Str(self, node: Any) -> ast.Constant: return ast.copy_location(ast.Constant(node.s), node) def visit_Bytes(self, node: Any) -> ast.Constant: return ast.copy_location(ast.Constant(node.s), node) def visit_Ellipsis(self, node: Any) -> ast.Constant: return ast.copy_location(ast.Constant(...), node) def visit_NameConstant(self, node: Any) -> ast.Constant: return ast.copy_location(ast.Constant(node.value), node) # Get rid of Index nodes, which are deprecated as of Python 3.9. def visit_Index(self, node: Any) -> Any: return self.visit(node.value) # Once we don't care about Python 3.6, we can replace all of this boilerplate # with dataclasses. class Module: def __init__( self, path: Optional[str], docstring: Optional[str], attrs: Mapping[str, "Node"] ) -> None: self.path = path self.docstring = docstring self.attrs = attrs def has_docstring(self) -> bool: return self.docstring is not None class Class: def __init__( self, bases: Sequence[ast.expr], docstring: Optional[str], attrs: Mapping[str, "NonModuleNode"], ) -> None: self.bases = bases self.docstring = docstring self.attrs = attrs def has_docstring(self) -> bool: if self.docstring is not None: return True init = self.attrs.get("__init__") return isinstance(init, Function) and init.has_docstring() class FunctionSignature: def __init__( self, args: ast.arguments, returns: Optional[ast.expr], decorator_list: Sequence[ast.expr], docstring: Optional[str], ) -> None: self.args = args self.returns = returns self.decorator_list = decorator_list self.docstring = docstring def has_decorator(self, name: str) -> bool: return any( isinstance(decorator, ast.Name) and decorator.id == name for decorator in self.decorator_list ) class Function: def __init__(self, async_: bool, signatures: Sequence[FunctionSignature]) -> None: self.async_ = async_ self.signatures = signatures def has_docstring(self) -> bool: return any(signature.docstring is not None for signature in self.signatures) class Variable: def __init__( self, annotation: Optional[ast.expr], docstring: Optional[str] ) -> None: self.annotation = annotation self.docstring = docstring def has_docstring(self) -> bool: return self.docstring is not None class Import: def __init__(self, module: str, aliased: bool) -> None: self.module = module self.aliased = aliased def has_docstring(self) -> bool: return False class ImportFrom: def __init__( self, name: str, module: Optional[str], level: int, aliased: bool ) -> None: self.name = name self.module = module self.level = level self.aliased = aliased def has_docstring(self) -> bool: return False Node = Union[Module, Class, Function, Variable, Import, ImportFrom] NonModuleNode = Union[Class, Function, Variable, Import, ImportFrom] DocumentedNode = Union[Module, Class, Function, Variable] def _docstring_from_node(node: Optional[ast.AST]) -> Optional[str]: if not isinstance(node, ast.Expr): return None node = node.value if isinstance(node, ast.Constant) and isinstance(node.value, str): text = node.value else: return None return inspect.cleandoc(text) def _transform_function(node: Function) -> Function: signature = node.signatures[-1] if ( signature.has_decorator("takes_program_or_default") and signature.docstring is not None ): match = re.search( r"^(\s*):(?:param|return|raises)", signature.docstring, flags=re.M ) if match: prefix = match.group(1) pos = match.start() else: prefix = "\n\n" pos = len(signature.docstring) signature.docstring = "".join( ( signature.docstring[:pos], prefix, ":param prog: Program, which :ref:`may be omitted to use the default program argument `.", signature.docstring[pos:], ) ) if signature.has_decorator("takes_object_or_program_or_default"): del signature.args.args[0] signature.args.args[0].annotation = ast.Subscript( value=ast.Name(id="Union", ctx=ast.Load()), slice=ast.Tuple( elts=[ ast.Name(id="Object", ctx=ast.Load()), ast.Name(id="Program", ctx=ast.Load()), ], ctx=ast.Load(), ), ctx=ast.Load(), ) return node class _ModuleVisitor(NodeVisitor): def visit(self, node: ast.AST) -> Tuple[Optional[str], Dict[str, NonModuleNode]]: self._attrs: Dict[str, NonModuleNode] = {} super().visit(node) docstring = self._docstring del self._docstring return docstring, self._attrs def visit_Module( self, node: ast.Module, parent: Optional[ast.AST], sibling: Optional[ast.AST] ) -> None: self._docstring = ast.get_docstring(node) self.generic_visit(node) def visit_ClassDef( self, node: ast.ClassDef, parent: Optional[ast.AST], sibling: Optional[ast.AST] ) -> None: attrs = self._attrs self._attrs = {} self.generic_visit(node) class_node = Class(node.bases, ast.get_docstring(node), self._attrs) self._attrs = attrs self._attrs[node.name] = class_node def _visit_function( self, node: Union[ast.FunctionDef, ast.AsyncFunctionDef], parent: Optional[ast.AST], sibling: Optional[ast.AST], ) -> None: signature = FunctionSignature( node.args, node.returns, node.decorator_list, ast.get_docstring(node) ) async_ = isinstance(node, ast.AsyncFunctionDef) func = self._attrs.get(node.name) # If we have a previous overload definition, we can add to it. # Otherwise, we replace it. if ( func and isinstance(func, Function) and func.async_ == async_ and func.signatures[-1].has_decorator("overload") ): signatures = list(func.signatures) signatures.append(signature) else: signatures = [signature] self._attrs[node.name] = _transform_function(Function(async_, signatures)) # NB: we intentionally don't visit the function body. visit_FunctionDef = _visit_function visit_AsyncFunctionDef = _visit_function def _add_assign( self, name: str, have_value: bool, annotation: Optional[ast.expr], docstring: Optional[str], ) -> None: try: var = self._attrs[name] except KeyError: pass else: # The name was previously defined. If it's a variable, add the # annotation and/or docstring. If this is an annotation without a # value, don't do anything. Otherwise, replace the previous # definition. if isinstance(var, Variable): if not annotation and docstring is None: return if not annotation: annotation = var.annotation if docstring is None: docstring = var.docstring elif not have_value: return self._attrs[name] = Variable(annotation, docstring) def visit_Assign( self, node: ast.Assign, parent: Optional[ast.AST], sibling: Optional[ast.AST] ) -> None: if len(node.targets) == 1: docstring = _docstring_from_node(sibling) else: docstring = None for target in node.targets: if isinstance(target, ast.Name): self._add_assign(target.id, True, None, docstring) def visit_AnnAssign( self, node: ast.AnnAssign, parent: Optional[ast.AST], sibling: Optional[ast.AST] ) -> None: if isinstance(node.target, ast.Name): self._add_assign( node.target.id, node.value is not None, node.annotation, _docstring_from_node(sibling), ) def visit_Import( self, node: ast.Import, parent: Optional[ast.AST], sibling: Optional[ast.AST] ) -> None: for alias in node.names: if alias.asname is None: # We don't distinguish between "import foo" and "import # foo.bar"; they both add "foo" to the current scope. name = module_name = alias.name.partition(".")[0] else: name = alias.asname module_name = alias.name self._attrs[name] = Import(module_name, alias.asname is not None) def visit_ImportFrom( self, node: ast.ImportFrom, parent: Optional[ast.AST], sibling: Optional[ast.AST], ) -> None: for alias in node.names: name = alias.name if alias.asname is None else alias.asname self._attrs[name] = ImportFrom( alias.name, node.module, node.level, alias.asname is not None ) def parse_source( source: str, filename: str ) -> Tuple[Optional[str], Dict[str, NonModuleNode]]: node = ast.parse(source, filename) return _ModuleVisitor().visit(_PreTransformer().visit(node)) def _default_handle_err(e: Exception) -> None: raise e def parse_module( path: str, handle_err: Callable[[Exception], None] = _default_handle_err ) -> Optional[Tuple[Optional[str], Dict[str, NonModuleNode]]]: try: with open(path, "r") as f: source = f.read() except (OSError, UnicodeError) as e: handle_err(e) return None try: return parse_source(source, path) except SyntaxError as e: handle_err(e) return None def parse_package( path: str, handle_err: Callable[[Exception], None] = _default_handle_err ) -> Optional[Module]: module_path: Optional[str] = None docstring: Optional[str] = None attrs: Dict[str, Node] = {} init_path = os.path.join(path, "__init__.py") if os.path.isfile(init_path): module_path = init_path result = parse_module(init_path, handle_err) if result is not None: docstring = result[0] attrs = cast(Dict[str, Node], result[1]) try: entries = sorted(os.scandir(path), key=operator.attrgetter("name")) except OSError as e: handle_err(e) else: for entry in entries: try: is_dir = entry.is_dir() is_file = entry.is_file() except OSError as e: handle_err(e) continue if is_dir: subpackage = parse_package(entry.path, handle_err) if subpackage: attrs[entry.name] = subpackage elif is_file and entry.name != "__init__.py": root, ext = os.path.splitext(entry.name) if ext == ".py" or ext == ".pyi": result = parse_module(entry.path, handle_err) if result: attrs[root] = Module(entry.path, result[0], result[1]) if module_path is None and docstring is None and not attrs: return None return Module(module_path, docstring, attrs) def parse_paths( paths: Iterable[str], handle_err: Callable[[Exception], None] = _default_handle_err ) -> Mapping[str, Module]: modules = {} for path in paths: path = os.path.realpath(path) try: st = os.stat(path) except OSError as e: handle_err(e) continue if stat.S_ISDIR(st.st_mode): package = parse_package(path, handle_err) if package: modules[os.path.basename(path)] = package else: handle_err(Exception(f"{path}:Not a Python module or package")) else: result = parse_module(path, handle_err) if result: name = os.path.splitext(os.path.basename(path))[0] modules[name] = Module(path, result[0], result[1]) return modules drgn-0.0.31/docs/exts/drgndoc/util.py000066400000000000000000000003301477777462700174370ustar00rootroot00000000000000# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: LGPL-2.1-or-later from typing import Optional def dot_join(*args: Optional[str]) -> str: return ".".join([s for s in args if s]) drgn-0.0.31/docs/exts/drgndoc/visitor.py000066400000000000000000000023621477777462700201700ustar00rootroot00000000000000# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: LGPL-2.1-or-later import ast from typing import Any, Optional class NodeVisitor: """ Node visitor based on ast.NodeVisitor that also passes the parent node and (right) sibling node. """ def visit(self, node: ast.AST) -> Any: return self._visit(node, None, None) def _visit( self, node: ast.AST, parent: Optional[ast.AST], sibling: Optional[ast.AST] ) -> Any: method = "visit_" + node.__class__.__name__ visitor = getattr(self, method, None) if visitor is None: self.generic_visit(node) else: return visitor(node, parent, sibling) def generic_visit(self, node: ast.AST) -> None: for field, value in ast.iter_fields(node): if isinstance(value, list): prev = None for item in value: if isinstance(item, ast.AST): if prev: self._visit(prev, node, item) prev = item if prev: self._visit(prev, node, None) elif isinstance(value, ast.AST): self._visit(value, node, None) drgn-0.0.31/docs/exts/linuxsrc.py000066400000000000000000000061421477777462700167200ustar00rootroot00000000000000# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: MIT """ Extension to reference Linux kernel code on git.kernel.org. The linux role links to a file in the Linux kernel code: :linux:`include/linux/list.h` Or a specific line in a file: :linux:`include/linux/list.h:100` By default, it links to Linus Torvald's master branch. This can be overriden for the rest of the document with the linuxversion directive: :linuxversion: v6.7 Or for a specific link: :linux:`include/linux/list.h@v6.6` :linux:`include/linux/list.h:600@v6.6` An explicit title can be given: :linux:`list_entry() ` The linuxt role is the same as the linux role except that it formats the title as inline text instead of inline code. """ import re from typing import Any, Dict, List, Tuple from docutils import nodes from docutils.nodes import Node, system_message import sphinx.application import sphinx.util.docutils class LinuxVersionDirective(sphinx.util.docutils.SphinxDirective): required_arguments = 1 optional_arguments = 0 def run(self) -> List[Node]: self.env.temp_data["linux_version"] = self.arguments[0] return [] class LinuxRole(sphinx.util.docutils.ReferenceRole): def __init__(self, code: bool) -> None: super().__init__() self._code = code def run(self) -> Tuple[List[Node], List[system_message]]: remainder, sep, head = self.target.rpartition("@") if not sep: remainder = head head = "" path, sep, line = remainder.rpartition(":") if not sep: path = line line = "" if not head: head = self.env.temp_data.get("linux_version", "master") if re.fullmatch(r"v[0-9]+\.[0-9]+.[0-9]+", head): base_url = ( "https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/tree/" ) else: base_url = "https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/" url_parts = [base_url, path] if head != "master": url_parts.append("?h=") url_parts.append(head) if line: url_parts.append("#n") url_parts.append(line) url = "".join(url_parts) if self.has_explicit_title: title = self.title else: title_parts = [path] if line: title_parts.append(":") title_parts.append(line) title = "".join(title_parts) if self._code: reference = nodes.reference("", "", internal=False, refuri=url) reference += nodes.literal(title, title) else: reference = nodes.reference(title, title, internal=False, refuri=url) return [reference], [] def setup(app: sphinx.application.Sphinx) -> Dict[str, Any]: app.add_directive("linuxversion", LinuxVersionDirective) app.add_role("linux", LinuxRole(True)) app.add_role("linuxt", LinuxRole(False)) return {"env_version": 1, "parallel_read_safe": True, "parallel_write_safe": True} drgn-0.0.31/docs/exts/setuptools_config.py000066400000000000000000000022451477777462700206170ustar00rootroot00000000000000# Copyright Jason R. Coombs # SPDX-License-Identifier: MIT # Based on https://pypi.org/project/jaraco.packaging/. import os import subprocess import sys def setup(app): app.add_config_value("package_url", "", "") app.connect("config-inited", load_config_from_setup) app.connect("html-page-context", add_package_url) return {"parallel_read_safe": "True"} def load_config_from_setup(app, config): """ Replace values in config from package metadata """ # for now, assume project root is one level up root = os.path.join(app.confdir, "..") setup_script = os.path.join(root, "setup.py") fields = ["--name", "--version", "--url", "--author"] dist_info_cmd = [sys.executable, setup_script] + fields output = subprocess.check_output(dist_info_cmd, cwd=root, universal_newlines=True) outputs = output.strip().split("\n") project, version, url, author = outputs config.project = project config.version = config.release = version config.package_url = url config.author = config.copyright = author def add_package_url(app, pagename, templatename, context, doctree): context["package_url"] = app.config.package_url drgn-0.0.31/docs/favicon.ico000066400000000000000000000004761477777462700156410ustar00rootroot00000000000000(( 5R.0[Mh`!PAC4C330HA4C14C`0pgdrgn-0.0.31/docs/getting_debugging_symbols.rst000066400000000000000000000450311477777462700214720ustar00rootroot00000000000000Getting Debugging Symbols ========================= .. highlight:: console drgn needs debugging symbols in order to interpret the target program. If drgn prints a warning like:: $ drgn warning: missing debugging symbols for kernel 6.13.8-200.fc41.x86_64 critical: missing some debugging symbols; see https://drgn.readthedocs.io/en/latest/getting_debugging_symbols.html ... then you need to get debugging symbols. The method depends on whether the binary that is missing debugging symbols was built manually or is provided by your Linux distribution. Note that you only need debugging symbols for the binaries you're actually debugging. If the warnings are for modules, shared libraries, etc. that you don't care about, feel free to ignore them. Since drgn 0.0.31, you can run drgn with ``--log-level debug`` to get logs of where drgn looked for debugging symbols. Building With Debugging Symbols ------------------------------- If the binary that drgn warns about is one that you built yourself, then you need to rebuild it with debugging symbols. Here is a quick overview of how to do that in different build systems: .. list-table:: :header-rows: 1 * - Build System - Instructions * - Linux Kernel - Since Linux 5.18: In ``menuconfig``, set ``Kernel hacking -> Compile-time checks and compiler options -> Debug information`` to ``Rely on the toolchain's implicit default DWARF version``. Or, add ``CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y`` to :file:`.config`. Before Linux 5.18: In ``menuconfig``, enable ``Kernel hacking -> Compile-time checks and compiler options -> Compile the kernel with debug info``. Or, add ``CONFIG_DEBUG_INFO=y`` to :file:`.config`. * - `Meson `_ - Run ``meson setup --buildtype=debugoptimized $builddir`` or ``meson setup --buildtype=debug $builddir``. * - `CMake `_ - Run ``cmake --build $builddir -DCMAKE_BUILD_TYPE=RelWithDebInfo`` or ``cmake --build $builddir -DCMAKE_BUILD_TYPE=Debug``. Or, add ``set(CMAKE_BUILD_TYPE RelWithDebInfo)`` or ``set(CMAKE_BUILD_TYPE Debug)`` to :file:`CMakeLists.txt`. * - Autotools - Depends on the project, but usually ``CFLAGS="-Og -g" ./configure``. * - Make - Depends on the project, but usually ``CFLAGS="-Og -g" make``. * - None (GCC or Clang directly) - Pass ``-Og -g`` options. Consult your build system's documentation for details. Debugging Symbols for Linux Distribution Packages ------------------------------------------------- Most Linux distributions don't install debugging symbols for installed packages by default. If the binary that drgn warns about is part of your Linux distribution, then you have two options: manual installation through the package manager or automatic downloads using debuginfod. This section documents how to do both on common Linux distributions, including flow charts for recommended practices. .. contents:: Contents :depth: 1 :local: :backlinks: none Debuginfod ^^^^^^^^^^ `debuginfod `_ is a service providing debugging symbols via an HTTP API. Many Linux distributions run a debuginfod server for their packages, and some automatically enable it. Debugging symbols can be downloaded via debuginfod automatically, so it typically provides the best user experience. However, there are a few caveats, especially when debugging the Linux kernel: 1. Before drgn 0.0.31, drgn did not support using debuginfod for the Linux kernel. 2. Except on Fedora's debuginfod server, downloading debugging symbols for the Linux kernel is extremely slow due to `technical limitations that have been fixed upstream `_ but not yet deployed on other distributions. As a result, since drgn 0.0.31, when debugging the Linux kernel, drgn only uses debuginfod on Fedora. 3. Before drgn 0.0.31, while drgn is downloading from debuginfod, it can't be interrupted with :kbd:`Ctrl-C`, and it doesn't print a progress bar. .. _debuginfod-support: Since drgn 0.0.31, drgn includes whether it was built with debuginfod support in its version string (look for "with debuginfod"):: $ drgn --version drgn 0.0.31 (using Python 3.13.2, elfutils 0.192, with debuginfod (dlopen), with libkdumpfile) If you built drgn from source and the version string includes "without debuginfod", make sure you installed the :ref:`necessary dependencies ` and rebuild drgn. Before drgn 0.0.31, drgn doesn't need to be built specifically with debuginfod support. Fedora ^^^^^^ .. graphviz:: digraph { start [ label = "Need debugging\nsymbols on Fedora" style = filled fillcolor = lightpink ] drgn_version [ label = "What version\nof drgn?" shape = diamond style = filled fillcolor = khaki1 ] use_debuginfod [ label = "Use debuginfod\n(automatic)" style = filled fillcolor = palegreen ] use_dnf [ label = "Manually install with\ndnf debuginfo-install" style = filled fillcolor = palegreen ] start -> drgn_version drgn_version -> use_debuginfod [ label = ">= 0.0.31" ] drgn_version -> use_dnf [ label = "< 0.0.31" ] } Debuginfod """""""""" Fedora automatically enables debuginfod by default. Since drgn 0.0.31, drgn can even use debuginfod for Linux kernel debugging symbols. If debuginfod is not working, :ref:`make sure ` your build of drgn supports it and try running:: $ sudo dnf install elfutils-debuginfod-client $ source /etc/profile.d/debuginfod.sh Also see the `Fedora debuginfod documentation `_. Manual Installation """"""""""""""""""" Debugging symbols can also be installed manually on Fedora with ``sudo dnf debuginfo-install $package``. To install symbols for the running kernel:: $ sudo dnf debuginfo-install kernel-$(uname -r) To find out what package owns a binary, use ``rpm -qf``:: $ rpm -qf "$(command -v python3)" python3-3.13.2-1.fc41.x86_64 $ sudo dnf debuginfo-install python3 Also see the `Fedora documentation `_. CentOS Stream ^^^^^^^^^^^^^ .. graphviz:: digraph { start [ label = "Need debugging symbols\non CentOS Stream" style = filled fillcolor = lightpink ] drgn_version [ label = "What version\nof drgn?" shape = diamond style = filled fillcolor = khaki1 ] kernel [ label = "Are you\ndebugging the\nLinux kernel?" shape = diamond style = filled fillcolor = khaki1 ] use_debuginfod [ label = "Use debuginfod\n(automatic)" style = filled fillcolor = palegreen ] use_dnf [ label = "Manually install with\ndnf debuginfo-install" style = filled fillcolor = palegreen ] start -> drgn_version drgn_version -> kernel [ label = ">= 0.0.31" ] drgn_version -> use_dnf [ label = "< 0.0.31" ] kernel -> use_dnf [ label = "Yes" ] kernel -> use_debuginfod [ label = "No" ] } Debuginfod """""""""" CentOS Stream automatically enables debuginfod by default since CentOS Stream 9. drgn will not use it for Linux kernel debugging symbols by default. If debuginfod is not working, :ref:`make sure ` your build of drgn supports it and try running:: $ sudo dnf install elfutils-debuginfod-client $ source /etc/profile.d/debuginfod.sh Manual Installation """"""""""""""""""" Debugging symbols can be installed manually on CentOS Stream with ``sudo dnf debuginfo-install $package``. To install symbols for the running kernel:: $ sudo dnf debuginfo-install kernel-$(uname -r) To find out what package owns a binary, use ``rpm -qf``:: $ rpm -qf "$(command -v python3)" python3-3.12.9-1.el10.x86_64 $ sudo dnf debuginfo-install python3 Debian ^^^^^^ .. graphviz:: digraph { start [ label = "Need debugging\nsymbols on Debian" style = filled fillcolor = lightpink ] drgn_version [ label = "What version\nof drgn?" shape = diamond style = filled fillcolor = khaki1 ] kernel [ label = "Are you\ndebugging the\nLinux kernel?" shape = diamond style = filled fillcolor = khaki1 ] enable_debug_repos [ label = "Enable debug\nrepositories" shape = rectangle style = filled fillcolor = bisque ] use_apt [ label = "Manually install\nwith apt" style = filled fillcolor = palegreen ] enable_debuginfod [ label = "Enable debuginfod" shape = rectangle style = filled fillcolor = bisque ] use_debuginfod [ label = "Use debuginfod" style = filled fillcolor = palegreen ] start -> drgn_version drgn_version -> kernel [ label = ">= 0.0.31" ] drgn_version -> enable_debug_repos [ label = "< 0.0.31" ] kernel -> enable_debug_repos [ label = "Yes" ] enable_debug_repos -> use_apt kernel -> enable_debuginfod [ label = "No" ] enable_debuginfod -> use_debuginfod } Debuginfod """""""""" On Debian, debuginfod must be enabled manually:: $ sudo apt install libdebuginfod-common $ sudo ln -s /usr/share/libdebuginfod-common/debuginfod.sh /usr/share/libdebuginfod-common/debuginfod.csh /etc/profile.d $ source /etc/profile.d/debuginfod.sh drgn will not use it for Linux kernel debugging symbols by default. Also see the `Debian debuginfod documentation `_. Manual Installation """"""""""""""""""" On Debian, the debugging symbol repositories must be added manually:: $ sudo apt install lsb-release $ sudo tee /etc/apt/sources.list.d/debug.list << EOF deb http://deb.debian.org/debian-debug/ $(lsb_release -cs)-debug main deb http://deb.debian.org/debian-debug/ $(lsb_release -cs)-proposed-updates-debug main EOF $ sudo apt update Then, debugging symbol packages can be installed with ``sudo apt install``. To install symbols for the running kernel:: $ sudo apt install linux-image-$(uname -r)-dbg Some debugging symbol packages are named with a ``-dbg`` suffix and some are named with a ``-dbgsym`` suffix:: $ sudo apt install python3-dbg $ sudo apt install coreutils-dbgsym You can use the ``find-dbgsym-packages`` command from the ``debian-goodies`` package to find the correct name:: $ sudo apt install debian-goodies $ find-dbgsym-packages $(command -v python3) libc6-dbg libexpat1-dbgsym python3.11-dbg zlib1g-dbgsym $ find-dbgsym-packages $(command -v cat) coreutils-dbgsym libc6-dbg Also see the `Debian documentation `_. Ubuntu ^^^^^^ .. graphviz:: digraph { start [ label = "Need debugging\nsymbols on Ubuntu" style = filled fillcolor = lightpink ] drgn_version [ label = "What version\nof drgn?" shape = diamond style = filled fillcolor = khaki1 ] kernel [ label = "Are you\ndebugging the\nLinux kernel?" shape = diamond style = filled fillcolor = khaki1 ] enable_debug_repos [ label = "Enable debug\nrepositories" shape = rectangle style = filled fillcolor = bisque ] use_apt [ label = "Manually install\nwith apt" style = filled fillcolor = palegreen ] use_debuginfod [ label = "Use debuginfod\n(automatic)" style = filled fillcolor = palegreen ] start -> drgn_version drgn_version -> kernel [ label = ">= 0.0.31" ] drgn_version -> enable_debug_repos [ label = "< 0.0.31" ] kernel -> enable_debug_repos [ label = "Yes" ] enable_debug_repos -> use_apt kernel -> use_debuginfod [ label = "No" ] } Debuginfod """""""""" Ubuntu automatically enables debuginfod by default since Ubuntu 22.04 (Jammy Jellyfish). drgn will not use it for Linux kernel debugging symbols by default. If debuginfod is not working, :ref:`make sure ` your build of drgn supports it and try running:: $ sudo apt install libdebuginfod-common $ source /etc/profile.d/debuginfod.sh Also see the `Ubuntu debuginfod documentation `_. Manual Installation """"""""""""""""""" On Ubuntu, the debugging symbol archive signing key must be installed and the debugging symbol repositories must be added manually:: $ sudo apt install lsb-release ubuntu-dbgsym-keyring $ sudo tee /etc/apt/sources.list.d/debug.list << EOF deb http://ddebs.ubuntu.com $(lsb_release -cs) main restricted universe multiverse deb http://ddebs.ubuntu.com $(lsb_release -cs)-updates main restricted universe multiverse deb http://ddebs.ubuntu.com $(lsb_release -cs)-proposed main restricted universe multiverse EOF $ sudo apt update Then, debugging symbol packages can be installed with ``sudo apt install``. To install symbols for the running kernel:: $ sudo apt install linux-image-$(uname -r)-dbgsym Some debugging symbol packages are named with a ``-dbg`` suffix and some are named with a ``-dbgsym`` suffix:: $ sudo apt install python3-dbg $ sudo apt install coreutils-dbgsym You can use the ``find-dbgsym-packages`` command from the ``debian-goodies`` package to find the correct name:: $ sudo apt install debian-goodies $ find-dbgsym-packages $(command -v python3) libc6-dbg libexpat1-dbgsym python3.12-dbg zlib1g-dbgsym $ find-dbgsym-packages $(command -v cat) coreutils-dbgsym libc6-dbg Also see the `Ubuntu documentation `_. Arch Linux ^^^^^^^^^^ .. graphviz:: digraph { start [ label = "Need debugging symbols\non Arch Linux" style = filled fillcolor = lightpink ] kernel [ label = "Are you\ndebugging the\nLinux kernel?" shape = diamond style = filled fillcolor = khaki1 ] not_available [ label = "Debugging symbols\nare not available" style = filled fillcolor = lightpink ] use_debuginfod [ label = "Use debuginfod\n(automatic)" style = filled fillcolor = palegreen ] start -> kernel kernel -> not_available [ label = "Yes" ] kernel -> use_debuginfod [ label = "No" ] } Debuginfod """""""""" Arch Linux automatically enables debuginfod by default. However, debugging symbols are not available for the Linux kernel. If debuginfod is not working, :ref:`make sure ` your build of drgn supports it and try running:: $ sudo pacman -S --needed libelf $ source /etc/profile.d/debuginfod.sh Also see the `Arch Linux debuginfod documentation `_. Manual Installation """"""""""""""""""" Arch Linux does not provide debugging symbol packages. openSUSE ^^^^^^^^ .. graphviz:: digraph { start [ label = "Need debugging symbols\non openSUSE" style = filled fillcolor = lightpink ] distribution [ label = "Which\ndistribution?" shape = diamond style = filled fillcolor = khaki1 ] drgn_version [ label = "What version\nof drgn?" shape = diamond style = filled fillcolor = khaki1 ] kernel [ label = "Are you\ndebugging the\nLinux kernel?" shape = diamond style = filled fillcolor = khaki1 ] use_debuginfod [ label = "Use debuginfod\n(automatic)" style = filled fillcolor = palegreen ] use_zypper [ label = "Manually install\nwith zypper" style = filled fillcolor = palegreen ] start -> distribution distribution -> drgn_version [ label = "Tumbleweed" ] distribution -> use_zypper [ label = "Leap" ] drgn_version -> kernel [ label = ">= 0.0.31" ] drgn_version -> use_zypper [ label = "< 0.0.31" ] kernel -> use_zypper [ label = "Yes" ] kernel -> use_debuginfod [ label = "No" ] } Debuginfod """""""""" openSUSE Tumbleweed automatically enables debuginfod by default. drgn will not use it for Linux kernel debugging symbols by default. If debuginfod is not working, :ref:`make sure ` your build of drgn supports it and try running:: $ sudo zypper install debuginfod-client $ source /etc/profile.d/debuginfod.sh openSUSE Leap does not support debuginfod. Manual Installation """"""""""""""""""" Debugging symbols can be installed manually on openSUSE with:: $ sudo zypper --plus-content debug install "${package}-debuginfo" To install symbols for the running kernel:: $ zypper --plus-content debug install "$(rpm --qf '%{NAME}-debuginfo-%{VERSION}-%{RELEASE}.%{ARCH}' -qf /boot/vmlinuz-"$(uname -r)")" To find out what package owns a binary, use ``rpm -qf``:: $ rpm -qf "$(command -v python3)" python313-base-3.13.2-3.1.x86_64 $ sudo zypper --plus-content debug install python313-base-debuginfo Oracle Linux ^^^^^^^^^^^^ Oracle Linux provides documentation on installing debugging symbols for the Linux kernel. See the documentation for `Oracle Linux 9 `_ and `Oracle Linux 8 `_. drgn-0.0.31/docs/helpers.rst000066400000000000000000000000321477777462700157000ustar00rootroot00000000000000.. drgndoc:: drgn.helpers drgn-0.0.31/docs/index.rst000066400000000000000000000017121477777462700153530ustar00rootroot00000000000000drgn ==== .. include:: ../README.rst :start-after: start-introduction :end-before: end-introduction In addition to the main Python API, an experimental C library, ``libdrgn``, is also available. See the :doc:`installation` instructions. Then, start with the :doc:`user_guide`. .. include:: ../README.rst :start-after: start-for-index :end-before: end-for-index Acknowledgements ---------------- drgn is named after `this `_ because dragons eat `dwarves `_. Table of Contents ----------------- .. toctree:: :caption: Using drgn :maxdepth: 3 installation user_guide advanced_usage getting_debugging_symbols .. toctree:: :caption: Tutorials :maxdepth: 3 tutorials case_studies .. toctree:: :caption: Reference :maxdepth: 3 api_reference helpers Man Page support_matrix release_highlights drgn-0.0.31/docs/installation.rst000066400000000000000000000033361477777462700167510ustar00rootroot00000000000000Installation ============ There are several options for installing drgn. Dependencies ------------ drgn depends on: - `Python `_ 3.6 or newer - `elfutils `_ 0.165 or newer It optionally depends on: - `libkdumpfile `_ for `makedumpfile `_ compressed kernel core dump format support The build requires: - `GCC `_ - `GNU Make `_ - `pkgconf `_ - `setuptools `_ Running tests requires: - `check `_ 0.10.0 or newer Building from the Git repository (rather than a release tarball) additionally requires: - `autoconf `_ - `automake `_ - `libtool `_ .. include:: ../README.rst :start-after: start-installation :end-before: end-installation .. highlight:: console Virtual Environment ^^^^^^^^^^^^^^^^^^^ The above options all install drgn globally. You can also install drgn in a `virtual environment `_, either with pip:: $ python3 -m venv drgnenv $ source drgnenv/bin/activate (drgnenv) $ pip3 install drgn (drgnenv) $ drgn --help Or from source:: $ python3 -m venv drgnenv $ source drgnenv/bin/activate (drgnenv) $ python3 setup.py install (drgnenv) $ drgn --help Running Locally --------------- If you build drgn from source, you can also run it without installing it:: $ python3 setup.py build_ext -i $ python3 -m drgn --help drgn-0.0.31/docs/man/000077500000000000000000000000001477777462700142645ustar00rootroot00000000000000drgn-0.0.31/docs/man/drgn.rst000066400000000000000000000134741477777462700157610ustar00rootroot00000000000000drgn ==== Synopsis -------- | **drgn** [*OPTION*...] | **drgn** [*OPTION*...] *SCRIPT* [*ARGUMENT*...] | **drgn** [*OPTION*...] **-e** *CODE* [*ARGUMENT*...] Description ----------- :command:`drgn` (pronounced "dragon") is a debugger with an emphasis on programmability. It provides APIs for using the types, variables, and stack traces in a program or core dump from Python, allowing for easy, expressive scripting and more complex debugging. Full documentation is available online at https://drgn.readthedocs.io/. For in-program documentation, try ``help(drgn)``. Options ------- .. program:: drgn If no positional arguments are given (and :option:`-e` is not given), then drgn runs in *interactive mode*: commands are read from the terminal. Relevant helpers are automatically imported. An empty string is prepended to :py:data:`sys.path`. If positional arguments are given (and :option:`-e` is not given), then drgn runs in *script mode*: *SCRIPT* is executed with the given *ARGUMENT*\ s. Nothing is imported automatically. :py:data:`sys.argv[0] ` is set to *SCRIPT* and the remaining arguments are added to :py:data:`sys.argv`. The parent directory of *SCRIPT* is prepended to :py:data:`sys.path`. .. option:: -e {CODE} Evaluate the given code and exit. Relevant helpers are automatically imported. :py:data:`sys.argv[0] ` is set to *-e* and the remaining arguments are added to :py:data:`sys.argv`. An empty string is prepended to :py:data:`sys.path`. Program Selection ^^^^^^^^^^^^^^^^^ One of these options may be given to specify what program to debug. .. option:: -k, --kernel Debug the running kernel. This is the default. .. option:: -c, --core {PATH} Debug the given core dump. .. option:: -p, --pid {PID} Debug the running process with the given process ID. Debugging Symbols ^^^^^^^^^^^^^^^^^ .. option:: -s, --symbols {PATH} Load debugging symbols from the given file. If the file does not correspond to a loaded executable, library, or module, then a warning is printed and it is ignored; see :option:`--extra-symbols` for an alternative. This option may be given more than once. .. option:: --main-symbols Only load debugging symbols for the main executable and those added with :option:`-s` or :option:`--extra-symbols`. .. option:: --no-default-symbols Don't load any debugging symbols that were not explicitly added with :option:`-s` or :option:`--extra-symbols`. .. option:: --extra-symbols {PATH} Load additional debugging symbols from the given file, which is assumed not to correspond to a loaded executable, library, or module. This option may be given more than once. The following options correspond to :py:attr:`drgn.Program.debug_info_options` in the Python API. .. option:: --try-symbols-by {METHOD[,METHOD...]} Enable loading debugging symbols using the given methods. *METHOD* may be: * The name of a debugging information finder (``standard``, ``debuginfod``, or any added by plugins). * ``module-name``: if the name of a module looks like a filesystem path, try the file at that path. * ``build-id``: search by build ID. * ``debug-link``: search by debug link (e.g., ``.gnu_debuglink``). * ``procfs``: try :file:`/proc/{pid}/exe` or :file:`/proc/{pid}/map_files`. * ``embedded-vdso``: try vDSO data saved in a core dump. * ``reuse``: try reusing a previously used file. * ``supplementary``: try finding supplementary files (e.g., ``.gnu_debugaltlink``). * ``kmod=depmod``: search using *depmod* metadata. * ``kmod=walk``: search by walking kernel directories. * ``kmod=depmod-or-walk``: search using *depmod* metadata if it is available or by walking kernel directories if *depmod* metadata does not exist. * ``kmod=depmod-and-walk``: search using *depmod* metadata if it is available, then by walking kernel directories if *depmod* metadata does not exist or does not contain the desired module. Multiple methods may be enabled by passing a comma-separated list. This option may be given more than once, in which case the lists will be combined. .. option:: --no-symbols-by {METHOD[,METHOD...]} Disable loading debugging symbols using the given methods. *METHOD* may be the name of a debugging information finder, ``module-name``, ``build-id``, ``debug-link``, ``procfs``, ``embedded-vdso``, ``reuse``, ``supplementary``, or ``kmod``. Multiple methods may be disabled by passing a comma-separated list. This option may be given more than once, in which case the lists will be combined. .. option:: --debug-directory {PATH} Search for debugging symbols by build ID and debug link in the given directory. This option may be given more than once to search in multiple directories. .. option:: --no-default-debug-directories Don't search for debugging symbols by build ID and debug link in the standard directories or those added by plugins. .. option:: --kernel-directory {PATH} Search for the kernel image and loadable kernel modules in the given directory. This option may be given more than once to search in multiple directories. .. option:: --no-default-kernel-directories Don't search for the kernel image and loadable kernel modules in the standard directories or those added by plugins. Logging ^^^^^^^ .. option:: --log-level {\{debug,info,warning,error,critical,none\}} Log messages of at least the given level to standard error. The default is *warning*. .. option:: -q, --quiet Don't print any logs or download progress. This is equivalent to :option:`--log-level none <--log-level>`. Generic Information ^^^^^^^^^^^^^^^^^^^ .. option:: -h, --help Show a help message and exit. .. option:: --version Show :command:`drgn`'s version information and exit. drgn-0.0.31/docs/release_highlights.rst000066400000000000000000000007701477777462700201010ustar00rootroot00000000000000Release Highlights ================== These are highlights of each release of drgn focusing on a few exciting items from the full `release notes `_. .. toctree:: release_highlights/0.0.31.rst release_highlights/0.0.30.rst release_highlights/0.0.28.rst release_highlights/0.0.27.rst release_highlights/0.0.26.rst release_highlights/0.0.25.rst release_highlights/0.0.24.rst release_highlights/0.0.23.rst release_highlights/0.0.22.rst drgn-0.0.31/docs/release_highlights/000077500000000000000000000000001477777462700173435ustar00rootroot00000000000000drgn-0.0.31/docs/release_highlights/0.0.22.rst000066400000000000000000000303601477777462700206160ustar00rootroot000000000000000.0.22 (Released January 5th, 2023) =================================== These are some of the highlights of drgn 0.0.22. See the `GitHub release `_ for the full release notes, including more improvements and bug fixes. .. highlight:: pycon Listing Stack Frame Locals -------------------------- :meth:`drgn.StackFrame.locals()` returns the names of all arguments and local variables in the scope of a stack frame. This allows you to get a quick idea of what's going on in a function without needing to read the source code right away. Let's use the ``__schedule`` stack frame from the following trace as an example:: >>> trace = prog.stack_trace(1) >>> trace #0 context_switch (./kernel/sched/core.c:5209:2) #1 __schedule (./kernel/sched/core.c:6521:8) #2 schedule (./kernel/sched/core.c:6597:3) #3 do_wait (./kernel/exit.c:1562:4) #4 kernel_wait4 (./kernel/exit.c:1706:8) #5 __do_sys_wait4 (./kernel/exit.c:1734:13) #6 do_syscall_x64 (./arch/x86/entry/common.c:50:14) #7 do_syscall_64 (./arch/x86/entry/common.c:80:7) #8 entry_SYSCALL_64+0x9b/0x197 (./arch/x86/entry/entry_64.S:120) #9 0x7f6a34a00057 >>> trace[1].locals() ['sched_mode', 'prev', 'next', 'switch_count', 'prev_state', 'rf', 'rq', 'cpu'] >>> for name in trace[1].locals(): ... print(name, trace[1][name].format_(dereference=False)) ... sched_mode (unsigned int)0 prev (struct task_struct *)0xffffa3b601178000 next (struct task_struct *)0xffffa3b6026db800 switch_count (unsigned long *)0xffffa3b601178528 prev_state (unsigned long) rf (struct rq_flags){ .flags = (unsigned long)1, .cookie = (struct pin_cookie){}, .clock_update_flags = (unsigned int)4, } rq (struct rq *)0xffffa3b67fda9640 cpu (int) Compare this to the `kernel source code `_. Note that some of the variables have been optimized out by the compiler. This feature was contributed by Stephen Brennan. Merged Slab Caches ------------------ The Linux kernel slab allocator merges "similar" slab caches as an optimization, which often causes confusion. :func:`~drgn.helpers.linux.slab.slab_cache_is_merged()` (added back in 0.0.20) returns whether or not a slab cache has been merged, but not what it was merged with. In this release, Stephen Brennan added :func:`~drgn.helpers.linux.slab.get_slab_cache_aliases()`, which provides a mapping from a slab cache name to the name of the cache it was merged into:: >>> get_slab_cache_aliases(prog) {'io_kiocb': 'maple_node', 'ip_dst_cache': 'uid_cache', 'aio_kiocb': 'uid_cache', 'ip_fib_alias': 'Acpi-Parse', 'pid_namespace': 'pid', 'iommu_iova': 'vmap_area', 'fasync_cache': 'ftrace_event_field', 'dnotify_mark': 'Acpi-State', 'tcp_bind2_bucket': 'vmap_area', 'nsproxy': 'Acpi-Operand', 'shared_policy_node': 'ftrace_event_field', 'eventpoll_epi': 'pid', 'fib6_nodes': 'vmap_area', 'Acpi-Namespace': 'ftrace_event_field', 'posix_timers_cache': 'maple_node', 'inotify_inode_mark': 'Acpi-State', 'kernfs_iattrs_cache': 'trace_event_file', 'fs_cache': 'vmap_area', 'UDP-Lite': 'UDP', 'anon_vma_chain': 'vmap_area', 'ip6_dst_cache': 'maple_node', 'eventpoll_pwq': 'vmap_area', 'inet_peer_cache': 'uid_cache', 'fsnotify_mark_connector': 'numa_policy', 'ip_fib_trie': 'ftrace_event_field', 'filp': 'maple_node', 'dnotify_struct': 'numa_policy', 'UDPLITEv6': 'UDPv6', 'biovec-16': 'maple_node', 'PING': 'signal_cache', 'ep_head': 'blkdev_ioc', 'tcp_bind_bucket': 'pid', 'Acpi-ParseExt': 'Acpi-State', 'cred_jar': 'pid', 'ovl_aio_req': 'pid', 'pool_workqueue': 'maple_node', 'sigqueue': 'Acpi-State', 'file_lock_ctx': 'Acpi-Parse', 'kernfs_node_cache': 'pid'} This means that if you're looking for ``io_kiocb`` allocations, you actually need to look at the ``maple_node`` slab cache. Conversely, if you're looking at the ``maple_node`` slab cache, you need to be aware that it also contains allocations from all of the following slab caches:: >>> [merged for merged, canonical in get_slab_cache_aliases(prog).items() if canonical == "maple_node"] ['io_kiocb', 'posix_timers_cache', 'ip6_dst_cache', 'filp', 'biovec-16', 'pool_workqueue'] Slab Address Information ------------------------ This release extended :func:`~drgn.helpers.common.memory.identify_address()` to show additional information about slab allocations:: >>> ptr1 = 0xffffa3b601178438 >>> ptr2 = 0xffffa3b601176cc0 >>> identify_address(prog, ptr1) 'slab object: task_struct+0x438' >>> identify_address(prog, ptr2) 'free slab object: mm_struct+0x0' This means that ``ptr1`` is an address 0x438 bytes into an allocated object from the ``task_struct`` slab cache, and ``ptr2`` is a free object from the ``mm_struct`` slab cache. :func:`~drgn.helpers.linux.slab.slab_object_info()` provides the same information programmatically:: >>> slab_object_info(prog, ptr1) SlabObjectInfo(slab_cache=Object(prog, 'struct kmem_cache *', value=0xffffa3b601045500), slab=Object(prog, 'struct slab *', value=0xffffe80840045e00), address=0xffffa3b601178000, allocated=True) >>> slab_object_info(prog, ptr2) SlabObjectInfo(slab_cache=Object(prog, 'struct kmem_cache *', value=0xffffa3b601045900), slab=Object(prog, 'struct slab *', value=0xffffe80840045c00), address=0xffffa3b601176cc0, allocated=False) Annotated Stack Memory ---------------------- :func:`~drgn.helpers.common.stack.print_annotated_stack()` prints a stack trace and all of its memory, identifying anything that it can:: >>> print_annotated_stack(prog.stack_trace(1)) STACK POINTER VALUE [stack frame #0 at 0xffffffffaf8a68e9 (__schedule+0x429/0x488) in context_switch at ./kernel/sched/core.c:5209:2 (inlined)] [stack frame #1 at 0xffffffffaf8a68e9 (__schedule+0x429/0x488) in __schedule at ./kernel/sched/core.c:6521:8] ffffbb1ac0013d28: ffffffffaf4498f5 [function symbol: __flush_tlb_one_user+0x5] ffffbb1ac0013d30: 00000000af449feb ffffbb1ac0013d38: 0000000000000001 ffffbb1ac0013d40: 0000000000000004 ffffbb1ac0013d48: 25c5ff9539edc200 ffffbb1ac0013d50: ffffa3b601178000 [slab object: task_struct+0x0] ffffbb1ac0013d58: ffffa3b601178000 [slab object: task_struct+0x0] ffffbb1ac0013d60: ffffbb1ac0013e10 ffffbb1ac0013d68: ffffa3b601177ff0 [slab object: mm_struct+0x70] ffffbb1ac0013d70: ffffa3b601178000 [slab object: task_struct+0x0] ffffbb1ac0013d78: ffffa3b601178000 [slab object: task_struct+0x0] ffffbb1ac0013d80: ffffffffaf8a69d1 [function symbol: schedule+0x89] [stack frame #2 at 0xffffffffaf8a69d1 (schedule+0x89/0xc7) in schedule at ./kernel/sched/core.c:6597:3] ffffbb1ac0013d88: ffffbb1ac0013de8 ffffbb1ac0013d90: 0000000000000000 ffffbb1ac0013d98: ffffffffaf4595ee [function symbol: do_wait+0x231] [stack frame #3 at 0xffffffffaf4595ee (do_wait+0x231/0x2e3) in do_wait at ./kernel/exit.c:1562:4] ffffbb1ac0013da0: ffffa3b601178450 [slab object: task_struct+0x450] ffffbb1ac0013da8: ffffa3b601178000 [slab object: task_struct+0x0] ffffbb1ac0013db0: 0000000000000004 ffffbb1ac0013db8: 0000000000000000 ffffbb1ac0013dc0: 00007ffe0984a170 ffffbb1ac0013dc8: 0000000000000000 ffffbb1ac0013dd0: fffffffffffffffd ffffbb1ac0013dd8: 0000000000000004 ffffbb1ac0013de0: ffffffffaf45a42f [function symbol: kernel_wait4+0xc2] [stack frame #4 at 0xffffffffaf45a42f (kernel_wait4+0xc2/0x11b) in kernel_wait4 at ./kernel/exit.c:1706:8] ffffbb1ac0013de8: 0000000400000004 ffffbb1ac0013df0: 0000000000000000 ffffbb1ac0013df8: 0000000000000000 ffffbb1ac0013e00: 0000000000000000 ffffbb1ac0013e08: 0000000000000000 ffffbb1ac0013e10: ffffffff00000000 ffffbb1ac0013e18: ffffa3b601178000 [slab object: task_struct+0x0] ffffbb1ac0013e20: ffffffffaf45890c [function symbol: child_wait_callback+0x0] ffffbb1ac0013e28: ffffa3b601188028 [slab object: signal_cache+0x28] ffffbb1ac0013e30: ffffa3b601188028 [slab object: signal_cache+0x28] ffffbb1ac0013e38: 000055d500000000 ffffbb1ac0013e40: 25c5ff9539edc200 ffffbb1ac0013e48: 0000000000000000 ffffbb1ac0013e50: ffffbb1ac0013f30 ffffbb1ac0013e58: ffffbb1ac0013f58 ffffbb1ac0013e60: 0000000000000000 ffffbb1ac0013e68: 0000000000000000 ffffbb1ac0013e70: 0000000000000000 ffffbb1ac0013e78: ffffffffaf45a4c0 [function symbol: __do_sys_wait4+0x38] [stack frame #5 at 0xffffffffaf45a4c0 (__do_sys_wait4+0x38/0x8c) in __do_sys_wait4 at ./kernel/exit.c:1734:13] ffffbb1ac0013e80: ffffffffaf8aaa21 [function symbol: _raw_spin_unlock_irq+0x10] ffffbb1ac0013e88: ffffffffaf46460c [function symbol: do_sigaction+0xf8] ffffbb1ac0013e90: ffffa3b601180020 [slab object: sighand_cache+0x20] ffffbb1ac0013e98: ffffa3b6028d02d0 [slab object: vm_area_struct+0x0] ffffbb1ac0013ea0: 25c5ff9539edc200 ffffbb1ac0013ea8: 0000000000000002 ffffbb1ac0013eb0: 00007ffe09849fb0 ffffbb1ac0013eb8: ffffbb1ac0013f58 ffffbb1ac0013ec0: 0000000000000000 ffffbb1ac0013ec8: 0000000000000000 ffffbb1ac0013ed0: 0000000000000046 ffffbb1ac0013ed8: ffffa3b601178000 [slab object: task_struct+0x0] ffffbb1ac0013ee0: ffffa3b601178000 [slab object: task_struct+0x0] ffffbb1ac0013ee8: ffffbb1ac0013f58 ffffbb1ac0013ef0: 0000000000000000 ffffbb1ac0013ef8: ffffffffaf426def [function symbol: fpregs_assert_state_consistent+0x1b] ffffbb1ac0013f00: 0000000000000000 ffffbb1ac0013f08: ffffffffaf4b2f53 [function symbol: exit_to_user_mode_prepare+0xa6] ffffbb1ac0013f10: 0000000000000000 ffffbb1ac0013f18: 25c5ff9539edc200 ffffbb1ac0013f20: ffffbb1ac0013f58 ffffbb1ac0013f28: 0000000000000000 ffffbb1ac0013f30: ffffbb1ac0013f48 ffffbb1ac0013f38: ffffffffaf8a1573 [function symbol: do_syscall_64+0x70] [stack frame #6 at 0xffffffffaf8a1573 (do_syscall_64+0x70/0x8a) in do_syscall_x64 at ./arch/x86/entry/common.c:50:14 (inlined)] [stack frame #7 at 0xffffffffaf8a1573 (do_syscall_64+0x70/0x8a) in do_syscall_64 at ./arch/x86/entry/common.c:80:7] ffffbb1ac0013f40: 0000000000000000 ffffbb1ac0013f48: 0000000000000000 ffffbb1ac0013f50: ffffffffafa0009b [symbol: entry_SYSCALL_64+0x9b] [stack frame #8 at 0xffffffffafa0009b (entry_SYSCALL_64+0x9b/0x197) at ./arch/x86/entry/entry_64.S:120] ffffbb1ac0013f58: 0000000000000000 [stack frame #9 at 0x7f6a34a00057] Like :meth:`drgn.StackFrame.locals()`, this provides a nice overview of everything happening in a function, which might include useful hints. Keep in mind that it may identify "stale" addresses for anything that a function hasn't reinitialized yet, and as always, be careful of slab cache merging. This was inspired by the crash ``bt -FF`` command. It was contributed by Nhat Pham. XArray Helpers -------------- XArrays were introduced in Linux 4.20 as a replacement for radix trees. drgn's radix tree helpers also support XArrays in some cases, but this is awkward, not obvious, and doesn't work for new, XArray-only functionality. This release added dedicated XArray helpers like :func:`~drgn.helpers.linux.xarray.xa_load()` and :func:`~drgn.helpers.linux.xarray.xa_for_each()`. s390x Support ------------- Sven Schnelle contributed s390x support for Linux kernel modules and stack traces. This is the state of architecture support in this release: .. list-table:: drgn 0.0.22 Architecture Support :header-rows: 1 * - Architecture - Linux Kernel Modules - Stack Traces - Virtual Address Translation * - x86-64 - ✓ - ✓ - ✓ * - AArch64 - ✓ - ✓ - ✓ * - ppc64 - ✓ - ✓ - * - s390x - ✓ - ✓ - * - i386 - ✓ - - * - Arm - ✓ - - * - RISC-V - ✓ - - Relicensing to LGPL ------------------- drgn was originally licensed as GPLv3+. In this release, it was changed to LGPLv2.1+. The motivation for this change was to enable the long term vision for drgn that more projects can use it as a library providing programmatic interfaces for debugger functionality. For example, `Object Introspection `_, a userspace memory profiler recently open sourced by Meta, uses drgn to parse debugging information. drgn-0.0.31/docs/release_highlights/0.0.23.rst000066400000000000000000000121001477777462700206070ustar00rootroot000000000000000.0.23 (Released June 28th, 2023) ================================= These are some of the highlights of drgn 0.0.23. See the `GitHub release `_ for the full release notes, including more improvements and bug fixes. .. highlight:: pycon Virtual Address Translation Helpers ----------------------------------- This release added several Linux kernel helpers for translating virtual addresses. :func:`~drgn.helpers.linux.mm.follow_phys()` translates a virtual address to a physical address in a given address space. For example, to get the physical address that virtual address 0x7f7fe46a4270 maps to in process 115:: >>> task = find_task(prog, 115) >>> address = 0x7f7fe46a4270 >>> print(hex(follow_phys(task.mm, address))) 0x4090270 :func:`~drgn.helpers.linux.mm.follow_page()` translates a virtual address to the ``struct page *`` that it maps to:: >>> follow_page(task.mm, address) *(struct page *)0xffffd20ac0102400 = { ... } :func:`~drgn.helpers.linux.mm.follow_pfn()` translates a virtual address to the page frame number (PFN) of the page that it maps to:: >>> follow_pfn(task.mm, address) (unsigned long)16528 These can be used to translate arbitrary kernel virtual addresses by passing ``prog["init_mm"].address_of_()``:: >>> print(hex(follow_phys(prog["init_mm"].address_of_(), 0xffffffffc0483000))) 0x2e4b000 Vmalloc/Vmap Address Translation Helpers ---------------------------------------- :func:`~drgn.helpers.linux.mm.vmalloc_to_page()` is a special case of :func:`~drgn.helpers.linux.mm.follow_page()` for vmalloc and vmap addresses:: >>> vmalloc_to_page(prog, 0xffffffffc0477000) *(struct page *)0xffffc902400b8980 = { ... } Likewise, :func:`~drgn.helpers.linux.mm.vmalloc_to_pfn()` is a special case of :func:`~drgn.helpers.linux.mm.follow_pfn()` for vmalloc and vmap addresses:: >>> vmalloc_to_pfn(prog, 0xffffffffc0477000) (unsigned long)11814 ``contrib`` Directory --------------------- Martin Liška, Boris Burkov, and Johannes Thumshirn added lots of new scripts to the ``contrib`` directory: - :contrib:`btrfs_tree.py`: work-in-progress helpers for Btrfs B-trees - :contrib:`btrfs_tree_mod_log.py`: simulator for the Btrfs tree modification log - :contrib:`dump_btrfs_bgs.py`: print block groups in a Btrfs filesystem - :contrib:`kcore_list.py`: print memory regions from ``/proc/kcore`` - :contrib:`kernel_sys.py`: print system information (similar to crash's ``sys`` command) - :contrib:`mount.py`: print a filesystem mount table - :contrib:`platform_drivers.py`: print registered `platform drivers `_ - :contrib:`vmmap.py`: print memory mappings in a process (similar to ``/proc/$pid/maps``) - :contrib:`vmstat.py`: print information about kernel memory usage Embedding Interactive Mode -------------------------- :meth:`drgn.cli.run_interactive()` runs drgn's interactive mode. It can be used to embed drgn in another application. For example, you could use it for a custom :class:`drgn.Program` that the standard drgn CLI can't set up: .. code-block:: python3 import drgn import drgn.cli prog = drgn.Program() prog.add_type_finder(...) prog.add_object_finder(...) prog.add_memory_segment(...) drgn.cli.run_interactive(prog) Full s390x Support ------------------ Sven Schnelle contributed s390x virtual address translation support. This is the state of architecture support in this release: .. list-table:: drgn 0.0.23 Architecture Support :header-rows: 1 * - Architecture - Linux Kernel Modules - Stack Traces - Virtual Address Translation * - x86-64 - ✓ - ✓ - ✓ * - AArch64 - ✓ - ✓ - ✓ * - ppc64 - ✓ - ✓ - * - s390x - ✓ - ✓ - ✓ * - i386 - ✓ - - * - Arm - ✓ - - * - RISC-V - ✓ - - Linux 6.3 & 6.4 Support ----------------------- Linux 6.3 and 6.4 had an unusual number of breaking changes for drgn. Here are some errors you might see with older versions of drgn that are fixed in this release. On startup (fixed by Ido Schimmel):: warning: could not get debugging information for: kernel modules (could not find loaded kernel modules: 'struct module' has no member 'core_size') From :meth:`drgn.Program.stack_trace()` and :meth:`drgn.Thread.stack_trace()`:: Exception: unknown ORC entry type 3 From :func:`~drgn.helpers.linux.mm.compound_order()` and :func:`~drgn.helpers.linux.mm.compound_nr()`:: AttributeError: 'struct page' has no member 'compound_order' From :func:`~drgn.helpers.linux.block.for_each_disk()` and :func:`~drgn.helpers.linux.block.for_each_partition()`:: AttributeError: 'struct class' has no member 'p' Python 3.12 Support ------------------- Python 3.12, currently in beta, changed an implementation detail that drgn depended on, which caused crashes like:: Py_SIZE: Assertion `ob->ob_type != &PyLong_Type' failed. Stephen Brennan fixed this. drgn-0.0.31/docs/release_highlights/0.0.24.rst000066400000000000000000000066431477777462700206270ustar00rootroot000000000000000.0.24 (Released September 8th, 2023) ===================================== These are some of the highlights of drgn 0.0.24. See the `GitHub release `_ for the full release notes, including more improvements and bug fixes. .. highlight:: pycon Linked List Length Helper ------------------------- This release added :func:`~drgn.helpers.linux.list.list_count_nodes()`, which returns the length of a Linux kernel linked list:: >>> list_count_nodes(prog["workqueues"].address_of_()) 29 Networking Helpers ------------------ This release added a couple of Linux kernel networking helpers requested by Jakub Kicinski. :func:`~drgn.helpers.linux.net.netdev_priv()` returns the private data of a network device:: >>> dev = netdev_get_by_name(prog, "wlp0s20f3") >>> netdev_priv(dev) (void *)0xffff9419c9dec9c0 >>> netdev_priv(dev, "struct ieee80211_sub_if_data") *(struct ieee80211_sub_if_data *)0xffff9419c9dec9c0 = { ... } :func:`~drgn.helpers.linux.net.skb_shinfo()` returns the shared info for a socket buffer. C++ Lookups ----------- This release added support for a few C++ features. Simple Type Specifiers ^^^^^^^^^^^^^^^^^^^^^^ Unlike C, C++ allows referring to ``class``, ``struct``, ``union``, and ``enum`` types without their respective keywords. For example: .. code-block:: c++ class Foo { ... }; Foo foo; // Equivalent to class Foo foo; Previously, drgn always required the keyword, so ``prog.type("class Foo")`` would succeed but ``prog.type("Foo")`` would fail with a :class:`LookupError`. This requirement was surprising to C++ developers, so it was removed. For C++ programs, ``prog.type("Foo")`` will now find a ``class``, ``struct``, ``union``, or ``enum`` type named ``Foo`` (for C programs, the keyword is still required). Nested Classes ^^^^^^^^^^^^^^ Again unlike C, C++ allows ``class``, ``struct``, and ``union`` types to be defined inside of other ``class``, ``struct``, and ``union`` types. For example: .. code-block:: c++ class Foo { public: class Bar { ... }; ... }; Foo::Bar bar; drgn can now find such types with ``prog.type("Foo::Bar")``. Member Functions ^^^^^^^^^^^^^^^^ C++ supports member functions (a.k.a. methods). For example: .. code-block:: c++ class Foo { int method() { ... } }; drgn can now find member functions with :meth:`drgn.Program.function()`, :meth:`drgn.Program.object()`, or :meth:`drgn.Program[] ` (e.g., ``prog.function("Foo::method")`` or ``prog["Foo::method"]``). Split DWARF ----------- drgn now supports split DWARF object (.dwo) files. This is enabled by the ``-gsplit-dwarf`` option in GCC and Clang or for the Linux kernel with ``CONFIG_DEBUG_INFO_SPLIT=y``. Split DWARF package (.dwp) file support is still in progress. Performance Improvements ------------------------ Thierry Treyer found a bug that made us search through much more debugging information than necessary when getting a stack trace. Fixing this made stack traces almost twice as fast. The C++ lookup and split DWARF support mentioned above require processing more information in drgn's debugging information indexing step, which it does on startup and whenever debugging information is manually loaded. This could've been a performance regression, but instead, indexing was reworked from the ground up in a way that's usually *faster* despite the added features. drgn-0.0.31/docs/release_highlights/0.0.25.rst000066400000000000000000000141471477777462700206260ustar00rootroot000000000000000.0.25 (Released December 1st, 2023) ==================================== These are some of the highlights of drgn 0.0.25. See the `GitHub release `_ for the full release notes, including more improvements and bug fixes. .. highlight:: pycon Omitting the ``prog`` Argument ------------------------------ As a usability improvement, ``prog`` can now be omitted from most function calls. For example, instead of :func:`find_task(prog, 1234) `, you can now simply write :func:`find_task(1234) `. Additionally, instead of :meth:`prog.stack_trace(1234) `, you can now write :func:`stack_trace(1234) `. (The old way will continue to be supported.) Most CLI users don't need to worry about how this works, but library users may want to understand the :ref:`default-program`. It's tricky balancing interactive convenience and sensible APIs for scripting, but we think this is a nice improvement overall! Running Without ``root`` ------------------------ drgn debugs the live Linux kernel via ``/proc/kcore``, which can only be accessed by the ``root`` user (or a user with the ``CAP_SYS_RAWIO`` capability, to be precise). However, it's not necessary (or ideal) for the rest of drgn to run as ``root``. Now when drgn is run against the live kernel as an unprivileged user, it will attempt to open ``/proc/kcore`` via :manpage:`sudo(8)`. The rest of drgn will then run without extra privileges. In other words, in order to debug the live kernel, all you need to do is :doc:`install debugging symbols ` and run: .. code-block:: console $ drgn This feature was contributed by Stephen Brennan. Maple Tree Helpers ------------------ `Maple trees `_ were introduced in Linux 6.1, initially to store virtual memory areas (VMAs). This release adds a couple of helpers for working with them. :func:`~drgn.helpers.linux.mapletree.mtree_load()` looks up an entry in a maple tree:: >>> mtree_load(task.mm.mm_mt.address_of_(), 0x55d65cfaa000) (void *)0xffff97ad82bfc930 :func:`~drgn.helpers.linux.mapletree.mt_for_each()` iterates over a maple tree:: >>> for first_index, last_index, entry in mt_for_each(task.mm.mm_mt.address_of_()): ... print(hex(first_index), hex(last_index), entry) ... 0x55d65cfaa000 0x55d65cfaafff (void *)0xffff97ad82bfc930 0x55d65cfab000 0x55d65cfabfff (void *)0xffff97ad82bfc0a8 0x55d65cfac000 0x55d65cfacfff (void *)0xffff97ad82bfc000 0x55d65cfad000 0x55d65cfadfff (void *)0xffff97ad82bfcb28 ... VMA Helpers ----------- This release also adds higher-level helpers specifically for VMAs. :func:`~drgn.helpers.linux.mm.vma_find()` looks up a VMA by address:: >>> vma_find(task.mm, 0x55d65cfaa000) *(struct vm_area_struct *)0xffff97ad82bfc930 = { ... } >>> vma_find(task.mm, 0x55d65cfa9fff) (struct vm_area_struct *)0 :func:`~drgn.helpers.linux.mm.for_each_vma()` iterates over every VMA in an address space:: >>> for vma in for_each_vma(task.mm): ... print(vma) ... *(struct vm_area_struct *)0xffff97ad82bfc930 = { ... } *(struct vm_area_struct *)0xffff97ad82bfc0a8 = { ... } ... These helpers also handle older kernels without maple trees. Wait Queue Helpers ------------------ Wait queues are a fundamental data structure and synchronization mechanism in the Linux kernel. Imran Khan contributed a few helpers for working with them. :func:`~drgn.helpers.linux.wait.waitqueue_active()` returns whether a wait queue has any waiters:: >>> wq *(wait_queue_head_t *)0xffff8da80d618e18 = { .lock = (spinlock_t){ .rlock = (struct raw_spinlock){ .raw_lock = (arch_spinlock_t){ .val = (atomic_t){ .counter = (int)0, }, .locked = (u8)0, .pending = (u8)0, .locked_pending = (u16)0, .tail = (u16)0, }, }, }, .head = (struct list_head){ .next = (struct list_head *)0xffffae44e3007ce8, .prev = (struct list_head *)0xffffae44e3007ce8, }, } >>> waitqueue_active(wq) True :func:`~drgn.helpers.linux.wait.waitqueue_for_each_entry()` iterates over each entry in a wait queue:: >>> for entry in waitqueue_for_each_entry(wq): ... print(entry) ... *(wait_queue_entry_t *)0xffffae44e3007cd0 = { .flags = (unsigned int)0, .private = (void *)0xffff8da7863ec000, .func = (wait_queue_func_t)woken_wake_function+0x0 = 0xffffffffa8181010, .entry = (struct list_head){ .next = (struct list_head *)0xffff8da80d618e20, .prev = (struct list_head *)0xffff8da80d618e20, }, } :func:`~drgn.helpers.linux.wait.waitqueue_for_each_task()` iterates over each task waiting on a wait queue (although note that this does not work for some special wait queues that don't store tasks):: >>> for task in waitqueue_for_each_task(wq): ... print(task.pid, task.comm) ... (pid_t)294708 (char [16])"zsh" ppc64 Radix MMU Support ----------------------- Sourabh Jain contributed ppc64 radix MMU virtual address translation support. This is the state of architecture support in this release: .. list-table:: drgn 0.0.25 Architecture Support :header-rows: 1 * - Architecture - Linux Kernel Modules - Stack Traces - Virtual Address Translation * - x86-64 - ✓ - ✓ - ✓ * - AArch64 - ✓ - ✓ - ✓ * - s390x - ✓ - ✓ - ✓ * - ppc64 - ✓ - ✓ - ✓ * - i386 - ✓ - - * - Arm - ✓ - - * - RISC-V - ✓ - - drgn-0.0.31/docs/release_highlights/0.0.26.rst000066400000000000000000000076411477777462700206300ustar00rootroot000000000000000.0.26 (Released March 11th, 2024) ==================================== These are some of the highlights of drgn 0.0.26. See the `GitHub release `_ for the full release notes, including more improvements and bug fixes. .. highlight:: pycon Miscellaneous Helpers --------------------- This release added several new Linux kernel helpers with no particular theme: - :func:`~drgn.helpers.linux.printk.print_dmesg()`, a shortcut for printing the kernel log buffer. - :func:`~drgn.helpers.linux.idr.idr_for_each_entry()`, a shortcut for iterating over an IDR and casting its entries to a specific type. - :func:`~drgn.helpers.linux.stackdepot.stack_depot_fetch()` for getting stack traces from the storage used by KASAN and other kernel debugging tools. This was contributed by Peter Collingbourne. - :func:`~drgn.helpers.linux.plist.plist_head_empty()`, :func:`~drgn.helpers.linux.plist.plist_node_empty()`, :func:`~drgn.helpers.linux.plist.plist_first_entry()`, :func:`~drgn.helpers.linux.plist.plist_last_entry()`, :func:`~drgn.helpers.linux.plist.plist_for_each()`, and :func:`~drgn.helpers.linux.plist.plist_for_each_entry()`, helpers for working with the kernel's priority-sorted lists. ``fsrefs.py`` Tool ------------------ The ``fsrefs.py`` tool was added to the ``tools`` directory. It prints information about everything that is referencing a file or filesystem. This is similar to :manpage:`fuser(1)` and :manpage:`lsof(8)`, but it can find more since it has access to kernel internals. .. code-block:: console $ ./tools/fsrefs.py --inode /dev/urandom pid 1349 (bluetoothd) fd 16 (struct file *)0xffff8881458cf000 pid 1368 (udisksd) fd 15 (struct file *)0xffff888145c13100 ... $ ./tools/fsrefs.py --super-block /run mount /run (struct mount *)0xffff8881015cc140 pid 1 (systemd) fd 256 (struct file *)0xffff8881012f3d00 /run/initctl pid 1 (systemd) fd 380 (struct file *)0xffff88810bf88800 /run/dmeventd-server pid 1 (systemd) fd 385 (struct file *)0xffff88810bf88f00 /run/dmeventd-client mount /run (mount namespace 4026532545) (struct mount *)0xffff8881474028c0 pid 2135770 (systemd-journal) vma 0x7f7d94f2a000-0x7f7d94f2b000 (struct file *)0xffff88813925bf00 /run/systemd/journal/kernel-seqnum pid 2135770 (systemd-journal) vma 0x7f7d94f2b000-0x7f7d94f2c000 (struct file *)0xffff88813925a100 /run/systemd/journal/seqnum ... ``fsrefs.py`` currently checks: - File descriptors - Task working directories - Task root directories - Memory mappings - Filesystem mounts - `binfmt_misc `_ - :manpage:`loop(4)` devices - Swap files - `uprobes `_ It will be extended to check more as the need arises, so feel free to report anything it missed. (Note that as opposed to the ``contrib`` directory, scripts in the ``tools`` directory are regularly maintained and tested.) DWARF Package Files ------------------- drgn now supports split DWARF package (.dwp) files. These are generated by the ``dwp`` and ``llvm-dwp`` tools. Linux 6.8 Support ----------------- Linux 6.8 changed some filesystem internals in a way that broke a couple of drgn helpers. Here are some errors you might see with older versions of drgn that are fixed in this release. From :func:`~drgn.helpers.linux.fs.path_lookup()` or :func:`~drgn.helpers.linux.fs.for_each_mount()` (fixed by Johannes Thumshirn):: AttributeError: 'struct mnt_namespace' has no member 'list' From :func:`~drgn.helpers.linux.fs.path_lookup()`:: AttributeError: 'struct dentry' has no member 'd_subdirs' Python 3.13 Support ------------------- Python 3.13, currently in alpha, removed or changed some private APIs (``_PyDict_GetItemIdWithError()``, ``_PyDict_SetItemId()``, and ``_PyLong_AsByteArray()``) that drgn depended on, which caused build failures. This was fixed by using public APIs instead. drgn-0.0.31/docs/release_highlights/0.0.27.rst000066400000000000000000000212061477777462700206220ustar00rootroot000000000000000.0.27 (Released July 1st, 2024) ==================================== These are some of the highlights of drgn 0.0.27. See the `GitHub release `_ for the full release notes, including more improvements and bug fixes. .. highlight:: pycon Finding the Type Member at an Offset ------------------------------------ This release added :func:`~drgn.helpers.common.type.member_at_offset()`, which returns the name of the member at an offset in a type:: >>> prog.type('struct list_head') struct list_head { struct list_head *next; struct list_head *prev; } >>> member_at_offset(prog.type('struct list_head'), 0) 'next' >>> member_at_offset(prog.type('struct list_head'), 8) 'prev' It also handles more complicated cases, like nested structures, arrays, unions, and padding. It is particularly useful in combination with :func:`~drgn.helpers.common.memory.identify_address()` or :func:`~drgn.helpers.linux.slab.slab_object_info()`:: >>> identify_address(0xffff984fc7cc6708) 'slab object: fuse_inode+0x188' >>> member_at_offset(prog.type("struct fuse_inode"), 0x188) 'inode.i_data.i_pages.xa_head' (Note that in some cases, the slab cache name isn't identical to the type name. Slab merging also complicates this; see :func:`~drgn.helpers.linux.slab.slab_cache_is_merged()`. In those cases, this trick requires some extra effort.) Identifying Memory ------------------ This release added :func:`~drgn.helpers.common.memory.print_annotated_memory()`, which dumps a range of memory, annotating values that can be identified:: >>> print_annotated_memory(0xffff985163300698, 64) ADDRESS VALUE ffff985163300698: ffff984f415456a0 [slab object: mnt_cache+0x20] ffff9851633006a0: ffff984f587b7840 [slab object: dentry+0x0] ffff9851633006a8: ffff984f404bfa38 [slab object: inode_cache+0x0] ffff9851633006b0: ffffffff8b4890c0 [object symbol: signalfd_fops+0x0] ffff9851633006b8: 0000000000000000 ffff9851633006c0: ffff984f9307c078 [slab object: lsm_file_cache+0x0] ffff9851633006c8: ffff984f8afe3980 [slab object: kmalloc-8+0x0] ffff9851633006d0: ffff984f414730f0 [slab object: ep_head+0x0] (This is similar to :func:`~drgn.helpers.common.stack.print_annotated_stack()` but for arbitrary memory ranges.) :func:`~drgn.helpers.common.memory.identify_address()` (used by :func:`~drgn.helpers.common.memory.print_annotated_memory()` and :func:`~drgn.helpers.common.stack.print_annotated_stack()`) can now also identify vmap addresses and vmap kernel stacks:: >>> print(identify_address(0xffffffffc0536540)) vmap: 0xffffffffc0536000-0xffffffffc0545000 caller load_module+0x811 >>> print(identify_address(0xffffbb88e2283f58)) vmap stack: 2220305 (python3) +0x3f58 Configurable Type and Object Finders ------------------------------------ .. currentmodule:: drgn drgn already supported registering custom callbacks that could satisfy type and object lookups: :meth:`Program.add_type_finder()` and :meth:`Program.add_object_finder()`. However, there was no way to disable previously added callbacks or control the order in which they are called. This release adds an interface for doing so. :meth:`Program.registered_object_finders()` returns the set of registered object finders:: >>> prog.registered_object_finders() {'dwarf', 'linux'} :meth:`Program.enabled_object_finders()` returns the list of enabled object finders in the order that they are called:: >>> prog.enabled_object_finders() ['linux', 'dwarf'] :meth:`Program.register_object_finder()` registers and optionally enables a finder:: >>> def my_object_finder(prog, name, flags, filename): ... ... ... >>> prog.register_object_finder("foo", my_object_finder) >>> prog.registered_object_finders() {'foo', 'dwarf', 'linux'} >>> prog.enabled_object_finders() ['linux', 'dwarf'] >>> def my_object_finder2(prog, name, flags, filename): ... ... ... >>> prog.register_object_finder("bar", my_object_finder2, enable_index=0) >>> prog.registered_object_finders() {'foo', 'dwarf', 'bar', 'linux'} >>> prog.enabled_object_finders() ['bar', 'linux', 'dwarf'] :meth:`Program.set_enabled_object_finders()` sets the list of enabled finders. This can enable, disable, and reorder finders. .. code:: >>> prog.set_enabled_object_finders(['dwarf', 'foo']) >>> prog.enabled_object_finders() ['dwarf', 'foo'] Type finders have equivalent methods: :meth:`Program.registered_type_finders`, :meth:`Program.enabled_type_finders`, :meth:`Program.register_type_finder`, and :meth:`Program.set_enabled_type_finders`. The old interface is now deprecated. Symbol Finders -------------- Previously, symbols could only be looked up using the ELF symbol table. In this release, Stephen Brennan added support for custom symbol finders: :meth:`Program.registered_symbol_finders`, :meth:`Program.enabled_symbol_finders`, :meth:`Program.register_symbol_finder`, and :meth:`Program.set_enabled_symbol_finders`. .. currentmodule:: None ``contrib`` Directory --------------------- A few new scripts were added to the ``contrib`` directory, and others were updated. ``contrib/search_kernel_memory.py`` ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ This script does a brute force search through kernel RAM for a given byte string and prints all of the addresses where it is found. It's useful as a last resort for finding what is referencing an object, for example. .. code:: >>> folio = stack_trace(task)[5]["folio"] >>> search_memory(prog, folio.value_().to_bytes(8, "little")) 0xffff8882f67539e8 vmap stack: 2232297 (io_thread) +0x39e8 0xffff8882f6753a18 vmap stack: 2232297 (io_thread) +0x3a18 0xffff8882f6753a60 vmap stack: 2232297 (io_thread) +0x3a60 0xffff8882f6753ac8 vmap stack: 2232297 (io_thread) +0x3ac8 0xffff888300405530 slab object: kmalloc-16+0x0 0xffff8883b8c6ca38 ``contrib/gcore.py`` ^^^^^^^^^^^^^^^^^^^^ This script creates a core dump of a live process. This works even if the process is stuck in D state (Uninterruptible Sleep), which normally causes debuggers attempting to attach to the process to hang, too. The generated core dump can be debugged with GDB, LLDB, or even drgn. By default, ``gcore.py`` reads the task's memory through ``/proc/$pid/mem``. However, if ``mmap_lock``/``mmap_sem`` is stuck, then this will also hang. If the ``--no-procfs`` flag is used, drgn bypasses this, too, by reading the process's page tables and reading the memory directly. This has a couple of big downsides: paged out memory will be skipped, and it's a lot slower. But if the task is badly stuck in memory management, ``--no-procfs`` is a great escape hatch. ``gcore.py`` can also extract userspace core dumps out of a kernel core dump, but note that `makedumpfile(8) `_ is normally configured to filter out userspace memory. ``contrib/negdentdelete.py`` ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ `Negative dentries `_ are a cache of failed filename lookups. They can take up a lot of memory, and it's difficult to get rid of them by normal means. Stephen Brennan contributed a script that can be used to get rid of negative dentries in a directory. ``contrib/btrfs_tree.py`` ^^^^^^^^^^^^^^^^^^^^^^^^^ This script contains work-in-progress helpers for reading Btrfs metadata. It was added in drgn 0.0.23, but this release expanded and improved it. It will likely be adapted into proper helpers in a future release. This script was used to investigate a bug, culminating in Linux kernel commit `9d274c19a71b ("btrfs: fix crash on racing fsync and size-extending write into prealloc") `_. ``contrib/bpf_inspect.py`` ^^^^^^^^^^^^^^^^^^^^^^^^^^ Leon Hwang made many improvements to this script, including adding more detailed information, new commands, and updating it for recent kernels. Linux 6.9 and 6.10 Support -------------------------- Changes in Linux 6.9 and 6.10 broke a few drgn helpers. Here are some errors you might see with older versions of drgn that are fixed in this release. From :func:`~drgn.helpers.linux.stackdepot.stack_depot_fetch()`:: AttributeError: 'union handle_parts' has no member 'pool_index' From :func:`~drgn.helpers.linux.block.for_each_disk()`:: AttributeError: 'struct block_device' has no member 'bd_partno' Additionally, :func:`~drgn.helpers.linux.slab.slab_cache_for_each_allocated_object()`, :func:`~drgn.helpers.linux.slab.slab_object_info()`, and :func:`~drgn.helpers.linux.slab.find_containing_slab_cache()` may fail to find anything. drgn-0.0.31/docs/release_highlights/0.0.28.rst000066400000000000000000000161471477777462700206330ustar00rootroot000000000000000.0.28 & 0.0.29 (Released October 7th & 8th, 2024) ================================================== These are some of the highlights of drgn 0.0.28. See the `GitHub release `_ for the full release notes, including more improvements and bug fixes. drgn 0.0.29 was released shortly after 0.0.28 with a single bug fix for the :mod:`drgn.helpers.experimental.kmodify` module. See the `release notes `_. .. highlight:: pycon Calling Arbitrary Functions in the Running Kernel ------------------------------------------------- This release added :func:`~drgn.helpers.experimental.kmodify.call_function()`, which calls a function in the running kernel. This is the first ever feature in drgn that allows modifying the state of the kernel. Its primary use cases are experimentation in development environments and mitigating kernel bugs in production. For example, `this `_ recent lost wake-up bug could be mitigated with something like: .. code-block:: python3 from drgn.helpers.experimental.kmodify import call_function for task in for_each_task(): for frame in stack_trace(task): if frame.name == "perf_event_free_task": call_function("wake_up_process", task) break Note that this feature is currently experimental, only supported on x86-64, and may have a different API in the future. There is a `blog post `_ about how this feature works. Writing to Kernel Memory ------------------------ In a similar vein, drgn can now write to kernel memory, either via an address (with :func:`~drgn.helpers.experimental.kmodify.write_memory()`):: >>> import os >>> from drgn.helpers.experimental.kmodify import write_memory >>> os.uname().sysname 'Linux' >>> write_memory(prog["init_uts_ns"].name.sysname.address_, b"Lol\\0") >>> os.uname().sysname 'Lol' or an object (with :func:`~drgn.helpers.experimental.kmodify.write_object()`):: >>> from drgn.helpers.experimental.kmodify import write_object >>> os.system("uptime -p") up 12 minutes >>> write_object(prog["init_time_ns"].offsets.boottime.tv_sec, 1000000000) >>> os.system("uptime -p") up 3 decades, 1 year, 37 weeks, 1 hour, 59 minutes This feature is also experimental. It uses the same underlying mechanism as :func:`~drgn.helpers.experimental.kmodify.call_function()`. More C Operators ---------------- This release added a couple of new functions corresponding to operators in C. The :func:`~drgn.alignof()` function is analogous to the |alignof()|_ operator in C:: >>> alignof(prog.type("long long")) 8 .. |alignof()| replace:: ``_Alignof()`` .. _alignof(): https://en.cppreference.com/w/c/language/_Alignof The :func:`~drgn.implicit_convert()` function implements implicit conversions in C, like when assigning a variable, passing an argument to a function call, or returning a value:: >>> implicit_convert("unsigned int", Object(prog, "float", 2.0)) (unsigned int)2 >>> implicit_convert("void *", Object(prog, "int", 0)) Traceback (most recent call last): ... TypeError: cannot convert 'int' to incompatible type 'void *' Kernel Module Helpers --------------------- Stephen Brennan contributed several helpers for working with Linux kernel modules. :func:`~drgn.helpers.linux.module.for_each_module()` iterates over loaded modules:: >>> for module in for_each_module(): ... print(module.name.string_().decode()) ... overlay vhost_net vhost ... :func:`~drgn.helpers.linux.module.find_module()` finds the module with a given name:: >>> module = find_module("overlay") >>> module *(struct module *)0xffffffffc23dae00 = { ... } :func:`~drgn.helpers.linux.module.module_address_regions()` and :func:`~drgn.helpers.linux.module.module_percpu_region()` return all of the memory regions associated with a module, and :func:`~drgn.helpers.linux.module.address_to_module()` finds the module containing an address:: >>> for start, size in module_address_regions(module): ... print(hex(start), size) ... 0xffffffffc23be000 102400 0xffffffffc23d8000 65536 0xffffffffc23e9000 73728 0xffffffffc2385000 4096 0x0 0 0x0 0 0x0 0 >>> address_to_module(0xffffffffc23bf000) == module True Thread Names ------------ Ryan Wilson added the :attr:`~drgn.Thread.name` attribute to :class:`drgn.Thread`. This provides a consistent interface for getting the name of a thread regardless of whether you're debugging the kernel or a userspace program. (Unfortunately, userspace core dumps on Linux don't save the name of any threads other than the main thread.) Full 32-Bit Arm Support ----------------------- This release added support for virtual address translation and stack traces on 32-bit Arm. This is the state of architecture support in this release: .. list-table:: :header-rows: 1 * - Architecture - Linux Kernel Modules - Stack Traces - Virtual Address Translation * - x86-64 - ✓ - ✓ - ✓ * - AArch64 - ✓ - ✓ - ✓ * - s390x - ✓ - ✓ - ✓ * - ppc64 - ✓ - ✓ - ✓ * - i386 - ✓ - - * - Arm - ✓ - ✓ - ✓ * - RISC-V - ✓ - - Note that there are known Linux kernel issues with debugging 32-bit Arm, both `live `_ and in `kdump `_. Please reach out to the linux-debuggers@vger.kernel.org mailing list if these affect you. AArch64 and s390x Virtual Address Translation Fixes --------------------------------------------------- As of Linux 6.9, the default AArch64 kernel configuration `enables `_ 52-bit virtual addresses and `falls back `_ to a smaller virtual address size if the hardware does not support 52 bits. This required updates to drgn that were missed in v0.0.27. As of Linux 6.10, on s390x, virtual addresses in the direct mapping are `no longer `_ equal to physical addresses. This also required updates to drgn that were missed in v0.0.27. Linux 6.11 and 6.12 Support --------------------------- A change in Linux 6.12 broke ``tools/fsrefs.py``. This error from ``visit_uprobes()`` is fixed in this release:: TypeError: cannot convert 'struct list_head' to bool No other changes were required to support Linux 6.11 and 6.12. drgn-0.0.31/docs/release_highlights/0.0.30.rst000066400000000000000000000033041477777462700206130ustar00rootroot000000000000000.0.30 (Released December 18th, 2024) ===================================== These are some of the highlights of drgn 0.0.30. See the `GitHub release `_ for the full release notes, including more improvements and bug fixes. This release is relatively small, as most development effort has been focused on the upcoming `module API `_, which is expected to land in the next release. Symbol Index and Kallsyms ------------------------- Stephen Brennan continued his efforts towards `making it possible to debug the Linux kernel without full DWARF debugging information `_. The next step in this release was adding new helpers for looking up symbols from kallsyms: :func:`~drgn.helpers.linux.kallsyms.load_vmlinux_kallsyms()` and :func:`~drgn.helpers.linux.kallsyms.load_module_kallsyms()`. These are built on top of a new, generic API for fast symbol lookups: :class:`drgn.SymbolIndex`. New Python 3.13 REPL -------------------- Python 3.13 `added `_ a vastly improved REPL with multiline editing, colorized output, interactive help, and more. drgn now makes use of this REPL when it is available. Stack Tracing Through Interrupt Handlers ---------------------------------------- drgn had a longstanding `bug `_ where stack traces would stop at an interrupt handler frame. This release fixes that (as long as the kernel is configured to use the ORC unwinder). Linux 6.13 Support ------------------ No drgn changes were required to support Linux 6.13 as of rc3. drgn-0.0.31/docs/release_highlights/0.0.31.rst000066400000000000000000000200071477777462700206130ustar00rootroot000000000000000.0.31 (Released April 16th, 2025) ================================== These are some of the highlights of drgn 0.0.31. See the `GitHub release `_ for the full release notes, including more improvements and bug fixes. Fun fact: this is the largest release of drgn since the first ever release, both in terms of number of commits and changed lines of code. .. highlight:: pycon .. program:: drgn Module API ---------- One of the first things drgn does when it starts up is figure out what binaries are loaded in your program: executables, shared libraries, Linux kernel image, Linux kernel modules, etc. Until this release, this all happened internally to drgn with no way to inspect or override it. This release adds new APIs to address this. First, the :class:`drgn.Module` class and its subclasses were added to represent binaries used by a program. :class:`drgn.Program` gained a couple of methods for querying what modules were created for a program, :meth:`drgn.Program.modules()` and :meth:`drgn.Program.module()`:: >>> for module in prog.modules(): ... print(module) ... prog.main_module(name='kernel') prog.relocatable_module(name='scsi_dh_rdac', address=0xffffffffc02fb000) prog.relocatable_module(name='nvme', address=0xffffffffc051f000) prog.relocatable_module(name='spi_intel', address=0xffffffffc0fa3000) ... >>> prog.module("kernel") prog.main_module(name='kernel') >>> prog.module(0xffffffff92811100) prog.main_module(name='kernel') Modules are normally created automatically for all loaded binaries when debugging symbols are loaded. This can also be done manually with :meth:`drgn.Program.loaded_modules()` or :meth:`drgn.Program.create_loaded_modules()`. Arbitrary modules can also be :ref:`created manually `. This enables more :ref:`advanced use cases `. Options for Finding Debugging Symbols ------------------------------------- drgn now provides much more control over how debugging symbols are found. The :option:`--try-symbols-by` and :option:`--no-symbols-by` command line options allow enabling or disabling methods of searching for debugging symbols. The :option:`--debug-directory` and :option:`--no-default-debug-directories` options allow controlling the directories that are searched for debugging symbols. The :option:`--kernel-directory` and :option:`--no-default-kernel-directories` options allow controlling the directories that are searched for Linux kernel files. For example, if you have a kernel core dump and a directory containing kernel debugging symbols: .. code-block:: console $ ls kernel-6.15.0-rc1-debuginfo vmcore $ drgn -c vmcore --kernel-directory kernel-6.15.0-rc1-debuginfo These options are also available programmatically as :attr:`drgn.Program.debug_info_options`. Stricter Debugging Symbol File Matching --------------------------------------- A common pitfall for users is passing the wrong debugging symbol file to :option:`-s` (for example, the vmlinux from a different kernel build, or a kernel module or library that wasn't loaded at the time). Before this release, drgn was quite permissive and would use the file anyways, usually with confusing results. Starting in this release, drgn now always checks that files passed to :option:`-s` or :meth:`drgn.Program.load_debug_info()` correspond to a loaded module (based on build IDs). If not, it logs a warning and ignores them. However, there are valid use cases for adding unloaded files, like corrupted core dumps or reading debugging symbols from arbitrary files. If you really want to use a file for a specific module, then you can find the module with :meth:`drgn.Program.modules()` or :meth:`drgn.Program.module()` and add the file with :meth:`drgn.Module.try_file(path, force=True) `. If you really want to load debugging symbols from a file without associating it with a loaded module, you can use :option:`--extra-symbols` or :meth:`drgn.Program.extra_module(...).try_file(path) `. Debuginfod Integration ---------------------- `debuginfod `_ is a service for automatically downloading debugging symbols. drgn has had partial debuginfod support for a long time (via the libdwfl library), with a few important limitations: 1. It couldn't use debuginfod for the Linux kernel. 2. Downloads couldn't be interrupted with Ctrl-C. 3. The download progress bar wasn't very pretty. This release improves drgn's integration with debuginfod and fixes these issues. There's still one caveat for the Linux kernel: drgn only enables debuginfod for the Linux kernel on Fedora, because other distributions haven't yet deployed the `fix for extremely slow downloads of kernel debugging symbols `_ on their debuginfod servers. Contact your distribution to request that they update their debuginfod server to at least elfutils 0.192 and compress their kernel debug info packages with parallel xz. Custom Debugging Information Finders ------------------------------------ If the above options for finding debugging symbols don't provide enough flexibility, you can define totally custom ways of finding debugging symbols by registering a debugging information finder. See :ref:`here ` for an example. Plugins ------- drgn now has a basic plugin system. Currently, the main use case is automatically setting system- or user-specific configuration when drgn starts up. For example, system administrators may install a plugin that registers a debugging information finder for their specific system. See :ref:`here ` for an overview and :ref:`here ` for an example. Running Code Snippets on the Command Line ----------------------------------------- Sometimes, you don't want an interactive drgn session or a full drgn script; you just want to run a short snippet of code. In this release, Stephen Brennan added the :option:`-e` option, which takes a string of code to evaluate: .. code-block:: console $ python3 -m drgn -e 'print(kaslr_offset())' 251658240 (We would have used ``-c`` like the Python CLI, but that is already used to specify a core dump.) Kernel Stack Unwinding Without Debugging Symbols ------------------------------------------------ drgn has had support for the Linux kernel's `ORC unwinder `_ for a long time. However, although ORC data is typically saved in kernel core dumps, drgn previously only supported reading ORC data from the kernel debugging symbol files. In this release, Stephen Brennan expanded drgn's ORC support to be able to read ORC data directly from the core dump. This enables reliable stack unwinding even through unknown or out-of-tree kernel modules. This is the latest step towards support for `debugging the Linux kernel without full DWARF debugging information `_. Linux 6.14 and 6.15 Support --------------------------- A change in Linux 6.14 broke how drgn determines module section addresses. This error on startup is fixed in this release:: /lib/modules/6.14.2/kernel/fs/binfmt_misc.ko (could not get section addresses: 'struct module_sect_attrs' has no member 'nsections') A change in Linux 6.15 broke the :mod:`~drgn.helpers.linux.kernfs` helpers. This error is fixed in this release:: AttributeError: 'struct kernfs_node' has no member 'parent' Another change in Linux 6.15 broke the :func:`~drgn.helpers.linux.fs.path_lookup()` helper's handling of mount points. This is fixed in this release. Last Release With Python 3.6 & 3.7 Support ------------------------------------------ This will be the last release of drgn with support for Python 3.6 and 3.7. Both versions have been EOL for awhile, and the maintenance burden has become unsustainable. See `here `_ for the announcement. Python 3.8 support will probably follow suit soon. drgn-0.0.31/docs/requirements.txt000066400000000000000000000000161477777462700167720ustar00rootroot00000000000000sphinx==7.3.7 drgn-0.0.31/docs/support_matrix.rst000066400000000000000000000044461477777462700173530ustar00rootroot00000000000000Support Matrix ============== Architectures ------------- Some features in drgn require architecture-specific support. The current status of this support is: .. _architecture support matrix: .. list-table:: :header-rows: 1 * - Architecture - Linux Kernel Modules [1]_ - Stack Traces [2]_ - Virtual Address Translation [3]_ * - x86-64 - ✓ - ✓ - ✓ * - AArch64 - ✓ - ✓ - ✓ * - s390x - ✓ - ✓ - ✓ * - ppc64 - ✓ - ✓ - ✓ * - i386 - ✓ - - * - Arm - ✓ - ✓ - ✓ * - RISC-V - ✓ - - .. rubric:: Key .. [1] Support for loading debugging symbols for Linux kernel modules. .. [2] Support for capturing stack traces (:meth:`drgn.Program.stack_trace()`, :meth:`drgn.Thread.stack_trace()`). .. [3] Support for translating virtual addresses, which is required for reading from vmalloc/vmap and module memory in Linux kernel vmcores and for various helpers in :mod:`drgn.helpers.linux.mm`. The listed architectures are recognized in :class:`drgn.Architecture`. Other architectures are represented by :attr:`drgn.Architecture.UNKNOWN`. Features not mentioned above should work on any architecture, listed or not. Cross-Debugging ^^^^^^^^^^^^^^^ drgn can debug architectures different from the host. For example, you can debug an AArch64 (kernel or userspace) core dump from an x86-64 machine. Linux Kernel Versions --------------------- drgn officially supports the current mainline, stable, and longterm kernel releases from `kernel.org `_. (There may be some delay before a new mainline version is fully supported.) End-of-life versions are supported until it becomes too difficult to do so. The kernel versions currently fully supported are: .. Keep this in sync with vmtest/config.py. - 6.0-6.15 - 5.10-5.19 - 5.4 - 4.19 - 4.14 - 4.9 Other versions are not tested. They'll probably mostly work, but support is best-effort. Kernel Configuration ^^^^^^^^^^^^^^^^^^^^ drgn supports debugging kernels with various configurations: - SMP and !SMP. - Preemptible and non-preemptible. - SLUB, SLAB, and SLOB allocators. drgn requires a kernel configured with ``CONFIG_PROC_KCORE=y`` for live kernel debugging. drgn-0.0.31/docs/tutorials.rst000066400000000000000000000002071477777462700162700ustar00rootroot00000000000000Tutorials ========= Hands-on tutorials for learning how to use drgn. .. toctree:: :maxdepth: 1 tutorials/blk_rq_qos_crash.rst drgn-0.0.31/docs/tutorials/000077500000000000000000000000001477777462700155375ustar00rootroot00000000000000drgn-0.0.31/docs/tutorials/blk_rq_qos_crash.rst000066400000000000000000000774111477777462700216170ustar00rootroot00000000000000Stack Traces and Mystery Addresses (blk-rq-qos Crash) ===================================================== | Author: Omar Sandoval | Date: February 12, 2025 .. linuxversion:: v6.11 This is a hands-on tutorial walking through a real Linux kernel bug that caused kernel crashes in production. We'll read kernel code and use a few important drgn techniques for reading stack traces and interpreting memory in order to identify the root cause of the bug. We saw this crash on storage workloads on multiple kernel versions, up to and including the latest at the time, Linux 6.11. The kernel logs all implicated something in the block layer. A core dump and debugging symbols are provided for you to follow along with. This tutorial is also available as a video: .. raw:: html Setup ----- .. highlight:: console Follow the :doc:`../installation` instructions to get drgn. Download and extract the tutorial files: .. code-block:: :class: tutorial $ curl -L https://github.com/osandov/drgn/releases/download/tutorial-assets/blk_rq_qos_crash_tutorial.tar.zst \ | zstd -d | tar -x This will create a directory named ``blk_rq_qos_crash_tutorial``. Enter it: .. code-block:: :class: tutorial $ cd blk_rq_qos_crash_tutorial Then, run drgn as follows. It will print a version banner and automatically import the relevant :doc:`../helpers`: .. code-block:: :class: tutorial $ drgn -c vmcore -s vmlinux --main-symbols drgn 0.0.30 (using Python 3.13.1, elfutils 0.192, with libkdumpfile) For help, type help(drgn). >>> import drgn >>> from drgn import FaultError, NULL, Object, alignof, cast, container_of, execscript, implicit_convert, offsetof, reinterpret, sizeof, stack_trace >>> from drgn.helpers.common import * >>> from drgn.helpers.linux import * In another window, check out the source code for Linux 6.11. For example, run ``git checkout v6.11`` in an existing Linux repo, or run: .. code-block:: :class: tutorial $ git clone -b v6.11 --depth 1 https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git ... $ cd linux Now we can dive into the core dump. Starting With Dmesg ------------------- .. highlight:: pycon The kernel log buffer is usually the first place to look when debugging a crash. In drgn, call :func:`~drgn.helpers.linux.printk.print_dmesg()` and scroll up until you find the line starting with ``BUG:``. You should see the following trace: .. code-block:: :class: scroll-y tutorial :emphasize-lines: 3,11 >>> print_dmesg() ... [ 18.051123] BUG: kernel NULL pointer dereference, address: 00000000000006fc [ 18.051597] #PF: supervisor write access in kernel mode [ 18.051936] #PF: error_code(0x0002) - not-present page [ 18.052241] PGD 0 P4D 0 [ 18.052336] Oops: Oops: 0002 [#1] PREEMPT SMP NOPTI [ 18.052629] CPU: 0 UID: 0 PID: 906 Comm: fio Kdump: loaded Not tainted 6.11.0 #1 [ 18.053123] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.3-3.fc41 04/01/2014 [ 18.053739] RIP: 0010:_raw_spin_lock_irqsave+0x36/0x70 [ 18.054059] Code: 04 25 28 00 00 00 48 89 44 24 08 48 c7 04 24 00 00 00 00 9c 8f 04 24 48 8b 1c 24 fa 65 ff 05 89 2a b7 7e b9 01 00 00 00 31 c0 0f b1 0f 75 1e 65 48 8b 04 25 28 00 00 00 48 3b 44 24 08 75 17 [ 18.055467] RSP: 0000:ffffc900011abcd0 EFLAGS: 00010046 [ 18.055788] RAX: 0000000000000000 RBX: 0000000000000082 RCX: 0000000000000001 [ 18.056260] RDX: 0000000000000000 RSI: 0000000000000003 RDI: 00000000000006fc [ 18.056725] RBP: 0000000000000000 R08: 0000000000000000 R09: 000000000015000e [ 18.057202] R10: ffff888002fa5900 R11: ffffffff81312090 R12: 0000000000000003 [ 18.057669] R13: ffff888002d4b678 R14: 00000000000006fc R15: 0000000000000003 [ 18.058138] FS: 00007f1ee66c06c0(0000) GS:ffff888005a00000(0000) knlGS:0000000000000000 [ 18.058677] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 18.059039] CR2: 00000000000006fc CR3: 0000000002f4a005 CR4: 0000000000770ef0 [ 18.059508] PKRU: 55555554 [ 18.059614] Call Trace: [ 18.059700] [ 18.059782] ? __die_body+0x16/0x60 [ 18.059982] ? page_fault_oops+0x31e/0x3a0 [ 18.060205] ? exc_page_fault+0x55/0xa0 [ 18.060409] ? asm_exc_page_fault+0x26/0x30 [ 18.060640] ? __pfx_wbt_inflight_cb+0x10/0x10 [ 18.060892] ? _raw_spin_lock_irqsave+0x36/0x70 [ 18.061150] try_to_wake_up+0x3e/0x400 [ 18.061342] rq_qos_wake_function+0x4d/0x60 [ 18.061572] __wake_up_common+0x42/0x80 [ 18.061770] __wake_up_common_lock+0x33/0x60 [ 18.062007] wbt_done+0x60/0x80 [ 18.062152] __rq_qos_done+0x22/0x40 [ 18.062330] blk_mq_free_request+0x62/0xb0 [ 18.062551] virtblk_done+0x99/0x120 [ 18.062731] vring_interrupt+0x71/0x80 [ 18.062928] vp_interrupt+0xa8/0xe0 [ 18.063100] __handle_irq_event_percpu+0x89/0x1b0 [ 18.063373] handle_irq_event_percpu+0xf/0x40 [ 18.063614] handle_irq_event+0x30/0x50 [ 18.063831] handle_fasteoi_irq+0xaa/0x1b0 [ 18.064051] __common_interrupt+0x3a/0xb0 [ 18.064266] common_interrupt+0x3d/0x90 [ 18.064462] asm_common_interrupt+0x26/0x40 [ 18.064691] RIP: 0033:0x7f1ef33679b9 [ 18.064886] Code: ff 48 85 c0 0f 84 32 35 00 00 48 8b bd b8 f9 ff ff 4c 89 b5 80 f9 ff ff 48 89 07 4c 01 f8 48 89 85 78 f9 ff ff e9 8d ca ff ff <48> 8b 85 60 fa ff ff 48 8d 50 08 48 89 95 60 fa ff ff e9 c7 d5 ff [ 18.066333] RSP: 002b:00007f1ee66baad0 EFLAGS: 00000212 [ 18.066624] RAX: 00007f1ee66bad56 RBX: 00007f1ee66bb1d0 RCX: 00007f1ee66bad56 [ 18.066999] RDX: 0000000000000030 RSI: 00000000000f12b3 RDI: 000000000000000a [ 18.067476] RBP: 00007f1ee66bb1a0 R08: 000000000000002c R09: 0000000000000000 [ 18.068003] R10: 00007f1ef348dfe0 R11: 0000000000000020 R12: 0000000000000020 [ 18.068482] R13: 0000000000000000 R14: 00000000ffffffff R15: 0000000000000001 [ 18.069005] [ 18.069097] CR2: 00000000000006fc ``BUG: kernel NULL pointer dereference, address: 00000000000006fc`` tells us that the kernel crashed because it dereferenced a null pointer and tried to access the address 0x6fc. ``RIP: 0010:_raw_spin_lock_irqsave+0x36/0x70`` tells us that the bad access happened in the function :linux:`_raw_spin_lock_irqsave() `. Below that, the stack trace tells us how we got there. .. tip:: Ignore call trace lines starting with ``?``. These are stale function addresses on the stack that are not part of the actual call trace. They are printed as a :linuxt:`hint/fail-safe `, but they are misleading as often as not. We'll look at the trace in more detail with drgn soon, but we can see that we got an interrupt for a disk I/O completion, which then tried to wake up a task and acquire a spinlock. Stack Trace in drgn ------------------- Now let's look at drgn's view of the stack trace. Save the stack trace of the crashed thread: .. code-block:: :class: tutorial >>> trace = prog.crashed_thread().stack_trace() And print it: .. code-block:: :class: scroll-y tutorial >>> trace #0 arch_atomic_try_cmpxchg (./arch/x86/include/asm/atomic.h:107:9) #1 raw_atomic_try_cmpxchg_acquire (./include/linux/atomic/atomic-arch-fallback.h:2170:9) #2 atomic_try_cmpxchg_acquire (./include/linux/atomic/atomic-instrumented.h:1302:9) #3 queued_spin_lock (./include/asm-generic/qspinlock.h:111:6) #4 do_raw_spin_lock (./include/linux/spinlock.h:187:2) #5 __raw_spin_lock_irqsave (./include/linux/spinlock_api_smp.h:111:2) #6 _raw_spin_lock_irqsave (kernel/locking/spinlock.c:162:9) #7 class_raw_spinlock_irqsave_constructor (./include/linux/spinlock.h:551:1) #8 try_to_wake_up (kernel/sched/core.c:4051:2) #9 rq_qos_wake_function (block/blk-rq-qos.c:223:2) #10 __wake_up_common (kernel/sched/wait.c:89:9) #11 __wake_up_common_lock (kernel/sched/wait.c:106:14) #12 wbt_done (block/blk-wbt.c:259:3) #13 __rq_qos_done (block/blk-rq-qos.c:39:4) #14 rq_qos_done (block/blk-rq-qos.h:122:3) #15 blk_mq_free_request (block/blk-mq.c:737:2) #16 virtblk_done (drivers/block/virtio_blk.c:367:5) #17 vring_interrupt (drivers/virtio/virtio_ring.c:2595:3) #18 vp_vring_interrupt (drivers/virtio/virtio_pci_common.c:82:7) #19 vp_interrupt (drivers/virtio/virtio_pci_common.c:113:9) #20 __handle_irq_event_percpu (kernel/irq/handle.c:158:9) #21 handle_irq_event_percpu (kernel/irq/handle.c:193:11) #22 handle_irq_event (kernel/irq/handle.c:210:8) #23 handle_fasteoi_irq (kernel/irq/chip.c:720:2) #24 generic_handle_irq_desc (./include/linux/irqdesc.h:173:2) #25 handle_irq (arch/x86/kernel/irq.c:247:3) #26 call_irq_handler (arch/x86/kernel/irq.c:259:3) #27 __common_interrupt (arch/x86/kernel/irq.c:285:6) #28 common_interrupt (arch/x86/kernel/irq.c:278:1) #29 asm_common_interrupt+0x26/0x2b (./arch/x86/include/asm/idtentry.h:693) #30 0x7f1ef33679b9 Notice that drgn's stack trace includes information not in the kernel trace, namely: 1. File names and line and column numbers. These are very useful for navigating the code that you're debugging. 2. Inlined function calls. For example, frames 0-5 are all inlined calls, and frame 6 was the last actual call. You can verify this by printing each frame individually: .. code-block:: :class: tutorial >>> trace[0] #0 at 0xffffffff814b6446 (_raw_spin_lock_irqsave+0x36/0x68) in arch_atomic_try_cmpxchg at ./arch/x86/include/asm/atomic.h:107:9 (inlined) >>> trace[1] #1 at 0xffffffff814b6446 (_raw_spin_lock_irqsave+0x36/0x68) in raw_atomic_try_cmpxchg_acquire at ./include/linux/atomic/atomic-arch-fallback.h:2170:9 (inlined) >>> trace[2] #2 at 0xffffffff814b6446 (_raw_spin_lock_irqsave+0x36/0x68) in atomic_try_cmpxchg_acquire at ./include/linux/atomic/atomic-instrumented.h:1302:9 (inlined) >>> trace[3] #3 at 0xffffffff814b6446 (_raw_spin_lock_irqsave+0x36/0x68) in queued_spin_lock at ./include/asm-generic/qspinlock.h:111:6 (inlined) >>> trace[4] #4 at 0xffffffff814b6446 (_raw_spin_lock_irqsave+0x36/0x68) in do_raw_spin_lock at ./include/linux/spinlock.h:187:2 (inlined) >>> trace[5] #5 at 0xffffffff814b6446 (_raw_spin_lock_irqsave+0x36/0x68) in __raw_spin_lock_irqsave at ./include/linux/spinlock_api_smp.h:111:2 (inlined) >>> trace[6] #6 at 0xffffffff814b6446 (_raw_spin_lock_irqsave+0x36/0x68) in _raw_spin_lock_irqsave at kernel/locking/spinlock.c:162:9 Notice that frames 0-5 end with ``(inlined)``, and all of the frames have the same instruction pointer, ``0xffffffff814b6446``. Tracing Local Variables ----------------------- Next, let's walk through the stack trace to figure out where the null pointer came from. Frames 0-2 are low-level atomic operations:: #0 arch_atomic_try_cmpxchg (./arch/x86/include/asm/atomic.h:107:9) #1 raw_atomic_try_cmpxchg_acquire (./include/linux/atomic/atomic-arch-fallback.h:2170:9) #2 atomic_try_cmpxchg_acquire (./include/linux/atomic/atomic-instrumented.h:1302:9) That's essentially a fancy memory access, so let's skip those frames. Frame 3 is in :linux:`queued_spin_lock() `, the kernel's spinlock implementation:: #3 queued_spin_lock (./include/asm-generic/qspinlock.h:111:6) In your window with the Linux source code, open :file:`include/asm-generic/qspinlock.h` and jump to line 111: .. code-block:: c :caption: include/asm-generic/qspinlock.h :lineno-start: 107 :emphasize-lines: 5 static __always_inline void queued_spin_lock(struct qspinlock *lock) { int val = 0; if (likely(atomic_try_cmpxchg_acquire(&lock->val, &val, _Q_LOCKED_VAL))) return; queued_spin_lock_slowpath(lock, val); } Notice that it accesses the ``lock`` parameter. Print it in drgn: .. code-block:: :class: tutorial >>> trace[3]["lock"] (struct qspinlock *)0x6fc This matches the address from the ``BUG`` message in dmesg! Now let's find out where ``lock`` came from. Frames 4-7 wrap the low-level spinlock implementation:: #4 do_raw_spin_lock (./include/linux/spinlock.h:187:2) #5 __raw_spin_lock_irqsave (./include/linux/spinlock_api_smp.h:111:2) #6 _raw_spin_lock_irqsave (kernel/locking/spinlock.c:162:9) #7 class_raw_spinlock_irqsave_constructor (./include/linux/spinlock.h:551:1) Feel free to open the source code for these, but we can quickly check that the lock simply gets passed through: .. code-block:: :class: tutorial >>> trace[4]["lock"] (raw_spinlock_t *)0x6fc >>> trace[5]["lock"] (raw_spinlock_t *)0x6fc >>> trace[6]["lock"] (raw_spinlock_t *)0x6fc :linux:`class_raw_spinlock_irqsave_constructor() ` is slightly different. It is generated by a macro and doesn't use the name ``lock``: .. code-block:: :class: tutorial >>> trace[7]["lock"] Traceback (most recent call last): ... KeyError: 'lock' Let's list all of its local variables and make a guess: .. code-block:: :class: tutorial >>> trace[7].locals() ['l', '_t'] >>> trace[7]["l"] (raw_spinlock_t *)0x6fc .. tip:: Use :meth:`drgn.StackFrame.locals()` to get the list of parameters and local variables in a stack frame when finding the implementation of the function is inconvenient. The caller must have passed 0x6fc. Let's look at it. The next frame is in :linux:`try_to_wake_up() `:: #8 try_to_wake_up (kernel/sched/core.c:4051:2) Open :file:`kernel/sched/core.c` at line 4051: .. code-block:: c :caption: kernel/sched/core.c :emphasize-lines: 4 int try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags) { ... scoped_guard (raw_spinlock_irqsave, &p->pi_lock) { It is acquiring :linux:`pi_lock ` in a :linux:`task_struct ` (using a `scoped guard `_). Print the ``task_struct``: .. code-block:: :class: tutorial >>> trace[8]["p"] (struct task_struct *)0x0 There's our null pointer! But where did 0x6fc come from? Look at the offset of ``pi_lock`` in ``struct task_struct``: .. code-block:: :class: tutorial >>> hex(offsetof(prog.type("struct task_struct"), "pi_lock")) '0x6fc' Or do the inverse and see what's at offset 0x6fc in ``struct task_struct``: .. code-block:: :class: tutorial >>> member_at_offset(prog.type("struct task_struct"), 0x6fc) 'pi_lock.raw_lock.val.counter or pi_lock.raw_lock.locked or pi_lock.raw_lock.locked_pending' .. tip:: Use :func:`~drgn.offsetof()` and :func:`~drgn.helpers.common.type.member_at_offset()` to decipher pointers to struct members. So where did ``p`` come from? Let's look at the caller, :linux:`rq_qos_wake_function() `, in frame 9:: #9 rq_qos_wake_function (block/blk-rq-qos.c:223:2) Open :file:`block/blk-rq-qos.c` at line 223: .. code-block:: c :caption: block/blk-rq-qos.c :lineno-start: 206 :emphasize-lines: 18 static int rq_qos_wake_function(struct wait_queue_entry *curr, unsigned int mode, int wake_flags, void *key) { struct rq_qos_wait_data *data = container_of(curr, struct rq_qos_wait_data, wq); /* * If we fail to get a budget, return -1 to interrupt the wake up loop * in __wake_up_common. */ if (!data->cb(data->rqw, data->private_data)) return -1; data->got_token = true; smp_wmb(); list_del_init(&curr->entry); wake_up_process(data->task); return 1; } (Note: :linux:`wake_up_process() ` doesn't show up in the stack trace because of `tail call elimination `_. This `may be fixed `_ in a future release of drgn.) ``p`` came from ``data->task``. Print ``data``: .. code-block:: :class: tutorial >>> trace[9]["data"] *(struct rq_qos_wait_data *)0xffffc900011b3558 = { .wq = (struct wait_queue_entry){ .flags = (unsigned int)2168637095, .private = (void *)0xffff888002d6c000, .func = (wait_queue_func_t)0x0, .entry = (struct list_head){ .next = (struct list_head *)0xffff888002d6c000, .prev = (struct list_head *)0xffff888002da2100, }, }, .task = (struct task_struct *)0xffff888000fd6001, .rqw = (struct rq_wait *)0xffffc900011b3a30, .cb = (acquire_inflight_cb_t *)0xffff888002763030, .private_data = (void *)0x1, .got_token = (bool)201, } Notice that ``data->task`` is NOT null. Print the ``comm`` member, which should be the thread name: .. code-block:: :class: tutorial >>> trace[9]["data"].task.comm (char [16])"" Instead, it's empty. This doesn't appear to be a valid ``task_struct``. Identifying Mystery Addresses ----------------------------- If ``data->task`` isn't a valid ``task_struct``, then what is it? Pass it to :func:`~drgn.helpers.common.memory.identify_address()` to answer that: .. code-block:: :class: tutorial >>> identify_address(trace[9]["data"].task) 'slab object: buffer_head+0x1' It's a pointer to a completely unrelated type. Since our problem seems to stem from ``data``, pass it to ``identify_address()`` to see where it comes from: .. code-block:: :class: tutorial >>> identify_address(trace[9]["data"]) 'vmap stack: 909 (fio) +0x3558' This means that ``data`` is on the stack of the task with PID 909. .. tip:: Use :func:`~drgn.helpers.common.memory.identify_address()` to figure out what an unknown address refers to. Other Stacks ------------ Notice that we've seen three possibilities for ``data->task``: 1. When it was passed to ``wake_up_process()``, it was ``NULL``. 2. By the time of the crash, it was an unrelated pointer. 3. It's supposed to point to a ``task_struct``. This suggests that there's a data race on ``data->task``. We know that ``data`` is on the stack of another task. Let's find where it's created. In :file:`block/blk-rq-qos.c`, search for ``struct rq_qos_wait_data``. You should find it being used in :linux:`rq_qos_wait() `: .. code-block:: c :caption: block/blk-rq-qos.c :lineno-start: 243 :emphasize-lines: 5 void rq_qos_wait(struct rq_wait *rqw, void *private_data, acquire_inflight_cb_t *acquire_inflight_cb, cleanup_cb_t *cleanup_cb) { struct rq_qos_wait_data data = { .wq = { .func = rq_qos_wake_function, .entry = LIST_HEAD_INIT(data.wq.entry), }, .task = current, .rqw = rqw, .cb = acquire_inflight_cb, .private_data = private_data, }; bool has_sleeper; has_sleeper = wq_has_sleeper(&rqw->wait); if (!has_sleeper && acquire_inflight_cb(rqw, private_data)) return; has_sleeper = !prepare_to_wait_exclusive(&rqw->wait, &data.wq, TASK_UNINTERRUPTIBLE); do { /* The memory barrier in set_task_state saves us here. */ if (data.got_token) break; if (!has_sleeper && acquire_inflight_cb(rqw, private_data)) { finish_wait(&rqw->wait, &data.wq); /* * We raced with rq_qos_wake_function() getting a token, * which means we now have two. Put our local token * and wake anyone else potentially waiting for one. */ smp_rmb(); if (data.got_token) cleanup_cb(rqw, private_data); break; } io_schedule(); has_sleeper = true; set_current_state(TASK_UNINTERRUPTIBLE); } while (1); finish_wait(&rqw->wait, &data.wq); } This function creates ``data`` on the stack, with ``data->task`` set to the current task, and then tries to acquire an "inflight counter". If one is not available, it puts itself on a wait queue and blocks until it can get one. So, ``rq_qos_wait()`` waits for an inflight counter, and ``rq_qos_wake_function()`` wakes it up when one becomes available. We would expect that the PID we found earlier, 909, is currently blocked in ``rq_qos_wait()``. Pass the PID to :func:`~drgn.stack_trace()` to check: .. code-block:: :class: scroll-y tutorial >>> stack_trace(909) #0 rep_nop (./arch/x86/include/asm/vdso/processor.h:0:2) #1 cpu_relax (./arch/x86/include/asm/vdso/processor.h:18:2) #2 queued_spin_lock_slowpath (kernel/locking/qspinlock.c:380:3) #3 queued_spin_lock (./include/asm-generic/qspinlock.h:114:2) #4 do_raw_spin_lock (./include/linux/spinlock.h:187:2) #5 __raw_spin_lock_irqsave (./include/linux/spinlock_api_smp.h:111:2) #6 _raw_spin_lock_irqsave (kernel/locking/spinlock.c:162:9) #7 virtblk_add_req_batch (drivers/block/virtio_blk.c:481:2) #8 virtio_queue_rqs (drivers/block/virtio_blk.c:519:11) #9 __blk_mq_flush_plug_list (block/blk-mq.c:2704:2) #10 blk_mq_flush_plug_list (block/blk-mq.c:2781:4) #11 blk_add_rq_to_plug (block/blk-mq.c:1292:3) #12 blk_mq_submit_bio (block/blk-mq.c:3028:3) #13 __submit_bio (block/blk-core.c:615:3) #14 __submit_bio_noacct_mq (block/blk-core.c:696:3) #15 submit_bio_noacct_nocheck (block/blk-core.c:725:3) #16 ext4_io_submit (fs/ext4/page-io.c:377:3) #17 io_submit_add_bh (fs/ext4/page-io.c:418:3) #18 ext4_bio_write_folio (fs/ext4/page-io.c:560:3) #19 mpage_submit_folio (fs/ext4/inode.c:1943:8) #20 mpage_process_page_bufs (fs/ext4/inode.c:2056:9) #21 mpage_prepare_extent_to_map (fs/ext4/inode.c:2564:11) #22 ext4_do_writepages (fs/ext4/inode.c:2706:8) #23 ext4_writepages (fs/ext4/inode.c:2842:8) #24 do_writepages (mm/page-writeback.c:2683:10) #25 __filemap_fdatawrite_range (mm/filemap.c:430:9) #26 generic_fadvise (mm/fadvise.c:114:3) #27 vfs_fadvise (mm/fadvise.c:185:9) #28 ksys_fadvise64_64 (mm/fadvise.c:199:8) #29 __do_sys_fadvise64 (mm/fadvise.c:214:9) #30 __se_sys_fadvise64 (mm/fadvise.c:212:1) #31 __x64_sys_fadvise64 (mm/fadvise.c:212:1) #32 do_syscall_x64 (arch/x86/entry/common.c:52:14) #33 do_syscall_64 (arch/x86/entry/common.c:83:7) #34 entry_SYSCALL_64+0xaf/0x14c (arch/x86/entry/entry_64.S:121) #35 0x7f1ef340203a It's not in ``rq_qos_wait()``! It seems to have moved on to something else. Analysis -------- At this point, we've gotten everything that we need from drgn. Now we need to interpret what we've gathered and analyze the kernel code. Based on the stack trace for PID 909, we can conclude that the *waiter* got a counter, returned, and moved on to something else. It reused the stack for unrelated data, which explains the mystery pointer that we saw in ``data->task``. The series of events is something like this: 1. ``acquire_inflight_cb()`` on line 260 fails. 2. ``prepare_to_wait_exclusive()`` puts ``data`` on the waitqueue. 3. ``acquire_inflight_cb()`` on line 269 succeeds. 4. ``finish_wait()`` removes ``data`` from the waitqueue. 5. ``rq_qos_wait()`` returns and the task moves on to something else, reusing the stack memory. This means that the *waker* found the waiter's ``data`` in between steps 2 and 4, but by the time the waker called ``wake_up_process(data->task)``, the waiter was past step 5. Wakers and waiters are supposed to be synchronized. Going back to the crashing stack trace, we see that ``rq_qos_wake_function()`` is called via :linux:`__wake_up_common_lock() `:: #10 __wake_up_common (kernel/sched/wait.c:89:9) #11 __wake_up_common_lock (kernel/sched/wait.c:106:14) Open :file:`kernel/sched/wait.c` at line 106 and see that it's holding ``wq_head->lock``: .. code-block:: c :caption: kernel/sched/wait.c :lineno-start: 99 :emphasize-lines: 8 static int __wake_up_common_lock(struct wait_queue_head *wq_head, unsigned int mode, int nr_exclusive, int wake_flags, void *key) { unsigned long flags; int remaining; spin_lock_irqsave(&wq_head->lock, flags); remaining = __wake_up_common(wq_head, mode, nr_exclusive, wake_flags, key); spin_unlock_irqrestore(&wq_head->lock, flags); return nr_exclusive - remaining; } On the waiter side, :linux:`finish_wait() ` also grabs ``wq_head->lock``: .. code-block:: c :caption: kernel/sched/wait.c :lineno-start: 446 void finish_wait(struct wait_queue_head *wq_head, struct wait_queue_entry *wq_entry) { unsigned long flags; __set_current_state(TASK_RUNNING); /* * We can check for list emptiness outside the lock * IFF: * - we use the "careful" check that verifies both * the next and prev pointers, so that there cannot * be any half-pending updates in progress on other * CPU's that we haven't seen yet (and that might * still change the stack area. * and * - all other users take the lock (ie we can only * have _one_ other CPU that looks at or modifies * the list). */ if (!list_empty_careful(&wq_entry->entry)) { spin_lock_irqsave(&wq_head->lock, flags); list_del_init(&wq_entry->entry); spin_unlock_irqrestore(&wq_head->lock, flags); } } But there's an important detail here: ``finish_wait()`` doesn't take the lock if the wait queue list entry is empty, i.e., if it has already been removed from the wait queue. Go back to ``rq_qos_wake_function()``: .. code-block:: c :caption: block/blk-rq-qos.c :lineno-start: 206 static int rq_qos_wake_function(struct wait_queue_entry *curr, unsigned int mode, int wake_flags, void *key) { struct rq_qos_wait_data *data = container_of(curr, struct rq_qos_wait_data, wq); /* * If we fail to get a budget, return -1 to interrupt the wake up loop * in __wake_up_common. */ if (!data->cb(data->rqw, data->private_data)) return -1; data->got_token = true; smp_wmb(); list_del_init(&curr->entry); wake_up_process(data->task); return 1; } It removes the entry from the wait queue on line 222, then accesses the entry on line 223. That's the race condition: as soon as the entry has been removed from the wait queue, ``finish_wait()`` in the waiter can return instantly, and the waiter is free to move on. Therefore, after the entry has been removed, the waker must not access it. The Fix ------- The fix is trivial: don't delete the wait queue entry until *after* using it. .. code-block:: diff diff --git a/block/blk-rq-qos.c b/block/blk-rq-qos.c index 2cfb297d9a62..058f92c4f9d5 100644 --- a/block/blk-rq-qos.c +++ b/block/blk-rq-qos.c @@ -219,8 +219,8 @@ static int rq_qos_wake_function(struct wait_queue_entry *curr, data->got_token = true; smp_wmb(); - list_del_init(&curr->entry); wake_up_process(data->task); + list_del_init_careful(&curr->entry); return 1; } The deletion also needs careful memory ordering to pair with the :linux:`list_empty_careful() ` in ``finish_wait()``, hence the replacement of :linux:`list_del_init() ` with :linux:`list_del_init_careful() `. This fix was merged in Linux 6.12 in `commit e972b08b91ef ("blk-rq-qos: fix crash on rq_qos_wait vs. rq_qos_wake_function race") `_. Conclusion ---------- Debugging a core dump involves a lot of cross-referencing code and core dump state. drgn gives you some powerful capabilities for understanding kernel state, which you can use to discern subtle bugs like this one. In particular, :func:`~drgn.helpers.common.memory.identify_address()`, :func:`~drgn.helpers.common.type.member_at_offset()`, and :meth:`drgn.StackFrame.locals()` are often crucial to an investigation. Feel free to reference the :doc:`../helpers` and explore this core dump further. Bonus Challenge: Reading File Pages ----------------------------------- As a bonus, try dumping the contents of the file ``/init`` in the core dump (this is the script that I used to reproduce the bug). First, find the inode for ``/init`` and its file size. .. details:: Hint See :func:`~drgn.helpers.linux.fs.path_lookup()`. .. details:: Answer .. code-block:: :class: tutorial >>> inode = path_lookup("/init").dentry.d_inode >>> inode *(struct inode *)0xffff88800289c568 = { ... } >>> inode.i_size (loff_t)578 The page cache for an inode is in an XArray, ``inode->i_mapping->i_pages``. Get the cached page at offset 0. .. details:: Hint See :func:`~drgn.helpers.linux.xarray.xa_load()` and :func:`~drgn.cast()`. .. details:: Answer .. code-block:: :class: tutorial >>> entry = xa_load(inode.i_mapping.i_pages.address_of_(), 0) >>> page = cast("struct page *", entry) >>> page *(struct page *)0xffffea000015f840 = { ... } Get the page's virtual address. .. details:: Hint See :func:`~drgn.helpers.linux.mm.page_to_virt()`. .. details:: Answer .. code-block:: :class: tutorial >>> addr = page_to_virt(page) >>> addr (void *)0xffff8880057e1000 Finally, read from the virtual address. .. details:: Hint See :meth:`drgn.Program.read()`. .. details:: Answer .. code-block:: :class: tutorial >>> print(prog.read(addr, inode.i_size).decode()) #!/bin/sh -e mount -t proc -o nosuid,nodev,noexec proc /proc mount -t devtmpfs -o nosuid dev /dev mkdir /dev/shm mount -t tmpfs -o nosuid,nodev tmpfs /dev/shm mount -t sysfs -o nosuid,nodev,noexec sys /sys mount -t tmpfs -o nosuid,nodev tmpfs /tmp kexec --load-panic --kexec-syscall-auto --command-line="root=/dev/vda rw console=ttyS0,115200 init=/kdump-init" vmlinuz echo 1 > /sys/block/vda/queue/wbt_lat_usec while true; do cat /init > /dev/null done & fio --name=writer --rw=randwrite --ioengine=sync --buffered=1 --bs=4K --time_based --runtime=3600 --size=16M poweroff -f drgn-0.0.31/docs/user_guide.rst000066400000000000000000000401511477777462700163770ustar00rootroot00000000000000User Guide ========== Quick Start ----------- .. include:: ../README.rst :start-after: start-quick-start :end-before: end-quick-start Core Concepts ------------- .. highlight:: pycon The most important interfaces in drgn are *programs*, *objects*, and *helpers*. Programs ^^^^^^^^ A program being debugged is represented by an instance of the :class:`drgn.Program` class. The drgn CLI is initialized with a ``Program`` named ``prog``; unless you are using the drgn library directly, this is usually the only ``Program`` you will need. A ``Program`` is used to look up type definitions, access variables, and read arbitrary memory:: >>> prog.type("struct list_head") struct list_head { struct list_head *next; struct list_head *prev; } >>> prog["jiffies"] (volatile unsigned long)4416739513 >>> prog.read(0xffffffffbe411e10, 16) b'swapper/0\x00\x00\x00\x00\x00\x00\x00' The :meth:`drgn.Program.type()`, :meth:`drgn.Program.variable()`, :meth:`drgn.Program.constant()`, and :meth:`drgn.Program.function()` methods look up those various things in a program. :meth:`drgn.Program.read()` reads memory from the program's address space. The :meth:`[] ` operator looks up a variable, constant, or function:: >>> prog["jiffies"] == prog.variable("jiffies") True It is usually more convenient to use the ``[]`` operator rather than the ``variable()``, ``constant()``, or ``function()`` methods unless the program has multiple objects with the same name, in which case the methods provide more control. Objects ^^^^^^^ Variables, constants, functions, and computed values are all called *objects* in drgn. Objects are represented by the :class:`drgn.Object` class. An object may exist in the memory of the program (a *reference*):: >>> Object(prog, 'int', address=0xffffffffc09031a0) Or, an object may be a constant or temporary computed value (a *value*):: >>> Object(prog, 'int', value=4) What makes drgn scripts expressive is that objects can be used almost exactly like they would be in the program's own source code. For example, structure members can be accessed with the dot (``.``) operator, arrays can be subscripted with ``[]``, arithmetic can be performed, and objects can be compared:: >>> print(prog["init_task"].comm[0]) (char)115 >>> print(repr(prog["init_task"].nsproxy.mnt_ns.mounts + 1)) Object(prog, 'unsigned int', value=34) >>> prog["init_task"].nsproxy.mnt_ns.pending_mounts > 0 False Python doesn't have all of the operators that C or C++ do, so some substitutions are necessary: * Instead of ``*ptr``, dereference a pointer with :meth:`ptr[0] `. * Instead of ``ptr->member``, access a member through a pointer with :meth:`ptr.member `. * Instead of ``&var``, get the address of a variable with :meth:`var.address_of_() `. A common use case is converting a ``drgn.Object`` to a Python value so it can be used by a standard Python library. There are a few ways to do this: * The :meth:`drgn.Object.value_()` method gets the value of the object with the directly corresponding Python type (i.e., integers and pointers become ``int``, floating-point types become ``float``, booleans become ``bool``, arrays become ``list``, structures and unions become ``dict``). * The :meth:`drgn.Object.string_()` method gets a null-terminated string as ``bytes`` from an array or pointer. * The :class:`int() `, :class:`float() `, and :class:`bool() ` functions do an explicit conversion to that Python type. Objects have several attributes; the most important are :attr:`drgn.Object.prog_` and :attr:`drgn.Object.type_`. The former is the :class:`drgn.Program` that the object is from, and the latter is the :class:`drgn.Type` of the object. Note that all attributes and methods of the ``Object`` class end with an underscore (``_``) in order to avoid conflicting with structure or union members. The ``Object`` attributes and methods always take precedence; use :meth:`drgn.Object.member_()` if there is a conflict. References vs. Values """"""""""""""""""""" The main difference between reference objects and value objects is how they are evaluated. References are read from the program's memory every time they are evaluated:: >>> import time >>> jiffies = prog["jiffies"] >>> jiffies.value_() 4391639989 >>> time.sleep(1) >>> jiffies.value_() 4391640290 Values simply return the stored value (:meth:`drgn.Object.read_()` reads a reference object and returns it as a value object):: >>> jiffies2 = jiffies.read_() >>> jiffies2.value_() 4391640291 >>> time.sleep(1) >>> jiffies2.value_() 4391640291 >>> jiffies.value_() 4391640593 References have a :attr:`drgn.Object.address_` attribute, which is the object's address as a Python ``int``:: >>> address = prog["jiffies"].address_ >>> type(address) >>> print(hex(address)) 0xffffffffbe405000 This is slightly different from the :meth:`drgn.Object.address_of_()` method, which returns the address as a ``drgn.Object``:: >>> jiffiesp = prog["jiffies"].address_of_() >>> print(repr(jiffiesp)) Object(prog, 'volatile unsigned long *', value=0xffffffffbe405000) >>> print(hex(jiffiesp.value_())) 0xffffffffbe405000 Of course, both references and values can have a pointer type; ``address_`` refers to the address of the pointer object itself, and :meth:`drgn.Object.value_()` refers to the value of the pointer (i.e., the address it points to). .. _absent-objects: Absent Objects """""""""""""" In addition to reference objects and value objects, objects may also be *absent*. >>> Object(prog, "int").value_() Traceback (most recent call last): File "", line 1, in _drgn.ObjectAbsentError: object absent This represents an object whose value or address is not known. For example, this can happen if the object was optimized out of the program by the compiler. Any attempt to operate on an absent object results in a :exc:`drgn.ObjectAbsentError` exception, although basic information including its type may still be accessed. Helpers ^^^^^^^ Some programs have common data structures that you may want to examine. For example, consider linked lists in the Linux kernel: .. code-block:: c struct list_head { struct list_head *next, *prev; }; #define list_for_each(pos, head) \ for (pos = (head)->next; pos != (head); pos = pos->next) When working with these lists, you'd probably want to define a function: .. code-block:: python3 def list_for_each(head): pos = head.next while pos != head: yield pos pos = pos.next Then, you could use it like so for any list you need to look at:: >>> for pos in list_for_each(head): ... do_something_with(pos) Of course, it would be a waste of time and effort for everyone to have to define these helpers for themselves, so drgn includes a collection of helpers for many use cases. See :doc:`helpers`. .. _validators: Validators """""""""" Validators are a special category of helpers that check the consistency of a data structure. In general, helpers assume that the data structures that they examine are valid. Validators do not make this assumption and do additional (potentially expensive) checks to detect broken invariants, corruption, etc. Validators raise :class:`drgn.helpers.ValidationError` if the data structure is not valid or :class:`drgn.FaultError` if the data structure is invalid in a way that causes a bad memory access. They have names prefixed with ``validate_``. For example, :func:`drgn.helpers.linux.list.validate_list()` checks the consistency of a linked list in the Linux kernel (in particular, the consistency of the ``next`` and ``prev`` pointers):: >>> validate_list(prog["my_list"].address_of_()) drgn.helpers.ValidationError: (struct list_head *)0xffffffffc029e460 next 0xffffffffc029e000 has prev 0xffffffffc029e450 :func:`drgn.helpers.linux.list.validate_list_for_each_entry()` does the same checks while also returning the entries in the list for further validation: .. code-block:: python3 def validate_my_list(prog): for entry in validate_list_for_each_entry( "struct my_entry", prog["my_list"].address_of_(), "list", ): if entry.value < 0: raise ValidationError("list contains negative entry") Other Concepts -------------- In addition to the core concepts above, drgn provides a few additional abstractions. Threads ^^^^^^^ The :class:`drgn.Thread` class represents a thread. :meth:`drgn.Program.threads()`, :meth:`drgn.Program.thread()`, :meth:`drgn.Program.main_thread()`, and :meth:`drgn.Program.crashed_thread()` can be used to find threads:: >>> for thread in prog.threads(): ... print(thread.tid) ... 39143 39144 >>> print(prog.main_thread().tid) 39143 >>> print(prog.crashed_thread().tid) 39144 Stack Traces ^^^^^^^^^^^^ drgn represents stack traces with the :class:`drgn.StackTrace` and :class:`drgn.StackFrame` classes. :func:`drgn.stack_trace()`, :meth:`drgn.Program.stack_trace()`, and :meth:`drgn.Thread.stack_trace()` return the call stack for a thread:: >>> trace = stack_trace(115) >>> trace #0 context_switch (./kernel/sched/core.c:4683:2) #1 __schedule (./kernel/sched/core.c:5940:8) #2 schedule (./kernel/sched/core.c:6019:3) #3 schedule_hrtimeout_range_clock (./kernel/time/hrtimer.c:2148:3) #4 poll_schedule_timeout (./fs/select.c:243:8) #5 do_poll (./fs/select.c:961:8) #6 do_sys_poll (./fs/select.c:1011:12) #7 __do_sys_poll (./fs/select.c:1076:8) #8 __se_sys_poll (./fs/select.c:1064:1) #9 __x64_sys_poll (./fs/select.c:1064:1) #10 do_syscall_x64 (./arch/x86/entry/common.c:50:14) #11 do_syscall_64 (./arch/x86/entry/common.c:80:7) #12 entry_SYSCALL_64+0x7c/0x15b (./arch/x86/entry/entry_64.S:113) #13 0x7f3344072af7 The :meth:`[] ` operator on a ``StackTrace`` gets the ``StackFrame`` at the given index:: >>> trace[5] #5 at 0xffffffff8a5a32d0 (do_sys_poll+0x400/0x578) in do_poll at ./fs/select.c:961:8 (inlined) The :meth:`[] ` operator on a ``StackFrame`` looks up an object in the scope of that frame. :meth:`drgn.StackFrame.locals()` returns a list of the available names:: >>> prog["do_poll"] (int (struct poll_list *list, struct poll_wqueues *wait, struct timespec64 *end_time))0xffffffff905c6e10 >>> trace[5].locals() ['list', 'wait', 'end_time', 'pt', 'expire', 'to', 'timed_out', 'count', 'slack', 'busy_flag', 'busy_start', 'walk', 'can_busy_loop'] >>> trace[5]["list"] *(struct poll_list *)0xffffacca402e3b50 = { .next = (struct poll_list *)0x0, .len = (int)1, .entries = (struct pollfd []){}, } Symbols ^^^^^^^ The symbol table of a program is a list of identifiers along with their address and size. drgn represents symbols with the :class:`drgn.Symbol` class, which is returned by :meth:`drgn.Program.symbol()`. Types ^^^^^ drgn automatically obtains type definitions from the program. Types are represented by the :class:`drgn.Type` class and created by various factory functions like :meth:`drgn.Program.int_type()`:: >>> prog.type("int") prog.int_type(name='int', size=4, is_signed=True) You won't usually need to work with types directly, but see :ref:`api-reference-types` if you do. Modules ^^^^^^^ drgn tracks executables, shared libraries, loadable kernel modules, and other binary files used by a program with the :class:`drgn.Module` class. Modules store their name, identifying information, load address, and debugging symbols. .. code-block:: pycon :caption: Linux kernel example >>> for module in prog.modules(): ... print(module) ... prog.main_module(name='kernel') prog.relocatable_module(name='rng_core', address=0xffffffffc0400000) prog.relocatable_module(name='virtio_rng', address=0xffffffffc0402000) prog.relocatable_module(name='binfmt_misc', address=0xffffffffc0401000) >>> prog.main_module().debug_file_path '/usr/lib/modules/6.13.0-rc1-vmtest34.1default/build/vmlinux' .. code-block:: pycon :caption: Userspace example >>> for module in prog.modules(): ... print(module) ... prog.main_module(name='/usr/bin/grep') prog.shared_library_module(name='/lib64/ld-linux-x86-64.so.2', dynamic_address=0x7f51772b6e68) prog.shared_library_module(name='/lib64/libc.so.6', dynamic_address=0x7f51771af960) prog.shared_library_module(name='/lib64/libpcre2-8.so.0', dynamic_address=0x7f5177258c68) prog.vdso_module(name='linux-vdso.so.1', dynamic_address=0x7f51772803e0) >>> prog.main_module().loaded_file_path '/usr/bin/grep' >>> prog.main_module().debug_file_path '/usr/lib/debug/usr/bin/grep-3.11-7.fc40.x86_64.debug' drgn normally initializes the appropriate modules and loads their debugging symbols automatically. Advanced use cases can create or modify modules and load debugging symbols manually; see the :ref:`advanced usage guide `. Platforms ^^^^^^^^^ Certain operations and objects in a program are platform-dependent; drgn allows accessing the platform that a program runs with the :class:`drgn.Platform` class. Command Line Interface ---------------------- The drgn CLI is basically a wrapper around the drgn library which automatically creates a :class:`drgn.Program`. The CLI can be run in interactive mode or script mode. Script Mode ^^^^^^^^^^^ Script mode is useful for reusable scripts. Simply pass the path to the script along with any arguments: .. code-block:: console $ cat script.py import sys from drgn.helpers.linux import find_task pid = int(sys.argv[1]) uid = find_task(pid).cred.uid.val.value_() print(f"PID {pid} is being run by UID {uid}") $ sudo drgn script.py 601 PID 601 is being run by UID 1000 It's even possible to run drgn scripts directly with the proper `shebang `_:: $ cat script2.py #!/usr/bin/env drgn mounts = prog["init_task"].nsproxy.mnt_ns.mounts.value_() print(f"You have {mounts} filesystems mounted") $ sudo ./script2.py You have 36 filesystems mounted .. _interactive-mode: Interactive Mode ^^^^^^^^^^^^^^^^ Interactive mode uses the Python interpreter's `interactive mode `_ and adds a few nice features, including: * History * Tab completion * Automatic import of relevant helpers * Pretty printing of objects and types The default behavior of the Python `REPL `_ is to print the output of :func:`repr()`. For :class:`drgn.Object` and :class:`drgn.Type`, this is a raw representation:: >>> print(repr(prog["jiffies"])) Object(prog, 'volatile unsigned long', address=0xffffffffbe405000) >>> print(repr(prog.type("atomic_t"))) prog.typedef_type(name='atomic_t', type=prog.struct_type(tag=None, size=4, members=(TypeMember(prog.type('int'), name='counter', bit_offset=0),))) The standard :func:`print()` function uses the output of :func:`str()`. For drgn objects and types, this is a representation in programming language syntax:: >>> print(prog["jiffies"]) (volatile unsigned long)4395387628 >>> print(prog.type("atomic_t")) typedef struct { int counter; } atomic_t In interactive mode, the drgn CLI automatically uses ``str()`` instead of ``repr()`` for objects and types, so you don't need to call ``print()`` explicitly:: $ drgn >>> prog["jiffies"] (volatile unsigned long)4395387628 >>> prog.type("atomic_t") typedef struct { int counter; } atomic_t Next Steps ---------- Follow along with a :doc:`tutorial ` or :doc:`case study `. Refer to the :doc:`api_reference` and look through the :doc:`helpers`. Browse through the `tools `_. Check out the `community contributions `_. drgn-0.0.31/drgn/000077500000000000000000000000001477777462700135135ustar00rootroot00000000000000drgn-0.0.31/drgn/__init__.py000066400000000000000000000170711477777462700156320ustar00rootroot00000000000000# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: LGPL-2.1-or-later """ Programmable debugger drgn is a programmable debugger. It is built on top of Python, so if you don't know at least a little bit of Python, go learn it first. drgn supports an interactive mode and a script mode. Both are simply a Python interpreter initialized with a special drgn.Program object named "prog" that represents the program which is being debugged. In interactive mode, try >>> help(prog) or >>> help(drgn.Program) to learn more about how to use it. Objects in the program (e.g., variables and values) are represented by drgn.Object. Try >>> help(drgn.Object) Types are represented by drgn.Type objects. Try >>> help(drgn.Type) Various helpers are provided for particular types of programs. Try >>> import drgn.helpers >>> help(drgn.helpers) The drgn.internal package contains the drgn internals. Everything in that package should be considered implementation details and should not be used. """ import io import pkgutil import sys import types from typing import Union from _drgn import ( NULL, AbsenceReason, Architecture, DebugInfoOptions, ExtraModule, FaultError, FindObjectFlags, IntegerLike, KmodSearchMethod, Language, MainModule, MissingDebugInfoError, Module, ModuleFileStatus, NoDefaultProgramError, Object, ObjectAbsentError, OutOfBoundsError, Path, Platform, PlatformFlags, PrimitiveType, Program, ProgramFlags, Qualifiers, Register, RelocatableModule, SharedLibraryModule, StackFrame, StackTrace, SupplementaryFileKind, Symbol, SymbolBinding, SymbolIndex, SymbolKind, Thread, Type, TypeEnumerator, TypeKind, TypeKindSet, TypeMember, TypeParameter, TypeTemplateParameter, VdsoModule, WantedSupplementaryFile, alignof, cast, container_of, filename_matches, get_default_prog, host_platform, implicit_convert, offsetof, program_from_core_dump, program_from_kernel, program_from_pid, reinterpret, set_default_prog, sizeof, ) # flake8 doesn't honor import X as X. See PyCQA/pyflakes#474. # isort: split from _drgn import ( # noqa: F401 _elfutils_version as _elfutils_version, _enable_dlopen_debuginfod as _enable_dlopen_debuginfod, _have_debuginfod as _have_debuginfod, _with_libkdumpfile as _with_libkdumpfile, ) from drgn.internal.version import __version__ as __version__ # noqa: F401 __all__ = ( "AbsenceReason", "Architecture", "DebugInfoOptions", "ExtraModule", "FaultError", "FindObjectFlags", "IntegerLike", "KmodSearchMethod", "Language", "MainModule", "MissingDebugInfoError", "Module", "ModuleFileStatus", "NULL", "NoDefaultProgramError", "Object", "ObjectAbsentError", "OutOfBoundsError", "Path", "Platform", "PlatformFlags", "PrimitiveType", "Program", "ProgramFlags", "Qualifiers", "Register", "RelocatableModule", "SharedLibraryModule", "StackFrame", "StackTrace", "SupplementaryFileKind", "Symbol", "SymbolBinding", "SymbolIndex", "SymbolKind", "Thread", "Type", "TypeEnumerator", "TypeKind", "TypeKindSet", "TypeMember", "TypeParameter", "TypeTemplateParameter", "VdsoModule", "WantedSupplementaryFile", "alignof", "cast", "container_of", "execscript", "filename_matches", "get_default_prog", "host_platform", "implicit_convert", "offsetof", "program_from_core_dump", "program_from_kernel", "program_from_pid", "reinterpret", "set_default_prog", "sizeof", "stack_trace", ) if sys.version_info >= (3, 8): _open_code = io.open_code # novermin else: from typing import BinaryIO def _open_code(path: str) -> BinaryIO: return open(path, "rb") # From https://docs.python.org/3/reference/import.html#import-related-module-attributes. _special_globals = frozenset( [ "__name__", "__loader__", "__package__", "__spec__", "__path__", "__file__", "__cached__", ] ) def execscript(path: str, *args: str) -> None: """ Execute a script. The script is executed in the same context as the caller: currently defined globals are available to the script, and globals defined by the script are added back to the calling context. This is most useful for executing scripts from interactive mode. For example, you could have a script named ``exe.py``: .. code-block:: python3 \"\"\"Get all tasks executing a given file.\"\"\" import sys from drgn.helpers.linux.fs import d_path from drgn.helpers.linux.pid import find_task def task_exe_path(task): if task.mm: return d_path(task.mm.exe_file.f_path).decode() else: return None tasks = [ task for task in for_each_task() if task_exe_path(task) == sys.argv[1] ] Then, you could execute it and use the defined variables and functions: >>> execscript('exe.py', '/usr/bin/bash') >>> tasks[0].pid (pid_t)358442 >>> task_exe_path(find_task(357954)) '/usr/bin/vim' :param path: File path of the script. :param args: Zero or more additional arguments to pass to the script. This is a :ref:`variable argument list `. """ # This is based on runpy.run_path(), which we can't use because we want to # update globals even if the script throws an exception. saved_module = [] try: saved_module.append(sys.modules["__main__"]) except KeyError: pass saved_argv = sys.argv try: module = types.ModuleType("__main__") sys.modules["__main__"] = module sys.argv = [path] sys.argv.extend(args) with _open_code(path) as f: code = pkgutil.read_code(f) if code is None: with _open_code(path) as f: code = compile(f.read(), path, "exec") module.__spec__ = None module.__file__ = path module.__cached__ = None # type: ignore[attr-defined] caller_globals = sys._getframe(1).f_globals caller_special_globals = { name: caller_globals[name] for name in _special_globals if name in caller_globals } for name, value in caller_globals.items(): if name not in _special_globals: setattr(module, name, value) try: exec(code, vars(module)) finally: caller_globals.clear() caller_globals.update(caller_special_globals) for name, value in vars(module).items(): if name not in _special_globals: caller_globals[name] = value finally: sys.argv = saved_argv if saved_module: sys.modules["__main__"] = saved_module[0] else: del sys.modules["__main__"] def stack_trace(thread: Union[Object, IntegerLike]) -> StackTrace: """ Get the stack trace for the given thread using the :ref:`default program argument `. See :meth:`Program.stack_trace()` for more details. :param thread: Thread ID, ``struct pt_regs`` object, or ``struct task_struct *`` object. """ if isinstance(thread, Object): return thread.prog_.stack_trace(thread) else: return get_default_prog().stack_trace(thread) drgn-0.0.31/drgn/__main__.py000066400000000000000000000004421477777462700156050ustar00rootroot00000000000000# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: LGPL-2.1-or-later """ drgn entry point This module runs the drgn CLI. There is nothing interesting here. $ python3 -m drgn --help """ if __name__ == "__main__": from drgn.cli import _main _main() drgn-0.0.31/drgn/cli.py000066400000000000000000000614251477777462700146440ustar00rootroot00000000000000# Copyright (c) Meta Platforms, Inc. and affiliates. # Copyright (c) 2023, Oracle and/or its affiliates. # SPDX-License-Identifier: LGPL-2.1-or-later """Functions for embedding the drgn CLI.""" import argparse import builtins import importlib import logging import os import os.path import pkgutil import runpy import shutil import sys from typing import IO, Any, Callable, Dict, Optional, Tuple import drgn from drgn.internal.repl import interact, readline from drgn.internal.rlcompleter import Completer from drgn.internal.sudohelper import open_via_sudo __all__ = ("default_globals", "run_interactive", "version_header") logger = logging.getLogger("drgn") # The list of attributes from the drgn module which are imported and inserted # into the global namespace for interactive debugging. _DRGN_GLOBALS = [ "FaultError", "NULL", "Object", "alignof", "cast", "container_of", "execscript", "implicit_convert", "offsetof", "reinterpret", "sizeof", "stack_trace", ] def _is_tty(file: IO[Any]) -> bool: try: return os.isatty(file.fileno()) except (AttributeError, OSError): return False class _LogFormatter(logging.Formatter): _LEVELS = ( (logging.DEBUG, "debug", "\033[36m", "\033[m", ""), (logging.INFO, "info", "\033[32m", "\033[m", ""), (logging.WARNING, "warning", "\033[33m", "\033[m", ""), (logging.ERROR, "error", "\033[31m", "\033[m", ""), (logging.CRITICAL, "critical", "\033[31;1m", "\033[0;1m", "\033[m"), ) def __init__(self, color: bool) -> None: if color: levels = { level: (f"{level_prefix}{level_name}:{message_prefix}", message_suffix) for level, level_name, level_prefix, message_prefix, message_suffix in self._LEVELS } else: levels = { level: (f"{level_name}:", "") for level, level_name, _, _, _ in self._LEVELS } default_prefix = "%(levelname)s:" self._drgn_formatters = { level: logging.Formatter(f"{prefix} %(message)s{suffix}") for level, (prefix, suffix) in levels.items() } self._default_drgn_formatter = logging.Formatter( f"{default_prefix} %(message)s" ) self._other_formatters = { level: logging.Formatter(f"{prefix}%(name)s: %(message)s{suffix}") for level, (prefix, suffix) in levels.items() } self._default_other_formatter = logging.Formatter( f"{default_prefix}%(name)s: %(message)s" ) def format(self, record: logging.LogRecord) -> str: if record.name == "drgn": formatter = self._drgn_formatters.get( record.levelno, self._default_drgn_formatter ) else: formatter = self._other_formatters.get( record.levelno, self._default_other_formatter ) return formatter.format(record) def version_header() -> str: """ Return the version header printed at the beginning of a drgn session. The :func:`run_interactive()` function does not include this banner at the beginning of an interactive session. Use this function to retrieve one line of text to add to the beginning of the drgn banner, or print it before calling :func:`run_interactive()`. """ python_version = ".".join(str(v) for v in sys.version_info[:3]) debuginfod = f'with{"" if drgn._have_debuginfod else "out"} debuginfod' if drgn._enable_dlopen_debuginfod: debuginfod += " (dlopen)" libkdumpfile = f'with{"" if drgn._with_libkdumpfile else "out"} libkdumpfile' return f"drgn {drgn.__version__} (using Python {python_version}, elfutils {drgn._elfutils_version}, {debuginfod}, {libkdumpfile})" def default_globals(prog: drgn.Program) -> Dict[str, Any]: """ Return the default globals for an interactive drgn session :param prog: the program which will be debugged :return: a dict of globals """ # Don't forget to update the default banner in run_interactive() # with any new additions. init_globals: Dict[str, Any] = { "prog": prog, "drgn": drgn, "__name__": "__main__", "__doc__": None, } for attr in _DRGN_GLOBALS: init_globals[attr] = getattr(drgn, attr) module = importlib.import_module("drgn.helpers.common") for name in module.__dict__["__all__"]: init_globals[name] = getattr(module, name) if prog.flags & drgn.ProgramFlags.IS_LINUX_KERNEL: module = importlib.import_module("drgn.helpers.linux") for name in module.__dict__["__all__"]: init_globals[name] = getattr(module, name) return init_globals def _identify_script(path: str) -> str: EI_NIDENT = 16 SIZEOF_E_TYPE = 2 with open(path, "rb") as f: header = f.read(EI_NIDENT + SIZEOF_E_TYPE) ELFMAG = b"\177ELF" EI_DATA = 5 ELFDATA2LSB = 1 ELFDATA2MSB = 2 ET_CORE = 4 if len(header) < EI_NIDENT + SIZEOF_E_TYPE or header[:4] != ELFMAG: return "other" if header[EI_DATA] == ELFDATA2LSB: byteorder = "little" elif header[EI_DATA] == ELFDATA2MSB: byteorder = "big" else: return "elf" e_type = int.from_bytes( header[EI_NIDENT : EI_NIDENT + SIZEOF_E_TYPE], byteorder, # type: ignore[arg-type] # python/mypy#9057 ) return "core" if e_type == ET_CORE else "elf" def _displayhook(value: Any) -> None: if value is None: return setattr(builtins, "_", None) if isinstance(value, drgn.Object): try: text = value.format_(columns=shutil.get_terminal_size((0, 0)).columns) except drgn.FaultError as e: logger.warning("can't print value: %s", e) text = repr(value) elif isinstance(value, (drgn.StackFrame, drgn.StackTrace, drgn.Type)): text = str(value) else: text = repr(value) try: sys.stdout.write(text) except UnicodeEncodeError: encoded = text.encode(sys.stdout.encoding, "backslashreplace") if hasattr(sys.stdout, "buffer"): sys.stdout.buffer.write(encoded) else: text = encoded.decode(sys.stdout.encoding, "strict") sys.stdout.write(text) sys.stdout.write("\n") setattr(builtins, "_", value) def _bool_options(value: bool) -> Dict[str, Tuple[str, bool]]: return { option: ("try_" + option.replace("-", "_"), value) for option in ( "module-name", "build-id", "debug-link", "procfs", "embedded-vdso", "reuse", "supplementary", ) } class _TrySymbolsByBaseAction(argparse.Action): _enable: bool _finder = ("disable_debug_info_finders", "enable_debug_info_finders") _options = ( { **_bool_options(False), "kmod": ("try_kmod", drgn.KmodSearchMethod.NONE), }, { **_bool_options(True), "kmod=depmod": ("try_kmod", drgn.KmodSearchMethod.DEPMOD), "kmod=walk": ("try_kmod", drgn.KmodSearchMethod.WALK), "kmod=depmod-or-walk": ("try_kmod", drgn.KmodSearchMethod.DEPMOD_OR_WALK), "kmod=depmod-and-walk": ("try_kmod", drgn.KmodSearchMethod.DEPMOD_AND_WALK), }, ) def __init__(self, *args: Any, **kwargs: Any) -> None: kwargs["dest"] = argparse.SUPPRESS super().__init__(*args, **kwargs) def __call__( self, parser: argparse.ArgumentParser, namespace: argparse.Namespace, values: Any, option_string: Optional[str] = None, ) -> None: for value in values.split(","): try: option_name, option_value = self._options[self._enable][value] except KeyError: # Raise an error if passed an option meant for the opposite # argument. if value in self._options[not self._enable]: raise argparse.ArgumentError(self, f"invalid option: {value!r}") if not hasattr(namespace, self._finder[self._enable]): setattr(namespace, self._finder[self._enable], {}) getattr(namespace, self._finder[self._enable])[value] = None if hasattr(namespace, self._finder[not self._enable]): getattr(namespace, self._finder[not self._enable]).pop(value, None) else: if not hasattr(namespace, "debug_info_options"): namespace.debug_info_options = {} namespace.debug_info_options[option_name] = option_value class _TrySymbolsByAction(_TrySymbolsByBaseAction): _enable = True class _NoSymbolsByAction(_TrySymbolsByBaseAction): _enable = False def _load_debugging_symbols(prog: drgn.Program, args: argparse.Namespace) -> None: enable_debug_info_finders = getattr(args, "enable_debug_info_finders", ()) disable_debug_info_finders = getattr(args, "disable_debug_info_finders", ()) if enable_debug_info_finders or disable_debug_info_finders: debug_info_finders = prog.enabled_debug_info_finders() registered_debug_info_finders = prog.registered_debug_info_finders() unknown_finders = [] for finder in enable_debug_info_finders: if finder not in debug_info_finders: if finder in registered_debug_info_finders: debug_info_finders.append(finder) else: unknown_finders.append(finder) for finder in disable_debug_info_finders: try: debug_info_finders.remove(finder) except ValueError: if finder not in registered_debug_info_finders: unknown_finders.append(finder) if unknown_finders: if len(unknown_finders) == 1: unknown_finders_repr = repr(unknown_finders[0]) elif len(unknown_finders) == 2: unknown_finders_repr = ( f"{unknown_finders[0]!r} or {unknown_finders[1]!r}" ) elif len(unknown_finders) > 2: unknown_finders = [repr(finder) for finder in unknown_finders] unknown_finders[-1] = "or " + unknown_finders[-1] unknown_finders_repr = ", ".join(unknown_finders) logger.warning( "no matching debugging information finders or options for %s", unknown_finders_repr, ) prog.set_enabled_debug_info_finders(debug_info_finders) debug_info_options = getattr(args, "debug_info_options", None) if debug_info_options: for option, value in debug_info_options.items(): setattr(prog.debug_info_options, option, value) if args.debug_directories is not None: if args.no_default_debug_directories: prog.debug_info_options.directories = args.debug_directories else: prog.debug_info_options.directories = ( tuple(args.debug_directories) + prog.debug_info_options.directories ) elif args.no_default_debug_directories: prog.debug_info_options.directories = () if args.kernel_directories is not None: if args.no_default_kernel_directories: prog.debug_info_options.kernel_directories = args.kernel_directories else: prog.debug_info_options.kernel_directories = ( tuple(args.kernel_directories) + prog.debug_info_options.kernel_directories ) elif args.no_default_kernel_directories: prog.debug_info_options.kernel_directories = () if args.default_symbols is None: args.default_symbols = {"default": True, "main": True} try: prog.load_debug_info(args.symbols, **args.default_symbols) except drgn.MissingDebugInfoError as e: if args.default_symbols.get("main"): try: main_module = prog.main_module() critical = ( main_module.wants_debug_file() or main_module.wants_loaded_file() ) except LookupError: critical = True else: critical = False logger.log(logging.CRITICAL if critical else logging.WARNING, "%s", e) if args.extra_symbols: for extra_symbol_path in args.extra_symbols: extra_symbol_path = os.path.abspath(extra_symbol_path) prog.extra_module(extra_symbol_path, create=True).try_file( extra_symbol_path ) def _main() -> None: handler = logging.StreamHandler() color = _is_tty(sys.stderr) handler.setFormatter(_LogFormatter(color)) logging.getLogger().addHandler(handler) version = version_header() parser = argparse.ArgumentParser(prog="drgn", description="Programmable debugger") program_group = parser.add_argument_group( title="program selection", ).add_mutually_exclusive_group() program_group.add_argument( "-k", "--kernel", action="store_true", help="debug the running kernel (default)" ) program_group.add_argument( "-c", "--core", metavar="PATH", type=str, help="debug the given core dump" ) program_group.add_argument( "-p", "--pid", metavar="PID", type=int, help="debug the running process with the given PID", ) symbol_group = parser.add_argument_group("debugging symbols") symbol_group.add_argument( "-s", "--symbols", metavar="PATH", type=str, action="append", help="load debugging symbols from the given file. " "If the file does not correspond to a loaded executable, library, or module, " "then it is ignored. This option may be given more than once", ) default_symbols_group = symbol_group.add_mutually_exclusive_group() default_symbols_group.add_argument( "--main-symbols", dest="default_symbols", action="store_const", const={"main": True}, help="only load debugging symbols for the main executable " "and those added with -s or --extra-symbols", ) default_symbols_group.add_argument( "--no-default-symbols", dest="default_symbols", action="store_const", const={}, help="don't load any debugging symbols that were not explicitly added " "with -s or --extra-symbols", ) symbol_group.add_argument( "--extra-symbols", metavar="PATH", type=str, action="append", help="load additional debugging symbols from the given file, " "which is assumed not to correspond to a loaded executable, library, or module. " "This option may be given more than once", ) symbol_group.add_argument( "--try-symbols-by", metavar="METHOD[,METHOD...]", action=_TrySymbolsByAction, help="enable loading debugging symbols using the given methods. " "Choices are debugging information finder names " "(standard, debuginfod, or any added by plugins) " "or debugging information options (" + ", ".join(_TrySymbolsByBaseAction._options[True]) + "). " "This option may be given more than once", ) symbol_group.add_argument( "--no-symbols-by", metavar="METHOD[,METHOD...]", action=_NoSymbolsByAction, help="disable loading debugging symbols using the given methods. " "Choices are debugging information finder names " "(standard, debuginfod, or any added by plugins) " "or debugging information options (" + ", ".join(_TrySymbolsByBaseAction._options[False]) + "). " "This option may be given more than once", ) symbol_group.add_argument( "--debug-directory", dest="debug_directories", metavar="PATH", type=str, action="append", help="search for debugging symbols by build ID and debug link in the given directory. " "This option may be given more than once", ) symbol_group.add_argument( "--no-default-debug-directories", action="store_true", help="don't search for debugging symbols by build ID and debug link " "in the standard directories or those added by plugins", ) symbol_group.add_argument( "--kernel-directory", dest="kernel_directories", metavar="PATH", type=str, action="append", help="search for the kernel image and loadable kernel modules in the given directory. " "This option may be given more than once", ) symbol_group.add_argument( "--no-default-kernel-directories", action="store_true", help="don't search for the kernel image and loadable kernel modules " "in the standard directories or those added by plugins", ) advanced_group = parser.add_argument_group("advanced") advanced_group.add_argument( "--architecture", metavar="ARCH", choices=[a.name for a in drgn.Architecture] + [a.name.lower() for a in drgn.Architecture], help="set the program architecture, in case it can't be auto-detected", ) advanced_group.add_argument( "--vmcoreinfo", type=str, metavar="PATH", help="path to vmcoreinfo file (overrides any already present in the file)", ) parser.add_argument( "--log-level", choices=["debug", "info", "warning", "error", "critical", "none"], default="warning", help="log messages of at least the given level to standard error (default: warning)", ) parser.add_argument( "-q", "--quiet", dest="log_level", action="store_const", const="none", help="don't print any logs or download progress", ) parser.add_argument( "-e", dest="exec", metavar="CODE", help="an expression or statement to evaluate, instead of running in interactive mode", ) parser.add_argument( "args", metavar="ARG", type=str, nargs=argparse.REMAINDER, help="script to execute instead of running in interactive mode " "(unless -e is given) and arguments to pass", ) parser.add_argument("--version", action="version", version=version) args = parser.parse_args() script = bool(not args.exec and args.args) interactive = bool(not args.exec and not args.args and _is_tty(sys.stdin)) if script: # A common mistake users make is running drgn $core_dump, which tries # to run $core_dump as a Python script. Rather than failing later with # some inscrutable syntax or encoding error, try to catch this early # and provide a helpful message. try: script_type = _identify_script(args.args[0]) except OSError as e: sys.exit(str(e)) if script_type == "core": sys.exit( f"error: {args.args[0]} is a core dump\n" f'Did you mean "-c {args.args[0]}"?' ) elif script_type == "elf": sys.exit(f"error: {args.args[0]} is a binary, not a drgn script") elif interactive: print(version, file=sys.stderr, flush=True) if args.log_level == "none": logger.setLevel(logging.CRITICAL + 1) else: logger.setLevel(args.log_level.upper()) platform = None if args.architecture: platform = drgn.Platform(drgn.Architecture[args.architecture.upper()]) vmcoreinfo = None if args.vmcoreinfo is not None: with open(args.vmcoreinfo, "rb") as f: vmcoreinfo = f.read() prog = drgn.Program(platform=platform, vmcoreinfo=vmcoreinfo) try: if args.core is not None: prog.set_core_dump(args.core) elif args.pid is not None: try: prog.set_pid(args.pid or os.getpid()) except PermissionError as e: sys.exit( f"{e}\nerror: attaching to live process requires ptrace attach permissions" ) else: try: prog.set_kernel() except PermissionError as e: if shutil.which("sudo") is None: sys.exit( f"{e}\ndrgn debugs the live kernel by default, which requires root" ) else: prog.set_core_dump(open_via_sudo("/proc/kcore", os.O_RDONLY)) except OSError as e: sys.exit(str(e)) except ValueError as e: # E.g., "not an ELF core file" sys.exit(f"error: {e}") _load_debugging_symbols(prog, args) if interactive: run_interactive(prog) else: drgn.set_default_prog(prog) if script: sys.argv = args.args script_path = args.args[0] if pkgutil.get_importer(script_path) is None: sys.path.insert(0, os.path.dirname(os.path.abspath(script_path))) runpy.run_path( script_path, init_globals={"prog": prog}, run_name="__main__" ) else: sys.path.insert(0, "") exec_globals = default_globals(prog) if args.exec: sys.argv = ["-e"] + args.args exec(args.exec, exec_globals) else: sys.argv = [""] exec_globals["__file__"] = "" exec(compile(sys.stdin.read(), "", "exec"), exec_globals) def run_interactive( prog: drgn.Program, banner_func: Optional[Callable[[str], str]] = None, globals_func: Optional[Callable[[Dict[str, Any]], Dict[str, Any]]] = None, quiet: bool = False, ) -> None: """ Run drgn's :ref:`interactive-mode` until the user exits. This function allows your application to embed the same REPL that drgn provides when it is run on the command line in interactive mode. :param prog: Pre-configured program to run against. Available as a global named ``prog`` in the CLI. :param banner_func: Optional function to modify the printed banner. Called with the default banner, and must return a string to use as the new banner. The default banner does not include the drgn version, which can be retrieved via :func:`version_header()`. :param globals_func: Optional function to modify globals provided to the session. Called with a dictionary of default globals, and must return a dictionary to use instead. :param quiet: Ignored. Will be removed in the future. .. note:: This function uses :mod:`readline` and modifies some settings. Unfortunately, it is not possible for it to restore all settings. In particular, it clears the ``readline`` history and resets the TAB keybinding to the default. Applications using ``readline`` should save their history and clear any custom settings before calling this function. After calling this function, applications should restore their history and settings before using ``readline``. """ init_globals = default_globals(prog) banner = f"""\ For help, type help(drgn). >>> import drgn >>> from drgn import {", ".join(_DRGN_GLOBALS)} >>> from drgn.helpers.common import *""" if prog.flags & drgn.ProgramFlags.IS_LINUX_KERNEL: banner += "\n>>> from drgn.helpers.linux import *" if banner_func: banner = banner_func(banner) if globals_func: init_globals = globals_func(init_globals) old_path = list(sys.path) old_displayhook = sys.displayhook old_history_length = readline.get_history_length() old_completer = readline.get_completer() try: old_default_prog = drgn.get_default_prog() except drgn.NoDefaultProgramError: old_default_prog = None histfile = os.path.expanduser("~/.drgn_history") try: readline.clear_history() try: readline.read_history_file(histfile) except OSError as e: if not isinstance(e, FileNotFoundError): logger.warning("could not read history: %s", e) readline.set_history_length(1000) readline.parse_and_bind("tab: complete") readline.set_completer(Completer(init_globals).complete) sys.path.insert(0, "") sys.displayhook = _displayhook drgn.set_default_prog(prog) try: interact(init_globals, banner) finally: try: readline.write_history_file(histfile) except OSError as e: logger.warning("could not write history: %s", e) finally: drgn.set_default_prog(old_default_prog) sys.displayhook = old_displayhook sys.path[:] = old_path readline.set_history_length(old_history_length) readline.parse_and_bind("tab: self-insert") readline.set_completer(old_completer) readline.clear_history() drgn-0.0.31/drgn/helpers/000077500000000000000000000000001477777462700151555ustar00rootroot00000000000000drgn-0.0.31/drgn/helpers/__init__.py000066400000000000000000000010341477777462700172640ustar00rootroot00000000000000# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: LGPL-2.1-or-later """ Helpers ------- The ``drgn.helpers`` package contains subpackages which provide helpers for working with particular types of programs. Currently, there are common helpers and helpers for the Linux kernel. In the future, there may be helpers for, e.g., glibc and libstdc++. """ class ValidationError(Exception): """ Error raised by a :ref:`validator ` when an inconsistent or invalid state is detected. """ drgn-0.0.31/drgn/helpers/common/000077500000000000000000000000001477777462700164455ustar00rootroot00000000000000drgn-0.0.31/drgn/helpers/common/__init__.py000066400000000000000000000017041477777462700205600ustar00rootroot00000000000000# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: LGPL-2.1-or-later """ Common ------ The ``drgn.helpers.common`` package provides helpers that can be used with any program. The helpers are available from the individual modules in which they are defined and from this top-level package. E.g., the following are both valid: >>> from drgn.helpers.common.memory import identify_address >>> from drgn.helpers.common import identify_address Some of these helpers may have additional program-specific behavior but are otherwise generic. """ import importlib import pkgutil from typing import List __all__: List[str] = [] for _module_info in pkgutil.iter_modules(__path__, prefix=__name__ + "."): _submodule = importlib.import_module(_module_info.name) _submodule_all = getattr(_submodule, "__all__", ()) __all__.extend(_submodule_all) for _name in _submodule_all: globals()[_name] = getattr(_submodule, _name) drgn-0.0.31/drgn/helpers/common/format.py000066400000000000000000000142431477777462700203130ustar00rootroot00000000000000# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: LGPL-2.1-or-later """ Formatting ---------- The ``drgn.helpers.common.format`` module provides generic helpers for formatting different things as text. """ from typing import Iterable, SupportsFloat, Tuple from drgn import IntegerLike, Type __all__ = ( "decode_enum_type_flags", "decode_flags", "escape_ascii_character", "escape_ascii_string", "number_in_binary_units", ) def escape_ascii_character( c: int, escape_single_quote: bool = False, escape_double_quote: bool = False, escape_backslash: bool = False, ) -> str: """ Format an ASCII byte value as a character, possibly escaping it. Non-printable characters are always escaped. Non-printable characters other than ``\\0``, ``\\a``, ``\\b``, ``\\t``, ``\\n``, ``\\v``, ``\\f``, and ``\\r`` are escaped in hexadecimal format (e.g., ``\\x7f``). By default, printable characters are never escaped. :param c: Character to escape. :param escape_single_quote: Whether to escape single quotes to ``\\'``. :param escape_double_quote: Whether to escape double quotes to ``\\"``. :param escape_backslash: Whether to escape backslashes to ``\\\\``. """ if c == 0: return r"\0" elif c == 7: return r"\a" elif c == 8: return r"\b" elif c == 9: return r"\t" elif c == 10: return r"\n" elif c == 11: return r"\v" elif c == 12: return r"\f" elif c == 13: return r"\r" elif escape_double_quote and c == 34: return r"\"" elif escape_single_quote and c == 39: return r"\'" elif escape_backslash and c == 92: return r"\\" elif 32 <= c <= 126: return chr(c) else: return f"\\x{c:02x}" def escape_ascii_string( buffer: Iterable[int], escape_single_quote: bool = False, escape_double_quote: bool = False, escape_backslash: bool = False, ) -> str: """ Escape an iterable of ASCII byte values (e.g., :class:`bytes` or :class:`bytearray`). See :func:`escape_ascii_character()`. :param buffer: Byte array to escape. """ return "".join( escape_ascii_character( c, escape_single_quote=escape_single_quote, escape_double_quote=escape_double_quote, escape_backslash=escape_backslash, ) for c in buffer ) def decode_flags( value: IntegerLike, flags: Iterable[Tuple[str, int]], bit_numbers: bool = True, ) -> str: """ Get a human-readable representation of a bitmask of flags. By default, flags are specified by their bit number: >>> decode_flags(2, [("BOLD", 0), ("ITALIC", 1), ("UNDERLINE", 2)]) 'ITALIC' They can also be specified by their value: >>> decode_flags(2, [("BOLD", 1), ("ITALIC", 2), ("UNDERLINE", 4)], ... bit_numbers=False) 'ITALIC' Multiple flags are combined with "|": >>> decode_flags(5, [("BOLD", 0), ("ITALIC", 1), ("UNDERLINE", 2)]) 'BOLD|UNDERLINE' If there are multiple names for the same bit, they are all included: >>> decode_flags(2, [("SMALL", 0), ("BIG", 1), ("LARGE", 1)]) 'BIG|LARGE' If there are any unknown bits, their raw value is included: >>> decode_flags(27, [("BOLD", 0), ("ITALIC", 1), ("UNDERLINE", 2)]) 'BOLD|ITALIC|0x18' Zero is returned verbatim: >>> decode_flags(0, [("BOLD", 0), ("ITALIC", 1), ("UNDERLINE", 2)]) '0' :param value: Bitmask to decode. :param flags: List of flag names and their bit numbers or values. :param bit_numbers: Whether *flags* specifies the bit numbers (where 0 is the least significant bit) or values of the flags. """ value = value.__index__() if value == 0: return "0" parts = [] mask = 0 for name, flag in flags: if bit_numbers: flag = 1 << flag if value & flag: parts.append(name) mask |= flag if value & ~mask: parts.append(hex(value & ~mask)) return "|".join(parts) def decode_enum_type_flags( value: IntegerLike, type: Type, bit_numbers: bool = True, ) -> str: """ Get a human-readable representation of a bitmask of flags where the flags are specified by an enumerated :class:`drgn.Type`. This supports enums where the values are bit numbers: >>> print(bits_enum) enum style_bits { BOLD = 0, ITALIC = 1, UNDERLINE = 2, } >>> decode_enum_type_flags(5, bits_enum) 'BOLD|UNDERLINE' Or the values of the flags: >>> print(flags_enum) enum style_flags { BOLD = 1, ITALIC = 2, UNDERLINE = 4, } >>> decode_enum_type_flags(5, flags_enum, bit_numbers=False) 'BOLD|UNDERLINE' See :func:`decode_flags()`. :param value: Bitmask to decode. :param type: Enumerated type with bit numbers for enumerators. :param bit_numbers: Whether the enumerator values specify the bit numbers or values of the flags. """ enumerators = type.enumerators if enumerators is None: raise TypeError("cannot decode incomplete enumerated type") return decode_flags( value, enumerators, # type: ignore # python/mypy#592 bit_numbers, ) def number_in_binary_units(n: SupportsFloat, precision: int = 1) -> str: """ Format a number in binary units (i.e., "K" is 1024, "M" is 1024\\ :sup:`2`, etc.). >>> number_in_binary_units(1280) '1.2K' A precision can be specified: >>> number_in_binary_units(1280, precision=2) '1.25K' Exact numbers are printed without a fractional part: >>> number_in_binary_units(1024 * 1024) '1M' Numbers less than 1024 are not scaled: >>> number_in_binary_units(10) '10' :param n: Number to format. :param precision: Number of digits to include in fractional part. """ n = float(n) for prefix in ("", "K", "M", "G", "T", "P", "E", "Z"): if abs(n) < 1024: break n /= 1024.0 else: prefix = "Y" if n.is_integer(): precision = 0 return f"{n:.{precision}f}{prefix}" drgn-0.0.31/drgn/helpers/common/memory.py000066400000000000000000000205511477777462700203320ustar00rootroot00000000000000# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: LGPL-2.1-or-later """ Memory ------ The ``drgn.helpers.common.memory`` module provides helpers for working with memory and addresses. """ import operator import os import typing from typing import Any, Dict, Optional import drgn from drgn import FaultError, IntegerLike, Object, PlatformFlags, Program, SymbolKind from drgn.helpers.common.format import escape_ascii_string from drgn.helpers.common.prog import takes_program_or_default from drgn.helpers.linux.mm import find_vmap_area, in_direct_map from drgn.helpers.linux.pid import for_each_task from drgn.helpers.linux.slab import _find_containing_slab, _get_slab_cache_helper __all__ = ( "identify_address", "print_annotated_memory", ) _SYMBOL_KIND_STR = { SymbolKind.OBJECT: "object symbol", SymbolKind.FUNC: "function symbol", } def _identify_kernel_vmap( prog: Program, addr: int, cache: Optional[Dict[Any, Any]] = None ) -> Optional[str]: va = find_vmap_area(prog, addr) if not va: return None vm = va.vm.read_() if not vm: return None task: Optional[Object] # The cached and uncached cases are separate so that we can avoid creating # a large cache and stop early in the uncached case. if cache is None: for task in for_each_task(prog): try: if task.stack_vm_area == vm: break except AttributeError: # CONFIG_VMAP_STACK must be disabled. task = None break except FaultError: continue else: task = None else: try: stack_vm_area_to_task = cache["stack_vm_area_to_task"] except KeyError: stack_vm_area_to_task = {} for task in for_each_task(prog): try: stack_vm_area_to_task[task.stack_vm_area.value_()] = task except AttributeError: # CONFIG_VMAP_STACK must be disabled. break except FaultError: continue cache["stack_vm_area_to_task"] = stack_vm_area_to_task task = stack_vm_area_to_task.get(vm.value_()) if task is not None: return ( f"vmap stack: {task.pid.value_()}" f" ({os.fsdecode(task.comm.string_())})" f" +{hex(addr - task.stack.value_())}" ) caller = "" caller_value = vm.caller.value_() try: caller_sym = prog.symbol(caller_value) except LookupError: pass else: caller = f" caller {caller_sym.name}+{hex(caller_value - caller_sym.address)}" return f"vmap: {hex(va.va_start)}-{hex(va.va_end)}{caller}" def _identify_kernel_address( prog: Program, addr: int, cache: Optional[Dict[Any, Any]] = None ) -> Optional[str]: try: direct_map = in_direct_map(prog, addr) except NotImplementedError: # Virtual address translation isn't implemented for this # architecture. direct_map = False if direct_map: result = _find_containing_slab(prog, addr) if result is not None: slab_cache, page, slab = result slab_info = _get_slab_cache_helper(slab_cache).object_info(page, slab, addr) if slab_info: cache_name = escape_ascii_string( slab_info.slab_cache.name.string_(), escape_backslash=True ) if slab_info.allocated: maybe_free = "" elif slab_info.allocated is None: maybe_free = "corrupted " else: maybe_free = "free " return f"{maybe_free}slab object: {cache_name}+{hex(addr - slab_info.address)}" else: return _identify_kernel_vmap(prog, addr, cache) return None @takes_program_or_default def identify_address( prog: Program, addr: IntegerLike, *, cache: Optional[Dict[Any, Any]] = None ) -> Optional[str]: """ Try to identify what an address refers to. For all programs, this will identify addresses as follows: * Object symbols (e.g., addresses in global variables): ``object symbol: {symbol_name}+{hex_offset}`` (where ``hex_offset`` is the offset from the beginning of the symbol in hexadecimal). * Function symbols (i.e., addresses in functions): ``function symbol: {symbol_name}+{hex_offset}``. * Other symbols: ``symbol: {symbol_name}+{hex_offset}``. Additionally, for the Linux kernel, this will identify: * Allocated slab objects: ``slab object: {slab_cache_name}+{hex_offset}`` (where ``hex_offset`` is the offset from the beginning of the object in hexadecimal). * Free slab objects: ``free slab object: {slab_cache_name}+{hex_offset}``. * Vmap addresses (e.g., vmalloc, ioremap): ``vmap: {hex_start_address}-{hex_end_address}``. If the function that allocated the vmap is known, this also includes ``caller {function_name}+{hex_offset}``. * Vmap kernel stacks: ``vmap stack: {pid} ({comm}) +{hex_offset}`` (where ``pid`` and ``comm`` identify the task and ``hex_offset`` is the offset from the beginning of the stack in hexadecimal). This may recognize other types of addresses in the future. :param addr: ``void *`` :param cache: Opaque cache used to amortize expensive lookups. If you're going to call this function many times in a short period, create an empty dictionary and pass the same dictionary as *cache* to each call. Don't reuse it indefinitely or you may get stale results. :return: Identity as string, or ``None`` if the address is unrecognized. """ addr = operator.index(addr) # Check if address is of a symbol: try: symbol = prog.symbol(addr) except LookupError: # not a symbol pass else: symbol_kind = _SYMBOL_KIND_STR.get(symbol.kind, "symbol") return f"{symbol_kind}: {symbol.name}+{hex(addr - symbol.address)}" if prog.flags & drgn.ProgramFlags.IS_LINUX_KERNEL: # Linux kernel-specific identification: return _identify_kernel_address(prog, addr, cache) return None @takes_program_or_default def print_annotated_memory( prog: Program, address: IntegerLike, size: IntegerLike, physical: bool = False ) -> None: """ Print the contents of a range of memory, annotating values that can be identified. Currently, this will identify any addresses in the memory range with :func:`~drgn.helpers.common.memory.identify_address()`. See :func:`~drgn.helpers.common.stack.print_annotated_stack()` for a similar function that annotates stack traces. >>> print_annotated_memory(0xffffffff963eb200, 56) ADDRESS VALUE ffffffff963eb200: 00000000000000b8 ffffffff963eb208: 000000000000a828 ffffffff963eb210: 0000000000000000 ffffffff963eb218: ffff8881042948e0 [slab object: mnt_cache+0x20] ffffffff963eb220: ffff88810074a540 [slab object: dentry+0x0] ffffffff963eb228: ffff8881042948e0 [slab object: mnt_cache+0x20] ffffffff963eb230: ffff88810074a540 [slab object: dentry+0x0] :param address: Starting address. :param size: Number of bytes to read. :param physical: Whether *address* is a physical memory address. If ``False``, then it is a virtual memory address. """ address = operator.index(address) mem = prog.read(address, size, physical) # The platform must be known if we were able to read memory. assert prog.platform is not None byteorder: 'typing.Literal["little", "big"]' if prog.platform.flags & PlatformFlags.IS_LITTLE_ENDIAN: byteorder = "little" else: byteorder = "big" if prog.platform.flags & PlatformFlags.IS_64_BIT: word_size = 8 line_format = "{:016x}: {:016x}{}" print("ADDRESS VALUE") else: word_size = 4 line_format = "{:08x}: {:08x}{}" print("ADDRESS VALUE") cache: Dict[Any, Any] = {} for offset in range(0, len(mem), word_size): value = int.from_bytes(mem[offset : offset + word_size], byteorder) identified = identify_address(prog, value, cache=cache) if identified is None: identified = "" else: identified = f" [{identified}]" print(line_format.format(address + offset, value, identified)) drgn-0.0.31/drgn/helpers/common/prog.py000066400000000000000000000206261477777462700177740ustar00rootroot00000000000000# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: LGPL-2.1-or-later """ Program Decorators ------------------ The ``drgn.helpers.common.prog`` module provides decorators to transparently use the :ref:`default program argument `. """ import functools import inspect import typing from drgn import Object, Program, get_default_prog __all__ = ( "takes_object_or_program_or_default", "takes_program_or_default", ) # We don't need any of this at runtime. if typing.TYPE_CHECKING: import sys from typing import Any, Optional, Protocol, TypeVar, overload # novermin if sys.version_info < (3, 10): from typing_extensions import Callable, Concatenate, ParamSpec else: from typing import Callable, Concatenate, ParamSpec # novermin P = ParamSpec("P") R = TypeVar("R") R_co = TypeVar("R_co", covariant=True) class TakesProgram(Protocol[P, R_co]): def __call__( self, prog: Program, *args: P.args, **kwargs: P.kwargs ) -> R_co: ... class TakesProgramOrDefault(Protocol[P, R_co]): @overload def __call__( self, prog: Program, *args: P.args, **kwargs: P.kwargs ) -> R_co: ... @overload def __call__(self, *args: P.args, **kwargs: P.kwargs) -> R_co: ... class TakesObjectOrProgramOrDefault(Protocol[P, R_co]): @overload def __call__( self, prog: Program, *args: P.args, **kwargs: P.kwargs ) -> R_co: ... @overload def __call__( self, __obj: Object, *args: P.args, **kwargs: P.kwargs ) -> R_co: ... @overload def __call__(self, *args: P.args, **kwargs: P.kwargs) -> R_co: ... def takes_program_or_default(f: "TakesProgram[P, R]") -> "TakesProgramOrDefault[P, R]": """ Wrap a function taking a :class:`~drgn.Program` so that it uses the :ref:`default program argument ` if omitted. .. code-block:: python3 @takes_program_or_default def my_helper(prog: Program, n: IntegerLike) -> Foo: ... my_helper(1) # is equivalent to my_helper(get_default_prog(), 1) obj = Object(...) my_helper(obj) # is equivalent to my_helper(obj.prog_, obj) """ parameters_iter = iter(inspect.signature(f).parameters.values()) if next(parameters_iter).name != "prog": raise TypeError("first parameter must be prog: Program") param1 = None for parameter in parameters_iter: if parameter.kind not in ( inspect.Parameter.KEYWORD_ONLY, inspect.Parameter.VAR_KEYWORD, ): param1 = parameter.name break if param1 is None: @functools.wraps(f) def wrapper(*args: "Any", **kwds: "Any") -> "R": if (args and isinstance(args[0], Program)) or ("prog" in kwds): return f(*args, **kwds) else: return f(get_default_prog(), *args, **kwds) else: @functools.wraps(f) def wrapper(*args: "Any", **kwds: "Any") -> "R": if args: if isinstance(args[0], Program): return f(*args, **kwds) elif isinstance(args[0], Object): return f(args[0].prog_, *args, **kwds) elif "prog" in kwds: return f(**kwds) elif param1 in kwds: arg1 = kwds[param1] if isinstance(arg1, Object): return f(arg1.prog_, **kwds) return f(get_default_prog(), *args, **kwds) # Update the docstring for pydoc. if wrapper.__doc__ is not None: wrapper.__doc__ += ":param prog: Program, which may be omitted to use the default program argument.\n" return wrapper def takes_object_or_program_or_default( # This should be a Protocol instead of Callable, but there's currently no # way for a Protocol to express that the second parameter can have any name # and then use that name in the return type. See python/typing#1505. f: "Callable[Concatenate[Program, Optional[Object], P], R]", ) -> "TakesObjectOrProgramOrDefault[P, R]": """ Wrap a function taking a :class:`~drgn.Program` and an optional :class:`~drgn.Object` so that it accepts a ``Program`` *or* an ``Object`` *or neither*, in which case the :ref:`default program argument ` is used. .. code-block:: python3 @takes_object_or_program_or_default def my_helper(prog: Program, obj: Optional[Object], n: IntegerLike) -> Foo: ... my_helper(prog, 1) # is equivalent to my_helper.__wrapped__(prog, None, 1) obj = Object(...) my_helper(obj, 1) # is equivalent to my_helper.__wrapped__(obj.prog_, obj, 1) my_helper(1) # is equivalent to my_helper.__wrapped__(get_default_prog(), None, 1) one_obj = Object(..., 1) my_helper(one_obj) # is equivalent to my_helper.__wrapped__(one_obj.prog_, None, one_obj) .. warning:: This cannot be used with positional parameters with a default value, as that would create ambiguity. Keyword-only parameters with a default value are OK. .. code-block:: python3 # NOT ALLOWED @takes_object_or_program_or_default def my_helper(prog: Program, obj: Optional[Object], foo: str = ""): ... # OK @takes_object_or_program_or_default def my_helper(prog: Program, obj: Optional[Object], *, foo: str = ""): ... .. note:: The object parameter can be passed as a keyword, but because of `limitations of the Python type system `_, type checkers do not recognize this. """ signature = inspect.signature(f) parameters = list(signature.parameters.values()) if parameters[0].name != "prog": raise TypeError("first parameter must be prog: Program") object_param = parameters[1].name extra_params = [] for parameter in parameters[2:]: if parameter.kind in ( inspect.Parameter.KEYWORD_ONLY, inspect.Parameter.VAR_KEYWORD, ): break if parameter.default != inspect.Parameter.empty: raise ValueError( f"{getattr(f, '__name__', str(f))} using @takes_object_or_program_or_default can't have positional parameters with defaults due to ambiguity" ) extra_params.append(parameter.name) if extra_params: param2 = extra_params[0] @functools.wraps(f) def wrapper(*args: "Any", **kwds: "Any") -> "R": if args: if len(args) > len(extra_params) or extra_params[len(args) - 1] in kwds: if isinstance(args[0], Program): return f(args[0], None, *args[1:], **kwds) else: return f(args[0].prog_, *args, **kwds) elif isinstance(args[0], Object): return f(args[0].prog_, None, *args, **kwds) elif object_param in kwds: return f(kwds[object_param].prog_, *args, **kwds) elif "prog" in kwds: kwds[object_param] = None return f(*args, **kwds) elif param2 in kwds: arg2 = kwds[param2] if isinstance(arg2, Object): return f(arg2.prog_, None, *args, **kwds) return f(get_default_prog(), None, *args, **kwds) else: @functools.wraps(f) def wrapper(*args: "Any", **kwds: "Any") -> "R": if args: if isinstance(args[0], Program): return f(args[0], None, *args[1:], **kwds) else: return f(args[0].prog_, *args, **kwds) elif object_param in kwds: return f(kwds[object_param].prog_, *args, **kwds) elif "prog" in kwds: kwds[object_param] = None return f(*args, **kwds) return f(get_default_prog(), None, *args, **kwds) # Update the signature for pydoc. wrapper.__signature__ = signature.replace( # type: ignore[attr-defined] parameters=[ parameters[1].replace(annotation=typing.Union[Object, Program]), *parameters[2:], ] ) return wrapper drgn-0.0.31/drgn/helpers/common/stack.py000066400000000000000000000117201477777462700201250ustar00rootroot00000000000000# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: LGPL-2.1-or-later """ Stack ----- The ``drgn.helpers.common.stack`` module provides helpers for working with stack traces. """ from typing import Any, Dict from drgn import FaultError, PlatformFlags, StackTrace from drgn.helpers.common.memory import identify_address __all__ = ("print_annotated_stack",) def print_annotated_stack(trace: StackTrace) -> None: """ Print the contents of stack memory in a stack trace, annotating values that can be identified. Currently, this will identify any addresses on the stack with :func:`~drgn.helpers.common.memory.identify_address()`. See :func:`~drgn.helpers.common.memory.print_annotated_memory()` for a similar function that annotates arbitrary memory ranges. >>> print_annotated_stack(stack_trace(1)) STACK POINTER VALUE [stack frame #0 at 0xffffffff8dc93c41 (__schedule+0x429/0x488) in context_switch at ./kernel/sched/core.c:5209:2 (inlined)] [stack frame #1 at 0xffffffff8dc93c41 (__schedule+0x429/0x488) in __schedule at ./kernel/sched/core.c:6521:8] ffffa903c0013d28: ffffffff8d8497bf [function symbol: __flush_tlb_one_user+0x5] ffffa903c0013d30: 000000008d849eb5 ffffa903c0013d38: 0000000000000001 ffffa903c0013d40: 0000000000000004 ffffa903c0013d48: efdea37bb7cb1f00 ffffa903c0013d50: ffff926641178000 [slab object: task_struct+0x0] ffffa903c0013d58: ffff926641178000 [slab object: task_struct+0x0] ffffa903c0013d60: ffffa903c0013e10 ffffa903c0013d68: ffff926641177ff0 [slab object: mm_struct+0x70] ffffa903c0013d70: ffff926641178000 [slab object: task_struct+0x0] ffffa903c0013d78: ffff926641178000 [slab object: task_struct+0x0] ffffa903c0013d80: ffffffff8dc93d29 [function symbol: schedule+0x89] ... :param trace: Stack trace to print. """ prog = trace.prog # platform must be known if there is a stack trace assert prog.platform is not None if prog.platform.flags & PlatformFlags.IS_LITTLE_ENDIAN: byteorder = "little" else: byteorder = "big" if prog.platform.flags & PlatformFlags.IS_64_BIT: word_size = 8 line_format = "{:016x}: {:016x}{}" print("STACK POINTER VALUE") else: word_size = 4 line_format = "{:08x}: {:08x}{}" print("STACK VALUE\nPOINTER") cache: Dict[Any, Any] = {} start = 0 while start < len(trace): # Find the bounds of this stack. Our heuristics for the end of the # stack are: # # 1. An interrupted frame. # 2. A frame with a stack pointer less than the previous frame's stack # pointer (since the stack grows down in all of the architectures we # support). # 3. A frame with a stack pointer much greater than the previous # frame's stack pointer. Our arbitrary threshold is 128 MB. (Linux # kernel stacks are at most 16 KB as of Linux 6.8, and userspace # stacks are limited to 8 MB by default, so this threshold could be # adjusted if needed.) end = start + 1 while ( end < len(trace) and not trace[end].interrupted and 0 <= trace[end].sp - trace[end - 1].sp <= 128 * 1024 * 1024 ): end += 1 # Gather the frames for this stack. frames = [trace[i] for i in range(start, end)] frames_addrs = [frame.sp for frame in frames] start_addr = frames_addrs[0] end_addr = frames_addrs[-1] + word_size - 1 stack_size = end_addr - start_addr + 1 try: stack_bytes = prog.read(start_addr, stack_size) except FaultError: # Couldn't read the stack. Just print the frames. for frame in frames: print(f"[stack frame {frame}]") start = end continue frame_ind = 0 for offset in range(0, len(stack_bytes), word_size): addr = start_addr + offset word_bytes = stack_bytes[offset : offset + word_size] word_val = int.from_bytes( word_bytes, # The byteorder parameter is annotated as # Literal['little', 'big'], but mypy infers that our byteorder # variable is str. byteorder=byteorder, # type: ignore[arg-type] ) # There may be multiple frames matching this address (usually # because of inline frames). while frame_ind < len(frames_addrs) and addr == frames_addrs[frame_ind]: frame = frames[frame_ind] print(f"[stack frame {frame}]") frame_ind += 1 identified = identify_address(prog, word_val, cache=cache) if identified is None: identified = "" else: identified = f" [{identified}]" print(line_format.format(addr, word_val, identified)) start = end drgn-0.0.31/drgn/helpers/common/type.py000066400000000000000000000241351477777462700200050ustar00rootroot00000000000000# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: LGPL-2.1-or-later """ Types ----- The ``drgn.helpers.common.type`` module provides generic helpers for working with types in ways that aren't provided by the core drgn library. """ import enum import operator import typing from typing import Container, List, Tuple from drgn import IntegerLike, Type, TypeKind, TypeMember, sizeof __all__ = ( "enum_type_to_class", "member_at_offset", ) def enum_type_to_class( type: Type, name: str, exclude: Container[str] = (), prefix: str = "" ) -> typing.Type[enum.IntEnum]: """ Get an :class:`enum.IntEnum` class from an enumerated :class:`drgn.Type`. :param type: Enumerated type to convert. :param name: Name of the ``IntEnum`` type to create. :param exclude: Container (e.g., list or set) of enumerator names to exclude from the created ``IntEnum``. :param prefix: Prefix to strip from the beginning of enumerator names. """ if type.enumerators is None: raise TypeError("enum type is incomplete") enumerators = [ (name[len(prefix) :] if name.startswith(prefix) else name, value) for (name, value) in type.enumerators if name not in exclude ] return enum.IntEnum(name, enumerators) # type: ignore # python/mypy#4865 def member_at_offset(type: Type, offset: IntegerLike) -> str: """ Return the name of the member at an offset in a type. This is effectively the opposite of :func:`~drgn.offsetof()`. >>> prog.type('struct list_head') struct list_head { struct list_head *next; struct list_head *prev; } >>> member_at_offset(prog.type('struct list_head'), 0) 'next' >>> member_at_offset(prog.type('struct list_head'), 8) 'prev' This includes nested structures and array elements: >>> prog.type('struct sigpending') struct sigpending { struct list_head list; sigset_t signal; } >>> prog.type('sigset_t') typedef struct { unsigned long sig[1]; } sigset_t >>> member_at_offset(prog.type('struct sigpending'), 0) 'list.next' >>> member_at_offset(prog.type('struct sigpending'), 8) 'list.prev' >>> member_at_offset(prog.type('struct sigpending'), 16) 'signal.sig[0]' This also includes all possible matches for a union: >>> prog.type('union mc_target') union mc_target { struct folio *folio; swp_entry_t ent; } >>> prog.type('swp_entry_t') typedef struct { unsigned long val; } swp_entry_t >>> member_at_offset(prog.type('union mc_target'), 0) 'folio or ent.val' Offsets in the middle of a member are represented: >>> member_at_offset(prog.type('struct list_head'), 4) 'next+0x4' Offsets in padding or past the end of the type are also represented: >>> prog.type('struct autogroup') struct autogroup { struct kref kref; struct task_group *tg; struct rw_semaphore lock; unsigned long id; int nice; } >>> member_at_offset(prog.type('struct autogroup'), 4) '' >>> member_at_offset(prog.type('struct autogroup'), 70) '' >>> member_at_offset(prog.type('struct autogroup'), 72) '' >>> member_at_offset(prog.type('struct autogroup'), 80) '' :param type: Type to check. :param offset: Offset in bytes. :raises TypeError: if *type* is not a structure, union, class, or array type (or a typedef of one of those) """ bit_offset = operator.index(offset) * 8 while type.kind == TypeKind.TYPEDEF: type = type.type if type.kind not in ( TypeKind.STRUCT, TypeKind.UNION, TypeKind.CLASS, TypeKind.ARRAY, ): raise TypeError("must be compound type or array") # Chain of member accesses and array subscripts that we've followed. chain = [] # We traverse all union members in a depth-first search. This stack stores # the members that still need to be explored, along with the remaining # bit_offset from that member and the length of the chain leading to that # member. stack: List[Tuple[TypeMember, int, int]] = [] results = [] # When we've reached the end of a chain, add it to the results and go to # the next member in the stack (if any). def emit_and_pop_member() -> bool: nonlocal type, bit_offset if bit_offset: if (bit_offset & 7) == 0: chain.append("+") chain.append(hex(bit_offset // 8)) else: chain.append("+") chain.append(str(bit_offset)) chain.append(" bits") results.append("".join(chain)) if not stack: return False member, parent_bit_offset, chain_len = stack.pop() type = member.type bit_offset = parent_bit_offset - member.bit_offset del chain[chain_len:] if member.name is not None: if chain: chain.append(".") chain.append(member.name) return True while True: if type.kind == TypeKind.TYPEDEF: # type: ignore[comparison-overlap] # python/mypy#17096 type = type.type elif type.kind == TypeKind.ARRAY: element_bit_size = sizeof(type.type) * 8 # Treat incomplete arrays as if they have infinite size. if type.length is None or bit_offset < type.length * element_bit_size: i = bit_offset // element_bit_size bit_offset -= i * element_bit_size chain.append(f"[{i}]") type = type.type else: if bit_offset == type.length * element_bit_size: chain.append("") else: chain.append("") bit_offset = 0 if not emit_and_pop_member(): break else: members = getattr(type, "members", None) if members is None: if not emit_and_pop_member(): break continue orig_bit_offset = bit_offset orig_chain_len = len(chain) # At first, we go forwards through the members. If this is a union, # then we go backwards through the rest once we've found a match. # This allows us to (1) avoid the stack as an optimization for the # common case of structures and (2) return results sorted by # declaration order in the source code. i = 0 end = len(members) step = 1 while i != end: member = members[i] bit_size = member.bit_field_size if bit_size is None: try: bit_size = sizeof(member.type) * 8 except TypeError: # Ignore incomplete members other than arrays. if member.type.kind != TypeKind.ARRAY: i += step continue if ( member.bit_offset <= bit_offset # Treat incomplete arrays as if they have infinite size. and (bit_size is None or bit_offset < member.bit_offset + bit_size) ): if step == 1: step = -1 if type.kind == TypeKind.UNION: i, end = end, i else: # Set i so that we break on the next iteration. i = end + 1 type = member.type bit_offset -= member.bit_offset if member.name is not None: if chain: chain.append(".") chain.append(member.name) else: stack.append((member, orig_bit_offset, orig_chain_len)) i += step if step == 1: # No matching members. bit_size = sizeof(type) * 8 if bit_offset == bit_size: chain.append("") elif bit_offset > bit_size: chain.append("") else: prev_member = None next_member = None for member in members: if member.bit_offset < bit_offset: if ( prev_member is None or member.bit_offset > prev_member.bit_offset ): prev_member = member else: if ( next_member is None or member.bit_offset < next_member.bit_offset ): next_member = member if chain: chain.append(".") if prev_member and next_member: chain.append("" if prev_member.name is None else prev_member.name ) chain.append(" and ") chain.append( "" if next_member.name is None else next_member.name ) chain.append(">") elif next_member: chain.append("") else: chain.append("") bit_offset = 0 if not emit_and_pop_member(): break return " or ".join(results) drgn-0.0.31/drgn/helpers/experimental/000077500000000000000000000000001477777462700176525ustar00rootroot00000000000000drgn-0.0.31/drgn/helpers/experimental/__init__.py000066400000000000000000000005251477777462700217650ustar00rootroot00000000000000# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: LGPL-2.1-or-later """ Experimental ------------ The ``drgn.helpers.experimental`` package contains experimental helpers with no stability guarantees. They may change, move to another package, or be removed. They are not automatically imported by the CLI. """ drgn-0.0.31/drgn/helpers/experimental/kmodify.py000066400000000000000000001424451477777462700217000ustar00rootroot00000000000000# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: LGPL-2.1-or-later """ Kmodify ------- The ``drgn.helpers.experimental.kmodify`` module provides experimental helpers for modifying the state of the running kernel. This works by loading a temporary kernel module, so the kernel must support loadable kernel modules (``CONFIG_MODULES=y``) and allow loading unsigned modules (``CONFIG_MODULE_SIG_FORCE=n``). It is currently only implemented for x86-64. .. warning:: These helpers are powerful but **extremely** dangerous. Use them with care. """ import ctypes import errno import operator import os import random import re import string import struct import sys from typing import ( TYPE_CHECKING, Any, List, Mapping, NamedTuple, Optional, Sequence, Tuple, Union, ) if TYPE_CHECKING: from _typeshed import SupportsWrite if sys.version_info < (3, 11): from typing_extensions import assert_never else: from typing import assert_never # novermin from _drgn_util.elf import ET, SHF, SHN, SHT, STB, STT, STV from _drgn_util.platform import SYS from drgn import ( Architecture, FaultError, IntegerLike, Object, ObjectAbsentError, PlatformFlags, PrimitiveType, Program, ProgramFlags, Type, TypeKind, alignof, cast, implicit_convert, offsetof, sizeof, ) from drgn.helpers.common.prog import takes_program_or_default __all__ = ( "call_function", "pass_pointer", "write_memory", "write_object", ) _c = ctypes.CDLL(None, use_errno=True) _syscall = _c.syscall _syscall.restype = ctypes.c_long # os.memfd_create() was added in Python 3.8, and only with glibc >= 2.27 # (manylinux2014 uses glibc 2.17). The syscall was added in Linux 3.17, so fall # back to using it directly. if hasattr(os, "memfd_create"): _memfd_create = os.memfd_create # novermin else: def _memfd_create( name: str, flags: int = 1, # MFD_CLOEXEC ) -> int: fd = _syscall( ctypes.c_long(SYS["memfd_create"]), ctypes.c_char_p(os.fsencode(name)), ctypes.c_uint(flags), ) if fd < 0: errnum = ctypes.get_errno() raise OSError(errnum, os.strerror(errnum)) return fd class _ElfSection: def __init__( self, *, name: str, type: SHT, flags: SHF = SHF(0), data: bytes, addr: int = 0, link: int = 0, info: int = 0, addralign: int = 1, entsize: int = 0, ) -> None: self.name = name self.type = type self.flags = flags self.data = data self.addr = addr self.link = link self.info = info self.addralign = addralign self.entsize = entsize class _ElfSymbol(NamedTuple): name: str value: int size: int section: Union[str, SHN] type: STT binding: STB visibility: STV = STV.DEFAULT class _ElfRelocation(NamedTuple): offset: int type: int symbol_name: str section_symbol: bool addend: int = 0 def _write_elf( file: "SupportsWrite[bytes]", *, machine: int, is_little_endian: bool, is_64_bit: bool, rela: bool, sections: Sequence[_ElfSection], symbols: Sequence[_ElfSymbol], relocations: Mapping[str, Sequence[_ElfRelocation]], ) -> None: endian = "<" if is_little_endian else ">" if is_64_bit: ehdr_struct = struct.Struct(endian + "16BHHIQQQIHHHHHH") shdr_struct = struct.Struct(endian + "IIQQQQIIQQ") rela_struct = struct.Struct(endian + "QQq") def r_info(sym: int, type: int) -> int: return (sym << 32) | type sym_struct = struct.Struct(endian + "IBBHQQ") def sym_fields(sym: _ElfSymbol) -> Tuple[int, int, int, int, int]: return ( (sym.binding << 4) + (sym.type & 0xF), sym.visibility, ( section_name_to_index[sym.section] if isinstance(sym.section, str) else sym.section ), sym.value, sym.size, ) else: ehdr_struct = struct.Struct(endian + "16BHHIIIIIHHHHHH") shdr_struct = struct.Struct(endian + "10I") rela_struct = struct.Struct(endian + "IIi") def r_info(sym: int, type: int) -> int: return (sym << 8) | type sym_struct = struct.Struct(endian + "IIIBBH") def sym_fields(sym: _ElfSymbol) -> Tuple[int, int, int, int, int]: return ( sym.value, sym.size, (sym.binding << 4) + (sym.type & 0xF), sym.visibility, ( section_name_to_index[sym.section] if isinstance(sym.section, str) else sym.section ), ) section_symbols = [ _ElfSymbol( name="", value=0, size=0, type=STT.SECTION, binding=STB.LOCAL, section=section.name, ) for section in sections if section.type == SHT.PROGBITS ] section_name_to_symbol_index = { sym.section: i for i, sym in enumerate(section_symbols, 1) } symbol_name_to_index = { sym.name: i for i, sym in enumerate(symbols, 1 + len(section_symbols)) } section_symbols.extend(symbols) symbols = section_symbols del section_symbols def relocation_symbol_index(reloc: _ElfRelocation) -> int: if reloc.section_symbol: return section_name_to_symbol_index[reloc.symbol_name] else: return symbol_name_to_index[reloc.symbol_name] if rela: reloc_prefix = ".rela" reloc_sht = SHT.RELA reloc_size = rela_struct.size def relocation_data(relocations: Sequence[_ElfRelocation]) -> bytes: data = bytearray(len(relocations) * rela_struct.size) for i, relocation in enumerate(relocations): rela_struct.pack_into( data, i * rela_struct.size, relocation.offset, r_info( relocation_symbol_index(relocation), relocation.type, ), relocation.addend, ) return data else: raise NotImplementedError("SHT_REL relocations") symtab_section_index = 1 + len(sections) + len(relocations) sections = list(sections) i = 0 while i < len(sections): section = sections[i] try: section_relocations = relocations[section.name] except KeyError: i += 1 continue sections.insert( i + 1, _ElfSection( name=reloc_prefix + section.name, type=reloc_sht, flags=SHF.INFO_LINK, data=relocation_data(section_relocations), link=symtab_section_index, info=i + 1, addralign=8 if is_64_bit else 4, entsize=reloc_size, ), ) i += 2 section_name_to_index = {section.name: i for i, section in enumerate(sections, 1)} if len(sections) < symtab_section_index - 1: raise ValueError( f"relocations for unknown section {', '.join(relocations.keys() - section_name_to_index)}" ) symtab_data = bytearray((len(symbols) + 1) * sym_struct.size) strtab_data = bytearray(1) sym_local_end = 1 for i, sym in enumerate(symbols, 1): if sym.name: st_name = len(strtab_data) strtab_data.extend(sym.name.encode()) strtab_data.append(0) else: st_name = 0 sym_struct.pack_into( symtab_data, i * sym_struct.size, st_name, *sym_fields(sym) ) if sym.binding == STB.LOCAL: if sym_local_end != i: raise ValueError("local symbol after non-local symbol") sym_local_end = i + 1 sections.append( _ElfSection( name=".symtab", type=SHT.SYMTAB, data=symtab_data, link=len(sections) + 2, info=sym_local_end, entsize=sym_struct.size, ) ) sections.append(_ElfSection(name=".strtab", type=SHT.STRTAB, data=strtab_data)) shstrtab_data = bytearray(1) sh_name = [] for section in sections: sh_name.append(len(shstrtab_data)) shstrtab_data.extend(section.name.encode()) shstrtab_data.append(0) sh_name.append(len(shstrtab_data)) shstrtab_data.extend(b".shstrtab\0") sections.append(_ElfSection(name=".shstrtab", type=SHT.STRTAB, data=shstrtab_data)) shnum = len(sections) + 1 # + 1 for the SHT_NULL section headers_size = ehdr_struct.size + shdr_struct.size * shnum file.write( ehdr_struct.pack( 0x7F, # ELFMAG0 ord("E"), # ELFMAG1 ord("L"), # ELFMAG2 ord("F"), # ELFMAG3 2 if is_64_bit else 1, # EI_CLASS = ELFCLASS64 or ELFCLASS32 1 if is_little_endian else 2, # EI_DATA = ELFDATA2LSB or ELFDATA2MSB 1, # EI_VERSION = EV_CURRENT 0, # EI_OSABI = ELFOSABI_NONE 0, # EI_ABIVERSION 0, 0, 0, 0, 0, 0, 0, # EI_PAD ET.REL, # e_type machine, 1, # e_version = EV_CURRENT 0, # e_entry 0, # e_phoff ehdr_struct.size, # e_shoff 0, # e_flags ehdr_struct.size, # e_ehsize 0, # e_phentsize 0, # e_phnum shdr_struct.size, # e_shentsize shnum, # e_shnum shnum - 1, # e_shstrndx ) ) # SHT_NULL section. file.write(bytes(shdr_struct.size)) section_data_offset = headers_size for i, section in enumerate(sections): section_data_offset += -section_data_offset % section.addralign file.write( shdr_struct.pack( sh_name[i], # sh_name section.type, # sh_type section.flags, # sh_flags section.addr, # sh_addr section_data_offset, # sh_offset len(section.data), # sh_size section.link, # sh_link section.info, # sh_info section.addralign, # sh_addralign section.entsize, # sh_entsize ) ) section_data_offset += len(section.data) section_data_offset = headers_size for section in sections: padding = -section_data_offset % section.addralign if padding: file.write(bytes(padding)) section_data_offset += padding file.write(section.data) section_data_offset += len(section.data) # Abstract syntax tree-ish representation of code to inject. class _Integer: def __init__(self, size: int, value: IntegerLike) -> None: self.size = size self.value = operator.index(value) class _Symbol(NamedTuple): name: str offset: int = 0 section: bool = False class _Call(NamedTuple): func: _Symbol args: Sequence[Union[_Integer, _Symbol]] class _StoreReturnValue(NamedTuple): size: int dst: _Symbol class _Return(NamedTuple): value: _Integer class _ReturnIfLastReturnValueNonZero(NamedTuple): value: _Integer _FunctionBodyNode = Union[ _Call, _StoreReturnValue, _Return, _ReturnIfLastReturnValueNonZero ] class _Function(NamedTuple): body: Sequence[_FunctionBodyNode] class _CodeGen_x86_64: _R_X86_64_PC32 = 2 _R_X86_64_PLT32 = 4 _R_X86_64_32S = 11 _rax = 0 _r11 = 11 _argument_registers = ( 7, # rdi 6, # rsi, 2, # rdx 1, # rcx 8, # r8 9, # r9 ) def __init__(self) -> None: self.code = bytearray() self.relocations: List[_ElfRelocation] = [] self._epilogue_jumps: List[int] = [] def enter_frame(self, size: int) -> None: if size < 0: raise ValueError("invalid stack frame size") self.code.extend( # endbr64 # This is only needed if CONFIG_X86_KERNEL_IBT=y, but it's much # simpler to do it unconditionally, and it's a no-op if not needed. b"\xF3\x0F\x1E\xFA" # Set up the frame pointer. # push %rbp b"\x55" # mov %rsp, %rbp b"\x48\x89\xE5" ) # The System V ABI requires that rsp % 16 == 0 on function entry. We # need to make sure that rsp % 16 == 8 in the function body so that the # return address pushed by the call will make rsp % 16 == 0. push %rbp # makes rsp % 16 == 8. So, we need to align the requested size up to 16 # bytes. size = (size + 15) & ~15 if size > 0: # sub $size, %rsp if size < 128: self.code.extend(b"\x48\x83\xEC") self.code.append(size) else: self.code.extend(b"\x48\x81\xEC") self.code.extend(size.to_bytes(4, "little", signed=True)) def leave_frame(self) -> None: # Fix up all of the jumps to the epilogue. for offset in self._epilogue_jumps: self.code[offset - 4 : offset] = (len(self.code) - offset).to_bytes( 4, "little", signed=True ) self.code.extend( # leave b"\xC9" # ret b"\xC3" ) def _mov_imm(self, i: _Integer, reg: int, sign_extend_bits: int = 0) -> None: value = i.value & ((1 << max(i.size * 8, sign_extend_bits)) - 1) assert value >= 0 and value <= 0xFFFFFFFFFFFFFFFF assert reg < 16 if value <= 0xFFFFFFFF: if reg >= 8: self.code.append(0x41) # REX.B reg -= 8 self.code.append(0xB8 + reg) self.code.extend(value.to_bytes(4, "little")) else: rex = 0x48 # REX.W if reg >= 8: rex |= 1 # REX.B reg -= 8 self.code.append(rex) if value >= 0xFFFFFFFF80000000: self.code.append(0xC7) self.code.append(0xC0 + reg) self.code.extend((value & 0xFFFFFFFF).to_bytes(4, "little")) else: self.code.append(0xB8 + reg) self.code.extend(value.to_bytes(8, "little")) def _mov_symbol(self, sym: _Symbol, reg: int) -> None: rex = 0x48 # REX.W if reg >= 8: rex |= 1 # REX.B reg -= 8 self.code.append(rex) self.code.append(0xC7) self.code.append(0xC0 + reg) self.relocations.append( _ElfRelocation( offset=len(self.code), type=self._R_X86_64_32S, symbol_name=sym.name, section_symbol=sym.section, addend=sym.offset, ) ) self.code.extend(bytes(4)) def _store_rax_on_stack(self, offset: int) -> None: # mov %rax, offset(%rsp) if offset == 0: self.code.extend(b"\x48\x89\x04\x24") elif -128 <= offset < 128: self.code.extend(b"\x48\x89\x44\x24") self.code.append(offset & 0xFF) else: self.code.extend(b"\x48\x89\x84\x24") self.code.extend(offset.to_bytes(4, "little", signed=True)) def _store_imm_on_stack(self, i: _Integer, offset: int) -> None: value = i.value & ((1 << max(i.size * 8, 64)) - 1) if (0 <= value <= 0x7FFFFFFF) or ( 0xFFFFFFFF80000000 <= value <= 0xFFFFFFFFFFFFFFFF ): # mov $value, offset(%rsp) if offset == 0: self.code.extend(b"\x48\xC7\x04\x24") elif -128 <= offset < 128: self.code.extend(b"\x48\xC7\x44\x24") self.code.append(offset & 0xFF) else: self.code.extend(b"\x48\xC7\x84\x24") self.code.extend(offset.to_bytes(4, "little", signed=True)) self.code.extend((value & 0xFFFFFFFF).to_bytes(4, "little")) else: self._mov_imm(i, self._rax, 64) self._store_rax_on_stack(offset) def _store_symbol_on_stack(self, sym: _Symbol, offset: int) -> None: self._mov_symbol(sym, self._rax) self._store_rax_on_stack(offset) def call(self, func: _Symbol, args: Sequence[Union[_Integer, _Symbol]]) -> None: for i, arg in enumerate(args): if i < len(self._argument_registers): reg = self._argument_registers[i] if isinstance(arg, _Integer): # Clang/LLVM as of version 19 relies on <32-bit arguments # being sign-extended to 32 bits despite this not being # guaranteed by the psABI. It's unclear whether this will # be resolved by changing LLVM or the psABI, so work around # it for now. See: # https://groups.google.com/g/x86-64-abi/c/h7FFh30oS3s/m/Gksanh3WAAAJ # https://github.com/llvm/llvm-project/issues/12579 # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=46942 self._mov_imm(arg, reg, 32) else: self._mov_symbol(arg, reg) else: stack_offset = 8 * (i - len(self._argument_registers)) if isinstance(arg, _Integer): self._store_imm_on_stack(arg, stack_offset) else: self._store_symbol_on_stack(arg, stack_offset) # call near self.code.append(0xE8) self.relocations.append( _ElfRelocation( offset=len(self.code), type=self._R_X86_64_PLT32, symbol_name=func.name, section_symbol=func.section, addend=-4, ) ) self.code.extend(bytes(4)) def store_return_value(self, size: int, dst: _Symbol) -> None: if size == 1: # movb %al, ... self.code.extend(b"\x88\x05") elif size == 2: # movw %ax, ... self.code.extend(b"\x66\x89\x05") elif size == 4: # movl %eax, ... self.code.extend(b"\x89\x05") elif size == 8: # movq %rax, ... self.code.extend(b"\x48\x89\x05") else: raise NotImplementedError("{size}-byte return values not implemented") self.relocations.append( _ElfRelocation( offset=len(self.code), type=self._R_X86_64_PC32, symbol_name=dst.name, section_symbol=dst.section, addend=dst.offset - 4, ) ) # ... 0x0(%rip) self.code.extend(bytes(4)) def return_(self, value: _Integer, last: bool) -> None: if value.size > 8: raise NotImplementedError( "return values larger than 8 bytes not implemented" ) self._mov_imm(value, self._rax) # Jump to the function epilogue. If this return is the last operation, # we can fall through instead of jumping. if not last: # jmp self.code.extend(b"\xE9\x00\x00\x00\x00") # The destination needs to be fixed up later. self._epilogue_jumps.append(len(self.code)) def return_if_last_return_value_nonzero(self, value: _Integer) -> None: if value.size > 8: raise NotImplementedError( "return values larger than 8 bytes not implemented" ) # mov %rax, %rdx self.code.extend(b"\x48\x89\xC2") self._mov_imm(value, self._rax) # Jump to the function epilogue if the last return value was non-zero. self.code.extend( # test %rdx, %rdx b"\x48\x85\xD2" # jnz b"\x0F\x85\x00\x00\x00\x00" ) # The destination needs to be fixed up later. self._epilogue_jumps.append(len(self.code)) class _Arch_X86_64: ELF_MACHINE = 62 # EM_X86_64 RELA = True ABSOLUTE_ADDRESS_RELOCATION_TYPE = 1 # R_X86_64_64 @staticmethod def code_gen(func: _Function) -> Tuple[bytes, Sequence[_ElfRelocation]]: needed_stack_size = 0 for node in func.body: if not isinstance(node, _Call): continue stack_size = len(_CodeGen_x86_64._argument_registers) * -8 for arg in node.args: if isinstance(arg, _Integer): if arg.size > 8: raise NotImplementedError( "passing integers larger than 8 bytes not implemented" ) stack_size += 8 elif isinstance(arg, _Symbol): stack_size += 8 else: assert_never(arg) if stack_size > needed_stack_size: needed_stack_size = stack_size code_gen = _CodeGen_x86_64() code_gen.enter_frame(needed_stack_size) for i, node in enumerate(func.body): if isinstance(node, _Call): code_gen.call(node.func, node.args) elif isinstance(node, _StoreReturnValue): code_gen.store_return_value(node.size, node.dst) elif isinstance(node, _Return): code_gen.return_(node.value, last=i == len(func.body) - 1) elif isinstance(node, _ReturnIfLastReturnValueNonZero): code_gen.return_if_last_return_value_nonzero(node.value) else: assert_never(node) code_gen.leave_frame() return code_gen.code, code_gen.relocations def _find_exported_symbol_in_section( prog: Program, name: bytes, start: int, stop: int ) -> int: kernel_symbol_type = prog.type("struct kernel_symbol") if kernel_symbol_type.has_member("name_offset"): def kernel_symbol_name(sym: Object) -> Object: return cast("char *", sym.name_offset.address_of_()) + sym.name_offset else: def kernel_symbol_name(sym: Object) -> Object: return sym.name syms = Object(prog, prog.pointer_type(kernel_symbol_type), start) lo = 0 hi = (stop - start) // sizeof(kernel_symbol_type) while lo < hi: mid = (lo + hi) // 2 sym_name = kernel_symbol_name(syms[mid]).string_() if sym_name < name: lo = mid + 1 elif sym_name > name: hi = mid else: return mid return -1 # If CONFIG_MODVERSIONS=y, then we need a __versions section containing a CRC # of each exported symbol that we use. Since we intentionally don't use any # symbols, we only need it for the special module_layout symbol. def _get_versions_section(struct_module: Type) -> Optional[_ElfSection]: prog = struct_module.prog try: return prog.cache["kmodify___versions_section"] except KeyError: pass # module_layout is defined if and only if CONFIG_MODVERSIONS=y. have_module_layout = False try: have_module_layout = prog["module_layout"].address_ is not None except KeyError: pass if have_module_layout: # We only check the non-GPL-only section because module_layout is # non-GPL-only. i = _find_exported_symbol_in_section( prog, b"module_layout", prog.symbol("__start___ksymtab").address, prog.symbol("__stop___ksymtab").address, ) if i < 0: raise LookupError("module_layout not found") # Since Linux kernel commit 71810db27c1c ("modversions: treat symbol # CRCs as 32 bit quantities") (in v4.10), CRCs are in an array of s32. # Before that, they are in an array of unsigned long. Determine the # correct type from struct module::crcs. module_layout_crc = ( Object( prog, struct_module.member("crcs").type, prog.symbol("__start___kcrctab").address, )[i].value_() & 0xFFFFFFFF ) struct_modversion_info = prog.type("struct modversion_info") section = _ElfSection( name="__versions", type=SHT.PROGBITS, flags=SHF.ALLOC, data=Object( prog, struct_modversion_info, { "crc": module_layout_crc, "name": b"module_layout", }, ).to_bytes_(), addralign=alignof(struct_modversion_info), ) else: section = None prog.cache["kmodify___versions_section"] = section return section class _Kmodify: def __init__(self, prog: Program) -> None: if prog.flags & ( ProgramFlags.IS_LINUX_KERNEL | ProgramFlags.IS_LIVE | ProgramFlags.IS_LOCAL ) != ( ProgramFlags.IS_LINUX_KERNEL | ProgramFlags.IS_LIVE | ProgramFlags.IS_LOCAL ): raise ValueError("kmodify is only available for the running kernel") platform = prog.platform if platform is None: raise ValueError("program platform is not known") self.prog = prog self.is_little_endian = bool(platform.flags & PlatformFlags.IS_LITTLE_ENDIAN) self.is_64_bit = bool(platform.flags & PlatformFlags.IS_64_BIT) if platform.arch == Architecture.X86_64: # When we add support for another architecture, we're going to need # an _Arch Protocol. self.arch = _Arch_X86_64 else: raise NotImplementedError( f"kmodify not implemented for {platform.arch.name} architecture" ) _KMOD_NAME_CHARS = string.digits + string.ascii_letters def insert( self, *, name: str, code: bytes, code_relocations: Sequence[_ElfRelocation], data: bytes, data_alignment: int, symbols: Sequence[_ElfSymbol], ) -> int: struct_module = self.prog.type("struct module") module_name = "".join( [ "drgn_kmodify_", # Randomize to avoid name collisions. *random.choices(self._KMOD_NAME_CHARS, k=12), "_", name, ] ).encode("ascii")[: sizeof(struct_module.member("name").type) - 1] sections = [ _ElfSection( name=".init.text", type=SHT.PROGBITS, flags=SHF.ALLOC | SHF.EXECINSTR, data=code, # This should be good enough for any supported architecture. addralign=16, ), _ElfSection( name=".data", type=SHT.PROGBITS, flags=SHF.WRITE | SHF.ALLOC, data=data, addralign=data_alignment, ), _ElfSection( name=".gnu.linkonce.this_module", type=SHT.PROGBITS, flags=SHF.WRITE | SHF.ALLOC, data=Object( self.prog, struct_module, {"name": module_name} ).to_bytes_(), addralign=alignof(struct_module), ), _ElfSection( name=".modinfo", type=SHT.PROGBITS, flags=SHF.ALLOC, data=b"".join( [ b"%b=%b\0" % (key, value) for key, value in ( (b"license", b"GPL"), (b"depends", b""), # A retpoline kernel complains when loading a # non-retpoline module. We never make indirect # calls, so we can claim to be a retpoline module. # (Note that it's harmless to set this for # non-retpoline kernels.) (b"retpoline", b"Y"), (b"name", module_name), (b"vermagic", self.prog["vermagic"].string_()), ) ] ), ), ] # Add the __versions section if needed. versions_section = _get_versions_section(struct_module) if versions_section is not None: sections.append(versions_section) symbols = [ *symbols, _ElfSymbol( name="init_module", value=0, size=len(code), type=STT.FUNC, binding=STB.GLOBAL, section=".init.text", ), ] relocations = { ".init.text": code_relocations, ".gnu.linkonce.this_module": [ _ElfRelocation( offset=offsetof(struct_module, "init"), type=self.arch.ABSOLUTE_ADDRESS_RELOCATION_TYPE, symbol_name="init_module", section_symbol=False, ) ], } with open(_memfd_create(module_name.decode() + ".ko"), "wb") as f: _write_elf( f, machine=self.arch.ELF_MACHINE, is_little_endian=self.is_little_endian, is_64_bit=self.is_64_bit, rela=self.arch.RELA, sections=sections, symbols=symbols, relocations=relocations, ) f.flush() if _syscall( ctypes.c_long(SYS["finit_module"]), ctypes.c_int(f.fileno()), ctypes.c_char_p(b""), ctypes.c_int(0), ): return -ctypes.get_errno() else: return 0 @takes_program_or_default def write_memory(prog: Program, address: IntegerLike, value: bytes) -> None: """ Write a byte string to kernel memory. >>> os.uname().sysname 'Linux' >>> write_memory(prog["init_uts_ns"].name.sysname.address_, b"Lol\\0") >>> os.uname().sysname 'Lol' .. warning:: This attempts to detect writes to bad addresses and raise a :class:`~drgn.FaultError`, but this is best-effort and may still crash the kernel. Writing bad data can of course also cause a crash when the data is used. Additionally, this is not atomic, so the data may be accessed while it is partially written. :param address: Address to write to. :param value: Byte string to write. :raises FaultError: if the address cannot be written to """ copy_to_kernel_nofault_address = None copy_from_kernel_nofault_address = None for copy_to_kernel_nofault, copy_from_kernel_nofault in ( # Names used since Linux kernel commit fe557319aa06 ("maccess: rename # probe_kernel_{read,write} to copy_{from,to}_kernel_nofault") (in # v5.8-rc2). ("copy_to_kernel_nofault", "copy_from_kernel_nofault"), # Names used before Linux kernel commit 48c49c0e5f31 ("maccess: remove # various unused weak aliases") (in v5.8-rc1). ("__probe_kernel_write", "__probe_kernel_read"), # Names briefly used between those two commits. ("probe_kernel_write", "probe_kernel_read"), ): try: copy_to_kernel_nofault_address = prog[copy_to_kernel_nofault].address_ copy_from_kernel_nofault_address = prog[copy_from_kernel_nofault].address_ break except KeyError: pass if copy_to_kernel_nofault_address is None: raise LookupError("copy_to_kernel_nofault not found") if copy_from_kernel_nofault_address is None: raise LookupError("copy_from_kernel_nofault not found") kmodify = _Kmodify(prog) address = operator.index(address) sizeof_int = sizeof(prog.type("int")) sizeof_void_p = sizeof(prog.type("void *")) sizeof_size_t = sizeof(prog.type("size_t")) code, code_relocations = kmodify.arch.code_gen( _Function( [ # copy_to_kernel_nofault() can still fault in some cases; see # https://lore.kernel.org/all/f0e171cbae576758d9387cee374dd65088e75b07.1725223574.git.osandov@fb.com/ # copy_from_kernel_nofault() catches some of those cases. _Call( _Symbol(copy_from_kernel_nofault), [ _Symbol(".data", section=True, offset=len(value)), _Integer(sizeof_void_p, address), _Integer(sizeof_size_t, 1), ], ), _ReturnIfLastReturnValueNonZero( _Integer(sizeof_int, -errno.EFAULT), ), _Call( _Symbol(copy_to_kernel_nofault), [ _Integer(sizeof_void_p, address), _Symbol(".data", section=True), _Integer(sizeof_size_t, len(value)), ], ), _ReturnIfLastReturnValueNonZero( _Integer(sizeof_int, -errno.EFAULT), ), _Return(_Integer(sizeof_int, -errno.EINPROGRESS)), ] ) ) ret = kmodify.insert( name=f"write_{len(value)}", code=code, code_relocations=code_relocations, data=value + b"\0", # Align generously so that the copy can use larger units and small # copies can be slightly less racy. data_alignment=16, symbols=[ _ElfSymbol( name=copy_to_kernel_nofault, value=copy_to_kernel_nofault_address, size=0, type=STT.FUNC, binding=STB.LOCAL, section=SHN.ABS, ), _ElfSymbol( name=copy_from_kernel_nofault, value=copy_from_kernel_nofault_address, size=0, type=STT.FUNC, binding=STB.LOCAL, section=SHN.ABS, ), ], ) if ret != -errno.EINPROGRESS: if ret == -errno.EFAULT: raise FaultError("could not write to memory", address) elif ret: raise OSError(-ret, os.strerror(-ret)) else: raise ValueError("module init did not run") def _underlying_type(type: Type) -> Type: while type.kind == TypeKind.TYPEDEF: type = type.type return type def write_object( object: Object, value: Any, *, dereference: Optional[bool] = None ) -> None: """ Write to an object in kernel memory. >>> os.system("uptime -p") up 12 minutes >>> write_object(prog["init_time_ns"].offsets.boottime.tv_sec, 1000000000) >>> os.system("uptime -p") up 3 decades, 1 year, 37 weeks, 1 hour, 59 minutes .. warning:: The warnings about :func:`write_memory()` also apply to ``write_object()``. :param object: Object to write to. :param value: Value to write. This may be an :class:`~drgn.Object` or a Python value. Either way, it will be converted to the type of *object*. :param dereference: If *object* is a pointer, whether to dereference it. If ``True``, then write to the object pointed to by *object* (``*ptr = value``). If ``False``, then write to the pointer itself (``ptr = value``). This is a common source of confusion, so it is required if *object* is a pointer. :raises ValueError: if *object* is not a reference object (i.e., its address is not known) :raises TypeError: if *object* is a pointer and *dereference* is not given :raises TypeError: if *object* is not a pointer and *dereference* is ``True`` """ type = object.type_ if _underlying_type(type).kind == TypeKind.POINTER: if dereference is None: raise TypeError( "to write to pointed-to object (*ptr = value), use dereference=True; " "to write to pointer itself (ptr = value), use dereference=False" ) elif dereference: object = object[0] elif dereference: raise TypeError("object is not a pointer") address = object.address_ if address is None: raise ValueError("cannot write to value object") if isinstance(value, Object): value = implicit_convert(type, value) else: value = Object(object.prog_, type, value) write_memory(object.prog_, address, value.to_bytes_()) def _default_argument_promotions(obj: Object) -> Object: type = _underlying_type(obj.type_) if type.kind == TypeKind.INT: return +obj elif type.primitive == PrimitiveType.C_FLOAT: return cast("double", obj) else: return obj @takes_program_or_default def call_function(prog: Program, func: Union[str, Object], *args: Any) -> Object: """ Call a function in the kernel. >>> task = find_task(99) >>> if task: ... call_function("wake_up_process", task) ... (int)1 Arguments can be either :class:`~drgn.Object`\\ s or Python values. The function return value is returned as an :class:`~drgn.Object`: >>> # GFP_KERNEL isn't in the kernel debug info >>> # We have to use this trick to get it. >>> for flag in prog["gfpflag_names"]: ... if flag.name.string_() == b"GFP_KERNEL": ... GFP_KERNEL = flag.mask ... break ... >>> # kmalloc() is actually a macro. >>> # We have to call the underlying function. >>> p = call_function("__kmalloc_noprof", 13, GFP_KERNEL) >>> p (void *)0xffff991701ef43c0 >>> identify_address(p) 'slab object: kmalloc-16+0x0' >>> call_function("kfree", p) (void) >>> identify_address(p) 'free slab object: kmalloc-16+0x0' Variadic functions are also supported: >>> call_function("_printk", "Hello, world! %d\\n", Object(prog, "int", 1234)) (int)18 >>> os.system("dmesg | tail -1") [ 1138.223004] Hello, world! 1234 Constructed values can be passed by pointer using :class:`pass_pointer()`: >>> sb = prog["init_fs"].root.mnt.mnt_sb >>> sb.s_shrink.scan_objects (unsigned long (*)(struct shrinker *, struct shrink_control *))super_cache_scan+0x0 = 0xffffffffbda4c487 >>> sc = pass_pointer(Object(prog, "struct shrink_control", ... {"gfp_mask": GFP_KERNEL, "nr_to_scan": 100, "nr_scanned": 100})) >>> call_function(sb.s_shrink.scan_objects, sb.s_shrink, sc) (unsigned long)31 If the function modifies the passed value, the :class:`pass_pointer` object is updated: >>> sc.object (struct shrink_control){ .gfp_mask = (gfp_t)3264, .nid = (int)0, .nr_to_scan = (unsigned long)1, .nr_scanned = (unsigned long)100, .memcg = (struct mem_cgroup *)0x0, } .. note:: It is not possible to call some functions, including inlined functions and function-like macros. If the unavailable function is a wrapper around another function, sometimes the wrapped function can be called instead. .. warning:: Calling a function incorrectly may cause the kernel to crash or misbehave in various ways. The function is called from process context. Note that the function may have context, locking, or reference counting requirements. :param func: Function to call. May be a function name, function object, or function pointer object. :param args: Function arguments. :class:`int`, :class:`float`, and :class:`bool` arguments are converted as "literals" with ``Object(prog, value=...)``. :class:`str` and :class:`bytes` arguments are converted to ``char`` array objects. :class:`pass_pointer` arguments are copied to the kernel, passed by pointer, and copied back. :return: Function return value. :raises TypeError: if the passed arguments have incorrect types for the function :raises ObjectAbsentError: if the function cannot be called because it is inlined :raises LookupError: if a function with the given name is not found (possibly because it is actually a function-like macro) """ if not isinstance(func, Object): func = prog.function(func) kmodify = _Kmodify(prog) func_type = _underlying_type(func.type_) try: if func_type.kind == TypeKind.FUNCTION: func_pointer = func.address_of_() elif func_type.kind == TypeKind.POINTER: func_type = _underlying_type(func_type.type) if func_type.kind != TypeKind.FUNCTION: raise TypeError("func must be function or function pointer") func_pointer = func.read_() else: raise TypeError("func must be function or function pointer") except ObjectAbsentError: raise ObjectAbsentError("function is absent, likely inlined") from None return_type = _underlying_type(func_type.type) if return_type.kind not in { TypeKind.VOID, TypeKind.INT, TypeKind.BOOL, TypeKind.ENUM, TypeKind.POINTER, }: raise NotImplementedError(f"{return_type} return values not implemented") if len(args) < len(func_type.parameters): raise TypeError(f"not enough arguments for {func_pointer}; got {len(args)}") if not func_type.is_variadic and len(args) > len(func_type.parameters): raise TypeError(f"too many arguments for {func_pointer}; got {len(args)}") call_args: List[Union[_Integer, _Symbol]] = [] out_pointers = [] data = bytearray() data_alignment = 1 def align_data(alignment: int) -> None: nonlocal data_alignment if alignment > data_alignment: data_alignment = alignment data.extend(bytes(-len(data) % alignment)) for i, arg in enumerate(args): if i < len(func_type.parameters): parameter_type = _underlying_type(func_type.parameters[i].type) if ( ( isinstance(arg, (str, bytes, bytearray)) or ( isinstance(arg, pass_pointer) and isinstance(arg.object, (str, bytes, bytearray)) ) ) and i < len(func_type.parameters) and parameter_type.kind == TypeKind.POINTER and _underlying_type(parameter_type.type).primitive in ( PrimitiveType.C_CHAR, PrimitiveType.C_SIGNED_CHAR, PrimitiveType.C_UNSIGNED_CHAR, ) ): # Convert strings to null-terminated character arrays. if not isinstance(arg, pass_pointer): arg = pass_pointer(arg) if isinstance(arg.object, str): arg.object = arg.object.encode() arg.object = Object( prog, prog.array_type( parameter_type.type, len(arg.object) + 1, language=func_type.language, ), arg.object, ) elif ( isinstance(arg, Object) and _underlying_type(arg.type_).kind == TypeKind.ARRAY ): # Convert arrays to pointers. if arg.address_ is None: arg = pass_pointer(arg) else: arg = arg + 0 if isinstance(arg, pass_pointer): if not isinstance(arg.object, Object): arg.object = Object(prog, value=arg.object) type = arg.object.type_ underlying_type = _underlying_type(type) if underlying_type.kind == TypeKind.ARRAY: type = underlying_type.type if i < len(func_type.parameters): # We don't need the result, just type checking. implicit_convert( func_type.parameters[i].type, Object(prog, prog.pointer_type(type), 0), ) value = arg.object.to_bytes_() align_data(alignof(arg.object.type_)) out_pointers.append((arg, len(data))) call_args.append(_Symbol(".data", section=True, offset=len(data))) data.extend(value) else: if not isinstance(arg, Object): arg = Object(prog, value=arg) if i < len(func_type.parameters): arg = implicit_convert(func_type.parameters[i].type, arg) else: arg = _default_argument_promotions(arg) type = _underlying_type(arg.type_) if type.kind not in { TypeKind.INT, TypeKind.BOOL, TypeKind.ENUM, TypeKind.POINTER, }: if type.kind in { TypeKind.FLOAT, TypeKind.STRUCT, TypeKind.UNION, TypeKind.CLASS, }: raise NotImplementedError( f"passing {type} by value not implemented" ) else: raise ValueError(f"cannot pass {type} by value") call_args.append(_Integer(sizeof(type), arg.value_())) function_body: List[_FunctionBodyNode] = [_Call(_Symbol("func"), call_args)] symbols = [ _ElfSymbol( name="func", value=func_pointer.value_(), size=0, type=STT.FUNC, binding=STB.LOCAL, section=SHN.ABS, ) ] if return_type.kind != TypeKind.VOID: align_data(alignof(return_type)) return_offset = len(data) return_size = sizeof(return_type) data.extend(bytes(return_size)) function_body.append( _StoreReturnValue( return_size, _Symbol(".data", section=True, offset=return_offset), ) ) # copy_to_user() is the more obvious choice, but it's an inline function. copy_to_user_nofault_address = None for copy_to_user_nofault in ( # Name used since Linux kernel commit c0ee37e85e0e ("maccess: rename # probe_user_{read,write} to copy_{from,to}_user_nofault") (in # v5.8-rc2). "copy_to_user_nofault", # Name used before Linux kernel commit 48c49c0e5f31 ("maccess: remove # various unused weak aliases") (in v5.8-rc1). "__probe_user_write", # Name briefly used between those two commits. "probe_user_write", ): try: copy_to_user_nofault_address = prog[copy_to_user_nofault].address_ break except KeyError: continue if copy_to_user_nofault_address is None: raise LookupError("copy_to_user_nofault not found") sizeof_int = sizeof(prog.type("int")) if data: out_buf = ctypes.create_string_buffer(len(data)) function_body.append( _Call( _Symbol(copy_to_user_nofault), [ _Integer(sizeof(prog.type("void *")), ctypes.addressof(out_buf)), _Symbol(".data", section=True), _Integer(sizeof(prog.type("size_t")), len(data)), ], ) ) function_body.append( _ReturnIfLastReturnValueNonZero(_Integer(sizeof_int, -errno.EFAULT)) ) symbols.append( _ElfSymbol( name=copy_to_user_nofault, value=copy_to_user_nofault_address, size=0, type=STT.FUNC, binding=STB.LOCAL, section=SHN.ABS, ) ) function_body.append(_Return(_Integer(sizeof_int, -errno.EINPROGRESS))) code, code_relocations = kmodify.arch.code_gen(_Function(function_body)) kmod_name = "call" try: symbol_name_match = re.match(r"[0-9a-zA-Z_]+", prog.symbol(func_pointer).name) if symbol_name_match: kmod_name = "call_" + symbol_name_match.group() except LookupError: pass ret = kmodify.insert( name=kmod_name, code=code, code_relocations=code_relocations, data=data, data_alignment=data_alignment, symbols=symbols, ) if ret != -errno.EINPROGRESS: if ret: raise OSError(-ret, os.strerror(-ret)) else: raise ValueError("module init did not run") for out_pointer, offset in out_pointers: out_pointer.object = Object.from_bytes_( prog, out_pointer.object.type_, out_buf, bit_offset=offset * 8 ) if return_type.kind == TypeKind.VOID: return Object(prog, func_type.type) else: return Object.from_bytes_( prog, func_type.type, out_buf, bit_offset=return_offset * 8 ) class pass_pointer: object: Any """ Wrapped object. Updated to an :class:`~drgn.Object` containing the final value after the function call. """ def __init__(self, object: Any) -> None: """ Wrapper used to pass values to :func:`call_function()` by pointer. :param object: :class:`~drgn.Object` or Python value to wrap. """ self.object = object def __repr__(self) -> str: return f"pass_pointer({self.object!r})" drgn-0.0.31/drgn/helpers/linux/000077500000000000000000000000001477777462700163145ustar00rootroot00000000000000drgn-0.0.31/drgn/helpers/linux/__init__.py000066400000000000000000000025151477777462700204300ustar00rootroot00000000000000# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: LGPL-2.1-or-later """ Linux Kernel ------------ The ``drgn.helpers.linux`` package contains several modules for working with data structures and subsystems in the Linux kernel. The helpers are available from the individual modules in which they are defined and from this top-level package. E.g., the following are both valid: >>> from drgn.helpers.linux.list import list_for_each_entry >>> from drgn.helpers.linux import list_for_each_entry Iterator macros (``for_each_foo``) are a common idiom in the Linux kernel. The equivalent drgn helpers are implemented as Python :ref:`generators `. For example, the following code in C: .. code-block:: c list_for_each(pos, head) do_something_with(pos); Translates to the following code in Python: .. code-block:: python3 for pos in list_for_each(head): do_something_with(pos) """ import importlib import pkgutil from typing import List __all__: List[str] = [] for _module_info in pkgutil.iter_modules(__path__, prefix=__name__ + "."): _submodule = importlib.import_module(_module_info.name) _submodule_all = getattr(_submodule, "__all__", ()) __all__.extend(_submodule_all) for _name in _submodule_all: globals()[_name] = getattr(_submodule, _name) drgn-0.0.31/drgn/helpers/linux/bitops.py000066400000000000000000000033611477777462700201710ustar00rootroot00000000000000# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: LGPL-2.1-or-later """ Bit Operations -------------- The ``drgn.helpers.linux.bitops`` module provides helpers for common bit operations in the Linux kernel. """ from typing import Iterator from drgn import IntegerLike, Object, sizeof __all__ = ( "for_each_clear_bit", "for_each_set_bit", "test_bit", ) def for_each_set_bit(bitmap: Object, size: IntegerLike) -> Iterator[int]: """ Iterate over all set (one) bits in a bitmap. :param bitmap: ``unsigned long *`` :param size: Size of *bitmap* in bits. """ size = int(size) word_bits = 8 * sizeof(bitmap.type_.type) for i in range((size + word_bits - 1) // word_bits): word = bitmap[i].value_() for j in range(min(word_bits, size - word_bits * i)): if word & (1 << j): yield (word_bits * i) + j def for_each_clear_bit(bitmap: Object, size: IntegerLike) -> Iterator[int]: """ Iterate over all clear (zero) bits in a bitmap. :param bitmap: ``unsigned long *`` :param size: Size of *bitmap* in bits. """ size = int(size) word_bits = 8 * sizeof(bitmap.type_.type) for i in range((size + word_bits - 1) // word_bits): word = bitmap[i].value_() for j in range(min(word_bits, size - word_bits * i)): if not (word & (1 << j)): yield (word_bits * i) + j def test_bit(nr: IntegerLike, bitmap: Object) -> bool: """ Return whether a bit in a bitmap is set. :param nr: Bit number. :param bitmap: ``unsigned long *`` """ nr = int(nr) word_bits = 8 * sizeof(bitmap.type_.type) return ((bitmap[nr // word_bits].value_() >> (nr & (word_bits - 1))) & 1) != 0 drgn-0.0.31/drgn/helpers/linux/block.py000066400000000000000000000157321477777462700177700ustar00rootroot00000000000000# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: LGPL-2.1-or-later """ Block Layer ----------- The ``drgn.helpers.linux.block`` module provides helpers for working with the Linux block layer, including disks (``struct gendisk``) and partitions. Since Linux v5.11, partitions are represented by ``struct block_device``. Before that, they were represented by ``struct hd_struct``. """ from typing import Iterator from drgn import Object, Program, cast, container_of from drgn.helpers.common.format import escape_ascii_string from drgn.helpers.common.prog import takes_program_or_default from drgn.helpers.linux.device import MAJOR, MINOR, MKDEV from drgn.helpers.linux.list import list_for_each_entry __all__ = ( "bdev_partno", "disk_devt", "disk_name", "for_each_disk", "for_each_partition", "part_devt", "part_name", "print_disks", "print_partitions", ) def disk_devt(disk: Object) -> Object: """ Get a disk's device number. :param disk: ``struct gendisk *`` :return: ``dev_t`` """ return Object(disk.prog_, "dev_t", MKDEV(disk.major, disk.first_minor)) def disk_name(disk: Object) -> bytes: """ Get the name of a disk (e.g., ``sda``). :param disk: ``struct gendisk *`` """ return disk.disk_name.string_() def _bdev_partno_flags(bdev: Object) -> Object: return cast("u8", bdev.__bd_flags.counter) def _bdev_partno_old(bdev: Object) -> Object: return bdev.bd_partno.read_() def bdev_partno(bdev: Object) -> Object: """ Get the partition number of a block device. :param bdev: ``struct block_device *`` :return: ``u8`` """ try: impl = bdev.prog_.cache["bdev_partno"] except KeyError: # Since Linux kernel commit 1116b9fa15c0 ("bdev: infrastructure for # flags") (in v6.10), partno is part of the atomic_t __bd_flags member. # Before that, it's its own member. bdev.prog_.cache["bdev_partno"] = impl = ( _bdev_partno_flags if bdev.prog_.type("struct block_device").has_member("__bd_flags") else _bdev_partno_old ) return impl(bdev) def _class_to_subsys(class_: Object) -> Object: # Walk the list of registered classes to find the struct subsys_private # matching the given class. Note that before Linux kernel commit # 2df418cf4b72 ("driver core: class: remove subsystem private pointer from # struct class") (in v6.4), struct subsys_private could also be found in # struct class::p, but it's easier to only maintain the newer code path. for sp in list_for_each_entry( "struct subsys_private", class_.prog_["class_kset"].list.address_of_(), "subsys.kobj.entry", ): if sp.member_("class") == class_: return sp else: raise LookupError("block_class subsys_private not found") def _for_each_block_device(prog: Program) -> Iterator[Object]: try: devices, class_in_device_private = prog.cache["_for_each_block_device"] except KeyError: devices = _class_to_subsys( prog["block_class"].address_of_() ).klist_devices.k_list.address_of_() # Linux kernel commit 570d0200123f ("driver core: move # device->knode_class to device_private") (in v5.1) moved the list # node. class_in_device_private = prog.type("struct device_private").has_member( "knode_class" ) prog.cache["_for_each_block_device"] = devices, class_in_device_private if class_in_device_private: for device_private in list_for_each_entry( "struct device_private", devices, "knode_class.n_node" ): yield device_private.device else: yield from list_for_each_entry("struct device", devices, "knode_class.n_node") @takes_program_or_default def for_each_disk(prog: Program) -> Iterator[Object]: """ Iterate over all disks in the system. :return: Iterator of ``struct gendisk *`` objects. """ # Before Linux kernel commit 0d02129e76ed ("block: merge struct # block_device and struct hd_struct") (in v5.11), partition devices are in # struct hd_struct::__dev. After that commit, they are in struct # block_device::bd_device. We start by assuming that the kernel has this # commit and fall back to the old path if that fails. have_bd_device = True for device in _for_each_block_device(prog): if have_bd_device: try: bdev = container_of(device, "struct block_device", "bd_device") except LookupError: have_bd_device = False else: if not bdev_partno(bdev): yield bdev.bd_disk continue part = container_of(device, "struct hd_struct", "__dev") if part.partno == 0: yield container_of(part, "struct gendisk", "part0") @takes_program_or_default def print_disks(prog: Program) -> None: """Print all of the disks in the system.""" for disk in for_each_disk(prog): major = disk.major.value_() minor = disk.first_minor.value_() name = escape_ascii_string(disk_name(disk), escape_backslash=True) print(f"{major}:{minor} {name} ({disk.type_.type_name()})0x{disk.value_():x}") def part_devt(part: Object) -> Object: """ Get a partition's device number. :param part: ``struct block_device *`` or ``struct hd_struct *`` depending on the kernel version. :return: ``dev_t`` """ try: return part.bd_dev except AttributeError: return part.__dev.devt def part_name(part: Object) -> bytes: """ Get the name of a partition (e.g., ``sda1``). :param part: ``struct block_device *`` or ``struct hd_struct *`` depending on the kernel version. """ try: bd_device = part.bd_device except AttributeError: return part.__dev.kobj.name.string_() return bd_device.kobj.name.string_() @takes_program_or_default def for_each_partition(prog: Program) -> Iterator[Object]: """ Iterate over all partitions in the system. :return: Iterator of ``struct block_device *`` or ``struct hd_struct *`` objects depending on the kernel version. """ # See the comment in for_each_disk(). have_bd_device = True for device in _for_each_block_device(prog): if have_bd_device: try: yield container_of(device, "struct block_device", "bd_device") continue except LookupError: have_bd_device = False yield container_of(device, "struct hd_struct", "__dev") @takes_program_or_default def print_partitions(prog: Program) -> None: """Print all of the partitions in the system.""" for part in for_each_partition(prog): devt = part_devt(part).value_() name = escape_ascii_string(part_name(part), escape_backslash=True) print( f"{MAJOR(devt)}:{MINOR(devt)} {name} ({part.type_.type_name()})0x{part.value_():x}" ) drgn-0.0.31/drgn/helpers/linux/boot.py000066400000000000000000000015221477777462700176310ustar00rootroot00000000000000# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: LGPL-2.1-or-later """ Boot ---- The ``drgn.helpers.linux.boot`` module provides helpers for inspecting the Linux kernel boot configuration. """ from _drgn import _linux_helper_kaslr_offset, _linux_helper_pgtable_l5_enabled from drgn import Program from drgn.helpers.common.prog import takes_program_or_default __all__ = ( "kaslr_offset", "pgtable_l5_enabled", ) @takes_program_or_default def kaslr_offset(prog: Program) -> int: """ Get the kernel address space layout randomization offset (zero if it is disabled). """ return _linux_helper_kaslr_offset(prog) @takes_program_or_default def pgtable_l5_enabled(prog: Program) -> bool: """Return whether 5-level paging is enabled.""" return _linux_helper_pgtable_l5_enabled(prog) drgn-0.0.31/drgn/helpers/linux/bpf.py000066400000000000000000000142361477777462700174430ustar00rootroot00000000000000# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: LGPL-2.1-or-later """ BPF --- The ``drgn.helpers.linux.bpf`` module provides helpers for working with BPF interface in :linux:`include/linux/bpf.h`, :linux:`include/linux/bpf-cgroup.h`, etc. """ import itertools from typing import Iterator from drgn import IntegerLike, Object, Program, cast from drgn.helpers.common.prog import takes_program_or_default from drgn.helpers.linux.idr import idr_for_each from drgn.helpers.linux.list import hlist_for_each_entry, list_for_each_entry __all__ = ( "bpf_btf_for_each", "bpf_link_for_each", "bpf_map_for_each", "bpf_prog_for_each", "cgroup_bpf_prog_for_each", "cgroup_bpf_prog_for_each_effective", ) @takes_program_or_default def bpf_btf_for_each(prog: Program) -> Iterator[Object]: """ Iterate over all BTF objects. This is only supported since Linux v4.18. :return: Iterator of ``struct btf *`` objects. """ type = prog.type("struct btf *") # BTF was introduced in Linux kernel commit 69b693f0aefa ("bpf: btf: # Introduce BPF Type Format (BTF)") (in v4.18). btf_idr was added in a # later commit in v4.18, 78958fca7ead ("bpf: btf: Introduce BTF ID"). for nr, entry in idr_for_each(prog["btf_idr"]): yield cast(type, entry) @takes_program_or_default def bpf_link_for_each(prog: Program) -> Iterator[Object]: """ Iterate over all BPF links. This is only supported since Linux v5.8. :return: Iterator of ``struct bpf_link *`` objects. """ type = prog.type("struct bpf_link *") # link_idr didn't exist before Linux kernel commit a3b80e107894 ("bpf: # Allocate ID for bpf_link") (in v5.8). struct bpf_link didn't exist at all # before Linux kernel commit 70ed506c3bbc ("bpf: Introduce pinnable # bpf_link abstraction") (in v5.7), and we don't support Linux v5.7 # anyways. for nr, entry in idr_for_each(prog["link_idr"]): yield cast(type, entry) @takes_program_or_default def bpf_map_for_each(prog: Program) -> Iterator[Object]: """ Iterate over all BPF maps. This is only supported since Linux v4.13. :return: Iterator of ``struct bpf_map *`` objects. """ type = prog.type("struct bpf_map *") # map_idr didn't exist before Linux kernel commit f3f1c054c288 ("bpf: # Introduce bpf_map ID") (in v4.13). for nr, entry in idr_for_each(prog["map_idr"]): yield cast(type, entry) @takes_program_or_default def bpf_prog_for_each(prog: Program) -> Iterator[Object]: """ Iterate over all BPF programs. This is only supported since Linux v4.13. :return: Iterator of ``struct bpf_prog *`` objects. """ type = prog.type("struct bpf_prog *") # prog_idr didn't exist before Linux kernel commit dc4bb0e23561 ("bpf: # Introduce bpf_prog ID") (in v4.13). for nr, entry in idr_for_each(prog["prog_idr"]): yield cast(type, entry) def cgroup_bpf_prog_for_each( cgrp: Object, bpf_attach_type: IntegerLike ) -> Iterator[Object]: """ Iterate over all cgroup BPF programs of the given attach type attached to the given cgroup. :param cgrp: ``struct cgroup *`` :param bpf_attach_type: ``enum cgroup_bpf_attach_type`` (``enum bpf_attach_type`` before Linux 5.15) :return: Iterator of ``struct bpf_prog *`` objects. """ # Before Linux kernel commit 3007098494be ("cgroup: add support for eBPF # programs") (in v4.10), struct cgroup::bpf didn't exist because cgroup BPF # programs didn't exist. try: cgrp_bpf = cgrp.bpf except AttributeError: return # Since Linux kernel commit 324bda9e6c5a ("bpf: multi program support for # cgroup+bpf") (in v4.15), the attached programs are stored in an array of # lists, struct cgroup_bpf::progs. Before that, only one program of each # attach type could be attached to a cgroup, so the attached programs are # stored in an array of struct bpf_prog *, struct cgroup_bpf::prog. try: progs = cgrp_bpf.progs except AttributeError: # If the kernel was not configured with CONFIG_CGROUP_BPF, then struct # cgroup_bpf is an empty structure. try: prog = cgrp_bpf.prog[bpf_attach_type] except AttributeError: return if prog: yield prog else: # Since Linux kernel commit 00442143a2ab ("bpf: convert # cgroup_bpf.progs to hlist") (in v6.0-rc1), the list of programs is an # hlist_head. Before that, it was a list_head. list = progs[bpf_attach_type].address_of_() if hasattr(list, "first"): iterator = hlist_for_each_entry else: iterator = list_for_each_entry for pl in iterator("struct bpf_prog_list", list, "node"): yield pl.prog def cgroup_bpf_prog_for_each_effective( cgrp: Object, bpf_attach_type: IntegerLike ) -> Iterator[Object]: """ Iterate over all effective cgroup BPF programs of the given attach type for the given cgroup. :param cgrp: ``struct cgroup *`` :param bpf_attach_type: ``enum bpf_attach_type`` :return: Iterator of ``struct bpf_prog *`` objects. """ # Before Linux kernel commit 3007098494be ("cgroup: add support for eBPF # programs") (in v4.10), struct cgroup::bpf didn't exist because cgroup BPF # programs didn't exist. Since then, if the kernel was not configured with # CONFIG_CGROUP_BPF, then struct cgroup_bpf is an empty structure. try: effective = cgrp.bpf.effective[bpf_attach_type] except AttributeError: return # Since Linux kernel commit 324bda9e6c5a ("bpf: multi program support for # cgroup+bpf") (in v4.15), struct cgroup_bpf::effective is an array of # struct bpf_prog_array. Before that, only one program of each attach type # could be effective for a cgroup, so struct cgroup_bpf::effective is an # array of struct bpf_prog *. try: effective_items = effective.items except AttributeError: if effective: yield effective else: for i in itertools.count(): prog = effective_items[i].prog.read_() if not prog: break yield prog drgn-0.0.31/drgn/helpers/linux/cgroup.py000066400000000000000000000123651477777462700201740ustar00rootroot00000000000000# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: LGPL-2.1-or-later """ Cgroup ------ The ``drgn.helpers.linux.cgroup`` module provides helpers for working with the cgroup interface in :linux:`include/linux/cgroup.h`. Only cgroup v2 is supported. """ from typing import Callable, Iterator from drgn import NULL, Object, Path, Program, cast, container_of from drgn.helpers.common.prog import takes_program_or_default from drgn.helpers.linux.kernfs import kernfs_name, kernfs_path, kernfs_walk from drgn.helpers.linux.list import list_for_each_entry __all__ = ( "cgroup_get_from_path", "cgroup_name", "cgroup_parent", "cgroup_path", "css_for_each_child", "css_for_each_descendant_pre", "css_next_child", "css_next_descendant_pre", "sock_cgroup_ptr", ) def sock_cgroup_ptr(skcd: Object) -> Object: """ Get the cgroup for a socket from the given ``struct sock_cgroup_data *`` (usually from ``struct sock::sk_cgrp_data``). :param skcd: ``struct sock_cgroup_data *`` :return: ``struct cgroup *`` """ # Since Linux kernel commit 8520e224f547 ("bpf, cgroups: Fix # cgroup v2 fallback on v1/v2 mixed mode") (in v5.15), the sock_cgroup_data # contains directly cgroup member (of struct cgroup * type). try: return skcd.cgroup except AttributeError: return cast("struct cgroup *", skcd.val) def cgroup_parent(cgrp: Object) -> Object: """ Return the parent cgroup of the given cgroup if it exists, ``NULL`` otherwise. :param cgrp: ``struct cgroup *`` :return: ``struct cgroup *`` """ parent_css = cgrp.self.parent if parent_css: return container_of(parent_css, "struct cgroup", "self") return NULL(cgrp.prog_, "struct cgroup *") def cgroup_name(cgrp: Object) -> bytes: """ Get the name of the given cgroup. :param cgrp: ``struct cgroup *`` """ return kernfs_name(cgrp.kn) def cgroup_path(cgrp: Object) -> bytes: """ Get the full path of the given cgroup. :param cgrp: ``struct cgroup *`` """ return kernfs_path(cgrp.kn) @takes_program_or_default def cgroup_get_from_path(prog: Program, path: Path) -> Object: """ Look up a cgroup from its default hierarchy path . :param path: Path name. """ obj = kernfs_walk(prog["cgrp_dfl_root"].cgrp.kn, path) if obj: obj = obj.priv return cast("struct cgroup *", obj) def css_next_child(pos: Object, parent: Object) -> Object: """ Get the next child (or ``NULL`` if there is none) of the given parent starting from the given position (``NULL`` to initiate traversal). :param pos: ``struct cgroup_subsys_state *`` :param parent: ``struct cgroup_subsys_state *`` :return: ``struct cgroup_subsys_state *`` """ if not pos: next_ = container_of( parent.children.next, "struct cgroup_subsys_state", "sibling" ) elif not (pos.flags & pos.prog_["CSS_RELEASED"]): next_ = container_of(pos.sibling.next, "struct cgroup_subsys_state", "sibling") else: serial_nr = pos.serial_nr.value_() # Read once and cache. for next_ in list_for_each_entry( "struct cgroup_subsys_state", parent.children.address_of_(), "sibling" ): if next_.serial_nr > serial_nr: break if next_.sibling.address_of_() != parent.children.address_of_(): return next_ return NULL(next_.prog_, "struct cgroup_subsys_state *") def css_next_descendant_pre(pos: Object, root: Object) -> Object: """ Get the next pre-order descendant (or ``NULL`` if there is none) of the given css root starting from the given position (``NULL`` to initiate traversal). :param pos: ``struct cgroup_subsys_state *`` :param root: ``struct cgroup_subsys_state *`` :return: ``struct cgroup_subsys_state *`` """ # If first iteration, visit root. if not pos: return root # Visit the first child if exists. null = NULL(pos.prog_, "struct cgroup_subsys_state *") next_ = css_next_child(null, pos) if next_: return next_ # No child, visit my or the closest ancestor's next sibling. while pos != root: next_ = css_next_child(pos, pos.parent) if next_: return next_ pos = pos.parent return NULL(root.prog_, "struct cgroup_subsys_state *") def _css_for_each_impl( next_fn: Callable[[Object, Object], Object], css: Object ) -> Iterator[Object]: pos = NULL(css.prog_, "struct cgroup_subsys_state *") while True: pos = next_fn(pos, css) if not pos: break yield pos def css_for_each_child(css: Object) -> Iterator[Object]: """ Iterate through children (offline included) of the given css. :param css: ``struct cgroup_subsys_state *`` :return: Iterator of ``struct cgroup_subsys_state *`` objects. """ return _css_for_each_impl(css_next_child, css) def css_for_each_descendant_pre(css: Object) -> Iterator[Object]: """ Iterate through the given css's descendants (offline included) in pre-order. :param css: ``struct cgroup_subsys_state *`` :return: Iterator of ``struct cgroup_subsys_state *`` objects. """ return _css_for_each_impl(css_next_descendant_pre, css) drgn-0.0.31/drgn/helpers/linux/cpumask.py000066400000000000000000000063011477777462700203310ustar00rootroot00000000000000# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: LGPL-2.1-or-later """ CPU Masks --------- The ``drgn.helpers.linux.cpumask`` module provides helpers for working with CPU masks from :linux:`include/linux/cpumask.h`. """ from typing import Iterator from drgn import Object, Program from drgn.helpers.common.prog import takes_program_or_default from drgn.helpers.linux.bitops import for_each_set_bit __all__ = ( "cpu_online_mask", "cpu_possible_mask", "cpu_present_mask", "cpumask_to_cpulist", "for_each_cpu", "for_each_online_cpu", "for_each_possible_cpu", "for_each_present_cpu", ) # Before Linux kernel commit c4c54dd1caf1 ("kernel/cpu.c: change type of # cpu_possible_bits and friends") (in v4.5), the CPU masks are struct cpumask # *cpu_foo_mask instead of struct cpumask __cpu_foo_mask. @takes_program_or_default def cpu_online_mask(prog: Program) -> Object: """ Return the mask of online CPUs. :return: ``struct cpumask *`` """ try: return prog["__cpu_online_mask"].address_of_() except KeyError: return prog["cpu_online_mask"] @takes_program_or_default def cpu_possible_mask(prog: Program) -> Object: """ Return the mask of possible CPUs. :return: ``struct cpumask *`` """ try: return prog["__cpu_possible_mask"].address_of_() except KeyError: return prog["cpu_possible_mask"] @takes_program_or_default def cpu_present_mask(prog: Program) -> Object: """ Return the mask of present CPUs. :return: ``struct cpumask *`` """ try: return prog["__cpu_present_mask"].address_of_() except KeyError: return prog["cpu_present_mask"] def for_each_cpu(mask: Object) -> Iterator[int]: """ Iterate over all of the CPUs in the given mask. :param mask: ``struct cpumask *`` """ try: nr_cpu_ids = mask.prog_["nr_cpu_ids"].value_() except KeyError: nr_cpu_ids = 1 return for_each_set_bit(mask.bits, nr_cpu_ids) @takes_program_or_default def for_each_online_cpu(prog: Program) -> Iterator[int]: """Iterate over all online CPUs.""" return for_each_cpu(cpu_online_mask(prog)) @takes_program_or_default def for_each_possible_cpu(prog: Program) -> Iterator[int]: """Iterate over all possible CPUs.""" return for_each_cpu(cpu_possible_mask(prog)) @takes_program_or_default def for_each_present_cpu(prog: Program) -> Iterator[int]: """Iterate over all present CPUs.""" return for_each_cpu(cpu_present_mask(prog)) def cpumask_to_cpulist(mask: Object) -> str: """ Return a CPU mask as a CPU list string. >>> cpumask_to_cpulist(mask) 0-3,8-11 :param mask: ``struct cpumask *`` :return: String in the `CPU list format `_. """ start = end = -2 parts = [] for cpu in for_each_cpu(mask): if cpu == end + 1: end = cpu else: if start >= 0: parts.append(str(start) if start == end else f"{start}-{end}") start = end = cpu if start >= 0: parts.append(str(start) if start == end else f"{start}-{end}") return ",".join(parts) drgn-0.0.31/drgn/helpers/linux/device.py000066400000000000000000000021501477777462700201230ustar00rootroot00000000000000# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: LGPL-2.1-or-later """ Devices ------- The ``drgn.helpers.linux.device`` module provides helpers for working with Linux devices, including the kernel encoding of ``dev_t``. """ import operator from drgn import IntegerLike __all__ = ( "MAJOR", "MINOR", "MKDEV", ) # This hasn't changed since at least v2.6. _MINORBITS = 20 _MINORMASK = (1 << _MINORBITS) - 1 def MAJOR(dev: IntegerLike) -> int: """ Return the major ID of a kernel ``dev_t``. :param dev: ``dev_t`` object or :class:`int`. """ return operator.index(dev) >> _MINORBITS def MINOR(dev: IntegerLike) -> int: """ Return the minor ID of a kernel ``dev_t``. :param dev: ``dev_t`` object or :class:`int`. """ return operator.index(dev) & _MINORMASK def MKDEV(major: IntegerLike, minor: IntegerLike) -> int: """ Return a kernel ``dev_t`` from the major and minor IDs. :param major: Device major ID. :param minor: Device minor ID. """ return (operator.index(major) << _MINORBITS) | operator.index(minor) drgn-0.0.31/drgn/helpers/linux/fs.py000066400000000000000000000344541477777462700173100ustar00rootroot00000000000000# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: LGPL-2.1-or-later """ Virtual Filesystem Layer ------------------------ The ``drgn.helpers.linux.fs`` module provides helpers for working with the Linux virtual filesystem (VFS) layer, including mounts, dentries, and inodes. """ import os from typing import Iterator, Optional, Tuple, Union, overload from drgn import IntegerLike, Object, Path, Program, container_of, sizeof from drgn.helpers.common.format import escape_ascii_string from drgn.helpers.common.prog import takes_object_or_program_or_default from drgn.helpers.linux.list import ( hlist_empty, hlist_for_each_entry, list_for_each_entry, ) from drgn.helpers.linux.rbtree import rbtree_inorder_for_each_entry __all__ = ( "path_lookup", "d_path", "dentry_path", "inode_path", "inode_paths", "mount_src", "mount_dst", "mount_fstype", "for_each_mount", "print_mounts", "fget", "for_each_file", "print_files", ) def _follow_mount(mnt: Object, dentry: Object) -> Tuple[Object, Object]: prog = dentry.prog_ try: DCACHE_MOUNTED = prog.cache["DCACHE_MOUNTED"] except KeyError: tokens = prog["UTS_RELEASE"].string_().split(b".", 2) major, minor = int(tokens[0]), int(tokens[1]) # Linux kernel commit 9748cb2dc393 ("VFS: repack DENTRY_ flags.") (in # v6.15) changed the value of DCACHE_MOUNTED. Unfortunately, it's a # macro, so we have to hardcode it based on a version check until it's # converted to an enum. if (major, minor) >= (6, 15): DCACHE_MOUNTED = 1 << 15 else: DCACHE_MOUNTED = 1 << 16 prog.cache["DCACHE_MOUNTED"] = DCACHE_MOUNTED while dentry.d_flags & DCACHE_MOUNTED: for mounted in list_for_each_entry( "struct mount", mnt.mnt_mounts.address_of_(), "mnt_child" ): if mounted.mnt_mountpoint == dentry: mnt = mounted dentry = mounted.mnt.mnt_root.read_() break else: break return mnt, dentry def _follow_dotdot( mnt: Object, dentry: Object, root_mnt: Object, root_dentry: Object ) -> Tuple[Object, Object]: while dentry != root_dentry or mnt != root_mnt: d_parent = dentry.d_parent.read_() if dentry != d_parent: dentry = d_parent break mnt_parent = mnt.mnt_parent.read_() if mnt == mnt_parent: break dentry = mnt.mnt_mountpoint mnt = mnt_parent return _follow_mount(mnt, dentry) @takes_object_or_program_or_default def path_lookup( prog: Program, root: Optional[Object], path: Path, *, allow_negative: bool = False, ) -> Object: """ Look up the given path name. :param root: ``struct path *`` to use as the root directory. Defaults to the initial root filesystem if given a :class:`~drgn.Program` or :ref:`omitted `. :param path: Path to lookup. :param allow_negative: Whether to allow returning a negative dentry (i.e., a dentry for a non-existent path). :return: ``struct path`` :raises Exception: if the dentry is negative and ``allow_negative`` is ``False``, or if the path is not present in the dcache. The latter does not necessarily mean that the path does not exist; it may be uncached. On a live system, you can make the kernel cache the path by accessing it (e.g., with :func:`open()` or :func:`os.stat()`): >>> path_lookup('/usr/include/stdlib.h') ... Exception: could not find '/usr/include/stdlib.h' in dcache >>> open('/usr/include/stdlib.h').close() >>> path_lookup('/usr/include/stdlib.h') (struct path){ .mnt = (struct vfsmount *)0xffff8b70413cdca0, .dentry = (struct dentry *)0xffff8b702ac2c480, } """ if root is None: root = prog["init_task"].fs.root mnt = root_mnt = container_of(root.mnt.read_(), "struct mount", "mnt") dentry = root_dentry = root.dentry.read_() components = os.fsencode(path).split(b"/") for i, component in enumerate(components): if component == b"" or component == b".": continue elif component == b"..": mnt, dentry = _follow_dotdot(mnt, dentry, root_mnt, root_dentry) else: # Since Linux kernel commit da549bdd15c2 ("dentry: switch the lists # of children to hlist") (in v6.8), the children are in an hlist. # Before that, they're in a list with different field names. try: children = hlist_for_each_entry( "struct dentry", dentry.d_children.address_of_(), "d_sib" ) except AttributeError: children = list_for_each_entry( "struct dentry", dentry.d_subdirs.address_of_(), "d_child" ) for child in children: if child.d_name.name.string_() == component: dentry = child break else: failed_path = os.fsdecode(b"/".join(components[: i + 1])) raise Exception(f"could not find {failed_path!r} in dcache") mnt, dentry = _follow_mount(mnt, dentry) if not allow_negative and not dentry.d_inode: failed_path = os.fsdecode(b"/".join(components)) raise Exception(f"{failed_path!r} dentry is negative") return Object( prog, "struct path", value={"mnt": mnt.mnt.address_of_(), "dentry": dentry}, ) @overload def d_path(path: Object) -> bytes: """ Return the full path of a dentry given a ``struct path``. :param path: ``struct path`` or ``struct path *`` """ ... @overload def d_path(vfsmnt: Object, dentry: Object) -> bytes: """ Return the full path of a dentry given a mount and dentry. :param vfsmnt: ``struct vfsmount *`` :param dentry: ``struct dentry *`` """ ... @overload def d_path(dentry: Object) -> bytes: """ Return the full path of a dentry. Since a mount is not provided, this arbitrarily selects a mount to determine the path. :param dentry: ``struct dentry *`` """ ... def d_path( # type: ignore # Need positional-only arguments. arg1: Object, arg2: Optional[Object] = None ) -> bytes: if arg2 is None: try: mnt = container_of(arg1.mnt, "struct mount", "mnt") dentry = arg1.dentry.read_() except AttributeError: # Select an arbitrary mount from this dentry's super block. We # choose the first non-internal mount. Internal mounts exist for # kernel filesystems (e.g. debugfs) and they are mounted at "/". # Paths from these mounts aren't usable in userspace and they're # confusing. If there's no other option, we will use the first # internal mount we encountered. # # The MNT_INTERNAL flag is defined as a macro in the kernel source. # Introduced in 2.6.34 and has not been modified since. MNT_INTERNAL = 0x4000 internal_mnt = None dentry = arg1 for mnt in list_for_each_entry( "struct mount", dentry.d_sb.s_mounts.address_of_(), "mnt_instance" ): if mnt.mnt.mnt_flags & MNT_INTERNAL: internal_mnt = internal_mnt or mnt continue break else: if internal_mnt is not None: mnt = internal_mnt else: raise ValueError("Could not find a mount for this dentry") else: mnt = container_of(arg1, "struct mount", "mnt") dentry = arg2.read_() d_op = dentry.d_op.read_() if d_op and d_op.d_dname: return b"[" + dentry.d_inode.i_sb.s_type.name.string_() + b"]" components = [] while True: if dentry == mnt.mnt.mnt_root: mnt_parent = mnt.mnt_parent.read_() if mnt == mnt_parent: break dentry = mnt.mnt_mountpoint.read_() mnt = mnt_parent continue d_parent = dentry.d_parent.read_() if dentry == d_parent: break components.append(dentry.d_name.name.string_()) components.append(b"/") dentry = d_parent if components: return b"".join(reversed(components)) else: return b"/" def dentry_path(dentry: Object) -> bytes: """ Return the path of a dentry from the root of its filesystem. :param dentry: ``struct dentry *`` """ components = [] while True: d_parent = dentry.d_parent.read_() if dentry == d_parent: break components.append(dentry.d_name.name.string_()) dentry = d_parent return b"/".join(reversed(components)) def inode_path(inode: Object) -> Optional[bytes]: """ Return any path of an inode from the root of its filesystem. :param inode: ``struct inode *`` :return: Path, or ``None`` if the inode has no aliases. """ if hlist_empty(inode.i_dentry): return None return dentry_path( container_of(inode.i_dentry.first, "struct dentry", "d_u.d_alias") ) def inode_paths(inode: Object) -> Iterator[bytes]: """ Return an iterator over all of the paths of an inode from the root of its filesystem. :param inode: ``struct inode *`` """ return ( dentry_path(dentry) for dentry in hlist_for_each_entry( "struct dentry", inode.i_dentry.address_of_(), "d_u.d_alias" ) ) def mount_src(mnt: Object) -> bytes: """ Get the source device name for a mount. :param mnt: ``struct mount *`` """ return mnt.mnt_devname.string_() def mount_dst(mnt: Object) -> bytes: """ Get the path of a mount point. :param mnt: ``struct mount *`` """ return d_path(mnt.mnt.address_of_(), mnt.mnt.mnt_root) def mount_fstype(mnt: Object) -> bytes: """ Get the filesystem type of a mount. :param mnt: ``struct mount *`` """ sb = mnt.mnt.mnt_sb.read_() fstype = sb.s_type.name.string_() subtype_obj = sb.s_subtype.read_() if subtype_obj: subtype = subtype_obj.string_() if subtype: fstype += b"." + subtype return fstype @takes_object_or_program_or_default def for_each_mount( prog: Program, ns: Optional[Object], *, src: Optional[Path] = None, dst: Optional[Path] = None, fstype: Optional[Union[str, bytes]] = None, ) -> Iterator[Object]: """ Iterate over all of the mounts in a given namespace. :param ns: ``struct mnt_namespace *``. Defaults to the initial mount namespace if given a :class:`~drgn.Program` or :ref:`omitted `. :param src: Only include mounts with this source device name. :param dst: Only include mounts with this destination path. :param fstype: Only include mounts with this filesystem type. :return: Iterator of ``struct mount *`` objects. """ if ns is None: ns = prog["init_task"].nsproxy.mnt_ns if src is not None: src = os.fsencode(src) if dst is not None: dst = os.fsencode(dst) if fstype: fstype = os.fsencode(fstype) # Since Linux kernel commit 2eea9ce4310d ("mounts: keep list of mounts in # an rbtree") (in v6.8), the mounts in a namespace are in a red-black tree. # Before that, they're in a list. # The old case is first here because before that commit, struct mount also # had a different member named "mounts". try: mounts = list_for_each_entry("struct mount", ns.list.address_of_(), "mnt_list") except AttributeError: mounts = rbtree_inorder_for_each_entry( "struct mount", ns.mounts.address_of_(), "mnt_node" ) for mnt in mounts: if ( (src is None or mount_src(mnt) == src) and (dst is None or mount_dst(mnt) == dst) and (fstype is None or mount_fstype(mnt) == fstype) ): yield mnt @takes_object_or_program_or_default def print_mounts( prog: Program, ns: Optional[Object], *, src: Optional[Path] = None, dst: Optional[Path] = None, fstype: Optional[Union[str, bytes]] = None, ) -> None: """ Print the mount table of a given namespace. The arguments are the same as :func:`for_each_mount()`. The output format is similar to ``/proc/mounts`` but prints the value of each ``struct mount *``. """ for mnt in for_each_mount( prog if ns is None else ns, # type: ignore # python/mypy#12056 src=src, dst=dst, fstype=fstype, ): mnt_src = escape_ascii_string(mount_src(mnt), escape_backslash=True) mnt_dst = escape_ascii_string(mount_dst(mnt), escape_backslash=True) mnt_fstype = escape_ascii_string(mount_fstype(mnt), escape_backslash=True) print( f"{mnt_src} {mnt_dst} {mnt_fstype} ({mnt.type_.type_name()})0x{mnt.value_():x}" ) def fget(task: Object, fd: IntegerLike) -> Object: """ Return the kernel file descriptor of the fd of a given task. :param task: ``struct task_struct *`` :param fd: File descriptor. :return: ``struct file *`` """ return task.files.fdt.fd[fd] def for_each_file(task: Object) -> Iterator[Tuple[int, Object]]: """ Iterate over all of the files open in a given task. :param task: ``struct task_struct *`` :return: Iterator of (fd, ``struct file *``) tuples. """ files = task.files.read_() if not files: return fdt = files.fdt.read_() bits_per_long = 8 * sizeof(fdt.open_fds.type_.type) for i in range((fdt.max_fds.value_() + bits_per_long - 1) // bits_per_long): word = fdt.open_fds[i].value_() for j in range(bits_per_long): if word & (1 << j): fd = i * bits_per_long + j file = fdt.fd[fd].read_() yield fd, file def print_files(task: Object) -> None: """ Print the open files of a given task. :param task: ``struct task_struct *`` """ for fd, file in for_each_file(task): path = d_path(file.f_path) escaped_path = escape_ascii_string(path, escape_backslash=True) print(f"{fd} {escaped_path} ({file.type_.type_name()})0x{file.value_():x}") drgn-0.0.31/drgn/helpers/linux/idr.py000066400000000000000000000065651477777462700174600ustar00rootroot00000000000000# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: LGPL-2.1-or-later """ IDR --- The ``drgn.helpers.linux.idr`` module provides helpers for working with the IDR data structure in :linux:`include/linux/idr.h`. An IDR provides a mapping from an ID to a pointer. """ import operator from typing import Iterator, Tuple, Union from _drgn import _linux_helper_idr_find from drgn import NULL, IntegerLike, Object, Type, cast, sizeof from drgn.helpers.linux.radixtree import radix_tree_for_each __all__ = ( "idr_find", "idr_for_each", "idr_for_each_entry", ) _IDR_BITS = 8 _IDR_MASK = (1 << _IDR_BITS) - 1 def idr_find(idr: Object, id: IntegerLike) -> Object: """ Look up the entry with the given ID in an IDR. :param idr: ``struct idr *`` :param id: Entry ID. :return: ``void *`` found entry, or ``NULL`` if not found. """ # Since Linux kernel commit 0a835c4f090a ("Reimplement IDR and IDA using # the radix tree") (in v4.11), IDRs are backed by radix trees. Before that, # they are a separate data structure. The helper in libdrgn only handles # the radix tree version. if hasattr(idr, "idr_rt"): return _linux_helper_idr_find(idr, id) else: prog = idr.prog_ id = operator.index(id) if id < 0: return NULL(prog, "void *") p = idr.top.read_() if not p: return NULL(prog, "void *") n = (p.layer.value_() + 1) * _IDR_BITS MAX_IDR_SHIFT = sizeof(prog.type("int")) * 8 - 1 # Equivalent to id > idr_max(p->layer + 1) in the kernel. if id >= (1 << min(n, MAX_IDR_SHIFT)): return NULL(prog, "void *") while n > 0 and p: n -= _IDR_BITS p = p.ary[(id >> n) & _IDR_MASK].read_() return cast("void *", p) def idr_for_each(idr: Object) -> Iterator[Tuple[int, Object]]: """ Iterate over all of the pointers in an IDR. :param idr: ``struct idr *`` :return: Iterator of (index, ``void *``) tuples. """ # Since Linux kernel commit 0a835c4f090a ("Reimplement IDR and IDA using # the radix tree") (in v4.11), IDRs are backed by radix trees. try: idr_rt = idr.idr_rt except AttributeError: voidp_type = idr.prog_.type("void *") def aux(p: Object, id: int, n: int) -> Iterator[Tuple[int, Object]]: p = p.read_() if p: if n == 0: yield id, cast(voidp_type, p) else: n -= _IDR_BITS for child in p.ary: yield from aux(child, id, n) id += 1 << n yield from aux(idr.top, 0, idr.layers.value_() * _IDR_BITS) else: try: base = idr.idr_base.value_() except AttributeError: base = 0 for index, entry in radix_tree_for_each(idr_rt.address_of_()): yield index + base, entry def idr_for_each_entry( idr: Object, type: Union[str, Type] ) -> Iterator[Tuple[int, Object]]: """ Iterate over all of the entries with the given type in an IDR. :param idr: ``struct idr *`` :param type: Entry type. :return: Iterator of (index, ``type *``) tuples. """ prog = idr.prog_ type = prog.pointer_type(prog.type(type)) for index, entry in idr_for_each(idr): yield index, cast(type, entry) drgn-0.0.31/drgn/helpers/linux/kallsyms.py000066400000000000000000000206531477777462700205330ustar00rootroot00000000000000# Copyright (c) 2024 Oracle and/or its affiliates # SPDX-License-Identifier: LGPL-2.1-or-later """ Kallsyms -------- The ``drgn.helpers.linux.kallsyms`` module contains helpers which allow you to use the built-in kallsyms symbol table for drgn symbol lookup. Combined with an alternative type information source, this can enable debugging Linux kernel core dumps without the corresponding DWARF debuginfo files. Even without type information, kallsyms can be used to help locate objects, and drgn's low-level memory reading functions can be used to do basic debugging tasks. """ import os import re from typing import Dict, List, Tuple from _drgn import ( _linux_helper_load_builtin_kallsyms, _linux_helper_load_proc_kallsyms as _load_proc_kallsyms, ) from drgn import ( Object, Program, ProgramFlags, Symbol, SymbolBinding, SymbolIndex, SymbolKind, ) from drgn.helpers.linux.module import for_each_module __all__ = ( "load_vmlinux_kallsyms", "load_module_kallsyms", ) def _vmcoreinfo_symbols(prog: Program) -> Dict[str, int]: vmcoreinfo_data = prog["VMCOREINFO"].string_().decode("ascii") vmcoreinfo_symbols = {} sym_re = re.compile(r"SYMBOL\(([^)]+)\)=([A-Fa-f0-9]+)") for line in vmcoreinfo_data.strip().split("\n"): match = sym_re.fullmatch(line) if match: vmcoreinfo_symbols[match.group(1)] = int(match.group(2), 16) return vmcoreinfo_symbols def _load_builtin_kallsyms(prog: Program) -> SymbolIndex: symbol_reqd = [ "kallsyms_names", "kallsyms_token_table", "kallsyms_token_index", "kallsyms_num_syms", "kallsyms_offsets", "kallsyms_relative_base", "kallsyms_addresses", "_stext", ] symbols = _vmcoreinfo_symbols(prog) args = [] for sym in symbol_reqd: args.append(symbols.get(sym, 0)) return _linux_helper_load_builtin_kallsyms(prog, *args) def load_vmlinux_kallsyms(prog: Program) -> SymbolIndex: """ Create a kallsyms index for vmlinux This function loads the kallsyms for the core kernel and returns a symbol index. This function does not require that any debuginfo is loaded for the kernel: it either relies on ``/proc/kallsyms`` (which requires running drgn as root) or it parses internal data structures using information found from the VMCOREINFO note (which requires Linux 6.0 or later, or a backport of commit ``f09bddbd86619 ("vmcoreinfo: add kallsyms_num_syms symbol")`` and its dependencies). :returns: a symbol index containing kallsyms for the core kernel (vmlinux) """ if prog.flags & ProgramFlags.IS_LIVE and os.geteuid() == 0: return _load_proc_kallsyms() else: return _load_builtin_kallsyms(prog) def _nm_type_to_binding_kind(code: str) -> Tuple[SymbolBinding, SymbolKind]: binding = SymbolBinding.UNKNOWN kind = SymbolKind.UNKNOWN if code == "v": binding = SymbolBinding.WEAK kind = SymbolKind.OBJECT elif code == "w": binding = SymbolBinding.WEAK elif code in "tT": kind = SymbolKind.FUNC elif code.lower() in "srbgncd": kind = SymbolKind.OBJECT if binding == SymbolBinding.UNKNOWN and code.isupper(): binding = SymbolBinding.GLOBAL return binding, kind def _st_info_to_binding_kind(info: int) -> Tuple[SymbolBinding, SymbolKind]: binding_int = info >> 4 STB_WEAK = 2 STB_GNU_UNIQUE = 10 if binding_int <= STB_WEAK or binding_int == STB_GNU_UNIQUE: binding = SymbolBinding(binding_int + 1) else: binding = SymbolBinding.UNKNOWN type_ = info & 0xF STT_TLS = 6 STT_GNU_IFUNC = 10 if type_ <= STT_TLS or type_ == STT_GNU_IFUNC: kind = SymbolKind(type_) else: kind = SymbolKind.UNKNOWN return binding, kind def _elf_sym_to_symbol(name: str, obj: Object, has_typetab: bool) -> Symbol: # Linux likes to have the nm(1) character code for its symbols, which it # refers to as the symbol's "type" (this is of course distinct from the ELF # notion of a symbol type, let alone what drgn considers a "type"...). # # Prior to 5439c985c5a8 ("module: Overwrite st_size instead of st_info"), # merged in v5.0, the kernel simply overwrote the "st_info" field with a # single-character code that represents the nm(1) character code for that # symbol. However, starting with that commit, it was switched to overwrite # the "st_size" field instead! This was thankfully fixed in v5.2 with # 1c7651f43777 ("kallsyms: store type information in its own array"). # # Unfortunately, this leaves us with three possibilities: # 1. Pre-v5.0: interpret the "st_info" as a character from nm(1) and try to # infer the kind and bindings. # 2. 5.0-5.2: interpret the "st_info" as normal, but ignore the "st_size" # field since it is bogus. # 3. 5.2+: both fields are valid, and the nm(1) code is stored in "typetab". # # Case 3 can be determined easily by the presence of "typetab" in "struct # mod_kallsyms". However, cases 1 & 2 are indistinguishable. For our # purposes, it makes more sense to fall back to case 1. After all, neither # 5.0 or 5.1 were LTS kernels, nor are they actively used by any major # distro. We have no way to deal with 5.0 or 5.1, whereas we can make some # informed guesses for pre-5.0 based on the nm(1) code. if has_typetab: binding, kind = _st_info_to_binding_kind(obj.st_info.value_()) else: binding, kind = _nm_type_to_binding_kind(chr(obj.st_info.value_())) return Symbol( # type: ignore name, obj.st_value.value_(), obj.st_size.value_(), binding, kind, ) def _module_kallsyms(module: Object) -> List[Symbol]: try: ks = module.kallsyms except AttributeError: # Prior to 8244062ef1e54 ("modules: fix longstanding /proc/kallsyms vs # module insertion race."), the kallsyms variables were stored directly # on the module object. This commit was introduced in 4.5, but was # backported to some stable kernels too. Fall back to the module object # in cases where kallsyms field isn't available. ks = module prog = module.prog_ num_symtab = ks.num_symtab.value_() try: ks.member_("typetab") has_typetab = True except LookupError: has_typetab = False # The symtab field is a pointer, but it points at an array of Elf_Sym # objects. Indexing it requires drgn to do pointer arithmetic and issue a # lot of very small /proc/kcore reads, which can be a real performance # issue. So convert it into an object representing a correctly-sized array, # and then read that object all at once. This does one /proc/kcore read, # which is a major improvement! symtab = Object( prog, type=prog.array_type(ks.symtab.type_.type, num_symtab), address=ks.symtab.value_(), ).read_() # The strtab is similarly a pointer into a contigous array of strings packed # next to each other. Reading individual strings from /proc/kcore can be # quite slow. So read the entire array of bytes into a Python bytes value, # and we'll extract the individual symbol strings from there. last_string_start = symtab[num_symtab - 1].st_name.value_() last_string_len = len(ks.strtab[last_string_start].address_of_().string_()) + 1 strtab = prog.read(ks.strtab.value_(), last_string_start + last_string_len) syms = [] for i in range(ks.num_symtab.value_()): elfsym = symtab[i] if not elfsym.st_name: continue str_index = elfsym.st_name.value_() nul_byte = strtab.find(b"\x00", str_index) name = strtab[str_index:nul_byte].decode("ascii") syms.append(_elf_sym_to_symbol(name, elfsym, has_typetab)) return syms def load_module_kallsyms(prog: Program) -> SymbolIndex: """ Return a symbol index containing all module symbols from kallsyms For kernels built with ``CONFIG_KALLSYMS``, loaded kernel modules contain an ELF symbol table in kernel memory. This function can parse those data structures and create a symbol index usable by drgn. However, it requires that you already have debuginfo for the vmlinux image. :returns: a symbol index containing all symbols from module kallsyms """ all_symbols = [] for module in for_each_module(prog): all_symbols.extend(_module_kallsyms(module)) return SymbolIndex(all_symbols) drgn-0.0.31/drgn/helpers/linux/kconfig.py000066400000000000000000000042331477777462700203100ustar00rootroot00000000000000# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: LGPL-2.1-or-later """ Kconfig ------- The ``drgn.helpers.linux.kconfig`` module provides helpers for reading the Linux kernel build configuration. """ import gzip import types from typing import Mapping from drgn import Program from drgn.helpers.common.prog import takes_program_or_default __all__ = ("get_kconfig",) @takes_program_or_default def get_kconfig(prog: Program) -> Mapping[str, str]: """ Get the kernel build configuration as a mapping from the option name to the value. >>> get_kconfig()['CONFIG_SMP'] 'y' >>> get_kconfig()['CONFIG_HZ'] '300' This is only supported if the kernel was compiled with ``CONFIG_IKCONFIG``. Note that most Linux distributions do not enable this option. """ try: return prog.cache["kconfig_map"] except KeyError: pass try: start = prog.symbol("kernel_config_data").address size = prog.symbol("kernel_config_data_end").address - start except LookupError: # Before Linux kernel commit 13610aa908dc ("kernel/configs: use .incbin # directive to embed config_data.gz") (in v5.1), the data is a variable # rather than two symbols. try: kernel_config_data = prog["kernel_config_data"] except KeyError: raise LookupError( "kernel configuration data not found; kernel must be compiled with CONFIG_IKCONFIG" ) from None # The data is delimited by the magic strings "IKCFG_ST" and "IKCFG_ED" # plus a NUL byte. start = kernel_config_data.address_ + 8 # type: ignore[operator] size = len(kernel_config_data) - 17 data = prog.read(start, size) kconfig = {} for line in gzip.decompress(data).decode().splitlines(): if not line or line.startswith("#"): continue name, _, value = line.partition("=") if value: kconfig[name] = value # Make result mapping 'immutable', so changes cannot propagate to the cache result = types.MappingProxyType(kconfig) prog.cache["kconfig_map"] = result return result drgn-0.0.31/drgn/helpers/linux/kernfs.py000066400000000000000000000053371477777462700201660ustar00rootroot00000000000000# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: LGPL-2.1-or-later """ Kernfs ------ The ``drgn.helpers.linux.kernfs`` module provides helpers for working with the kernfs pseudo filesystem interface in :linux:`include/linux/kernfs.h`. """ import os from drgn import NULL, Object, Path from drgn.helpers.linux.rbtree import rbtree_inorder_for_each_entry __all__ = ( "kernfs_name", "kernfs_parent", "kernfs_path", "kernfs_root", "kernfs_walk", ) def kernfs_root(kn: Object) -> Object: """ Get the kernfs root that the given kernfs node belongs to. :param kn: ``struct kernfs_node *`` :return: ``struct kernfs_root *`` """ knp = kernfs_parent(kn) if knp: kn = knp return kn.dir.root.read_() def kernfs_parent(kn: Object) -> Object: """ Get the parent of the given kernfs node. :param kn: ``struct kernfs_node *`` :return: ``struct kernfs_node *`` """ # Linux kernel commit 633488947ef6 ("kernfs: Use RCU to access # kernfs_node::parent.") (in v6.15) renamed the parent member. try: return kn.__parent.read_() except AttributeError: return kn.parent.read_() def kernfs_name(kn: Object) -> bytes: """ Get the name of the given kernfs node. :param kn: ``struct kernfs_node *`` """ if not kn: return b"(null)" return kn.name.string_() if kernfs_parent(kn) else b"/" def kernfs_path(kn: Object) -> bytes: """ Get full path of the given kernfs node. :param kn: ``struct kernfs_node *`` """ if not kn: return b"(null)" root_kn = kernfs_root(kn).kn if kn == root_kn: return b"/" names = [] while kn != root_kn: names.append(kn.name.string_()) kn = kernfs_parent(kn) names.append(root_kn.name.string_()) names.reverse() return b"/".join(names) def kernfs_walk(parent: Object, path: Path) -> Object: """ Find the kernfs node with the given path from the given parent kernfs node. :param parent: ``struct kernfs_node *`` :param path: Path name. :return: ``struct kernfs_node *`` (``NULL`` if not found) """ kernfs_nodep_type = parent.type_ kernfs_node_type = kernfs_nodep_type.type for name in os.fsencode(path).split(b"/"): if not name: continue for parent in rbtree_inorder_for_each_entry( kernfs_node_type, parent.dir.children.address_of_(), "rb" ): if ( parent.name.string_() == name and not parent.ns # For now, we don't bother with namespaced kernfs nodes. ): break else: return NULL(parent.prog_, kernfs_nodep_type) return parent drgn-0.0.31/drgn/helpers/linux/kthread.py000066400000000000000000000036271477777462700203200ustar00rootroot00000000000000# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: LGPL-2.1-or-later """ Kernel Threads -------------- The ``drgn.helpers.linux.kthread`` module provides helpers for working with Linux kernel threads, a.k.a. kthreads. """ from drgn import Object, cast, container_of __all__ = ( "kthread_data", "to_kthread", ) def to_kthread(task: Object) -> Object: """ Get the kthread information for a task. >>> to_kthread(find_task(3)) *(struct kthread *)0xffff8ef600191580 = { ... .threadfn = (int (*)(void *))kthread_worker_fn+0x0 = 0xffffffffba1e61b0, .full_name = (char *)0xffff8ef6003d4ac0 = "pool_workqueue_release", } :param task: ``struct task *`` :return: ``struct kthread *`` """ try: # Since Linux kernel commit e32cf5dfbe22 ("kthread: Generalize # pf_io_worker so it can point to struct kthread") (in v5.17), the # struct kthread * is in task->worker_private. return cast("struct kthread *", task.worker_private) except AttributeError: if "free_kthread_struct" in task.prog_: # Between that and Linux kernel commit 1da5c46fa965 ("kthread: Make # struct kthread kmalloc'ed") (in v4.10), it is in # task->set_child_tid. Unfortunately we can only distinguish this # by looking for another function added in that commit. return cast("struct kthread *", task.set_child_tid) else: # Before that, task->vfork_done points to kthread->exited. return container_of(task.vfork_done, "struct kthread", "exited") def kthread_data(task: Object) -> Object: """ Get the data that was specified when a kthread was created. >>> kthread_data(find_task(3)) (void *)0xffff8ef6001812c0 :param task: ``struct task *`` :return: ``void *`` """ return to_kthread(task).data.read_() drgn-0.0.31/drgn/helpers/linux/list.py000066400000000000000000000171451477777462700176510ustar00rootroot00000000000000# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: LGPL-2.1-or-later """ Linked Lists ------------ The ``drgn.helpers.linux.list`` module provides helpers for working with the doubly-linked list implementations (``struct list_head`` and ``struct hlist_head``) in :linux:`include/linux/list.h`. """ from typing import Iterator, Union from drgn import NULL, Object, Type, container_of from drgn.helpers import ValidationError __all__ = ( "hlist_empty", "hlist_for_each", "hlist_for_each_entry", "list_count_nodes", "list_empty", "list_first_entry", "list_first_entry_or_null", "list_for_each", "list_for_each_entry", "list_for_each_entry_reverse", "list_for_each_reverse", "list_is_singular", "list_last_entry", "list_next_entry", "list_prev_entry", "validate_list", "validate_list_for_each", "validate_list_for_each_entry", ) def list_empty(head: Object) -> bool: """ Return whether a list is empty. :param head: ``struct list_head *`` """ head = head.read_() return head.next == head def list_is_singular(head: Object) -> bool: """ Return whether a list has only one element. :param head: ``struct list_head *`` """ head = head.read_() next = head.next return next != head and next == head.prev def list_count_nodes(head: Object) -> int: """ Return the number of nodes in a list. :param head: ``struct list_head *`` """ return sum(1 for _ in list_for_each(head)) def list_first_entry(head: Object, type: Union[str, Type], member: str) -> Object: """ Return the first entry in a list. The list is assumed to be non-empty. See also :func:`list_first_entry_or_null()`. :param head: ``struct list_head *`` :param type: Entry type. :param member: Name of list node member in entry type. :return: ``type *`` """ return container_of(head.next, type, member) def list_first_entry_or_null( head: Object, type: Union[str, Type], member: str ) -> Object: """ Return the first entry in a list or ``NULL`` if the list is empty. See also :func:`list_first_entry()`. :param head: ``struct list_head *`` :param type: Entry type. :param member: Name of list node member in entry type. :return: ``type *`` """ head = head.read_() pos = head.next.read_() if pos == head: return NULL(head.prog_, head.prog_.pointer_type(head.prog_.type(type))) else: return container_of(pos, type, member) def list_last_entry(head: Object, type: Union[str, Type], member: str) -> Object: """ Return the last entry in a list. The list is assumed to be non-empty. :param head: ``struct list_head *`` :param type: Entry type. :param member: Name of list node member in entry type. :return: ``type *`` """ return container_of(head.prev, type, member) def list_next_entry(pos: Object, member: str) -> Object: """ Return the next entry in a list. :param pos: ``type*`` :param member: Name of list node member in entry type. :return: ``type *`` """ return container_of(getattr(pos, member).next, pos.type_.type, member) def list_prev_entry(pos: Object, member: str) -> Object: """ Return the previous entry in a list. :param pos: ``type*`` :param member: Name of list node member in entry type. :return: ``type *`` """ return container_of(getattr(pos, member).prev, pos.type_.type, member) def list_for_each(head: Object) -> Iterator[Object]: """ Iterate over all of the nodes in a list. :param head: ``struct list_head *`` :return: Iterator of ``struct list_head *`` objects. """ head = head.read_() pos = head.next.read_() while pos != head: yield pos pos = pos.next.read_() def list_for_each_reverse(head: Object) -> Iterator[Object]: """ Iterate over all of the nodes in a list in reverse order. :param head: ``struct list_head *`` :return: Iterator of ``struct list_head *`` objects. """ head = head.read_() pos = head.prev.read_() while pos != head: yield pos pos = pos.prev.read_() def list_for_each_entry( type: Union[str, Type], head: Object, member: str ) -> Iterator[Object]: """ Iterate over all of the entries in a list. :param type: Entry type. :param head: ``struct list_head *`` :param member: Name of list node member in entry type. :return: Iterator of ``type *`` objects. """ type = head.prog_.type(type) for pos in list_for_each(head): yield container_of(pos, type, member) def list_for_each_entry_reverse( type: Union[str, Type], head: Object, member: str ) -> Iterator[Object]: """ Iterate over all of the entries in a list in reverse order. :param type: Entry type. :param head: ``struct list_head *`` :param member: Name of list node member in entry type. :return: Iterator of ``type *`` objects. """ type = head.prog_.type(type) for pos in list_for_each_reverse(head): yield container_of(pos, type, member) def validate_list(head: Object) -> None: """ Validate that the ``next`` and ``prev`` pointers in a list are consistent. :param head: ``struct list_head *`` :raises ValidationError: if the list is invalid """ for _ in validate_list_for_each(head): pass def validate_list_for_each(head: Object) -> Iterator[Object]: """ Like :func:`list_for_each()`, but validates the list like :func:`validate_list()` while iterating. :param head: ``struct list_head *`` :raises ValidationError: if the list is invalid """ head = head.read_() pos = head.next.read_() while pos != head: yield pos next = pos.next.read_() next_prev = next.prev.read_() if next_prev != pos: raise ValidationError( f"{pos.format_(dereference=False, symbolize=False)}" f" next {next.format_(dereference=False, symbolize=False, type_name=False)}" f" has prev {next_prev.format_(dereference=False, symbolize=False, type_name=False)}" ) pos = next def validate_list_for_each_entry( type: Union[str, Type], head: Object, member: str ) -> Iterator[Object]: """ Like :func:`list_for_each_entry()`, but validates the list like :func:`validate_list()` while iterating. :param type: Entry type. :param head: ``struct list_head *`` :param member: Name of list node member in entry type. :raises ValidationError: if the list is invalid """ type = head.prog_.type(type) for pos in validate_list_for_each(head): yield container_of(pos, type, member) def hlist_empty(head: Object) -> bool: """ Return whether a hash list is empty. :param head: ``struct hlist_head *`` """ return not head.first def hlist_for_each(head: Object) -> Iterator[Object]: """ Iterate over all of the nodes in a hash list. :param head: ``struct hlist_head *`` :return: Iterator of ``struct hlist_node *`` objects. """ pos = head.first.read_() while pos: yield pos pos = pos.next.read_() def hlist_for_each_entry( type: Union[str, Type], head: Object, member: str ) -> Iterator[Object]: """ Iterate over all of the entries in a hash list. :param type: Entry type. :param head: ``struct hlist_head *`` :param member: Name of list node member in entry type. :return: Iterator of ``type *`` objects. """ type = head.prog_.type(type) for pos in hlist_for_each(head): yield container_of(pos, type, member) drgn-0.0.31/drgn/helpers/linux/list_nulls.py000066400000000000000000000027101477777462700210560ustar00rootroot00000000000000# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: LGPL-2.1-or-later """ Nulls Lists ----------- The ``drgn.helpers.linux.list_nulls`` module provides helpers for working with the special version of lists (``struct hlist_nulls_head`` and ``struct hlist_nulls_node``) in :linux:`include/linux/list_nulls.h` where the end of list is not a ``NULL`` pointer, but a "nulls" marker. """ from typing import Iterator, Union from drgn import Object, Type, container_of __all__ = ( "hlist_nulls_empty", "hlist_nulls_for_each_entry", "is_a_nulls", ) def is_a_nulls(pos: Object) -> bool: """ Return whether a a pointer is a nulls marker. :param pos: ``struct hlist_nulls_node *`` """ return bool(pos.value_() & 1) def hlist_nulls_empty(head: Object) -> bool: """ Return whether a nulls hash list is empty. :param head: ``struct hlist_nulls_head *`` """ return is_a_nulls(head.first) def hlist_nulls_for_each_entry( type: Union[str, Type], head: Object, member: str ) -> Iterator[Object]: """ Iterate over all the entries in a nulls hash list. :param type: Entry type. :param head: ``struct hlist_nulls_head *`` :param member: Name of list node member in entry type. :return: Iterator of ``type *`` objects. """ type = head.prog_.type(type) pos = head.first while not is_a_nulls(pos): yield container_of(pos, type, member) pos = pos.next drgn-0.0.31/drgn/helpers/linux/llist.py000066400000000000000000000062151477777462700200210ustar00rootroot00000000000000# Copyright (c) 2022, Oracle and/or its affiliates. # SPDX-License-Identifier: LGPL-2.1-or-later """ Lockless Lists -------------- The ``drgn.helpers.linux.llist`` module provides helpers for working with the lockless, ``NULL``-terminated, singly-linked list implementation in :linux:`include/linux/llist.h` (``struct llist_head`` and ``struct llist_node``). """ from typing import Iterator, Union from drgn import NULL, Object, Type, container_of __all__ = ( "llist_empty", "llist_first_entry", "llist_first_entry_or_null", "llist_for_each", "llist_for_each_entry", "llist_is_singular", "llist_next_entry", ) def llist_empty(head: Object) -> bool: """ Return whether an llist is empty. :param head: ``struct llist_head *`` """ return not head.first def llist_is_singular(head: Object) -> bool: """ Return whether an llist has only one element. :param head: ``struct llist_head *`` """ first = head.first.read_() return bool(first) and not first.next def llist_first_entry(head: Object, type: Union[str, Type], member: str) -> Object: """ Return the first entry in an llist. The list is assumed to be non-empty. See also :func:`llist_first_entry_or_null()`. :param head: ``struct llist_head *`` :param type: Entry type. :param member: Name of ``struct llist_node`` member in entry type. :return: ``type *`` """ return container_of(head.first, type, member) def llist_first_entry_or_null( head: Object, type: Union[str, Type], member: str ) -> Object: """ Return the first entry in an llist or ``NULL`` if the llist is empty. See also :func:`llist_first_entry()`. :param head: ``struct llist_head *`` :param type: Entry type. :param member: Name of ``struct llist_node`` member in entry type. :return: ``type *`` """ first = head.first.read_() if first: return container_of(first, type, member) else: return NULL(head.prog_, head.prog_.pointer_type(head.prog_.type(type))) def llist_next_entry(pos: Object, member: str) -> Object: """ Return the next entry in an llist. :param pos: ``type*`` :param member: Name of ``struct llist_node`` member in entry type. :return: ``type *`` """ return container_of(getattr(pos, member).next, pos.type_.type, member) def llist_for_each(node: Object) -> Iterator[Object]: """ Iterate over all of the nodes in an llist starting from a given node. :param node: ``struct llist_node *`` :return: Iterator of ``struct llist_node *`` objects. """ pos = node.read_() while pos: yield pos pos = pos.next.read_() def llist_for_each_entry( type: Union[str, Type], node: Object, member: str ) -> Iterator[Object]: """ Iterate over all of the entries in an llist starting from a given node. :param type: Entry type. :param node: ``struct llist_node *`` :param member: Name of ``struct llist_node`` member in entry type. :return: Iterator of ``type *`` objects. """ type = node.prog_.type(type) for pos in llist_for_each(node): yield container_of(pos, type, member) drgn-0.0.31/drgn/helpers/linux/mapletree.py000066400000000000000000000126131477777462700206470ustar00rootroot00000000000000# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: LGPL-2.1-or-later """ Maple Trees ----------- The ``drgn.helpers.linux.mapletree`` module provides helpers for working with maple trees from :linux:`include/linux/maple_tree.h`. Maple trees were introduced in Linux 6.1. """ import collections import operator from typing import Iterator, Tuple from drgn import NULL, IntegerLike, Object, Program, sizeof from drgn.helpers.linux.xarray import _XA_ZERO_ENTRY, _xa_is_node, xa_is_zero __all__ = ( "mt_for_each", "mtree_load", ) def _ulong_max(prog: Program) -> int: return (1 << (8 * sizeof(prog.type("unsigned long")))) - 1 # Combination of mte_to_node(), mte_node_type(), ma_data_end(), and # ma_is_leaf(). def _mte_to_node( prog: Program, entry_value: int, max: int ) -> Tuple[Object, Object, Object, int, bool]: MAPLE_NODE_MASK = 255 MAPLE_NODE_TYPE_MASK = 0xF MAPLE_NODE_TYPE_SHIFT = 0x3 maple_leaf_64 = 1 maple_range_64 = 2 maple_arange_64 = 3 node = Object(prog, "struct maple_node *", entry_value & ~MAPLE_NODE_MASK) type = (entry_value >> MAPLE_NODE_TYPE_SHIFT) & MAPLE_NODE_TYPE_MASK if type == maple_arange_64: m = node.ma64 pivots = m.pivot slots = m.slot end = m.meta.end.value_() elif type == maple_range_64 or type == maple_leaf_64: m = node.mr64 pivots = m.pivot slots = m.slot pivot = pivots[len(pivots) - 1].value_() if not pivot: end = m.meta.end.value_() elif pivot == max: end = len(pivots) - 1 else: end = len(pivots) else: raise NotImplementedError(f"unknown maple_type {type}") return node, pivots, slots, end, type < maple_range_64 def mtree_load(mt: Object, index: IntegerLike, *, advanced: bool = False) -> Object: """ Look up the entry at a given index in a maple tree. >>> entry = mtree_load(task.mm.mm_mt.address_of_(), 0x55d65cfaa000) >>> cast("struct vm_area_struct *", entry) *(struct vm_area_struct *)0xffff97ad82bfc930 = { ... } :param mt: ``struct maple_tree *`` :param index: Entry index. :param advanced: Whether to return nodes only visible to the maple tree advanced API. If ``False``, zero entries (see :func:`~drgn.helpers.linux.xarray.xa_is_zero()`) will be returned as ``NULL``. :return: ``void *`` found entry, or ``NULL`` if not found. """ prog = mt.prog_ index = operator.index(index) entry = mt.ma_root.read_() entry_value = entry.value_() if _xa_is_node(entry_value): max = _ulong_max(prog) while True: node, pivots, slots, end, leaf = _mte_to_node(prog, entry_value, max) for offset in range(end): pivot = pivots[offset].value_() if pivot >= index: max = pivot break else: offset = end entry_value = slots[offset].value_() if leaf: if not advanced and entry_value == _XA_ZERO_ENTRY: return NULL(prog, "void *") return Object(prog, "void *", entry_value) elif entry_value and index == 0: return entry else: return NULL(prog, "void *") def mt_for_each( mt: Object, *, advanced: bool = False ) -> Iterator[Tuple[int, int, Object]]: """ Iterate over all of the entries and their ranges in a maple tree. >>> for first_index, last_index, entry in mt_for_each(task.mm.mm_mt.address_of_()): ... print(hex(first_index), hex(last_index), entry) ... 0x55d65cfaa000 0x55d65cfaafff (void *)0xffff97ad82bfc930 0x55d65cfab000 0x55d65cfabfff (void *)0xffff97ad82bfc0a8 0x55d65cfac000 0x55d65cfacfff (void *)0xffff97ad82bfc000 0x55d65cfad000 0x55d65cfadfff (void *)0xffff97ad82bfcb28 ... :param mt: ``struct maple_tree *`` :param advanced: Whether to return nodes only visible to the maple tree advanced API. If ``False``, zero entries (see :func:`~drgn.helpers.linux.xarray.xa_is_zero()`) will be skipped. :return: Iterator of (first_index, last_index, ``void *``) tuples. Both indices are inclusive. """ entry = mt.ma_root.read_() entry_value = entry.value_() if _xa_is_node(entry_value): prog = mt.prog_ queue = collections.deque(((entry_value, 0, _ulong_max(prog)),)) while queue: entry_value, min, max = queue.popleft() node, pivots, slots, end, leaf = _mte_to_node(prog, entry_value, max) if leaf: prev = min for offset in range(end): pivot = pivots[offset].value_() slot = slots[offset].read_() if slot and (advanced or not xa_is_zero(slot)): yield (prev, pivot, slot) prev = pivot + 1 slot = slots[end].read_() if slot and (advanced or not xa_is_zero(slot)): yield (prev, max, slot) else: prev = min for offset in range(end): pivot = pivots[offset].value_() queue.append((slots[offset].value_(), prev, pivot)) prev = pivot + 1 queue.append((slots[end].value_(), prev, max)) elif entry_value: yield (0, 0, entry) drgn-0.0.31/drgn/helpers/linux/mm.py000066400000000000000000001170511477777462700173040ustar00rootroot00000000000000# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: LGPL-2.1-or-later """ Memory Management ----------------- The ``drgn.helpers.linux.mm`` module provides helpers for working with the Linux memory management (MM) subsystem. .. _virtual address translation failures: Helpers that translate virtual addresses or read virtual memory may fail for multiple reasons: 1. If the address is invalid. 2. If the address is swapped or paged out. 3. If the address is in `high memory `_. High memory is only used for userspace memory by 32-bit systems with a lot of physical memory, and only if ``CONFIG_HIGHMEM`` is enabled. 3a. If the page table is in high memory. This is only possible if ``CONFIG_HIGHPTE`` is enabled. """ import operator import re from typing import Callable, Iterator, List, Optional from _drgn import ( _linux_helper_direct_mapping_offset, _linux_helper_follow_phys, _linux_helper_read_vm, ) from drgn import NULL, IntegerLike, Object, ObjectAbsentError, Program, cast from drgn.helpers.common.format import decode_enum_type_flags from drgn.helpers.common.prog import takes_program_or_default from drgn.helpers.linux.list import list_for_each_entry from drgn.helpers.linux.mapletree import mt_for_each, mtree_load from drgn.helpers.linux.rbtree import rb_find __all__ = ( "PFN_PHYS", "PHYS_PFN", "PageCompound", "PageHead", "PageSlab", "PageTail", "access_process_vm", "access_remote_vm", "cmdline", "compound_head", "compound_nr", "compound_order", "decode_page_flags", "environ", "find_vmap_area", "follow_page", "follow_pfn", "follow_phys", "for_each_page", "for_each_vma", "for_each_vmap_area", "page_size", "page_to_pfn", "page_to_phys", "page_to_virt", "pfn_to_page", "pfn_to_virt", "phys_to_page", "phys_to_virt", "totalram_pages", "virt_to_page", "virt_to_pfn", "virt_to_phys", "vma_find", "vmalloc_to_page", "vmalloc_to_pfn", # Generated by scripts/generate_page_flag_getters.py. "PageActive", "PageChecked", "PageDirty", "PageDoubleMap", "PageError", "PageForeign", "PageHWPoison", "PageHasHWPoisoned", "PageIdle", "PageIsolated", "PageLRU", "PageLocked", "PageMappedToDisk", "PageMlocked", "PageOwnerPriv1", "PagePinned", "PagePrivate", "PagePrivate2", "PageReadahead", "PageReclaim", "PageReferenced", "PageReported", "PageReserved", "PageSavePinned", "PageSkipKASanPoison", "PageSlobFree", "PageSwapBacked", "PageUncached", "PageUnevictable", "PageUptodate", "PageVmemmapSelfHosted", "PageWaiters", "PageWorkingset", "PageWriteback", "PageXenRemapped", "PageYoung", ) def PageActive(page: Object) -> bool: """ Return whether the ``PG_active`` flag is set on a page. :param page: ``struct page *`` """ try: flag = page.prog_["PG_active"] except KeyError: return False return bool(page.flags & (1 << flag)) def PageChecked(page: Object) -> bool: """ Return whether the ``PG_checked`` flag is set on a page. :param page: ``struct page *`` """ try: flag = page.prog_["PG_checked"] except KeyError: return False return bool(page.flags & (1 << flag)) def PageDirty(page: Object) -> bool: """ Return whether the ``PG_dirty`` flag is set on a page. :param page: ``struct page *`` """ try: flag = page.prog_["PG_dirty"] except KeyError: return False return bool(page.flags & (1 << flag)) def PageDoubleMap(page: Object) -> bool: """ Return whether the ``PG_double_map`` flag is set on a page. :param page: ``struct page *`` """ try: flag = page.prog_["PG_double_map"] except KeyError: return False return bool(page.flags & (1 << flag)) def PageError(page: Object) -> bool: """ Return whether the ``PG_error`` flag is set on a page. :param page: ``struct page *`` """ try: flag = page.prog_["PG_error"] except KeyError: return False return bool(page.flags & (1 << flag)) def PageForeign(page: Object) -> bool: """ Return whether the ``PG_foreign`` flag is set on a page. :param page: ``struct page *`` """ try: flag = page.prog_["PG_foreign"] except KeyError: return False return bool(page.flags & (1 << flag)) def PageHWPoison(page: Object) -> bool: """ Return whether the ``PG_hwpoison`` flag is set on a page. :param page: ``struct page *`` """ try: flag = page.prog_["PG_hwpoison"] except KeyError: return False return bool(page.flags & (1 << flag)) def PageHasHWPoisoned(page: Object) -> bool: """ Return whether the ``PG_has_hwpoisoned`` flag is set on a page. :param page: ``struct page *`` """ try: flag = page.prog_["PG_has_hwpoisoned"] except KeyError: return False return bool(page.flags & (1 << flag)) def PageIdle(page: Object) -> bool: """ Return whether the ``PG_idle`` flag is set on a page. :param page: ``struct page *`` """ try: flag = page.prog_["PG_idle"] except KeyError: return False return bool(page.flags & (1 << flag)) def PageIsolated(page: Object) -> bool: """ Return whether the ``PG_isolated`` flag is set on a page. :param page: ``struct page *`` """ try: flag = page.prog_["PG_isolated"] except KeyError: return False return bool(page.flags & (1 << flag)) def PageLRU(page: Object) -> bool: """ Return whether the ``PG_lru`` flag is set on a page. :param page: ``struct page *`` """ try: flag = page.prog_["PG_lru"] except KeyError: return False return bool(page.flags & (1 << flag)) def PageLocked(page: Object) -> bool: """ Return whether the ``PG_locked`` flag is set on a page. :param page: ``struct page *`` """ try: flag = page.prog_["PG_locked"] except KeyError: return False return bool(page.flags & (1 << flag)) def PageMappedToDisk(page: Object) -> bool: """ Return whether the ``PG_mappedtodisk`` flag is set on a page. :param page: ``struct page *`` """ try: flag = page.prog_["PG_mappedtodisk"] except KeyError: return False return bool(page.flags & (1 << flag)) def PageMlocked(page: Object) -> bool: """ Return whether the ``PG_mlocked`` flag is set on a page. :param page: ``struct page *`` """ try: flag = page.prog_["PG_mlocked"] except KeyError: return False return bool(page.flags & (1 << flag)) def PageOwnerPriv1(page: Object) -> bool: """ Return whether the ``PG_owner_priv_1`` flag is set on a page. :param page: ``struct page *`` """ try: flag = page.prog_["PG_owner_priv_1"] except KeyError: return False return bool(page.flags & (1 << flag)) def PagePinned(page: Object) -> bool: """ Return whether the ``PG_pinned`` flag is set on a page. :param page: ``struct page *`` """ try: flag = page.prog_["PG_pinned"] except KeyError: return False return bool(page.flags & (1 << flag)) def PagePrivate(page: Object) -> bool: """ Return whether the ``PG_private`` flag is set on a page. :param page: ``struct page *`` """ try: flag = page.prog_["PG_private"] except KeyError: return False return bool(page.flags & (1 << flag)) def PagePrivate2(page: Object) -> bool: """ Return whether the ``PG_private_2`` flag is set on a page. :param page: ``struct page *`` """ try: flag = page.prog_["PG_private_2"] except KeyError: return False return bool(page.flags & (1 << flag)) def PageReadahead(page: Object) -> bool: """ Return whether the ``PG_readahead`` flag is set on a page. :param page: ``struct page *`` """ try: flag = page.prog_["PG_readahead"] except KeyError: return False return bool(page.flags & (1 << flag)) def PageReclaim(page: Object) -> bool: """ Return whether the ``PG_reclaim`` flag is set on a page. :param page: ``struct page *`` """ try: flag = page.prog_["PG_reclaim"] except KeyError: return False return bool(page.flags & (1 << flag)) def PageReferenced(page: Object) -> bool: """ Return whether the ``PG_referenced`` flag is set on a page. :param page: ``struct page *`` """ try: flag = page.prog_["PG_referenced"] except KeyError: return False return bool(page.flags & (1 << flag)) def PageReported(page: Object) -> bool: """ Return whether the ``PG_reported`` flag is set on a page. :param page: ``struct page *`` """ try: flag = page.prog_["PG_reported"] except KeyError: return False return bool(page.flags & (1 << flag)) def PageReserved(page: Object) -> bool: """ Return whether the ``PG_reserved`` flag is set on a page. :param page: ``struct page *`` """ try: flag = page.prog_["PG_reserved"] except KeyError: return False return bool(page.flags & (1 << flag)) def PageSavePinned(page: Object) -> bool: """ Return whether the ``PG_savepinned`` flag is set on a page. :param page: ``struct page *`` """ try: flag = page.prog_["PG_savepinned"] except KeyError: return False return bool(page.flags & (1 << flag)) def PageSkipKASanPoison(page: Object) -> bool: """ Return whether the ``PG_skip_kasan_poison`` flag is set on a page. :param page: ``struct page *`` """ try: flag = page.prog_["PG_skip_kasan_poison"] except KeyError: return False return bool(page.flags & (1 << flag)) def PageSlobFree(page: Object) -> bool: """ Return whether the ``PG_slob_free`` flag is set on a page. :param page: ``struct page *`` """ try: flag = page.prog_["PG_slob_free"] except KeyError: return False return bool(page.flags & (1 << flag)) def PageSwapBacked(page: Object) -> bool: """ Return whether the ``PG_swapbacked`` flag is set on a page. :param page: ``struct page *`` """ try: flag = page.prog_["PG_swapbacked"] except KeyError: return False return bool(page.flags & (1 << flag)) def PageUncached(page: Object) -> bool: """ Return whether the ``PG_uncached`` flag is set on a page. :param page: ``struct page *`` """ try: flag = page.prog_["PG_uncached"] except KeyError: return False return bool(page.flags & (1 << flag)) def PageUnevictable(page: Object) -> bool: """ Return whether the ``PG_unevictable`` flag is set on a page. :param page: ``struct page *`` """ try: flag = page.prog_["PG_unevictable"] except KeyError: return False return bool(page.flags & (1 << flag)) def PageUptodate(page: Object) -> bool: """ Return whether the ``PG_uptodate`` flag is set on a page. :param page: ``struct page *`` """ try: flag = page.prog_["PG_uptodate"] except KeyError: return False return bool(page.flags & (1 << flag)) def PageVmemmapSelfHosted(page: Object) -> bool: """ Return whether the ``PG_vmemmap_self_hosted`` flag is set on a page. :param page: ``struct page *`` """ try: flag = page.prog_["PG_vmemmap_self_hosted"] except KeyError: return False return bool(page.flags & (1 << flag)) def PageWaiters(page: Object) -> bool: """ Return whether the ``PG_waiters`` flag is set on a page. :param page: ``struct page *`` """ try: flag = page.prog_["PG_waiters"] except KeyError: return False return bool(page.flags & (1 << flag)) def PageWorkingset(page: Object) -> bool: """ Return whether the ``PG_workingset`` flag is set on a page. :param page: ``struct page *`` """ try: flag = page.prog_["PG_workingset"] except KeyError: return False return bool(page.flags & (1 << flag)) def PageWriteback(page: Object) -> bool: """ Return whether the ``PG_writeback`` flag is set on a page. :param page: ``struct page *`` """ try: flag = page.prog_["PG_writeback"] except KeyError: return False return bool(page.flags & (1 << flag)) def PageXenRemapped(page: Object) -> bool: """ Return whether the ``PG_xen_remapped`` flag is set on a page. :param page: ``struct page *`` """ try: flag = page.prog_["PG_xen_remapped"] except KeyError: return False return bool(page.flags & (1 << flag)) def PageYoung(page: Object) -> bool: """ Return whether the ``PG_young`` flag is set on a page. :param page: ``struct page *`` """ try: flag = page.prog_["PG_young"] except KeyError: return False return bool(page.flags & (1 << flag)) # End generated by scripts/generate_page_flag_getters.py. def _get_PageSlab_impl(prog: Program) -> Callable[[Object], bool]: # Since Linux kernel commit 46df8e73a4a3 ("mm: free up PG_slab") (in # v6.10), slab pages are identified by a page type, which is indicated by a # mapcount value matching a value in VMCOREINFO. Before that, they are # indicated by a page flag. try: return prog.cache["PageSlab"] except KeyError: pass vmcoreinfo = prog["VMCOREINFO"].string_() match = re.search( rb"^NUMBER\(PAGE_SLAB_MAPCOUNT_VALUE\)=(-?[0-9]+)$", vmcoreinfo, flags=re.M ) if match: PAGE_SLAB_MAPCOUNT_VALUE = int(match.group(1)) def PageSlab(page: Object) -> bool: return page._mapcount.counter.value_() == PAGE_SLAB_MAPCOUNT_VALUE else: mask = 1 << prog["PG_slab"] def PageSlab(page: Object) -> bool: return bool(page.flags & mask) prog.cache["PageSlab"] = PageSlab return PageSlab def PageSlab(page: Object) -> bool: """ Return whether a page belongs to the slab allocator. :param page: ``struct page *`` """ return _get_PageSlab_impl(page.prog_)(page) def PageCompound(page: Object) -> bool: """ Return whether a page is part of a `compound page `_. :param page: ``struct page *`` """ page = page.read_() # Since Linux kernel commit 1d798ca3f164 ("mm: make compound_head() # robust") (in v4.4), PG_head is always defined, and a tail page has the # least significant bit of compound_head set. Before that, there is no # compound_head (and no fake head pages). Instead, if # CONFIG_PAGEFLAGS_EXTENDED=y, then PG_head and PG_tail are defined. # Otherwise, there is only PG_compound, and PG_reclaim is set for tail # pages and clear for head pages. try: PG_head = page.prog_["PG_head"] except KeyError: return bool(page.flags & (1 << page.prog_["PG_compound"])) else: flags = page.flags.read_() if flags & (1 << PG_head): return True try: return bool(page.compound_head.read_() & 1) except AttributeError: return bool(flags & (1 << page.prog_["PG_tail"])) # HugeTLB Vmemmap Optimization (HVO) creates "fake" head pages that are # actually tail pages. See Linux kernel commit e7d324850bfc ("mm: hugetlb: free # the 2nd vmemmap page associated with each HugeTLB page") (in v5.18) and # https://www.kernel.org/doc/html/latest/mm/vmemmap_dedup.html. def _page_is_fake_head(page: Object) -> bool: head = page[1].compound_head.value_() return bool(head & 1) and (head - 1) != page.value_() def PageHead(page: Object) -> bool: """ Return whether a page is a head page in a `compound page`_. :param page: ``struct page *`` """ page = page.read_() # See PageCompound() re: Linux kernel commit 1d798ca3f164 ("mm: make # compound_head() robust") (in v4.4). try: PG_head = page.prog_["PG_head"] except KeyError: PG_compound = page.prog_["PG_compound"] PG_head_mask = 1 << PG_compound PG_head_tail_mask = PG_head_mask | (1 << page.prog_["PG_reclaim"]) return (page.flags & PG_head_tail_mask) == PG_head_mask else: if not (page.flags & (1 << PG_head)): return False try: return not _page_is_fake_head(page) except AttributeError: return True def PageTail(page: Object) -> bool: """ Return whether a page is a tail page in a `compound page`_. :param page: ``struct page *`` """ page = page.read_() # See PageCompound() re: Linux kernel commit 1d798ca3f164 ("mm: make # compound_head() robust") (in v4.4). try: if page.compound_head.value_() & 1: return True except AttributeError: try: PG_tail = page.prog_["PG_tail"] except KeyError: PG_head_tail_mask = (1 << page.prog_["PG_compound"]) | ( 1 << page.prog_["PG_reclaim"] ) return (page.flags & PG_head_tail_mask) == PG_head_tail_mask else: return bool(page.flags & (1 << PG_tail)) if page.flags & (1 << page.prog_["PG_head"]): return _page_is_fake_head(page) return False def compound_head(page: Object) -> Object: """ Get the head page associated with a page. If *page* is a tail page, this returns the head page of the `compound page`_ it belongs to. Otherwise, it returns *page*. :param page: ``struct page *`` :return: ``struct page *`` """ page = page.read_() try: head = page.compound_head.read_() except AttributeError: # Before Linux kernel commit 1d798ca3f164 ("mm: make compound_head() # robust") (in v4.4), the head page is in page->first_page, and there # are no fake head pages. return page.first_page.read_() if PageTail(page) else page if head & 1: return cast(page.type_, head - 1) # Handle fake head pages (see _page_is_fake_head()). if page.flags & (1 << page.prog_["PG_head"]): head = page[1].compound_head.read_() if head & 1: return cast(page.type_, head - 1) return page def compound_order(page: Object) -> Object: """ Return the allocation order of a potentially `compound page`_. :param page: ``struct page *`` :return: ``unsigned int`` """ prog = page.prog_ if not PageHead(page): return Object(prog, "unsigned int", 0) # Since Linux kernel commit ebc1baf5c9b4 ("mm: free up a word in the first # tail page") (in v6.6), the compound order is in the low byte of struct # folio::_flags_1 (from_folio = 2). Between that and Linux kernel commit # Linux kernel commit 379708ffde1b ("mm: add the first tail page to struct # folio") (in v6.1), the compound order is in struct folio::_folio_order # (from_folio = 1). Before Linux kernel commit 1c5509be58f6 ("mm: remove # 'First tail page' members from struct page") (in v6.3), the compound # order is in struct page::compound_order of the first tail page # (from_folio = 0). try: from_folio = prog.cache["compound_order_from_folio"] except KeyError: from_folio = 0 try: struct_folio = prog.type("struct folio") except LookupError: pass else: if struct_folio.has_member("_folio_order"): from_folio = 1 elif struct_folio.has_member("_flags_1"): from_folio = 2 prog.cache["compound_order_from_folio"] = from_folio if from_folio == 2: return cast("unsigned int", cast("struct folio *", page)._flags_1 & 0xFF) elif from_folio == 1: return cast("unsigned int", cast("struct folio *", page)._folio_order) else: return cast("unsigned int", page[1].compound_order) def compound_nr(page: Object) -> Object: """ Return the number of pages in a potentially `compound page`_. :param page: ``struct page *`` :return: ``unsigned long`` """ return Object(page.prog_, "unsigned long", 1) << compound_order(page) def page_size(page: Object) -> Object: """ Return the number of bytes in a potentially `compound page`_. :param page: ``struct page *`` :return: ``unsigned long`` """ return page.prog_["PAGE_SIZE"] << compound_order(page) def decode_page_flags(page: Object) -> str: """ Get a human-readable representation of the flags set on a page. >>> decode_page_flags(page) 'PG_uptodate|PG_dirty|PG_lru|PG_reclaim|PG_swapbacked|PG_readahead|PG_savepinned|PG_isolated|PG_reported' :param page: ``struct page *`` """ NR_PAGEFLAGS = page.prog_["__NR_PAGEFLAGS"] PAGEFLAGS_MASK = (1 << NR_PAGEFLAGS.value_()) - 1 return decode_enum_type_flags( page.flags.value_() & PAGEFLAGS_MASK, NR_PAGEFLAGS.type_ ) # Get the struct page * for PFN 0. def _page0(prog: Program) -> Object: try: return prog.cache["page0"] except KeyError: pass try: # With CONFIG_SPARSEMEM_VMEMMAP=y, page 0 is vmemmap. page0 = prog["vmemmap"] except KeyError: contig_page_data = prog["contig_page_data"] # With CONFIG_FLATMEM=y, page 0 is mem_map - ARCH_PFN_OFFSET, but we # can't determine ARCH_PFN_OFFSET easily. Alternatively, # contig_page_data.node_mem_map is the struct page * for # contig_page_data.node_start_pfn, therefore page 0 is: page0 = contig_page_data.node_mem_map - contig_page_data.node_start_pfn # The struct page array is not contiguous for CONFIG_SPARSEMEM=y with # CONFIG_SPARSEMEM_VMEMMAP=n or CONFIG_DISCONTIGMEM=y, so those are not # supported yet. prog.cache["page0"] = page0 return page0 @takes_program_or_default def for_each_page(prog: Program) -> Iterator[Object]: """ Iterate over every ``struct page *`` from the minimum to the maximum page. .. note:: This may include offline pages which don't have a valid ``struct page``. Wrap accesses in a ``try`` ... ``except`` :class:`drgn.FaultError`: >>> for page in for_each_page(): ... try: ... if PageLRU(page): ... print(hex(page)) ... except drgn.FaultError: ... continue 0xfffffb4a000c0000 0xfffffb4a000c0040 ... This may be fixed in the future. :return: Iterator of ``struct page *`` objects. """ page0 = _page0(prog) for i in range(prog["min_low_pfn"], prog["max_pfn"]): yield page0 + i @takes_program_or_default def PFN_PHYS(prog: Program, pfn: IntegerLike) -> Object: """ Get the physical address of a page frame number (PFN). :param pfn: ``unsigned long`` :return: ``phys_addr_t`` """ return Object(prog, "phys_addr_t", pfn) << prog["PAGE_SHIFT"] @takes_program_or_default def PHYS_PFN(prog: Program, addr: IntegerLike) -> Object: """ Get the page frame number (PFN) of a physical address. :param addr: ``phys_addr_t`` :return: ``unsigned long`` """ return Object(prog, "unsigned long", addr) >> prog["PAGE_SHIFT"] def page_to_pfn(page: Object) -> Object: """ Get the page frame number (PFN) of a page. :param page: ``struct page *`` :return: ``unsigned long`` """ return cast("unsigned long", page - _page0(page.prog_)) def page_to_phys(page: Object) -> Object: """ Get the physical address of a page. :param page: ``struct page *`` :return: ``phys_addr_t`` """ return PFN_PHYS(page_to_pfn(page)) def page_to_virt(page: Object) -> Object: """ Get the directly mapped virtual address of a page. :param page: ``struct page *`` :return: ``void *`` """ return pfn_to_virt(page_to_pfn(page)) @takes_program_or_default def pfn_to_page(prog: Program, pfn: IntegerLike) -> Object: """ Get the page with a page frame number (PFN). :param pfn: ``unsigned long`` :return: ``struct page *`` """ return _page0(prog) + pfn @takes_program_or_default def pfn_to_virt(prog: Program, pfn: IntegerLike) -> Object: """ Get the directly mapped virtual address of a page frame number (PFN). :param pfn: ``unsigned long`` :return: ``void *`` """ return phys_to_virt(PFN_PHYS(prog, pfn)) @takes_program_or_default def phys_to_page(prog: Program, addr: IntegerLike) -> Object: """ Get the page containing a physical address. :param addr: ``phys_addr_t`` :return: ``struct page *`` """ return pfn_to_page(PHYS_PFN(prog, addr)) @takes_program_or_default def phys_to_virt(prog: Program, addr: IntegerLike) -> Object: """ Get the directly mapped virtual address of a physical address. :param addr: ``phys_addr_t`` :return: ``void *`` """ return Object( prog, "void *", operator.index(addr) + _linux_helper_direct_mapping_offset(prog) ) @takes_program_or_default def virt_to_page(prog: Program, addr: IntegerLike) -> Object: """ Get the page containing a directly mapped virtual address. .. _mm-helpers-direct-map: .. note:: This only works for virtual addresses from the "direct map". This includes address from: * kmalloc * Slab allocator * Page allocator But not: * vmalloc * vmap * ioremap * Symbols (function pointers, global variables) For vmalloc or vmap addresses, use :func:`vmalloc_to_page(addr) `. For arbitrary kernel addresses, use :func:`follow_page(prog["init_mm"].address_of_(), addr) `. :param addr: ``void *`` :return: ``struct page *`` """ return pfn_to_page(virt_to_pfn(prog, addr)) @takes_program_or_default def virt_to_pfn(prog: Program, addr: IntegerLike) -> Object: """ Get the page frame number (PFN) of a directly mapped virtual address. .. note:: This only works for virtual addresses from the :ref:`"direct map" `. For vmalloc or vmap addresses, use :func:`vmalloc_to_pfn(addr) `. For arbitrary kernel addresses, use :func:`follow_pfn(prog["init_mm"].address_of_(), addr) `. :param addr: ``void *`` :return: ``unsigned long`` """ return PHYS_PFN(virt_to_phys(prog, addr)) @takes_program_or_default def virt_to_phys(prog: Program, addr: IntegerLike) -> Object: """ Get the physical address of a directly mapped virtual address. .. note:: This only works for virtual addresses from the :ref:`"direct map" `. For arbitrary kernel addresses, use :func:`follow_phys(prog["init_mm"].address_of_(), addr) `. :param addr: ``void *`` :return: ``phys_addr_t`` """ return Object( prog, "unsigned long", operator.index(addr) - _linux_helper_direct_mapping_offset(prog), ) def follow_page(mm: Object, addr: IntegerLike) -> Object: """ Get the page that a virtual address maps to in a virtual address space. >>> task = find_task(113) >>> follow_page(task.mm, 0x7fffbbb6d4d0) *(struct page *)0xffffbe4bc0337b80 = { ... } :param mm: ``struct mm_struct *`` :param addr: ``void *`` :return: ``struct page *`` :raises FaultError: if the virtual address :ref:`cannot be translated ` :raises NotImplementedError: if virtual address translation is :ref:`not supported ` for this architecture yet """ return phys_to_page(follow_phys(mm, addr)) def follow_pfn(mm: Object, addr: IntegerLike) -> Object: """ Get the page frame number (PFN) that a virtual address maps to in a virtual address space. >>> task = find_task(113) >>> follow_pfn(task.mm, 0x7fffbbb6d4d0) (unsigned long)52718 :param mm: ``struct mm_struct *`` :param addr: ``void *`` :return: ``unsigned long`` :raises FaultError: if the virtual address :ref:`cannot be translated ` :raises NotImplementedError: if virtual address translation is :ref:`not supported ` for this architecture yet """ return PHYS_PFN(follow_phys(mm, addr)) def follow_phys(mm: Object, addr: IntegerLike) -> Object: """ Get the physical address that a virtual address maps to in a virtual address space. >>> task = find_task(113) >>> follow_phys(task.mm, 0x7fffbbb6d4d0) (phys_addr_t)215934160 :param mm: ``struct mm_struct *`` :param addr: ``void *`` :return: ``phys_addr_t`` :raises FaultError: if the virtual address :ref:`cannot be translated ` :raises NotImplementedError: if virtual address translation is :ref:`not supported ` for this architecture yet """ prog = mm.prog_ return Object(prog, "phys_addr_t", _linux_helper_follow_phys(prog, mm.pgd, addr)) @takes_program_or_default def vmalloc_to_page(prog: Program, addr: IntegerLike) -> Object: """ Get the page containing a vmalloc or vmap address. >>> task = find_task(113) >>> vmalloc_to_page(task.stack) *(struct page *)0xffffbe4bc00a2200 = { ... } :param addr: ``void *`` :return: ``struct page *`` """ return follow_page(prog["init_mm"].address_of_(), addr) @takes_program_or_default def vmalloc_to_pfn(prog: Program, addr: IntegerLike) -> Object: """ Get the page frame number (PFN) containing a vmalloc or vmap address. >>> task = find_task(113) >>> vmalloc_to_pfn(task.stack) (unsigned long)10376 :param addr: ``void *`` :return: ``unsigned long`` """ return page_to_pfn(vmalloc_to_page(prog, addr)) def _vmap_area_rb_cmp(addr: int, va: Object) -> int: if addr < va.va_start.value_(): return -1 elif addr >= va.va_end.value_(): return 1 else: return 0 def _vmap_nodes(prog: Program) -> Object: vmap_nodes = prog["vmap_nodes"] try: return vmap_nodes.read_() except ObjectAbsentError: # On !SMP and 32-bit kernels, vmap_nodes is initialized to &single and # never reassigned. GCC as of version 12.2 doesn't generate a location # for vmap_nodes description in that case. return prog.variable("single", "mm/vmalloc.c").address_of_() @takes_program_or_default def find_vmap_area(prog: Program, addr: IntegerLike) -> Object: """ Return the ``struct vmap_area *`` containing an address. >>> find_vmap_area(0xffffa2b680081000) *(struct vmap_area *)0xffffa16541046b40 = { ... } :param addr: Address to look up. :return: ``struct vmap_area *`` (``NULL`` if not found) """ addr = operator.index(addr) # Since Linux kernel commit d093602919ad ("mm: vmalloc: remove global # vmap_area_root rb-tree") (in v6.9), vmap areas are split up in multiple # red-black trees in separate "nodes". Before that, they're in a single # red-black tree. try: vmap_nodes = _vmap_nodes(prog) except KeyError: return rb_find( "struct vmap_area", prog["vmap_area_root"].address_of_(), "rb_node", addr, _vmap_area_rb_cmp, ) else: nr_vmap_nodes = prog["nr_vmap_nodes"].value_() i = j = (addr // prog["vmap_zone_size"].value_()) % nr_vmap_nodes while True: vn = vmap_nodes[i] va = rb_find( "struct vmap_area", vn.busy.root.address_of_(), "rb_node", addr, _vmap_area_rb_cmp, ) if va: return va # As noted in the kernel implementation, the given address may be # in a different node than the start address, so we have to loop. i = (i + 1) % nr_vmap_nodes if i == j: return NULL(prog, "struct vmap_area *") @takes_program_or_default def for_each_vmap_area(prog: Program) -> Iterator[Object]: """ Iterate over every ``struct vmap_area *`` on the system. >>> for va in for_each_vmap_area(): ... caller = "" ... if va.vm: ... sym = prog.symbol(va.vm.caller) ... if sym: ... caller = f" {sym.name}" ... print(f"{hex(va.va_start)}-{hex(va.va_end)}{caller}") ... 0xffffa2b680000000-0xffffa2b680005000 irq_init_percpu_irqstack 0xffffa2b680005000-0xffffa2b680007000 acpi_os_map_iomem 0xffffa2b68000b000-0xffffa2b68000d000 hpet_enable 0xffffa2b680080000-0xffffa2b680085000 kernel_clone ... :return: Iterator of ``struct vmap_area *`` objects. """ # Since Linux kernel commit d093602919ad ("mm: vmalloc: remove global # vmap_area_root rb-tree") (in v6.9), vmap areas are split up in multiple # lists in separate "nodes". Before that, they're in a single list. try: vmap_nodes = _vmap_nodes(prog) except KeyError: yield from list_for_each_entry( "struct vmap_area", prog["vmap_area_list"].address_of_(), "list" ) else: for i in range(prog["nr_vmap_nodes"]): yield from list_for_each_entry( "struct vmap_area", vmap_nodes[i].busy.head.address_of_(), "list" ) def access_process_vm(task: Object, address: IntegerLike, size: IntegerLike) -> bytes: """ Read memory from a task's virtual address space. >>> task = find_task(1490152) >>> access_process_vm(task, 0x7f8a62b56da0, 12) b'hello, world' :param task: ``struct task_struct *`` :param address: Starting address. :param size: Number of bytes to read. :raises FaultError: if the virtual address :ref:`cannot be translated ` :raises NotImplementedError: if virtual address translation is :ref:`not supported ` for this architecture yet """ return _linux_helper_read_vm(task.prog_, task.mm.pgd, address, size) def access_remote_vm(mm: Object, address: IntegerLike, size: IntegerLike) -> bytes: """ Read memory from a virtual address space. This is similar to :func:`access_process_vm()`, but it takes a ``struct mm_struct *`` instead of a ``struct task_struct *``. >>> task = find_task(1490152) >>> access_remote_vm(task.mm, 0x7f8a62b56da0, 12) b'hello, world' :param mm: ``struct mm_struct *`` :param address: Starting address. :param size: Number of bytes to read. :raises FaultError: if the virtual address :ref:`cannot be translated ` :raises NotImplementedError: if virtual address translation is :ref:`not supported ` for this architecture yet """ return _linux_helper_read_vm(mm.prog_, mm.pgd, address, size) def cmdline(task: Object) -> Optional[List[bytes]]: """ Get the list of command line arguments of a task, or ``None`` for kernel tasks. >>> cmdline(find_task(1495216)) [b'vim', b'drgn/helpers/linux/mm.py'] .. code-block:: console $ tr '\\0' ' ' < /proc/1495216/cmdline vim drgn/helpers/linux/mm.py :param task: ``struct task_struct *`` :raises FaultError: if the virtual address containing the command line :ref:`cannot be translated ` :raises NotImplementedError: if virtual address translation is :ref:`not supported ` for this architecture yet """ mm = task.mm.read_() if not mm: return None arg_start = mm.arg_start.value_() arg_end = mm.arg_end.value_() return access_remote_vm(mm, arg_start, arg_end - arg_start).split(b"\0")[:-1] def environ(task: Object) -> Optional[List[bytes]]: """ Get the list of environment variables of a task, or ``None`` for kernel tasks. >>> environ(find_task(1497797)) [b'HOME=/root', b'PATH=/usr/local/sbin:/usr/local/bin:/usr/bin', b'LOGNAME=root'] .. code-block:: console $ tr '\\0' '\\n' < /proc/1497797/environ HOME=/root PATH=/usr/local/sbin:/usr/local/bin:/usr/bin LOGNAME=root :param task: ``struct task_struct *`` :raises FaultError: if the virtual address containing the environment :ref:`cannot be translated ` :raises NotImplementedError: if virtual address translation is :ref:`not supported ` for this architecture yet """ mm = task.mm.read_() if not mm: return None env_start = mm.env_start.value_() env_end = mm.env_end.value_() return access_remote_vm(mm, env_start, env_end - env_start).split(b"\0")[:-1] def _vma_rb_cmp(addr: int, vma: Object) -> int: if addr < vma.vm_start.value_(): return -1 elif addr >= vma.vm_end.value_(): return 1 else: return 0 def vma_find(mm: Object, addr: IntegerLike) -> Object: """ Return the virtual memory area (VMA) containing an address. :param mm: ``struct mm_struct *`` :param addr: Address to look up. :return: ``struct vm_area_struct *`` (``NULL`` if not found) """ try: # Since Linux kernel commit 524e00b36e8c ("mm: remove rb tree.") (in # v6.1), VMAs are stored in a maple tree. mt = mm.mm_mt.address_of_() except AttributeError: # Before that, they are in a red-black tree. return rb_find( "struct vm_area_struct", mm.mm_rb.address_of_(), "vm_rb", operator.index(addr), _vma_rb_cmp, ) else: return cast("struct vm_area_struct *", mtree_load(mt, addr)) def for_each_vma(mm: Object) -> Iterator[Object]: """ Iterate over every virtual memory area (VMA) in a virtual address space. >>> for vma in for_each_vma(task.mm): ... print(vma) ... *(struct vm_area_struct *)0xffff97ad82bfc930 = { ... } *(struct vm_area_struct *)0xffff97ad82bfc0a8 = { ... } ... :param mm: ``struct mm_struct *`` :return: Iterator of ``struct vm_area_struct *`` objects. """ try: # Since Linux kernel commit 763ecb035029 ("mm: remove the vma linked # list") (in v6.1), VMAs are stored in a maple tree. mt = mm.mm_mt.address_of_() except AttributeError: # Before that, they are in a linked list. vma = mm.mmap while vma: yield vma vma = vma.vm_next else: type = mm.prog_.type("struct vm_area_struct *") for _, _, entry in mt_for_each(mt): yield cast(type, entry) @takes_program_or_default def totalram_pages(prog: Program) -> int: """Return the total number of RAM pages.""" try: # The variable is present since Linux kernel commit ca79b0c211af63fa32 # ("mm: convert totalram_pages and totalhigh_pages variables # to atomic") (in v5.0). return prog["_totalram_pages"].counter.value_() except KeyError: return prog["totalram_pages"].value_() @takes_program_or_default def in_direct_map(prog: Program, addr: IntegerLike) -> bool: """ Return whether an address is within the kernel's direct memory mapping. :param addr: address to check """ addr = operator.index(addr) start_addr = pfn_to_virt(prog["min_low_pfn"]).value_() end_addr = (pfn_to_virt(prog["max_low_pfn"]) + prog["PAGE_SIZE"]).value_() return start_addr <= addr < end_addr drgn-0.0.31/drgn/helpers/linux/module.py000066400000000000000000000201571477777462700201600ustar00rootroot00000000000000# Copyright (c) 2024 Oracle and/or its affiliates # SPDX-License-Identifier: LGPL-2.1-or-later """ Modules ------- The ``drgn.helpers.linux.module`` module contains helpers for working with loaded kernel modules. """ import operator from typing import Iterable, List, Tuple, Union from drgn import NULL, IntegerLike, Object, Program from drgn.helpers.common.prog import takes_program_or_default from drgn.helpers.linux.list import list_for_each_entry from drgn.helpers.linux.rbtree import rb_find __all__ = ( "address_to_module", "find_module", "for_each_module", "module_address_regions", "module_percpu_region", ) @takes_program_or_default def for_each_module(prog: Program) -> Iterable[Object]: """ Returns all loaded kernel modules :returns: Iterable of ``struct module *`` objects """ return list_for_each_entry("struct module", prog["modules"].address_of_(), "list") @takes_program_or_default def find_module(prog: Program, name: Union[str, bytes]) -> Object: """ Lookup a kernel module by name, or return NULL if not found :param name: name to search for :returns: the ``struct module *`` by that name, or NULL """ if isinstance(name, str): name = name.encode() for module in for_each_module(prog): if module.name.string_() == name: return module return NULL(prog, "struct module *") def module_percpu_region(mod: Object) -> Tuple[int, int]: """ Lookup the percpu memory region of a module. Given a ``struct module *``, return the address (as a an int) and the length of the percpu memory region. Modules may have a NULL percpu region, in which case (0, 0) is returned. Rarely, on kernels without ``CONFIG_SMP``, there is no percpu region at all, and this function returns (0, 0) :param mod: Object of type ``struct module *`` :returns: (base, size) of the module percpu region """ try: return mod.percpu.value_(), mod.percpu_size.value_() except AttributeError: return 0, 0 def _range_from_module_layout(layout: Object) -> Tuple[int, int]: # For "struct module_layout" (old) or "struct module_memory" return layout.base.value_(), layout.size.value_() def _range_from_module(module: Object, kind: str) -> Tuple[int, int]: # For reading a range directly from "struct module" (old kernels) return ( module.member_(f"module_{kind}").value_(), module.member_(f"{kind}_size").value_(), ) def _ranges_from_module_memory(mod: Object) -> List[Tuple[int, int]]: # For reading all ranges from a modules "struct module_memory" return [_range_from_module_layout(mem) for mem in mod.mem] def module_address_regions(mod: Object) -> List[Tuple[int, int]]: """ Returns a list of address ranges for a module Given a ``struct module *``, return every address range associated with the module. Note that the number of address ranges and their interpretations vary across kernel versions. Some kernel versions provide additional information about some regions (e.g. text, data, R/O, init). This API doesn't distinguish. However, this API does not provide the module's percpu region: use ``module_percpu_region()`` for that. :param mod: Object of type ``struct module *`` :returns: list of tuples: (starting memory address, length of address range) """ try: # Since Linux 6.4, ac3b432839234 ("module: replace module_layout with # module_memory"), module address regions are broken into several types, # each with their own base and size. mod.prog_.constant("MOD_MEM_NUM_TYPES") except LookupError: pass else: return _ranges_from_module_memory(mod) try: # Prior to 6.4, there were two "struct module_layout" objects, # core_layout and init_layout, which contained the module's memory # layout and any memory which could be freed after init. The init_layout # is usually NULL / size 0. The module_layout structure has more # information to say where text ends, where rodata ends, etc. We ignore # these. core = _range_from_module_layout(mod.core_layout) init = _range_from_module_layout(mod.init_layout) except AttributeError: # Prior to 4.5, 7523e4dc5057 ("module: use a structure to encapsulate # layout."), the layout information was stored as variables directly in # the struct module. They were prefixed with "core_" and "init_". core = _range_from_module(mod, "core") init = _range_from_module(mod, "init") ret = [core] if init: ret.append(init) return ret def _addrmod_tree(mod_tree: Object, addr: int) -> Object: prog = mod_tree.prog_ # The module tree is "latched": there are two parallel trees. Which one is # in use depends on the seqcount, which gets incremented for each # modification. This is a really neat approach that allows reads in parallel # with a writer. In our use case, it's probably not worth verifying the # seqcount after the fact. What we do need is the index (0 or 1). This may # be a seqcount_latch_t, or before 24bf401cebfd6 ("rbtree_latch: Use # seqcount_latch_t"), a regular seqcount_t. try: idx = mod_tree.root.seq.seqcount.sequence.value_() & 1 except AttributeError: idx = mod_tree.root.seq.sequence.value_() & 1 # In ac3b432839234 ("module: replace module_layout with module_memory"), # struct module_layout was replaced by module_memory. The module_layout # encoded the separate regions (text, data, rodata, etc) in a single # structure, whereas module_memory is a simple base pointer followed by a # size: one module_memory structure is used per kind of memory. However, # both of them contain a "base" pointer that indicates the start of the # region, a "size" that indicates its total size, and a "mtn.mod" pointer # which refers to the relevant module. So for our use case, they are # interchangeable, except for their names. try: tp = prog.type("struct module_memory") except LookupError: tp = prog.type("struct module_layout") def cmp(v: int, node: Object) -> int: start = node.base.value_() end = start + node.size.value_() if v < start: return -1 elif v >= end: return 1 else: return 0 mem = rb_find( tp, mod_tree.root.tree[idx].address_of_(), f"mtn.node.node[{idx}]", # container_of allows array indices! addr, cmp, ) if mem: return mem.mtn.mod else: return NULL(prog, "struct module *") @takes_program_or_default def address_to_module(prog: Program, addr: IntegerLike) -> Object: """ Return the ``struct module *`` associated with a memory address If the address is a text, data, or read-only data address associated with a kernel module, then this function returns the module it is associated with. Otherwise, returns NULL. Note that dynamic memory (e.g. slab objects) generally can't be associated with the module that allocated it. Further, static & dynamic per-cpu address cannot be associated with their associated module either. Normally, this lookup is efficient, thanks to ``CONFIG_MODULES_TREE_LOOKUP``, which provides a red-black tree of module address ranges, and is `very commonly`__ enabled. However, on some uncommon configurations the rbtree may not be present. In those cases, we fall back to a linear search of each kernel module's memory regions. .. __: https://oracle.github.io/kconfigs/?config=MODULES_TREE_LOOKUP&config=UTS_RELEASE :param addr: memory address to lookup :returns: the ``struct module *`` associated with the memory, or NULL """ addr = operator.index(addr) try: mod_tree = prog["mod_tree"] except LookupError: pass else: return _addrmod_tree(mod_tree, addr) for module in for_each_module(prog): for start, length in module_address_regions(module): if start <= addr < start + length: return module return NULL(prog, "struct module *") drgn-0.0.31/drgn/helpers/linux/net.py000066400000000000000000000175251477777462700174660ustar00rootroot00000000000000# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: LGPL-2.1-or-later """ Networking ---------- The ``drgn.helpers.linux.net`` module provides helpers for working with the Linux kernel networking subsystem. """ import operator from typing import Iterator, Optional, Union from drgn import NULL, IntegerLike, Object, Program, Type, cast, container_of, sizeof from drgn.helpers.common.prog import ( takes_object_or_program_or_default, takes_program_or_default, ) from drgn.helpers.linux.fs import fget from drgn.helpers.linux.list import hlist_for_each_entry, list_for_each_entry from drgn.helpers.linux.list_nulls import hlist_nulls_for_each_entry __all__ = ( "SOCK_INODE", "SOCKET_I", "for_each_net", "get_net_ns_by_inode", "get_net_ns_by_fd", "netdev_for_each_tx_queue", "netdev_get_by_index", "netdev_get_by_name", "netdev_priv", "sk_fullsock", "sk_nulls_for_each", "skb_shinfo", ) _S_IFMT = 0o170000 _S_IFSOCK = 0o140000 def SOCKET_I(inode: Object) -> Object: """ Get a socket from an inode referring to the socket. :param inode: ``struct inode *`` :return: ``struct socket *`` :raises ValueError: If *inode* does not refer to a socket """ if inode.i_mode & _S_IFMT != _S_IFSOCK: raise ValueError("not a socket inode") return container_of(inode, "struct socket_alloc", "vfs_inode").socket.address_of_() def SOCK_INODE(sock: Object) -> Object: """ Get the inode of a socket. :param sock: ``struct socket *`` :return: ``struct inode *`` """ return container_of(sock, "struct socket_alloc", "socket").vfs_inode.address_of_() @takes_program_or_default def for_each_net(prog: Program) -> Iterator[Object]: """ Iterate over all network namespaces in the system. :return: Iterator of ``struct net *`` objects. """ return list_for_each_entry( "struct net", prog["net_namespace_list"].address_of_(), "list" ) _CLONE_NEWNET = 0x40000000 def get_net_ns_by_inode(inode: Object) -> Object: """ Get a network namespace from a network namespace NSFS inode, e.g. ``/proc/$PID/ns/net`` or ``/var/run/netns/$NAME``. :param inode: ``struct inode *`` :return: ``struct net *`` :raises ValueError: if *inode* is not a network namespace inode """ if inode.i_fop != inode.prog_["ns_file_operations"].address_of_(): raise ValueError("not a namespace inode") ns = cast("struct ns_common *", inode.i_private) if ns.ops.type != _CLONE_NEWNET: raise ValueError("not a network namespace inode") return container_of(ns, "struct net", "ns") def get_net_ns_by_fd(task: Object, fd: IntegerLike) -> Object: """ Get a network namespace from a task and a file descriptor referring to a network namespace NSFS inode, e.g. ``/proc/$PID/ns/net`` or ``/var/run/netns/$NAME``. :param task: ``struct task_struct *`` :param fd: File descriptor. :return: ``struct net *`` :raises ValueError: If *fd* does not refer to a network namespace inode """ return get_net_ns_by_inode(fget(task, fd).f_inode) def netdev_for_each_tx_queue(dev: Object) -> Iterator[Object]: """ Iterate over all TX queues for a network device. :param dev: ``struct net_device *`` :return: Iterator of ``struct netdev_queue *`` objects. """ for i in range(dev.num_tx_queues): yield dev._tx + i _NETDEV_HASHBITS = 8 _NETDEV_HASHENTRIES = 1 << _NETDEV_HASHBITS @takes_object_or_program_or_default def netdev_get_by_index( prog: Program, net: Optional[Object], ifindex: IntegerLike ) -> Object: """ Get the network device with the given interface index number. :param net: ``struct net *``. Defaults to the initial network namespace if given a :class:`~drgn.Program` or :ref:`omitted `. :param ifindex: Network interface index number. :return: ``struct net_device *`` (``NULL`` if not found) """ if net is None: net = prog["init_net"] ifindex = operator.index(ifindex) head = net.dev_index_head[ifindex & (_NETDEV_HASHENTRIES - 1)] for netdev in hlist_for_each_entry("struct net_device", head, "index_hlist"): if netdev.ifindex.value_() == ifindex: return netdev return NULL(prog, "struct net_device *") @takes_object_or_program_or_default def netdev_get_by_name( prog: Program, net: Optional[Object], name: Union[str, bytes] ) -> Object: """ Get the network device with the given interface name. :param net: ``struct net *``. Defaults to the initial network namespace if given a :class:`~drgn.Program` or :ref:`omitted `. :param name: Network interface name. :return: ``struct net_device *`` (``NULL`` if not found) """ if net is None: net = prog["init_net"] if isinstance(name, str): name = name.encode() # Since Linux kernel commit ff92741270bf ("net: introduce name_node struct # to be used in hashlist") (in v5.5), the device name hash table contains # struct netdev_name_node entries. Before that, it contained the struct # net_device directly. try: entry_type = prog.type("struct netdev_name_node") member = "hlist" entry_is_name_node = True except LookupError: entry_type = prog.type("struct net_device") member = "name_hlist" entry_is_name_node = False for i in range(_NETDEV_HASHENTRIES): head = net.dev_name_head[i] for entry in hlist_for_each_entry(entry_type, head, member): if entry.name.string_() == name: if entry_is_name_node: return entry.dev else: return entry return NULL(prog, "struct net_device *") def netdev_priv(dev: Object, type: Union[str, Type] = "void") -> Object: """ Return the private data of a network device. >>> dev = netdev_get_by_name("wlp0s20f3") >>> netdev_priv(dev) (void *)0xffff9419c9dec9c0 >>> netdev_priv(dev, "struct ieee80211_sub_if_data") *(struct ieee80211_sub_if_data *)0xffff9419c9dec9c0 = { ... } :param dev: ``struct net_device *`` :param type: Type of private data. :return: ``type *`` """ prog = dev.prog_ try: offset = prog.cache["net_device_aligned_size"] except KeyError: # 31 is NETDEV_ALIGN - 1 offset = (sizeof(prog.type("struct net_device")) + 31) & ~31 prog.cache["net_device_aligned_size"] = offset return Object(prog, prog.pointer_type(prog.type(type)), dev.value_() + offset) def sk_fullsock(sk: Object) -> bool: """ Check whether a socket is a full socket, i.e., not a time-wait or request socket. :param sk: ``struct sock *`` """ prog = sk.prog_ state = sk.__sk_common.skc_state.value_() return state != prog["TCP_SYN_RECV"] and state != prog["TCP_TIME_WAIT"] def sk_nulls_for_each(head: Object) -> Iterator[Object]: """ Iterate over all the entries in a nulls hash list of sockets specified by ``struct hlist_nulls_head`` head. :param head: ``struct hlist_nulls_head *`` :return: Iterator of ``struct sock *`` objects. """ return hlist_nulls_for_each_entry("struct sock", head, "__sk_common.skc_nulls_node") def skb_shinfo(skb: Object) -> Object: """ Get the shared info for a socket buffer. :param skb: ``struct sk_buff *`` :return: ``struct skb_shared_info *`` """ prog = skb.prog_ try: NET_SKBUFF_DATA_USES_OFFSET = prog.cache["NET_SKBUFF_DATA_USES_OFFSET"] except KeyError: NET_SKBUFF_DATA_USES_OFFSET = sizeof(prog.type("long")) > 4 prog.cache["NET_SKBUFF_DATA_USES_OFFSET"] = NET_SKBUFF_DATA_USES_OFFSET if NET_SKBUFF_DATA_USES_OFFSET: return cast("struct skb_shared_info *", skb.head + skb.end) else: return cast("struct skb_shared_info *", skb.end) drgn-0.0.31/drgn/helpers/linux/nodemask.py000066400000000000000000000035541477777462700204760ustar00rootroot00000000000000# Copyright (c) ByteDance, Inc. and its affiliates. # SPDX-License-Identifier: LGPL-2.1-or-later """ NUMA Node Masks --------------- The ``drgn.helpers.linux.nodemask`` module provides helpers for working with NUMA node masks from :linux:`include/linux/nodemask.h`. """ from typing import Iterator from drgn import IntegerLike, Object, Program from drgn.helpers.common.prog import takes_program_or_default from drgn.helpers.linux.bitops import for_each_set_bit, test_bit __all__ = ( "for_each_node", "for_each_node_mask", "for_each_node_state", "for_each_online_node", "node_state", ) def for_each_node_mask(mask: Object) -> Iterator[int]: """ Iterate over all of the NUMA nodes in the given mask. :param mask: ``nodemask_t`` """ try: nr_node_ids = mask.prog_["nr_node_ids"].value_() except KeyError: nr_node_ids = 1 return for_each_set_bit(mask.bits, nr_node_ids) @takes_program_or_default def for_each_node_state(prog: Program, state: IntegerLike) -> Iterator[int]: """ Iterate over all NUMA nodes in the given state. :param state: ``enum node_states`` (e.g., ``N_NORMAL_MEMORY``) """ return for_each_node_mask(prog["node_states"][state]) @takes_program_or_default def for_each_node(prog: Program) -> Iterator[int]: """Iterate over all possible NUMA nodes.""" return for_each_node_state(prog["N_POSSIBLE"]) @takes_program_or_default def for_each_online_node(prog: Program) -> Iterator[int]: """Iterate over all online NUMA nodes.""" return for_each_node_state(prog["N_ONLINE"]) def node_state(node: IntegerLike, state: Object) -> bool: """ Return whether the given NUMA node has the given state. :param node: NUMA node number. :param state: ``enum node_states`` (e.g., ``N_NORMAL_MEMORY``) """ return test_bit(node, state.prog_["node_states"][state].bits) drgn-0.0.31/drgn/helpers/linux/percpu.py000066400000000000000000000025361477777462700201720ustar00rootroot00000000000000# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: LGPL-2.1-or-later """ Per-CPU ------- The ``drgn.helpers.linux.percpu`` module provides helpers for working with per-CPU allocations from :linux:`include/linux/percpu.h` and per-CPU counters from :linux:`include/linux/percpu_counter.h`. """ from _drgn import _linux_helper_per_cpu_ptr as per_cpu_ptr from drgn import IntegerLike, Object from drgn.helpers.linux.cpumask import for_each_online_cpu __all__ = ( "per_cpu", "per_cpu_ptr", "percpu_counter_sum", ) def per_cpu(var: Object, cpu: IntegerLike) -> Object: """ Return the per-CPU variable for a given CPU. >>> print(repr(prog["runqueues"])) Object(prog, 'struct rq', address=0x278c0) >>> per_cpu(prog["runqueues"], 6).curr.comm (char [16])"python3" :param var: Per-CPU variable, i.e., ``type __percpu`` (not a pointer; use :func:`per_cpu_ptr()` for that). :param cpu: CPU number. :return: ``type`` object. """ return per_cpu_ptr(var.address_of_(), cpu)[0] def percpu_counter_sum(fbc: Object) -> int: """ Return the sum of a per-CPU counter. :param fbc: ``struct percpu_counter *`` """ ret = fbc.count.value_() ptr = fbc.counters for cpu in for_each_online_cpu(fbc.prog_): ret += per_cpu_ptr(ptr, cpu)[0].value_() return ret drgn-0.0.31/drgn/helpers/linux/pid.py000066400000000000000000000063641477777462700174530ustar00rootroot00000000000000# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: LGPL-2.1-or-later """ Process IDS ----------- The ``drgn.helpers.linux.pid`` module provides helpers for looking up process IDs and processes. """ from typing import Iterator, Optional from _drgn import ( _linux_helper_find_pid, _linux_helper_find_task, _linux_helper_pid_task as pid_task, ) from drgn import IntegerLike, Object, Program, cast, container_of from drgn.helpers.common.prog import takes_object_or_program_or_default from drgn.helpers.linux.idr import idr_for_each from drgn.helpers.linux.list import hlist_for_each_entry __all__ = ( "find_pid", "find_task", "for_each_pid", "for_each_task", "pid_task", ) @takes_object_or_program_or_default def find_pid(prog: Program, ns: Optional[Object], pid: IntegerLike) -> Object: """ Return the ``struct pid *`` for the given PID number. :param ns: ``struct pid_namespace *``. Defaults to the initial PID namespace if given a :class:`~drgn.Program` or :ref:`omitted `. :return: ``struct pid *`` """ if ns is None: ns = prog["init_pid_ns"].address_of_() return _linux_helper_find_pid(ns, pid) @takes_object_or_program_or_default def for_each_pid(prog: Program, ns: Optional[Object]) -> Iterator[Object]: """ Iterate over all PIDs in a namespace. :param ns: ``struct pid_namespace *``. Defaults to the initial PID namespace if given a :class:`~drgn.Program` or :ref:`omitted `. :return: Iterator of ``struct pid *`` objects. """ if ns is None: ns = prog["init_pid_ns"].address_of_() if hasattr(ns, "idr"): for nr, entry in idr_for_each(ns.idr): yield cast("struct pid *", entry) else: pid_hash = prog["pid_hash"] for i in range(1 << prog["pidhash_shift"].value_()): for upid in hlist_for_each_entry( "struct upid", pid_hash[i].address_of_(), "pid_chain" ): if upid.ns == ns: yield container_of(upid, "struct pid", f"numbers[{int(ns.level)}]") @takes_object_or_program_or_default def find_task(prog: Program, ns: Optional[Object], pid: IntegerLike) -> Object: """ Return the task with the given PID. :param ns: ``struct pid_namespace *``. Defaults to the initial PID namespace if given a :class:`~drgn.Program` or :ref:`omitted `. :return: ``struct task_struct *`` """ if ns is None: ns = prog["init_pid_ns"].address_of_() return _linux_helper_find_task(ns, pid) @takes_object_or_program_or_default def for_each_task(prog: Program, ns: Optional[Object]) -> Iterator[Object]: """ Iterate over all of the tasks visible in a namespace. :param ns: ``struct pid_namespace *``. Defaults to the initial PID namespace if given a :class:`~drgn.Program` or :ref:`omitted `. :return: Iterator of ``struct task_struct *`` objects. """ PIDTYPE_PID = prog["PIDTYPE_PID"].value_() for pid in for_each_pid( prog if ns is None else ns # type: ignore # python/mypy#12056 ): task = pid_task(pid, PIDTYPE_PID) if task: yield task drgn-0.0.31/drgn/helpers/linux/plist.py000066400000000000000000000052351477777462700200260ustar00rootroot00000000000000# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: LGPL-2.1-or-later """ Priority-Sorted Lists --------------------- The ``drgn.helpers.linux.plist`` module provides helpers for working with descending-priority-sorted doubly-linked lists (``struct plist_head`` and ``struct plist_node``) from :linux:`include/linux/plist.h`. """ from typing import Iterator, Union from drgn import Object, Type, container_of from drgn.helpers.linux.list import list_empty, list_for_each_entry __all__ = ( "plist_first_entry", "plist_for_each", "plist_for_each_entry", "plist_head_empty", "plist_last_entry", "plist_node_empty", ) def plist_head_empty(head: Object) -> bool: """ Return whether a plist is empty. :param head: ``struct plist_head *`` """ return list_empty(head.node_list.address_of_()) def plist_node_empty(node: Object) -> bool: """ Return whether a plist node is empty (i.e., not on a list). :param node: ``struct plist_node *`` """ return list_empty(node.node_list.address_of_()) def plist_first_entry(head: Object, type: Union[str, Type], member: str) -> Object: """ Return the first (highest priority) entry in a plist. The list is assumed to be non-empty. :param head: ``struct plist_head *`` :param type: Entry type. :param member: Name of list node member in entry type. :return: ``type *`` """ return container_of(head.node_list.next, type, member + ".node_list") def plist_last_entry(head: Object, type: Union[str, Type], member: str) -> Object: """ Return the last (lowest priority) entry in a plist. The list is assumed to be non-empty. :param head: ``struct plist_head *`` :param type: Entry type. :param member: Name of list node member in entry type. :return: ``type *`` """ return container_of(head.node_list.prev, type, member + ".node_list") def plist_for_each(head: Object) -> Iterator[Object]: """ Iterate over all of the nodes in a plist. :param head: ``struct plist_head *`` :return: Iterator of ``struct plist_node *`` objects. """ return list_for_each_entry( "struct plist_node", head.node_list.address_of_(), "node_list" ) def plist_for_each_entry( type: Union[str, Type], head: Object, member: str ) -> Iterator[Object]: """ Iterate over all of the entries in a plist. :param type: Entry type. :param head: ``struct plist_head *`` :param member: Name of plist node member in entry type. :return: Iterator of ``type *`` objects. """ return list_for_each_entry( type, head.node_list.address_of_(), member + ".node_list" ) drgn-0.0.31/drgn/helpers/linux/printk.py000066400000000000000000000230511477777462700201760ustar00rootroot00000000000000# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: LGPL-2.1-or-later """ Log Buffer ---------- The ``drgn.helpers.linux.printk`` module provides helpers for reading the Linux kernel log buffer. """ import sys from typing import TYPE_CHECKING, Dict, List, NamedTuple, Optional, Tuple if TYPE_CHECKING: from _typeshed import SupportsWrite from drgn import Object, Program, cast, sizeof from drgn.helpers.common.prog import takes_program_or_default __all__ = ( "get_dmesg", "get_printk_records", "print_dmesg", ) class PrintkRecord(NamedTuple): """Kernel log record.""" text: bytes """Message text.""" facility: int """:manpage:`syslog(3)` facility.""" level: int """Log level.""" seq: int """Sequence number.""" timestamp: int """Timestamp in nanoseconds.""" caller_tid: Optional[int] """ Thread ID of thread that logged this record, if available. This is available if the message was logged from task context and if the kernel saves the ``printk()`` caller ID. As of Linux 5.10, the kernel always saves the caller ID. From Linux 5.1 through 5.9, it is saved only if the kernel was compiled with ``CONFIG_PRINTK_CALLER``. Before that, it is never saved. """ caller_cpu: Optional[int] """ Processor ID of CPU that logged this record, if available. This is available only if the message was logged when not in task context (e.g., in an interrupt handler) and if the kernel saves the ``printk()`` caller ID. See :attr:`caller_tid` for when the kernel saves the caller ID. """ continuation: bool """Whether this record is a continuation of a previous record.""" context: Dict[bytes, bytes] """ Additional metadata for the message. See the |/dev/kmsg documentation|_ for an explanation of the keys and values. .. |/dev/kmsg documentation| replace:: ``/dev/kmsg`` documentation .. _/dev/kmsg documentation: https://www.kernel.org/doc/Documentation/ABI/testing/dev-kmsg """ def _caller_id(caller_id: int) -> Tuple[Optional[int], Optional[int]]: if caller_id & 0x80000000: return None, caller_id & ~0x80000000 else: return caller_id, None def _get_printk_records_lockless(prog: Program, prb: Object) -> List[PrintkRecord]: ulong_size = sizeof(prog.type("unsigned long")) DESC_SV_BITS = ulong_size * 8 DESC_FLAGS_SHIFT = DESC_SV_BITS - 2 DESC_FLAGS_MASK = 3 << DESC_FLAGS_SHIFT DESC_ID_MASK = DESC_FLAGS_MASK ^ ((1 << DESC_SV_BITS) - 1) LOG_CONT = prog["LOG_CONT"].value_() desc_committed = prog["desc_committed"].value_() desc_finalized = prog["desc_finalized"].value_() def record_committed(current_id: int, state_var: int) -> bool: state_desc_id = state_var & DESC_ID_MASK state = 3 & (state_var >> DESC_FLAGS_SHIFT) return (current_id == state_desc_id) and ( state == desc_committed or state == desc_finalized ) desc_ring = prb.desc_ring descs = desc_ring.descs.read_() infos = desc_ring.infos.read_() desc_ring_mask = (1 << desc_ring.count_bits.value_()) - 1 text_data_ring = prb.text_data_ring text_data_ring_data = text_data_ring.data.read_() text_data_ring_mask = (1 << text_data_ring.size_bits) - 1 result = [] def add_record(current_id: int) -> None: idx = current_id & desc_ring_mask desc = descs[idx].read_() if not record_committed(current_id, desc.state_var.counter.value_()): return lpos_begin = desc.text_blk_lpos.begin & text_data_ring_mask lpos_next = desc.text_blk_lpos.next & text_data_ring_mask lpos_begin += ulong_size if lpos_begin == lpos_next: # Data-less record. return if lpos_begin > lpos_next: # Data wrapped. lpos_begin -= lpos_begin info = infos[idx].read_() text_len = info.text_len if lpos_next - lpos_begin < text_len: # Truncated record. text_len = lpos_next - lpos_begin caller_tid, caller_cpu = _caller_id(info.caller_id.value_()) context = {} subsystem = info.dev_info.subsystem.string_() device = info.dev_info.device.string_() if subsystem: context[b"SUBSYSTEM"] = subsystem if device: context[b"DEVICE"] = device result.append( PrintkRecord( text=prog.read(text_data_ring_data + lpos_begin, text_len), facility=info.facility.value_(), level=info.level.value_(), seq=info.seq.value_(), timestamp=info.ts_nsec.value_(), caller_tid=caller_tid, caller_cpu=caller_cpu, continuation=bool(info.flags.value_() & LOG_CONT), context=context, ) ) head_id = desc_ring.head_id.counter.value_() current_id = desc_ring.tail_id.counter.value_() while current_id != head_id: add_record(current_id) current_id = (current_id + 1) & DESC_ID_MASK add_record(current_id) return result def _get_printk_records_structured(prog: Program) -> List[PrintkRecord]: try: printk_logp_type = prog.type("struct printk_log *") except LookupError: # Before Linux kernel commit 62e32ac3505a ("printk: rename struct log # to struct printk_log") (in v3.11), records were "struct log" instead # of "struct printk_log". RHEL 7 kernel still uses old naming. printk_logp_type = prog.type("struct log *") have_caller_id = printk_logp_type.type.has_member("caller_id") LOG_CONT = prog["LOG_CONT"].value_() result = [] # Between Linux kernel commits cbd357008604 ("bpf: verifier (add ability to # receive verification log)") (in v3.18) and e7bf8249e8f1 ("bpf: # encapsulate verifier log state into a structure") (in v4.15), # kernel/bpf/verifier.c also contains a variable named log_buf. log_buf = prog.object("log_buf", filename="printk.c").read_() current_idx = prog["log_first_idx"].read_() next_idx = prog["log_next_idx"].read_() seq = prog["log_first_seq"].value_() while current_idx != next_idx: logp = cast(printk_logp_type, log_buf + current_idx) log = logp[0].read_() text_len = log.text_len.value_() dict_len = log.dict_len.value_() text_dict = prog.read(logp + 1, text_len + dict_len) if have_caller_id: caller_tid, caller_cpu = _caller_id(log.caller_id.value_()) else: caller_tid = caller_cpu = None context = {} if dict_len: for elmt in text_dict[text_len:].split(b"\0"): key, value = elmt.split(b"=", 1) context[key] = value result.append( PrintkRecord( text=text_dict[:text_len], facility=log.facility.value_(), level=log.level.value_(), seq=seq, timestamp=log.ts_nsec.value_(), caller_tid=caller_tid, caller_cpu=caller_cpu, continuation=bool(log.flags.value_() & LOG_CONT), context=context, ) ) log_len = log.len.read_() if log_len: current_idx += log_len else: # Zero means the buffer wrapped around. if current_idx < next_idx: # Avoid getting into an infinite loop if the buffer is # corrupted. break current_idx -= current_idx seq += 1 return result @takes_program_or_default def get_printk_records(prog: Program) -> List[PrintkRecord]: """Get a list of records in the kernel log buffer.""" # Linux kernel commit 896fbe20b4e2 ("printk: use the lockless ringbuffer") # (in v5.10) changed the ring buffer structure completely. try: prb = prog["prb"] except KeyError: return _get_printk_records_structured(prog) else: return _get_printk_records_lockless(prog, prb) @takes_program_or_default def get_dmesg(prog: Program) -> bytes: """ Get the contents of the kernel log buffer formatted like :manpage:`dmesg(1)`. If you just want to print the log buffer, use :func:`print_dmesg()`. The format of each line is: .. code-block:: [ timestamp] message If you need to format the log buffer differently, use :func:`get_printk_records()` and format it yourself. """ lines = [ b"[% 5d.%06d] %s" % ( record.timestamp // 1000000000, record.timestamp % 1000000000 // 1000, record.text, ) for record in get_printk_records(prog) ] lines.append(b"") # So we get a trailing newline. return b"\n".join(lines) @takes_program_or_default def print_dmesg(prog: Program, *, file: "Optional[SupportsWrite[str]]" = None) -> None: """ Print the contents of the kernel log buffer. >>> print_dmesg() [ 0.000000] Linux version 6.8.0-vmtest28.1default (drgn@drgn) (x86_64-linux-gcc (GCC) 12.2.0, GNU ld (GNU Binutils) 2.39) #1 SMP PREEMPT_DYNAMIC Mon Mar 11 06:38:45 UTC 2024 [ 0.000000] Command line: rootfstype=9p rootflags=trans=virtio,cache=loose,msize=1048576 ro console=ttyS0,115200 panic=-1 crashkernel=256M init=/tmp/drgn-vmtest-rudzppeo/init [ 0.000000] BIOS-provided physical RAM map: ... :param file: File to print to. Defaults to :data:`sys.stdout`. """ (sys.stdout if file is None else file).write( get_dmesg(prog).decode(errors="replace") ) drgn-0.0.31/drgn/helpers/linux/radixtree.py000066400000000000000000000022021477777462700206510ustar00rootroot00000000000000# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: LGPL-2.1-or-later """ Radix Trees ----------- The ``drgn.helpers.linux.radixtree`` module provides helpers for working with radix trees from :linux:`include/linux/radix-tree.h`. .. seealso:: `XArrays`_, which were introduced in Linux 4.20 as a replacement for radix trees. """ from typing import Iterator, Tuple from drgn import IntegerLike, Object from drgn.helpers.linux.xarray import xa_for_each, xa_load __all__ = ( "radix_tree_for_each", "radix_tree_lookup", ) def radix_tree_lookup(root: Object, index: IntegerLike) -> Object: """ Look up the entry at a given index in a radix tree. :param root: ``struct radix_tree_root *`` :param index: Entry index. :return: ``void *`` found entry, or ``NULL`` if not found. """ return xa_load(root, index) def radix_tree_for_each(root: Object) -> Iterator[Tuple[int, Object]]: """ Iterate over all of the entries in a radix tree. :param root: ``struct radix_tree_root *`` :return: Iterator of (index, ``void *``) tuples. """ return xa_for_each(root) drgn-0.0.31/drgn/helpers/linux/rbtree.py000066400000000000000000000273721477777462700201640ustar00rootroot00000000000000# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: LGPL-2.1-or-later """ Red-Black Trees --------------- The ``drgn.helpers.linux.rbtree`` module provides helpers for working with red-black trees from :linux:`include/linux/rbtree.h`. """ from typing import Callable, Generator, Iterator, Tuple, TypeVar, Union from drgn import NULL, Object, Type, container_of from drgn.helpers import ValidationError __all__ = ( "RB_EMPTY_ROOT", "RB_EMPTY_NODE", "rb_find", "rb_first", "rb_last", "rb_next", "rb_parent", "rb_prev", "rbtree_inorder_for_each", "rbtree_inorder_for_each_entry", "validate_rbtree", "validate_rbtree_inorder_for_each_entry", ) def RB_EMPTY_ROOT(root: Object) -> bool: """ Return whether a red-black tree is empty. :param node: ``struct rb_root *`` """ return not root.rb_node def RB_EMPTY_NODE(node: Object) -> bool: """ Return whether a red-black tree node is empty, i.e., not inserted in a tree. :param node: ``struct rb_node *`` """ return node.__rb_parent_color.value_() == node.value_() def rb_parent(node: Object) -> Object: """ Return the parent node of a red-black tree node. :param node: ``struct rb_node *`` :return: ``struct rb_node *`` """ return Object(node.prog_, node.type_, value=node.__rb_parent_color.value_() & ~3) # Return parent node and whether the node is black. def _rb_parent_color(node: Object) -> Tuple[Object, bool]: value = node.__rb_parent_color.value_() return Object(node.prog_, node.type_, value=value & ~3), (value & 1) != 0 def rb_first(root: Object) -> Object: """ Return the first node (in sort order) in a red-black tree, or ``NULL`` if the tree is empty. :param root: ``struct rb_root *`` :return: ``struct rb_node *`` """ node = root.rb_node.read_() if not node: return node while True: next = node.rb_left.read_() if not next: return node node = next def rb_last(root: Object) -> Object: """ Return the last node (in sort order) in a red-black tree, or ``NULL`` if the tree is empty. :param root: ``struct rb_root *`` :return: ``struct rb_node *`` """ node = root.rb_node.read_() if not node: return node while True: next = node.rb_right.read_() if not next: return node node = next def rb_next(node: Object) -> Object: """ Return the next node (in sort order) after a red-black node, or ``NULL`` if the node is the last node in the tree or is empty. :param node: ``struct rb_node *`` :return: ``struct rb_node *`` """ node = node.read_() if RB_EMPTY_NODE(node): return NULL(node.prog_, node.type_) next = node.rb_right.read_() if next: node = next while True: next = node.rb_left.read_() if not next: return node node = next parent = rb_parent(node).read_() while parent and node == parent.rb_right: node = parent parent = rb_parent(node).read_() return parent def rb_prev(node: Object) -> Object: """ Return the previous node (in sort order) before a red-black node, or ``NULL`` if the node is the first node in the tree or is empty. :param node: ``struct rb_node *`` :return: ``struct rb_node *`` """ node = node.read_() if RB_EMPTY_NODE(node): return NULL(node.prog_, node.type_) next = node.rb_left.read_() if next: node = next while True: next = node.rb_right.read_() if not next: return node node = next parent = rb_parent(node).read_() while parent and node == parent.rb_left: node = parent parent = rb_parent(node).read_() return parent def rbtree_inorder_for_each(root: Object) -> Iterator[Object]: """ Iterate over all of the nodes in a red-black tree, in sort order. :param root: ``struct rb_root *`` :return: Iterator of ``struct rb_node *`` objects. """ def aux(node: Object) -> Iterator[Object]: if node: yield from aux(node.rb_left.read_()) yield node yield from aux(node.rb_right.read_()) yield from aux(root.rb_node.read_()) def rbtree_inorder_for_each_entry( type: Union[str, Type], root: Object, member: str ) -> Iterator[Object]: """ Iterate over all of the entries in a red-black tree in sorted order. :param type: Entry type. :param root: ``struct rb_root *`` :param member: Name of ``struct rb_node`` member in entry type. :return: Iterator of ``type *`` objects. """ type = root.prog_.type(type) for node in rbtree_inorder_for_each(root): yield container_of(node, type, member) KeyType = TypeVar("KeyType") def rb_find( type: Union[str, Type], root: Object, member: str, key: KeyType, cmp: Callable[[KeyType, Object], int], ) -> Object: """ Find an entry in a red-black tree given a key and a comparator function. Note that this function does not have an analogue in the Linux kernel source code, as tree searches are all open-coded. :param type: Entry type. :param root: ``struct rb_root *`` :param member: Name of ``struct rb_node`` member in entry type. :param key: Key to find. :param cmp: Callback taking key and entry that returns < 0 if the key is less than the entry, > 0 if the key is greater than the entry, and 0 if the key matches the entry. :return: ``type *`` found entry, or ``NULL`` if not found. """ prog = root.prog_ type = prog.type(type) node = root.rb_node.read_() while node: entry = container_of(node, type, member) ret = cmp(key, entry) if ret < 0: node = node.rb_left.read_() elif ret > 0: node = node.rb_right.read_() else: return entry return NULL(prog, prog.pointer_type(type)) def validate_rbtree( type: Union[str, Type], root: Object, member: str, cmp: Callable[[Object, Object], int], allow_equal: bool, ) -> None: """ Validate a red-black tree. This checks that: 1. The tree is a valid binary search tree ordered according to *cmp*. 2. If *allow_equal* is ``False``, there are no nodes that compare equal according to *cmp*. 3. The ``rb_parent`` pointers are consistent. 4. The red-black tree requirements are satisfied: the root node is black, no red node has a red child, and every path from any node to any of its descendant leaf nodes goes through the same number of black nodes. :param type: Entry type. :param root: ``struct rb_root *`` :param member: Name of ``struct rb_node`` member in entry type. :param cmp: Callback taking two ``type *`` entry objects that returns < 0 if the first entry is less than the second entry, > 0 if the first entry is greater than the second entry, and 0 if they are equal. :param allow_equal: Whether the tree may contain entries that compare equal to each other. :raises ValidationError: if the tree is invalid """ for _ in validate_rbtree_inorder_for_each_entry( type, root, member, cmp, allow_equal ): pass def validate_rbtree_inorder_for_each_entry( type: Union[str, Type], root: Object, member: str, cmp: Callable[[Object, Object], int], allow_equal: bool, ) -> Iterator[Object]: """ Like :func:`rbtree_inorder_for_each_entry()`, but validates the red-black tree like :func:`validate_rbtree()` while iterating. :param type: Entry type. :param root: ``struct rb_root *`` :param member: Name of ``struct rb_node`` member in entry type. :param cmp: Callback taking two ``type *`` entry objects that returns < 0 if the first entry is less than the second entry, > 0 if the first entry is greater than the second entry, and 0 if they are equal. :param allow_equal: Whether the tree may contain entries that compare equal to each other. :raises ValidationError: if the tree is invalid """ prog = root.prog_ type = prog.type(type) def visit( node: Object, parent_node: Object, parent_entry: Object, parent_is_red: bool, is_left: bool, ) -> Generator[Object, None, int]: if node: node_rb_parent, black = _rb_parent_color(node) if node_rb_parent != parent_node: raise ValidationError( f"{parent_node.format_(dereference=False, symbolize=False)}" f" rb_{'left' if is_left else 'right'}" f" {node.format_(dereference=False, symbolize=False, type_name=False)}" f" has rb_parent {node_rb_parent.format_(dereference=False, symbolize=False, type_name=False)}" ) if parent_is_red and not black: raise ValidationError( f"red node {parent_node.format_(dereference=False, symbolize=False)}" f" has red child {node.format_(dereference=False, symbolize=False, type_name=False)}" ) entry = container_of(node, type, member) r = cmp(entry, parent_entry) if r > 0: if is_left: raise ValidationError( f"{parent_entry.format_(dereference=False, symbolize=False)}" f" left child {entry.format_(dereference=False, symbolize=False, type_name=False)}" " compares greater than it" ) elif r < 0: if not is_left: raise ValidationError( f"{parent_entry.format_(dereference=False, symbolize=False)}" f" right child {entry.format_(dereference=False, symbolize=False, type_name=False)}" " compares less than it" ) elif not allow_equal: raise ValidationError( f"{parent_entry.format_(dereference=False, symbolize=False)}" f" {'left' if is_left else 'right'}" f" child {entry.format_(dereference=False, symbolize=False, type_name=False)}" " compares equal to it" ) return (yield from descend(node, entry, black)) else: return 0 def descend( node: Object, entry: Object, black: bool ) -> Generator[Object, None, int]: left_black_height = yield from visit( node.rb_left.read_(), node, entry, parent_is_red=not black, is_left=True ) yield entry right_black_height = yield from visit( node.rb_right.read_(), node, entry, parent_is_red=not black, is_left=False ) if left_black_height != right_black_height: raise ValidationError( f"left and right subtrees of {node.format_(dereference=False, symbolize=False)}" f" have unequal black heights ({left_black_height} != {right_black_height})" ) return left_black_height + black root_node = root.rb_node.read_() if root_node: parent, black = _rb_parent_color(root_node) if parent: raise ValidationError( f"root node {root_node.format_(dereference=False, symbolize=False)}" f" has parent {parent.format_(dereference=False, symbolize=False, type_name=False)}" ) if not black: raise ValidationError( f"root node {root_node.format_(dereference=False, symbolize=False)} is red" ) yield from descend(root_node, container_of(root_node, type, member), black) drgn-0.0.31/drgn/helpers/linux/sched.py000066400000000000000000000075621477777462700177660ustar00rootroot00000000000000# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: LGPL-2.1-or-later """ CPU Scheduler ------------- The ``drgn.helpers.linux.sched`` module provides helpers for working with the Linux CPU scheduler. """ from typing import Tuple from _drgn import ( _linux_helper_cpu_curr, _linux_helper_idle_task, _linux_helper_task_cpu as task_cpu, _linux_helper_task_thread_info as task_thread_info, ) from drgn import IntegerLike, Object, Program from drgn.helpers.common.prog import takes_program_or_default __all__ = ( "cpu_curr", "idle_task", "loadavg", "task_cpu", "task_state_to_char", "task_thread_info", ) _TASK_NOLOAD = 0x400 @takes_program_or_default def cpu_curr(prog: Program, cpu: IntegerLike) -> Object: """ Return the task running on the given CPU. >>> cpu_curr(7).comm (char [16])"python3" :param cpu: CPU number. :return: ``struct task_struct *`` """ return _linux_helper_cpu_curr(prog, cpu) @takes_program_or_default def idle_task(prog: Program, cpu: IntegerLike) -> Object: """ Return the idle thread (PID 0, a.k.a swapper) for the given CPU. >>> idle_task(1).comm (char [16])"swapper/1" :param cpu: CPU number. :return: ``struct task_struct *`` """ return _linux_helper_idle_task(prog, cpu) def task_state_to_char(task: Object) -> str: """ Get the state of the task as a character (e.g., ``'R'`` for running). See `ps(1) `_ for a description of the process state codes. :param task: ``struct task_struct *`` """ prog = task.prog_ task_state_chars: str TASK_REPORT: int try: task_state_chars, TASK_REPORT, task_state_name = prog.cache[ "task_state_to_char" ] except KeyError: task_state_array = prog["task_state_array"] # Walk through task_state_array backwards looking for the largest state # that we know is in TASK_REPORT, then populate the task state mapping. chars = None for i in range(len(task_state_array) - 1, -1, -1): c: int = task_state_array[i][0].value_() if chars is None and c in b"RSDTtXZP": chars = bytearray(i + 1) TASK_REPORT = (1 << i) - 1 if chars is not None: chars[i] = c if chars is None: raise Exception("could not parse task_state_array") task_state_chars = chars.decode("ascii") # Since Linux kernel commit 2f064a59a11f ("sched: Change # task_struct::state") (in v5.14), the task state is named "__state". # Before that, it is named "state". try: task_state = task.__state task_state_name = "__state" except AttributeError: task_state = task.state task_state_name = "state" prog.cache["task_state_to_char"] = ( task_state_chars, TASK_REPORT, task_state_name, ) else: task_state = getattr(task, task_state_name) task_state = task_state.value_() exit_state = task.exit_state.value_() state = (task_state | exit_state) & TASK_REPORT char = task_state_chars[state.bit_length()] # States beyond TASK_REPORT are special. As of Linux v5.14, TASK_IDLE is # the only one; it is defined as TASK_UNINTERRUPTIBLE | TASK_NOLOAD. if char == "D" and (task_state & ~state) == _TASK_NOLOAD: return "I" else: return char @takes_program_or_default def loadavg(prog: Program) -> Tuple[float, float, float]: """ Return system load averaged over 1, 5 and 15 minutes as tuple of three float values. >>> loadavg() (2.34, 0.442, 1.33) """ avenrun = prog["avenrun"] vals = [avenrun[i].value_() / (1 << 11) for i in range(3)] return (vals[0], vals[1], vals[2]) drgn-0.0.31/drgn/helpers/linux/slab.py000066400000000000000000000542631477777462700176210ustar00rootroot00000000000000# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: LGPL-2.1-or-later """ Slab Allocator -------------- The ``drgn.helpers.linux.slab`` module provides helpers for working with the Linux slab allocator. .. warning:: Beware of slab merging when using these helpers. See :func:`slab_cache_is_merged() `. """ import operator from os import fsdecode from typing import Callable, Dict, Iterator, Optional, Set, Tuple, Union from drgn import ( NULL, FaultError, IntegerLike, Object, Program, Type, cast, container_of, sizeof, ) from drgn.helpers import ValidationError from drgn.helpers.common.format import escape_ascii_string from drgn.helpers.common.prog import takes_program_or_default from drgn.helpers.linux.cpumask import for_each_online_cpu from drgn.helpers.linux.list import list_for_each_entry from drgn.helpers.linux.mm import ( PageSlab, _get_PageSlab_impl, compound_head, for_each_page, in_direct_map, page_to_virt, virt_to_page, ) from drgn.helpers.linux.percpu import per_cpu_ptr from drgn.helpers.linux.rbtree import rbtree_inorder_for_each_entry __all__ = ( "find_containing_slab_cache", "find_slab_cache", "for_each_slab_cache", "get_slab_cache_aliases", "print_slab_caches", "slab_cache_for_each_allocated_object", "slab_cache_is_merged", "slab_object_info", ) # Get the type containing slab information. # # Linux kernel commit d122019bf061cccc4583eb9ad40bf58c2fe517be ("mm: Split slab # into its own type") (in v5.17) moved slab information from struct page to # struct slab. The former can be casted to the latter. def _get_slab_type(prog: Program) -> Type: try: return prog.type("struct slab *") except LookupError: return prog.type("struct page *") def slab_cache_is_merged(slab_cache: Object) -> bool: """ Return whether a slab cache has been merged with any other slab caches. Unless configured otherwise, the kernel may merge slab caches of similar sizes together. See the `SLUB users guide `_ and ``slab_merge``/``slab_nomerge`` in the `kernel parameters documentation `_. This can cause confusion, as only the name of the first cache will be found, and objects of different types will be mixed in the same slab cache. For example, suppose that we have two types, ``struct foo`` and ``struct bar``, which have the same size but are otherwise unrelated. If the kernel creates a slab cache named ``foo`` for ``struct foo``, then another slab cache named ``bar`` for ``struct bar``, then slab cache ``foo`` will be reused instead of creating another cache for ``bar``. So the following will fail:: find_slab_cache("bar") And the following will also return ``struct bar *`` objects errantly casted to ``struct foo *``:: slab_cache_for_each_allocated_object(find_slab_cache("foo"), "struct foo") Unfortunately, these issues are difficult to work around generally, so one must be prepared to handle them on a case-by-case basis (e.g., by looking up the slab cache by its variable name and by checking that members of the structure make sense for the expected type). :param slab_cache: ``struct kmem_cache *`` """ return slab_cache.refcount > 1 @takes_program_or_default def get_slab_cache_aliases(prog: Program) -> Dict[str, str]: """ Return a dict mapping slab cache name to the cache it was merged with. The SLAB and SLUB subsystems can merge caches with similar settings and object sizes, as described in the documentation of :func:`slab_cache_is_merged()`. In some cases, the information about which caches were merged is lost, but in other cases, we can reconstruct the info. This function reconstructs the mapping, but requires that the kernel is configured with ``CONFIG_SLUB`` and ``CONFIG_SYSFS``. The returned dict maps from original cache name, to merged cache name. You can use this mapping to discover the correct cache to lookup via :func:`find_slab_cache()`. The dict contains an entry only for caches which were merged into a cache of a different name. >>> cache_to_merged = get_slab_cache_aliases() >>> cache_to_merged["dnotify_struct"] 'avc_xperms_data' >>> "avc_xperms_data" in cache_to_merged False >>> find_slab_cache("dnotify_struct") is None True >>> find_slab_cache("avc_xperms_data") is None False :warning: This function will only work on kernels which are built with ``CONFIG_SLUB`` and ``CONFIG_SYSFS`` enabled. :returns: Mapping of slab cache name to final merged name :raises LookupError: If the helper fails because the debugged kernel doesn't have the required configuration """ try: slab_kset = prog["slab_kset"] except KeyError: raise LookupError( "Couldn't find SLUB sysfs information: get_slab_cache_aliases() " "requires CONFIG_SLUB and CONFIG_SYSFS enabled in the debugged " "kernel." ) from None link_flag = prog.constant("KERNFS_LINK") name_map = {} for child in rbtree_inorder_for_each_entry( "struct kernfs_node", slab_kset.kobj.sd.dir.children.address_of_(), "rb", ): if child.flags & link_flag: cache = container_of( cast("struct kobject *", child.symlink.target_kn.priv), "struct kmem_cache", "kobj", ) original_name = fsdecode(child.name.string_()) target_name = fsdecode(cache.name.string_()) if original_name != target_name: name_map[original_name] = target_name return name_map @takes_program_or_default def for_each_slab_cache(prog: Program) -> Iterator[Object]: """ Iterate over all slab caches. :return: Iterator of ``struct kmem_cache *`` objects. """ return list_for_each_entry( "struct kmem_cache", prog["slab_caches"].address_of_(), "list" ) @takes_program_or_default def find_slab_cache(prog: Program, name: Union[str, bytes]) -> Optional[Object]: """ Return the slab cache with the given name. :param name: Slab cache name. :return: ``struct kmem_cache *`` """ if isinstance(name, str): name = name.encode() for s in for_each_slab_cache(prog): if s.name.string_() == name: return s return None @takes_program_or_default def print_slab_caches(prog: Program) -> None: """Print the name and ``struct kmem_cache *`` value of all slab caches.""" for s in for_each_slab_cache(prog): name = escape_ascii_string(s.name.string_(), escape_backslash=True) print(f"{name} ({s.type_.type_name()})0x{s.value_():x}") class SlabCorruptionError(ValidationError): """ Error raised when a corruption is encountered in a slab allocator data structure. """ class SlabFreelistCycleError(SlabCorruptionError): """ Error raised when a cycle is encountered in a slab allocator freelist. """ # Between SLUB, SLAB, their respective configuration options, and the # differences between kernel versions, there is a lot of state that we need to # keep track of to inspect the slab allocator. It isn't pretty, but this class # and its subclasses track all of that complexity so that we can share code # between slab helpers. class _SlabCacheHelper: def __init__(self, slab_cache: Object) -> None: self._prog = slab_cache.prog_ self._slab_cache = slab_cache.read_() self._freelist_error: Optional[Exception] = None def _page_objects( self, page: Object, slab: Object, pointer_type: Type ) -> Iterator[Object]: raise NotImplementedError() def for_each_allocated_object(self, type: Union[str, Type]) -> Iterator[Object]: if self._freelist_error: raise self._freelist_error pointer_type = self._prog.pointer_type(self._prog.type(type)) slab_type = _get_slab_type(self._prog) # Get the underlying implementation directly to avoid overhead on each # page. PageSlab = _get_PageSlab_impl(self._prog) for page in for_each_page(self._prog): try: if not PageSlab(page): continue except FaultError: continue slab = cast(slab_type, page) if slab.slab_cache == self._slab_cache: yield from self._page_objects(page, slab, pointer_type) def object_info( self, page: Object, slab: Object, addr: int ) -> "Optional[SlabObjectInfo]": raise NotImplementedError() class _SlabCacheHelperSlub(_SlabCacheHelper): def __init__(self, slab_cache: Object) -> None: super().__init__(slab_cache) self._slab_cache_size = slab_cache.size.value_() try: self._red_left_pad = slab_cache.red_left_pad.value_() except AttributeError: self._red_left_pad = 0 # In SLUB, the freelist is a linked list with the next pointer located # at ptr + slab_cache->offset. freelist_offset = slab_cache.offset.value_() # If CONFIG_SLAB_FREELIST_HARDENED is enabled, then the next pointer is # obfuscated using slab_cache->random. try: freelist_random = slab_cache.random.value_() except AttributeError: self._freelist_dereference: Callable[[int], int] = self._prog.read_word else: ulong_size = sizeof(self._prog.type("unsigned long")) # Since Linux kernel commit 1ad53d9fa3f6 ("slub: improve bit # diffusion for freelist ptr obfuscation") in v5.7, a swab() was # added to the freelist dereferencing calculation. This commit was # backported to all stable branches which have # CONFIG_SLAB_FREELIST_HARDENED, but you can still encounter some # older stable kernels which don't have it. Unfortunately, there's # no easy way to detect whether it is in effect, since the commit # adds no struct field or other detectable difference. # # To handle this, we implement both methods, and we start out with a # "trial" function. On the first time we encounter a non-NULL # freelist, we try using the method with the swab(), and test # whether the resulting pointer may be dereferenced. If it can, we # commit to using that method forever. If it cannot, we switch to # the version without swab() and commit to using that. def _freelist_dereference_swab(ptr_addr: int) -> int: # *ptr_addr ^ slab_cache->random ^ byteswap(ptr_addr) return ( self._prog.read_word(ptr_addr) ^ freelist_random ^ int.from_bytes(ptr_addr.to_bytes(ulong_size, "little"), "big") ) def _freelist_dereference_no_swab(ptr_addr: int) -> int: # *ptr_addr ^ slab_cache->random ^ ptr_addr return self._prog.read_word(ptr_addr) ^ freelist_random ^ ptr_addr def _try_hardened_freelist_dereference(ptr_addr: int) -> int: result = _freelist_dereference_swab(ptr_addr) if result: try: self._prog.read_word(result) self._freelist_dereference = _freelist_dereference_swab except FaultError: result = _freelist_dereference_no_swab(ptr_addr) self._freelist_dereference = _freelist_dereference_no_swab return result self._freelist_dereference = _try_hardened_freelist_dereference def _slub_get_freelist( freelist_name: Callable[[], str], freelist: Object, freelist_set: Set[int] ) -> None: ptr = freelist.value_() while ptr: if ptr in freelist_set: raise SlabFreelistCycleError( f"{fsdecode(slab_cache.name.string_())} {freelist_name()} " "freelist contains cycle; " "may be corrupted or in the middle of update" ) freelist_set.add(ptr) ptr = self._freelist_dereference(ptr + freelist_offset) cpu_freelists: Set[int] = set() try: # cpu_slab doesn't exist for CONFIG_SLUB_TINY. cpu_slab = slab_cache.cpu_slab.read_() except AttributeError: pass else: # Since Linux kernel commit bb192ed9aa71 ("mm/slub: Convert most # struct page to struct slab by spatch") (in v5.17), the current # slab for a CPU is `struct slab *slab`. Before that, it is `struct # page *page`. cpu_slab_attr = "slab" if hasattr(cpu_slab, "slab") else "page" try: for cpu in for_each_online_cpu(self._prog): this_cpu_slab = per_cpu_ptr(cpu_slab, cpu) slab = getattr(this_cpu_slab, cpu_slab_attr).read_() if slab and slab.slab_cache == slab_cache: _slub_get_freelist( lambda: f"cpu {cpu}", this_cpu_slab.freelist, cpu_freelists ) except (SlabCorruptionError, FaultError) as e: self._freelist_error = e self._slub_get_freelist = _slub_get_freelist self._cpu_freelists = cpu_freelists def _page_objects( self, page: Object, slab: Object, pointer_type: Type ) -> Iterator[Object]: freelist: Set[int] = set() self._slub_get_freelist(lambda: f"slab {hex(slab)}", slab.freelist, freelist) addr = page_to_virt(page).value_() + self._red_left_pad end = addr + self._slab_cache_size * slab.objects while addr < end: if addr not in freelist and addr not in self._cpu_freelists: yield Object(self._prog, pointer_type, value=addr) addr += self._slab_cache_size def object_info(self, page: Object, slab: Object, addr: int) -> "SlabObjectInfo": first_addr = page_to_virt(page).value_() + self._red_left_pad address = ( first_addr + (addr - first_addr) // self._slab_cache_size * self._slab_cache_size ) if address in self._cpu_freelists: allocated: Optional[bool] = False else: freelist: Set[int] = set() try: self._slub_get_freelist( lambda: f"slab {hex(slab)}", slab.freelist, freelist ) except (SlabCorruptionError, FaultError): allocated = False if address in freelist else None else: if address in freelist: allocated = False elif self._freelist_error: allocated = None else: allocated = True return SlabObjectInfo(self._slab_cache, slab, address, allocated) class _SlabCacheHelperSlab(_SlabCacheHelper): def __init__(self, slab_cache: Object) -> None: super().__init__(slab_cache) self._slab_cache_size = slab_cache.size.value_() self._freelist_type = self._prog.type("freelist_idx_t *") try: self._obj_offset = slab_cache.obj_offset.value_() except AttributeError: self._obj_offset = 0 self._slab_cache_num = slab_cache.num.value_() cpu_cache = slab_cache.cpu_cache.read_() cpu_caches_avail: Set[int] = set() for cpu in for_each_online_cpu(self._prog): ac = per_cpu_ptr(cpu_cache, cpu) for i in range(ac.avail): cpu_caches_avail.add(ac.entry[i].value_()) self._cpu_caches_avail = cpu_caches_avail def _slab_freelist(self, slab: Object) -> Set[int]: # In SLAB, the freelist is an array of free object indices. freelist = cast(self._freelist_type, slab.freelist) return {freelist[i].value_() for i in range(slab.active, self._slab_cache_num)} def _page_objects( self, page: Object, slab: Object, pointer_type: Type ) -> Iterator[Object]: freelist = self._slab_freelist(slab) s_mem = slab.s_mem.value_() for i in range(self._slab_cache_num): if i in freelist: continue addr = s_mem + i * self._slab_cache_size + self._obj_offset if addr in self._cpu_caches_avail: continue yield Object(self._prog, pointer_type, value=addr) def object_info(self, page: Object, slab: Object, addr: int) -> "SlabObjectInfo": s_mem = slab.s_mem.value_() object_index = (addr - s_mem) // self._slab_cache_size object_address = s_mem + object_index * self._slab_cache_size return SlabObjectInfo( self._slab_cache, slab, object_address, allocated=object_address not in self._cpu_caches_avail and object_index not in self._slab_freelist(slab), ) class _SlabCacheHelperSlob(_SlabCacheHelper): def for_each_allocated_object(self, type: Union[str, Type]) -> Iterator[Object]: raise ValueError("SLOB is not supported") def object_info(self, page: Object, slab: Object, addr: int) -> None: return None def _get_slab_cache_helper(slab_cache: Object) -> _SlabCacheHelper: prog = slab_cache.prog_ try: type = prog.cache["slab_cache_helper_type"] except KeyError: try: prog.type("freelist_idx_t *") type = _SlabCacheHelperSlab except LookupError: if hasattr(slab_cache, "offset"): type = _SlabCacheHelperSlub else: type = _SlabCacheHelperSlob prog.cache["slab_cache_helper_type"] = type return type(slab_cache) def slab_cache_for_each_allocated_object( slab_cache: Object, type: Union[str, Type] ) -> Iterator[Object]: """ Iterate over all allocated objects in a given slab cache. Only the SLUB and SLAB allocators are supported; SLOB does not store enough information to identify objects in a slab cache. >>> dentry_cache = find_slab_cache("dentry") >>> next(slab_cache_for_each_allocated_object(dentry_cache, "struct dentry")) *(struct dentry *)0xffff905e41404000 = { ... } :param slab_cache: ``struct kmem_cache *`` :param type: Type of object in the slab cache. :return: Iterator of ``type *`` objects. """ return _get_slab_cache_helper(slab_cache).for_each_allocated_object(type) def _find_containing_slab( prog: Program, addr: int ) -> Optional[Tuple[Object, Object, Object]]: page = virt_to_page(prog, addr) try: page = compound_head(page) if not PageSlab(page): return None except FaultError: # Page does not exist return None slab = cast(_get_slab_type(prog), page) try: return slab.slab_cache, page, slab except AttributeError: # SLOB return None @takes_program_or_default def slab_object_info(prog: Program, addr: IntegerLike) -> "Optional[SlabObjectInfo]": """ Get information about an address if it is in a slab object. >>> ptr = find_task(1).comm.address_of_() >>> info = slab_object_info(ptr) >>> info SlabObjectInfo(slab_cache=Object(prog, 'struct kmem_cache *', address=0xffffdb93c0045e18), slab=Object(prog, 'struct slab *', value=0xffffdb93c0045e00), address=0xffffa2bf81178000, allocated=True) Note that :attr:`SlabObjectInfo.address` is the start address of the object, which may be less than *addr* if *addr* points to a member inside of the object: >>> ptr.value_() - info.address 1496 >>> offsetof(prog.type("struct task_struct"), "comm") 1496 Note that SLOB does not store enough information to identify slab objects, so if the kernel is configured to use SLOB, this will always return ``None``. :param addr: ``void *`` :return: :class:`SlabObjectInfo` if *addr* is in a slab object, or ``None`` if not. """ addr = operator.index(addr) if not in_direct_map(prog, addr): return None result = _find_containing_slab(prog, addr) if result is None: return None slab_cache, page, slab = result return _get_slab_cache_helper(slab_cache).object_info(page, slab, addr) class SlabObjectInfo: """Information about an object in the slab allocator.""" slab_cache: Object """``struct kmem_cache *`` that the slab object is from.""" slab: Object """ Slab containing the slab object. Since Linux v5.17, this is a ``struct slab *``. Before that, it is a ``struct page *``. """ address: int """Address of the slab object.""" allocated: Optional[bool] """ ``True`` if the object is allocated, ``False`` if it is free, or ``None`` if not known because the slab cache is corrupted. """ def __init__( self, slab_cache: Object, slab: Object, address: int, allocated: Optional[bool] ) -> None: self.slab_cache = slab_cache self.slab = slab self.address = address self.allocated = allocated def __repr__(self) -> str: return f"SlabObjectInfo(slab_cache={self.slab_cache!r}, slab={self.slab!r}, address={hex(self.address)}, allocated={self.allocated})" @takes_program_or_default def find_containing_slab_cache(prog: Program, addr: IntegerLike) -> Object: """ Get the slab cache that an address was allocated from, if any. Note that SLOB does not store enough information to identify objects in a slab cache, so if the kernel is configured to use SLOB, this will always return ``NULL``. :param addr: ``void *`` :return: ``struct kmem_cache *`` containing *addr*, or ``NULL`` if *addr* is not from a slab cache. """ if not in_direct_map(prog, addr): return NULL(prog, "struct kmem_cache *") result = _find_containing_slab(prog, operator.index(addr)) if result is None: return NULL(prog, "struct kmem_cache *") return result[0].read_() drgn-0.0.31/drgn/helpers/linux/stackdepot.py000066400000000000000000000040501477777462700210260ustar00rootroot00000000000000# Copyright (c) Google LLC # SPDX-License-Identifier: LGPL-2.1-or-later """ Stack Depot ----------- The ``drgn.helpers.linux.stackdepot`` module provides helpers for working with the stack trace storage from :linux:`include/linux/stackdepot.h` used by KASAN and other kernel debugging tools. """ from typing import Optional from drgn import Object, StackTrace, cast, reinterpret __all__ = ("stack_depot_fetch",) def stack_depot_fetch(handle: Object) -> Optional[StackTrace]: """ Returns a stack trace for the given stack handle. :param handle: ``depot_stack_handle_t`` :return: The stack trace, or ``None`` if not available. """ prog = handle.prog_ handle_parts = reinterpret("union handle_parts", handle) # Renamed in Linux kernel commit 961c949b012f ("lib/stackdepot: rename slab # to pool") (in v6.3). try: stack_pools = prog["stack_pools"] except KeyError: pool = prog["stack_slabs"][handle_parts.slabindex] else: # Linux kernel commit 3ee34eabac2a ("lib/stackdepot: fix first entry # having a 0-handle") (in v6.9-rc1) changed the meaning of pool_index. # Linux kernel commit a6c1d9cb9a68 ("stackdepot: rename pool_index to # pool_index_plus_1") (in v6.9-rc3) renamed pool_index to reflect the # new meaning. This will therefore be wrong for v6.9-rc[1-2] and # v6.8.[3-4]. try: pool_index = handle_parts.pool_index_plus_1 - 1 except AttributeError: pool_index = handle_parts.pool_index pool = stack_pools[pool_index] if not pool: return None # This has remained the same since the stack depot was introduced in Linux # kernel commit cd11016e5f52 ("mm, kasan: stackdepot implementation. Enable # stackdepot for SLAB") (in v4.6), when it was known as STACK_ALLOC_ALIGN. DEPOT_STACK_ALIGN = 4 record = cast( "struct stack_record *", pool + (handle_parts.offset << DEPOT_STACK_ALIGN) ) return prog.stack_trace_from_pcs([record.entries[x] for x in range(record.size)]) drgn-0.0.31/drgn/helpers/linux/tc.py000066400000000000000000000037111477777462700172760ustar00rootroot00000000000000# Copyright (c) ByteDance, Inc. and its affiliates. # SPDX-License-Identifier: LGPL-2.1-or-later """ Traffic Control (TC) -------------------- The ``drgn.helpers.linux.tc`` module provides helpers for working with the Linux kernel Traffic Control (TC) subsystem. """ import operator from drgn import NULL, IntegerLike, Object from drgn.helpers.linux.list import hlist_for_each_entry, list_for_each_entry __all__ = ("qdisc_lookup",) def qdisc_lookup(dev: Object, major: IntegerLike) -> Object: """ Get a Qdisc from a device and a major handle number. It is worth noting that conventionally handles are hexadecimal, e.g. ``10:`` in a ``tc`` command means major handle 0x10. :param dev: ``struct net_device *`` :param major: Qdisc major handle number. :return: ``struct Qdisc *`` (``NULL`` if not found) """ major = operator.index(major) << 16 roots = [dev.qdisc] if dev.ingress_queue: roots.append(dev.ingress_queue.qdisc_sleeping) # Since Linux kernel commit 59cc1f61f09c ("net: sched: convert qdisc linked # list to hashtable") (in v4.7), a device's child Qdiscs are maintained in # a hashtable in its struct net_device. Before that, they are maintained in # a linked list in their root Qdisc. use_hashtable = dev.prog_.type("struct net_device").has_member("qdisc_hash") for root in roots: if root.handle == major: return root if use_hashtable: for head in root.dev_queue.dev.qdisc_hash: for qdisc in hlist_for_each_entry( "struct Qdisc", head.address_of_(), "hash" ): if qdisc.handle == major: return qdisc else: for qdisc in list_for_each_entry( "struct Qdisc", root.list.address_of_(), "list" ): if qdisc.handle == major: return qdisc return NULL(dev.prog_, "struct Qdisc *") drgn-0.0.31/drgn/helpers/linux/tcp.py000066400000000000000000000010271477777462700174540ustar00rootroot00000000000000# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: LGPL-2.1-or-later """ TCP --- The ``drgn.helpers.linux.tcp`` module provides helpers for working with the TCP protocol in the Linux kernel. """ from drgn import Object, cast __all__ = ("sk_tcpstate",) def sk_tcpstate(sk: Object) -> Object: """ Return the TCP protocol state of a socket. :param sk: ``struct sock *`` :return: TCP state enum value. """ return cast(sk.prog_["TCP_ESTABLISHED"].type_, sk.__sk_common.skc_state) drgn-0.0.31/drgn/helpers/linux/user.py000066400000000000000000000037241477777462700176520ustar00rootroot00000000000000# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: LGPL-2.1-or-later """ Users ----- The ``drgn.helpers.linux.user`` module provides helpers for working with users in the Linux kernel. """ import operator from typing import Iterator, Union from drgn import NULL, IntegerLike, Object, Program from drgn.helpers.common.prog import takes_program_or_default from drgn.helpers.linux.list import hlist_for_each_entry __all__ = ( "find_user", "for_each_user", ) def _kuid_val(uid: Union[Object, IntegerLike]) -> int: if isinstance(uid, Object) and uid.type_.type_name() == "kuid_t": uid = uid.val return operator.index(uid) @takes_program_or_default def find_user(prog: Program, uid: Union[Object, IntegerLike]) -> Object: """ Return the user structure with the given UID. :param uid: ``kuid_t`` object or integer. :return: ``struct user_struct *`` (``NULL`` if not found) """ try: uidhashentry = prog.cache["uidhashentry"] except KeyError: uidhash_table = prog["uidhash_table"] uidhash_sz = len(uidhash_table) uidhash_bits = uidhash_sz.bit_length() - 1 uidhash_mask = uidhash_sz - 1 def uidhashentry(uid: int) -> Object: hash = ((uid >> uidhash_bits) + uid) & uidhash_mask return uidhash_table + hash prog.cache["uidhashentry"] = uidhashentry uid = _kuid_val(uid) for user in hlist_for_each_entry( "struct user_struct", uidhashentry(uid), "uidhash_node" ): if user.uid.val == uid: return user return NULL(prog, "struct user_struct *") @takes_program_or_default def for_each_user(prog: Program) -> Iterator[Object]: """ Iterate over all users in the system. :return: Iterator of ``struct user_struct *`` objects. """ for hash_entry in prog["uidhash_table"]: yield from hlist_for_each_entry( "struct user_struct", hash_entry, "uidhash_node" ) drgn-0.0.31/drgn/helpers/linux/wait.py000066400000000000000000000053101477777462700176310ustar00rootroot00000000000000# Copyright (c) 2023, Oracle and/or its affiliates. # SPDX-License-Identifier: LGPL-2.1-or-later """ Wait Queues ----------- The ``drgn.helpers.linux.wait`` module provides helpers for working with wait queues (``wait_queue_head_t`` and ``wait_queue_entry_t``) from :linux:`include/linux/wait.h`. .. note:: Since Linux 4.13, entries in a wait queue have type ``wait_queue_entry_t``. Before that, the type was named ``wait_queue_t``. """ from typing import Iterator from drgn import Object, cast from drgn.helpers.linux.list import list_empty, list_for_each_entry __all__ = ( "waitqueue_active", "waitqueue_for_each_entry", "waitqueue_for_each_task", ) def _get_wait_queue_head(wq: Object) -> Object: # Linux kernel commit 2055da97389a ("sched/wait: Disambiguate # wq_entry->task_list and wq_head->task_list naming") (in v4.13) renamed # the task_list member to head. try: return wq.head except AttributeError: return wq.task_list def waitqueue_active(wq: Object) -> bool: """ Return whether a wait queue has any waiters. :param wq: ``wait_queue_head_t *`` """ head = _get_wait_queue_head(wq) return not list_empty(head.address_of_()) def waitqueue_for_each_entry(wq: Object) -> Iterator[Object]: """ Iterate over all entries in a wait queue. :param wq: ``wait_queue_head_t *`` :return: Iterator of ``wait_queue_entry_t *`` or ``wait_queue_t *`` objects depending on the kernel version. """ head_addr = _get_wait_queue_head(wq).address_of_() prog = wq.prog_ # Linux kernel commit ac6424b981bc ("sched/wait: Rename wait_queue_t => # wait_queue_entry_t") (in v4.13) renamed the entry type and commit # 2055da97389a ("sched/wait: Disambiguate wq_entry->task_list and # wq_head->task_list naming") (in v4.13) renamed .task_list to .entry. try: wait_queue_entry_type, link = prog.type("wait_queue_entry_t"), "entry" except LookupError: wait_queue_entry_type, link = prog.type("wait_queue_t"), "task_list" return list_for_each_entry(wait_queue_entry_type, head_addr, link) def waitqueue_for_each_task(wq: Object) -> Iterator[Object]: """ Iterate over all tasks waiting on a wait queue. .. warning:: This comes from ``wait_queue_entry_t::private``, which usually stores a task. However, some wait queue entries store a different pointer type, in which case this will return garbage. :param wq: ``wait_queue_head_t *`` :return: Iterator of ``struct task_struct *`` objects. """ task_structp_type = wq.prog_.type("struct task_struct *") for entry in waitqueue_for_each_entry(wq): yield cast(task_structp_type, entry.private) drgn-0.0.31/drgn/helpers/linux/xarray.py000066400000000000000000000175411477777462700202040ustar00rootroot00000000000000# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: LGPL-2.1-or-later """ XArrays ------- The ``drgn.helpers.linux.xarray`` module provides helpers for working with the `XArray `_ data structure from :linux:`include/linux/xarray.h`. .. note:: XArrays were introduced in Linux 4.20 as a replacement for `radix trees`_. To make it easier to work with data structures that were changed from a radix tree to an XArray (like ``struct address_space::i_pages``), drgn treats XArrays and radix trees interchangeably in some cases. Specifically, :func:`~drgn.helpers.linux.xarray.xa_load()` is equivalent to :func:`~drgn.helpers.linux.radixtree.radix_tree_lookup()`, and :func:`~drgn.helpers.linux.xarray.xa_for_each()` is equivalent to :func:`~drgn.helpers.linux.radixtree.radix_tree_for_each()`, except that the radix tree helpers assume ``advanced=False``. (Therefore, :func:`~drgn.helpers.linux.xarray.xa_load()` and :func:`~drgn.helpers.linux.xarray.xa_for_each()` also accept a ``struct radix_tree_root *``, and :func:`~drgn.helpers.linux.radixtree.radix_tree_lookup()` and :func:`~drgn.helpers.linux.radixtree.radix_tree_for_each()` also accept a ``struct xarray *``.) """ from typing import Iterator, Optional, Tuple from _drgn import _linux_helper_xa_load from drgn import NULL, IntegerLike, Object, cast __all__ = ( "xa_for_each", "xa_is_value", "xa_is_zero", "xa_load", "xa_to_value", ) _XA_ZERO_ENTRY = 1030 # xa_mk_internal(257) def _xa_is_node(entry_value: int) -> bool: return (entry_value & 3) == 2 and entry_value > 4096 def xa_load(xa: Object, index: IntegerLike, *, advanced: bool = False) -> Object: """ Look up the entry at a given index in an XArray. >>> entry = xa_load(inode.i_mapping.i_pages.address_of_(), 2) >>> cast("struct page *", entry) *(struct page *)0xffffed6980306f40 = { ... } :param xa: ``struct xarray *`` :param index: Entry index. :param advanced: Whether to return nodes only visible to the XArray advanced API. If ``False``, zero entries (see :func:`xa_is_zero()`) will be returned as ``NULL``. :return: ``void *`` found entry, or ``NULL`` if not found. """ entry = _linux_helper_xa_load(xa, index) if not advanced and entry.value_() == _XA_ZERO_ENTRY: return NULL(xa.prog_, "void *") return entry class _XAIteratorNode: def __init__(self, node: Object, index: int) -> None: self.slots = node.slots self.shift = node.shift.value_() self.index = index self.next_slot = 0 def xa_for_each(xa: Object, *, advanced: bool = False) -> Iterator[Tuple[int, Object]]: """ Iterate over all of the entries in an XArray. >>> for index, entry in xa_for_each(inode.i_mapping.i_pages.address_of_()): ... print(index, entry) ... 0 (void *)0xffffed6980356140 1 (void *)0xffffed6980306f80 2 (void *)0xffffed6980306f40 3 (void *)0xffffed6980355b40 :param xa: ``struct xarray *`` :param advanced: Whether to return nodes only visible to the XArray advanced API. If ``False``, zero entries (see :func:`xa_is_zero()`) will be skipped. :return: Iterator of (index, ``void *``) tuples. """ prog = xa.prog_ def should_yield(entry_value: int) -> bool: return entry_value != 0 # This handles three cases: # # 1. XArrays. # 2. Radix trees since Linux kernel commit f8d5d0cc145c ("xarray: Add # definition of struct xarray") (in v4.20) redefined them in terms of # XArrays. These reuse the XArray structures and are close enough to # case 1 that the same code handles both. # 3. Radix trees before that commit. These are similar to cases 1 and 2, # but they have different type and member names, use different flags in # the lower bits (see Linux kernel commit 3159f943aafd ("xarray: Replace # exceptional entries") (in v4.20)), and represent sibling entries # differently (see Linux kernel commit 02c02bf12c5d ("xarray: Change # definition of sibling entries") (in v4.20)). try: entry = xa.xa_head.read_() except AttributeError: entry = xa.rnode node_type = entry.type_ entry = cast("void *", entry) # Return > 0 if radix_tree_is_internal_node(), < 0 if # is_sibling_entry(), and 0 otherwise. def is_internal(slots: Optional[Object], entry_value: int) -> int: if (entry_value & 3) == 1: # slots must be a reference object, so address_ is never None. if slots is not None and ( slots.address_ <= entry_value < slots[len(slots)].address_ # type: ignore[operator] ): return -1 else: return 1 return 0 # entry_to_node() def to_node(entry_value: int) -> Object: return Object(prog, node_type, entry_value - 1) else: node_type = prog.type("struct xa_node *") # Return > 0 if xa_is_node(), < 0 if xa_is_sibling(), and 0 otherwise. def is_internal(slots: Optional[Object], entry_value: int) -> int: if _xa_is_node(entry_value): return 1 elif (entry_value & 3) == 2 and entry_value < 256: return -1 else: return 0 # xa_to_node() def to_node(entry_value: int) -> Object: return Object(prog, node_type, entry_value - 2) if not advanced: # We're intentionally redefining should_yield() for this case. def should_yield(entry_value: int) -> bool: # noqa: F811 return entry_value != 0 and entry_value != _XA_ZERO_ENTRY entry_value = entry.value_() internal = is_internal(None, entry_value) if internal > 0: stack = [_XAIteratorNode(to_node(entry_value), 0)] else: if internal == 0 and should_yield(entry_value): yield 0, entry return while stack: node = stack[-1] if node.next_slot >= len(node.slots): stack.pop() continue entry = node.slots[node.next_slot].read_() entry_value = entry.value_() index = node.index + (node.next_slot << node.shift) node.next_slot += 1 internal = is_internal(node.slots, entry_value) if internal > 0: stack.append(_XAIteratorNode(to_node(entry_value), index)) elif internal == 0 and should_yield(entry_value): yield index, entry def xa_is_value(entry: Object) -> bool: """ Return whether an XArray entry is a value. See :func:`xa_to_value()`. :param entry: ``void *`` """ return (entry.value_() & 1) != 0 def xa_to_value(entry: Object) -> Object: """ Return the value in an XArray entry. In addition to pointers, XArrays can store integers between 0 and ``LONG_MAX``. If :func:`xa_is_value()` returns ``True``, use this to get the stored integer. >>> entry = xa_load(xa, 9) >>> entry (void *)0xc9 >>> xa_is_value(entry) True >>> xa_to_value(entry) (unsigned long)100 :param entry: ``void *`` :return: ``unsigned long`` """ return cast("unsigned long", entry) >> 1 def xa_is_zero(entry: Object) -> bool: """ Return whether an XArray entry is a "zero" entry. A zero entry is an entry that was reserved but is not present. These are only visible to the XArray advanced API, so they are only returned by :func:`xa_load()` and :func:`xa_for_each()` when ``advanced = True``. >>> entry = xa_load(xa, 10, advanced=True) >>> entry (void *)0x406 >>> xa_is_zero(entry) True >>> xa_load(xa, 10) (void *)0 :param entry: ``void *`` """ return entry.value_() == _XA_ZERO_ENTRY drgn-0.0.31/drgn/internal/000077500000000000000000000000001477777462700153275ustar00rootroot00000000000000drgn-0.0.31/drgn/internal/__init__.py000066400000000000000000000003041477777462700174350ustar00rootroot00000000000000# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: LGPL-2.1-or-later """ drgn internals This package contains modules internal to drgn. You should not use them. """ drgn-0.0.31/drgn/internal/repl.py000066400000000000000000000037001477777462700166430ustar00rootroot00000000000000# Copyright (c) 2024, Oracle and/or its affiliates. # SPDX-License-Identifier: LGPL-2.1-or-later """Compatibility shim between drgn and the pyrepl/code modules""" import os import sys from typing import Any, Dict __all__ = ("interact", "readline") # Python 3.13 introduces a new REPL implemented by the "_pyrepl" internal # module. It includes features such as colored output and multiline editing. # Unfortunately, there is no public API exposing these abilities to users, even # in the "code" module. We'd like to give the best experience possible, so we'll # detect _pyrepl and try to use it where possible. try: # The official Python interpreter honors this environment variable to # disable the new REPL. We do the same, which also gives users an escape # hatch if any of the internals we're messing with change. if os.environ.get("PYTHON_BASIC_REPL"): raise ModuleNotFoundError() # Unfortunately, the typeshed library behind mypy explicitly removed type # stubs for these modules. This makes sense as they are private APIs, but it # means we need to disable mypy checks. from _pyrepl import readline # type: ignore from _pyrepl.console import InteractiveColoredConsole # type: ignore from _pyrepl.simple_interact import ( # type: ignore run_multiline_interactive_console, ) # This _setup() function clobbers the readline completer, but it is # protected so it only runs once. Call it early so that our overridden # completer doesn't get clobbered. readline._setup({}) def interact(local: Dict[str, Any], banner: str) -> None: console = InteractiveColoredConsole(local) print(banner, file=sys.stderr) run_multiline_interactive_console(console) except (ModuleNotFoundError, ImportError, AttributeError): import code import readline def interact(local: Dict[str, Any], banner: str) -> None: code.interact(banner=banner, exitmsg="", local=local) drgn-0.0.31/drgn/internal/rlcompleter.py000066400000000000000000000073301477777462700202340ustar00rootroot00000000000000# Copyright (c) Meta Platforms, Inc. and affiliates. # SPDX-License-Identifier: LGPL-2.1-or-later """Improved readline completer""" import builtins import keyword import re from typing import Any, Dict, List, Optional from drgn.internal.repl import readline _EXPR_RE = re.compile( r""" ( (?: # Expression allowing only .member and [key] (?:^|\.) # Either beginning of string or .member \w+ # Identifier (?: # [key], zero or more times \[ (?: \d+| # Integer key "(?:\\"|[^"])*"| # Double-quoted string key '(?:\\'|[^'])*' # Single-quoted string key ) \] )* )+ ) \.(\w*) # Attribute to complete """, re.VERBOSE, ) class Completer: """ This is a readline completer based on rlcompleter.Completer from the standard library. It allows expressions containing [key], where key is an integer or string. """ def __init__(self, namespace: Dict[str, Any]) -> None: self._namespace = namespace # _EXPR_RE can match these characters, so don't treat them as # delimiters. delims = re.sub("[]['\"\\\\]", "", readline.get_completer_delims()) readline.set_completer_delims(delims) def complete(self, text: str, state: int) -> Optional[str]: if not text.strip(): if state == 0: readline.insert_text("\t") readline.redisplay() return "" else: return None if state == 0: if "." in text: self._matches = self._expr_matches(text) else: self._matches = self._global_matches(text) if 0 <= state < len(self._matches): return self._matches[state] else: return None def _expr_matches(self, text: str) -> List[str]: m = _EXPR_RE.fullmatch(text) if not m: return [] expr, attr = m.group(1, 2) try: obj = eval(expr, self._namespace) except Exception: return [] noprefix: Optional[str] if attr == "": noprefix = "_" elif attr == "_": noprefix = "__" else: noprefix = None matches = set() for word in dir(obj): if word.startswith(attr) and not (noprefix and word.startswith(noprefix)): match = expr + "." + word try: value = getattr(obj, word) except Exception: pass else: if callable(value): match += "(" matches.add(match) return sorted(matches) def _global_matches(self, text: str) -> List[str]: matches = set() for word in keyword.kwlist: if word.startswith(text): if word in {"finally", "try"}: word += ":" elif word not in { "False", "None", "True", "break", "continue", "pass", "else", }: word += " " matches.add(word) for nspace in [self._namespace, builtins.__dict__]: for word, value in nspace.items(): if word.startswith(text): if callable(value): word += "(" matches.add(word) return sorted(matches) drgn-0.0.31/drgn/internal/sudohelper.py000066400000000000000000000041001477777462700200460ustar00rootroot00000000000000# Copyright (c) Stephen Brennan # SPDX-License-Identifier: LGPL-2.1-or-later """Helper for opening a file as root and transmitting it via unix socket""" import array from pathlib import Path import pickle import socket import subprocess import sys import tempfile from typing import Union _OPEN_VIA_SUDO_COMMAND = r""" import array import os import pickle import socket import sys sockpath = sys.argv[1] filename = sys.argv[2] flags = int(sys.argv[3]) mode = int(sys.argv[4]) sock = socket.socket(socket.AF_UNIX, socket.SOCK_DGRAM) sock.connect(sockpath) try: fd = os.open(filename, flags, mode) fds = array.array("i", [fd]) sock.sendmsg( [b"success"], [(socket.SOL_SOCKET, socket.SCM_RIGHTS, fds)], ) except Exception as e: sock.sendmsg([pickle.dumps(e)]) """ def open_via_sudo( path: Union[Path, str], flags: int, mode: int = 0o777, ) -> int: """Implements os.open() using sudo to get permissions""" # Currently does not support dir_fd argument with tempfile.TemporaryDirectory() as td: sockpath = Path(td) / "sock" with socket.socket(socket.AF_UNIX, socket.SOCK_DGRAM) as sock: sock.bind(str(sockpath)) subprocess.check_call( [ "sudo", "-p", f"[sudo] password for %p to open {path}: ", sys.executable, "-B", "-c", _OPEN_VIA_SUDO_COMMAND, sockpath, path, str(flags), str(mode), ], ) fds = array.array("i") msg, ancdata, flags, addr = sock.recvmsg( 4096, socket.CMSG_SPACE(fds.itemsize) ) for level, typ, data in ancdata: if level == socket.SOL_SOCKET and typ == socket.SCM_RIGHTS: data = data[: fds.itemsize] fds.frombytes(data) return fds[0] raise pickle.loads(msg) drgn-0.0.31/drgn/py.typed000066400000000000000000000000001477777462700152000ustar00rootroot00000000000000drgn-0.0.31/libdrgn/000077500000000000000000000000001477777462700142025ustar00rootroot00000000000000drgn-0.0.31/libdrgn/.gitignore000066400000000000000000000003611477777462700161720ustar00rootroot00000000000000*.la *.lo *.o .deps/ .dirstamp .libs/ /Makefile /Makefile.in /aclocal.m4 /autom4te.cache /config.log /config.status /configure /configure~ /html /libtool /c_keywords.inc /linux_kernel_object_find.inc /python/constants.c /python/docstrings.c drgn-0.0.31/libdrgn/Doxyfile000066400000000000000000003355321477777462700157230ustar00rootroot00000000000000# Doxyfile 1.9.1 # This file describes the settings to be used by the documentation system # doxygen (www.doxygen.org) for a project. # # All text after a double hash (##) is considered a comment and is placed in # front of the TAG it is preceding. # # All text after a single hash (#) is considered a comment and will be ignored. # The format is: # TAG = value [value, ...] # For lists, items can also be appended using: # TAG += value [value, ...] # Values that contain spaces should be placed between quotes (\" \"). #--------------------------------------------------------------------------- # Project related configuration options #--------------------------------------------------------------------------- # This tag specifies the encoding used for all characters in the configuration # file that follow. The default is UTF-8 which is also the encoding used for all # text before the first occurrence of this tag. Doxygen uses libiconv (or the # iconv built into libc) for the transcoding. See # https://www.gnu.org/software/libiconv/ for the list of possible encodings. # The default value is: UTF-8. DOXYFILE_ENCODING = UTF-8 # The PROJECT_NAME tag is a single word (or a sequence of words surrounded by # double-quotes, unless you are using Doxywizard) that should identify the # project for which the documentation is generated. This name is used in the # title of most generated pages and in a few other places. # The default value is: My Project. PROJECT_NAME = libdrgn # The PROJECT_NUMBER tag can be used to enter a project or revision number. This # could be handy for archiving the generated documentation or if some version # control system is used. PROJECT_NUMBER = # Using the PROJECT_BRIEF tag one can provide an optional one line description # for a project that appears at the top of each page and should give viewer a # quick idea about the purpose of the project. Keep the description short. PROJECT_BRIEF = # With the PROJECT_LOGO tag one can specify a logo or an icon that is included # in the documentation. The maximum height of the logo should not exceed 55 # pixels and the maximum width should not exceed 200 pixels. Doxygen will copy # the logo to the output directory. PROJECT_LOGO = # The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute) path # into which the generated documentation will be written. If a relative path is # entered, it will be relative to the location where doxygen was started. If # left blank the current directory will be used. OUTPUT_DIRECTORY = # If the CREATE_SUBDIRS tag is set to YES then doxygen will create 4096 sub- # directories (in 2 levels) under the output directory of each output format and # will distribute the generated files over these directories. Enabling this # option can be useful when feeding doxygen a huge amount of source files, where # putting all generated files in the same directory would otherwise causes # performance problems for the file system. # The default value is: NO. CREATE_SUBDIRS = NO # If the ALLOW_UNICODE_NAMES tag is set to YES, doxygen will allow non-ASCII # characters to appear in the names of generated files. If set to NO, non-ASCII # characters will be escaped, for example _xE3_x81_x84 will be used for Unicode # U+3044. # The default value is: NO. ALLOW_UNICODE_NAMES = NO # The OUTPUT_LANGUAGE tag is used to specify the language in which all # documentation generated by doxygen is written. Doxygen will use this # information to generate all constant output in the proper language. # Possible values are: Afrikaans, Arabic, Armenian, Brazilian, Catalan, Chinese, # Chinese-Traditional, Croatian, Czech, Danish, Dutch, English (United States), # Esperanto, Farsi (Persian), Finnish, French, German, Greek, Hungarian, # Indonesian, Italian, Japanese, Japanese-en (Japanese with English messages), # Korean, Korean-en (Korean with English messages), Latvian, Lithuanian, # Macedonian, Norwegian, Persian (Farsi), Polish, Portuguese, Romanian, Russian, # Serbian, Serbian-Cyrillic, Slovak, Slovene, Spanish, Swedish, Turkish, # Ukrainian and Vietnamese. # The default value is: English. OUTPUT_LANGUAGE = English # The OUTPUT_TEXT_DIRECTION tag is used to specify the direction in which all # documentation generated by doxygen is written. Doxygen will use this # information to generate all generated output in the proper direction. # Possible values are: None, LTR, RTL and Context. # The default value is: None. OUTPUT_TEXT_DIRECTION = None # If the BRIEF_MEMBER_DESC tag is set to YES, doxygen will include brief member # descriptions after the members that are listed in the file and class # documentation (similar to Javadoc). Set to NO to disable this. # The default value is: YES. BRIEF_MEMBER_DESC = YES # If the REPEAT_BRIEF tag is set to YES, doxygen will prepend the brief # description of a member or function before the detailed description # # Note: If both HIDE_UNDOC_MEMBERS and BRIEF_MEMBER_DESC are set to NO, the # brief descriptions will be completely suppressed. # The default value is: YES. REPEAT_BRIEF = YES # This tag implements a quasi-intelligent brief description abbreviator that is # used to form the text in various listings. Each string in this list, if found # as the leading text of the brief description, will be stripped from the text # and the result, after processing the whole list, is used as the annotated # text. Otherwise, the brief description is used as-is. If left blank, the # following values are used ($name is automatically replaced with the name of # the entity):The $name class, The $name widget, The $name file, is, provides, # specifies, contains, represents, a, an and the. ABBREVIATE_BRIEF = "The $name class" \ "The $name widget" \ "The $name file" \ is \ provides \ specifies \ contains \ represents \ a \ an \ the # If the ALWAYS_DETAILED_SEC and REPEAT_BRIEF tags are both set to YES then # doxygen will generate a detailed section even if there is only a brief # description. # The default value is: NO. ALWAYS_DETAILED_SEC = NO # If the INLINE_INHERITED_MEMB tag is set to YES, doxygen will show all # inherited members of a class in the documentation of that class as if those # members were ordinary class members. Constructors, destructors and assignment # operators of the base classes will not be shown. # The default value is: NO. INLINE_INHERITED_MEMB = NO # If the FULL_PATH_NAMES tag is set to YES, doxygen will prepend the full path # before files name in the file list and in the header files. If set to NO the # shortest path that makes the file name unique will be used # The default value is: YES. FULL_PATH_NAMES = YES # The STRIP_FROM_PATH tag can be used to strip a user-defined part of the path. # Stripping is only done if one of the specified strings matches the left-hand # part of the path. The tag can be used to show relative paths in the file list. # If left blank the directory from which doxygen is run is used as the path to # strip. # # Note that you can specify absolute paths here, but also relative paths, which # will be relative from the directory where doxygen is started. # This tag requires that the tag FULL_PATH_NAMES is set to YES. STRIP_FROM_PATH = # The STRIP_FROM_INC_PATH tag can be used to strip a user-defined part of the # path mentioned in the documentation of a class, which tells the reader which # header file to include in order to use a class. If left blank only the name of # the header file containing the class definition is used. Otherwise one should # specify the list of include paths that are normally passed to the compiler # using the -I flag. STRIP_FROM_INC_PATH = # If the SHORT_NAMES tag is set to YES, doxygen will generate much shorter (but # less readable) file names. This can be useful is your file systems doesn't # support long names like on DOS, Mac, or CD-ROM. # The default value is: NO. SHORT_NAMES = NO # If the JAVADOC_AUTOBRIEF tag is set to YES then doxygen will interpret the # first line (until the first dot) of a Javadoc-style comment as the brief # description. If set to NO, the Javadoc-style will behave just like regular Qt- # style comments (thus requiring an explicit @brief command for a brief # description.) # The default value is: NO. JAVADOC_AUTOBRIEF = YES # If the JAVADOC_BANNER tag is set to YES then doxygen will interpret a line # such as # /*************** # as being the beginning of a Javadoc-style comment "banner". If set to NO, the # Javadoc-style will behave just like regular comments and it will not be # interpreted by doxygen. # The default value is: NO. JAVADOC_BANNER = NO # If the QT_AUTOBRIEF tag is set to YES then doxygen will interpret the first # line (until the first dot) of a Qt-style comment as the brief description. If # set to NO, the Qt-style will behave just like regular Qt-style comments (thus # requiring an explicit \brief command for a brief description.) # The default value is: NO. QT_AUTOBRIEF = NO # The MULTILINE_CPP_IS_BRIEF tag can be set to YES to make doxygen treat a # multi-line C++ special comment block (i.e. a block of //! or /// comments) as # a brief description. This used to be the default behavior. The new default is # to treat a multi-line C++ comment block as a detailed description. Set this # tag to YES if you prefer the old behavior instead. # # Note that setting this tag to YES also means that rational rose comments are # not recognized any more. # The default value is: NO. MULTILINE_CPP_IS_BRIEF = NO # By default Python docstrings are displayed as preformatted text and doxygen's # special commands cannot be used. By setting PYTHON_DOCSTRING to NO the # doxygen's special commands can be used and the contents of the docstring # documentation blocks is shown as doxygen documentation. # The default value is: YES. PYTHON_DOCSTRING = YES # If the INHERIT_DOCS tag is set to YES then an undocumented member inherits the # documentation from any documented member that it re-implements. # The default value is: YES. INHERIT_DOCS = YES # If the SEPARATE_MEMBER_PAGES tag is set to YES then doxygen will produce a new # page for each member. If set to NO, the documentation of a member will be part # of the file/class/namespace that contains it. # The default value is: NO. SEPARATE_MEMBER_PAGES = NO # The TAB_SIZE tag can be used to set the number of spaces in a tab. Doxygen # uses this value to replace tabs by spaces in code fragments. # Minimum value: 1, maximum value: 16, default value: 4. TAB_SIZE = 4 # This tag can be used to specify a number of aliases that act as commands in # the documentation. An alias has the form: # name=value # For example adding # "sideeffect=@par Side Effects:\n" # will allow you to put the command \sideeffect (or @sideeffect) in the # documentation, which will result in a user-defined paragraph with heading # "Side Effects:". You can put \n's in the value part of an alias to insert # newlines (in the resulting output). You can put ^^ in the value part of an # alias to insert a newline as if a physical newline was in the original file. # When you need a literal { or } or , in the value part of an alias you have to # escape them by means of a backslash (\), this can lead to conflicts with the # commands \{ and \} for these it is advised to use the version @{ and @} or use # a double escape (\\{ and \\}) ALIASES = manpage{2}="\1(\2)" # Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C sources # only. Doxygen will then generate output that is more tailored for C. For # instance, some of the names that are used will be different. The list of all # members will be omitted, etc. # The default value is: NO. OPTIMIZE_OUTPUT_FOR_C = YES # Set the OPTIMIZE_OUTPUT_JAVA tag to YES if your project consists of Java or # Python sources only. Doxygen will then generate output that is more tailored # for that language. For instance, namespaces will be presented as packages, # qualified scopes will look different, etc. # The default value is: NO. OPTIMIZE_OUTPUT_JAVA = NO # Set the OPTIMIZE_FOR_FORTRAN tag to YES if your project consists of Fortran # sources. Doxygen will then generate output that is tailored for Fortran. # The default value is: NO. OPTIMIZE_FOR_FORTRAN = NO # Set the OPTIMIZE_OUTPUT_VHDL tag to YES if your project consists of VHDL # sources. Doxygen will then generate output that is tailored for VHDL. # The default value is: NO. OPTIMIZE_OUTPUT_VHDL = NO # Set the OPTIMIZE_OUTPUT_SLICE tag to YES if your project consists of Slice # sources only. Doxygen will then generate output that is more tailored for that # language. For instance, namespaces will be presented as modules, types will be # separated into more groups, etc. # The default value is: NO. OPTIMIZE_OUTPUT_SLICE = NO # Doxygen selects the parser to use depending on the extension of the files it # parses. With this tag you can assign which parser to use for a given # extension. Doxygen has a built-in mapping, but you can override or extend it # using this tag. The format is ext=language, where ext is a file extension, and # language is one of the parsers supported by doxygen: IDL, Java, JavaScript, # Csharp (C#), C, C++, D, PHP, md (Markdown), Objective-C, Python, Slice, VHDL, # Fortran (fixed format Fortran: FortranFixed, free formatted Fortran: # FortranFree, unknown formatted Fortran: Fortran. In the later case the parser # tries to guess whether the code is fixed or free formatted code, this is the # default for Fortran type files). For instance to make doxygen treat .inc files # as Fortran files (default is PHP), and .f files as C (default is Fortran), # use: inc=Fortran f=C. # # Note: For files without extension you can use no_extension as a placeholder. # # Note that for custom extensions you also need to set FILE_PATTERNS otherwise # the files are not read by doxygen. When specifying no_extension you should add # * to the FILE_PATTERNS. # # Note see also the list of default file extension mappings. EXTENSION_MAPPING = in=C # If the MARKDOWN_SUPPORT tag is enabled then doxygen pre-processes all comments # according to the Markdown format, which allows for more readable # documentation. See https://daringfireball.net/projects/markdown/ for details. # The output of markdown processing is further processed by doxygen, so you can # mix doxygen, HTML, and XML commands with Markdown formatting. Disable only in # case of backward compatibilities issues. # The default value is: YES. MARKDOWN_SUPPORT = YES # When the TOC_INCLUDE_HEADINGS tag is set to a non-zero value, all headings up # to that level are automatically included in the table of contents, even if # they do not have an id attribute. # Note: This feature currently applies only to Markdown headings. # Minimum value: 0, maximum value: 99, default value: 5. # This tag requires that the tag MARKDOWN_SUPPORT is set to YES. TOC_INCLUDE_HEADINGS = 0 # When enabled doxygen tries to link words that correspond to documented # classes, or namespaces to their corresponding documentation. Such a link can # be prevented in individual cases by putting a % sign in front of the word or # globally by setting AUTOLINK_SUPPORT to NO. # The default value is: YES. AUTOLINK_SUPPORT = YES # If you use STL classes (i.e. std::string, std::vector, etc.) but do not want # to include (a tag file for) the STL sources as input, then you should set this # tag to YES in order to let doxygen match functions declarations and # definitions whose arguments contain STL classes (e.g. func(std::string); # versus func(std::string) {}). This also make the inheritance and collaboration # diagrams that involve STL classes more complete and accurate. # The default value is: NO. BUILTIN_STL_SUPPORT = NO # If you use Microsoft's C++/CLI language, you should set this option to YES to # enable parsing support. # The default value is: NO. CPP_CLI_SUPPORT = NO # Set the SIP_SUPPORT tag to YES if your project consists of sip (see: # https://www.riverbankcomputing.com/software/sip/intro) sources only. Doxygen # will parse them like normal C++ but will assume all classes use public instead # of private inheritance when no explicit protection keyword is present. # The default value is: NO. SIP_SUPPORT = NO # For Microsoft's IDL there are propget and propput attributes to indicate # getter and setter methods for a property. Setting this option to YES will make # doxygen to replace the get and set methods by a property in the documentation. # This will only work if the methods are indeed getting or setting a simple # type. If this is not the case, or you want to show the methods anyway, you # should set this option to NO. # The default value is: YES. IDL_PROPERTY_SUPPORT = YES # If member grouping is used in the documentation and the DISTRIBUTE_GROUP_DOC # tag is set to YES then doxygen will reuse the documentation of the first # member in the group (if any) for the other members of the group. By default # all members of a group must be documented explicitly. # The default value is: NO. DISTRIBUTE_GROUP_DOC = NO # If one adds a struct or class to a group and this option is enabled, then also # any nested class or struct is added to the same group. By default this option # is disabled and one has to add nested compounds explicitly via \ingroup. # The default value is: NO. GROUP_NESTED_COMPOUNDS = NO # Set the SUBGROUPING tag to YES to allow class member groups of the same type # (for instance a group of public functions) to be put as a subgroup of that # type (e.g. under the Public Functions section). Set it to NO to prevent # subgrouping. Alternatively, this can be done per class using the # \nosubgrouping command. # The default value is: YES. SUBGROUPING = YES # When the INLINE_GROUPED_CLASSES tag is set to YES, classes, structs and unions # are shown inside the group in which they are included (e.g. using \ingroup) # instead of on a separate page (for HTML and Man pages) or section (for LaTeX # and RTF). # # Note that this feature does not work in combination with # SEPARATE_MEMBER_PAGES. # The default value is: NO. INLINE_GROUPED_CLASSES = NO # When the INLINE_SIMPLE_STRUCTS tag is set to YES, structs, classes, and unions # with only public data fields or simple typedef fields will be shown inline in # the documentation of the scope in which they are defined (i.e. file, # namespace, or group documentation), provided this scope is documented. If set # to NO, structs, classes, and unions are shown on a separate page (for HTML and # Man pages) or section (for LaTeX and RTF). # The default value is: NO. INLINE_SIMPLE_STRUCTS = NO # When TYPEDEF_HIDES_STRUCT tag is enabled, a typedef of a struct, union, or # enum is documented as struct, union, or enum with the name of the typedef. So # typedef struct TypeS {} TypeT, will appear in the documentation as a struct # with name TypeT. When disabled the typedef will appear as a member of a file, # namespace, or class. And the struct will be named TypeS. This can typically be # useful for C code in case the coding convention dictates that all compound # types are typedef'ed and only the typedef is referenced, never the tag name. # The default value is: NO. TYPEDEF_HIDES_STRUCT = NO # The size of the symbol lookup cache can be set using LOOKUP_CACHE_SIZE. This # cache is used to resolve symbols given their name and scope. Since this can be # an expensive process and often the same symbol appears multiple times in the # code, doxygen keeps a cache of pre-resolved symbols. If the cache is too small # doxygen will become slower. If the cache is too large, memory is wasted. The # cache size is given by this formula: 2^(16+LOOKUP_CACHE_SIZE). The valid range # is 0..9, the default is 0, corresponding to a cache size of 2^16=65536 # symbols. At the end of a run doxygen will report the cache usage and suggest # the optimal cache size from a speed point of view. # Minimum value: 0, maximum value: 9, default value: 0. LOOKUP_CACHE_SIZE = 0 # The NUM_PROC_THREADS specifies the number threads doxygen is allowed to use # during processing. When set to 0 doxygen will based this on the number of # cores available in the system. You can set it explicitly to a value larger # than 0 to get more control over the balance between CPU load and processing # speed. At this moment only the input processing can be done using multiple # threads. Since this is still an experimental feature the default is set to 1, # which efficively disables parallel processing. Please report any issues you # encounter. Generating dot graphs in parallel is controlled by the # DOT_NUM_THREADS setting. # Minimum value: 0, maximum value: 32, default value: 1. NUM_PROC_THREADS = 1 #--------------------------------------------------------------------------- # Build related configuration options #--------------------------------------------------------------------------- # If the EXTRACT_ALL tag is set to YES, doxygen will assume all entities in # documentation are documented, even if no documentation was available. Private # class members and static file members will be hidden unless the # EXTRACT_PRIVATE respectively EXTRACT_STATIC tags are set to YES. # Note: This will also disable the warnings about undocumented members that are # normally produced when WARNINGS is set to YES. # The default value is: NO. EXTRACT_ALL = NO # If the EXTRACT_PRIVATE tag is set to YES, all private members of a class will # be included in the documentation. # The default value is: NO. EXTRACT_PRIVATE = NO # If the EXTRACT_PRIV_VIRTUAL tag is set to YES, documented private virtual # methods of a class will be included in the documentation. # The default value is: NO. EXTRACT_PRIV_VIRTUAL = NO # If the EXTRACT_PACKAGE tag is set to YES, all members with package or internal # scope will be included in the documentation. # The default value is: NO. EXTRACT_PACKAGE = NO # If the EXTRACT_STATIC tag is set to YES, all static members of a file will be # included in the documentation. # The default value is: NO. EXTRACT_STATIC = YES # If the EXTRACT_LOCAL_CLASSES tag is set to YES, classes (and structs) defined # locally in source files will be included in the documentation. If set to NO, # only classes defined in header files are included. Does not have any effect # for Java sources. # The default value is: YES. EXTRACT_LOCAL_CLASSES = YES # This flag is only useful for Objective-C code. If set to YES, local methods, # which are defined in the implementation section but not in the interface are # included in the documentation. If set to NO, only methods in the interface are # included. # The default value is: NO. EXTRACT_LOCAL_METHODS = NO # If this flag is set to YES, the members of anonymous namespaces will be # extracted and appear in the documentation as a namespace called # 'anonymous_namespace{file}', where file will be replaced with the base name of # the file that contains the anonymous namespace. By default anonymous namespace # are hidden. # The default value is: NO. EXTRACT_ANON_NSPACES = NO # If this flag is set to YES, the name of an unnamed parameter in a declaration # will be determined by the corresponding definition. By default unnamed # parameters remain unnamed in the output. # The default value is: YES. RESOLVE_UNNAMED_PARAMS = YES # If the HIDE_UNDOC_MEMBERS tag is set to YES, doxygen will hide all # undocumented members inside documented classes or files. If set to NO these # members will be included in the various overviews, but no documentation # section is generated. This option has no effect if EXTRACT_ALL is enabled. # The default value is: NO. HIDE_UNDOC_MEMBERS = NO # If the HIDE_UNDOC_CLASSES tag is set to YES, doxygen will hide all # undocumented classes that are normally visible in the class hierarchy. If set # to NO, these classes will be included in the various overviews. This option # has no effect if EXTRACT_ALL is enabled. # The default value is: NO. HIDE_UNDOC_CLASSES = NO # If the HIDE_FRIEND_COMPOUNDS tag is set to YES, doxygen will hide all friend # declarations. If set to NO, these declarations will be included in the # documentation. # The default value is: NO. HIDE_FRIEND_COMPOUNDS = NO # If the HIDE_IN_BODY_DOCS tag is set to YES, doxygen will hide any # documentation blocks found inside the body of a function. If set to NO, these # blocks will be appended to the function's detailed documentation block. # The default value is: NO. HIDE_IN_BODY_DOCS = NO # The INTERNAL_DOCS tag determines if documentation that is typed after a # \internal command is included. If the tag is set to NO then the documentation # will be excluded. Set it to YES to include the internal documentation. # The default value is: NO. INTERNAL_DOCS = NO # With the correct setting of option CASE_SENSE_NAMES doxygen will better be # able to match the capabilities of the underlying filesystem. In case the # filesystem is case sensitive (i.e. it supports files in the same directory # whose names only differ in casing), the option must be set to YES to properly # deal with such files in case they appear in the input. For filesystems that # are not case sensitive the option should be be set to NO to properly deal with # output files written for symbols that only differ in casing, such as for two # classes, one named CLASS and the other named Class, and to also support # references to files without having to specify the exact matching casing. On # Windows (including Cygwin) and MacOS, users should typically set this option # to NO, whereas on Linux or other Unix flavors it should typically be set to # YES. # The default value is: system dependent. CASE_SENSE_NAMES = YES # If the HIDE_SCOPE_NAMES tag is set to NO then doxygen will show members with # their full class and namespace scopes in the documentation. If set to YES, the # scope will be hidden. # The default value is: NO. HIDE_SCOPE_NAMES = NO # If the HIDE_COMPOUND_REFERENCE tag is set to NO (default) then doxygen will # append additional text to a page's title, such as Class Reference. If set to # YES the compound reference will be hidden. # The default value is: NO. HIDE_COMPOUND_REFERENCE= NO # If the SHOW_INCLUDE_FILES tag is set to YES then doxygen will put a list of # the files that are included by a file in the documentation of that file. # The default value is: YES. SHOW_INCLUDE_FILES = YES # If the SHOW_GROUPED_MEMB_INC tag is set to YES then Doxygen will add for each # grouped member an include statement to the documentation, telling the reader # which file to include in order to use the member. # The default value is: NO. SHOW_GROUPED_MEMB_INC = NO # If the FORCE_LOCAL_INCLUDES tag is set to YES then doxygen will list include # files with double quotes in the documentation rather than with sharp brackets. # The default value is: NO. FORCE_LOCAL_INCLUDES = NO # If the INLINE_INFO tag is set to YES then a tag [inline] is inserted in the # documentation for inline members. # The default value is: YES. INLINE_INFO = YES # If the SORT_MEMBER_DOCS tag is set to YES then doxygen will sort the # (detailed) documentation of file and class members alphabetically by member # name. If set to NO, the members will appear in declaration order. # The default value is: YES. SORT_MEMBER_DOCS = YES # If the SORT_BRIEF_DOCS tag is set to YES then doxygen will sort the brief # descriptions of file, namespace and class members alphabetically by member # name. If set to NO, the members will appear in declaration order. Note that # this will also influence the order of the classes in the class list. # The default value is: NO. SORT_BRIEF_DOCS = NO # If the SORT_MEMBERS_CTORS_1ST tag is set to YES then doxygen will sort the # (brief and detailed) documentation of class members so that constructors and # destructors are listed first. If set to NO the constructors will appear in the # respective orders defined by SORT_BRIEF_DOCS and SORT_MEMBER_DOCS. # Note: If SORT_BRIEF_DOCS is set to NO this option is ignored for sorting brief # member documentation. # Note: If SORT_MEMBER_DOCS is set to NO this option is ignored for sorting # detailed member documentation. # The default value is: NO. SORT_MEMBERS_CTORS_1ST = NO # If the SORT_GROUP_NAMES tag is set to YES then doxygen will sort the hierarchy # of group names into alphabetical order. If set to NO the group names will # appear in their defined order. # The default value is: NO. SORT_GROUP_NAMES = NO # If the SORT_BY_SCOPE_NAME tag is set to YES, the class list will be sorted by # fully-qualified names, including namespaces. If set to NO, the class list will # be sorted only by class name, not including the namespace part. # Note: This option is not very useful if HIDE_SCOPE_NAMES is set to YES. # Note: This option applies only to the class list, not to the alphabetical # list. # The default value is: NO. SORT_BY_SCOPE_NAME = NO # If the STRICT_PROTO_MATCHING option is enabled and doxygen fails to do proper # type resolution of all parameters of a function it will reject a match between # the prototype and the implementation of a member function even if there is # only one candidate or it is obvious which candidate to choose by doing a # simple string match. By disabling STRICT_PROTO_MATCHING doxygen will still # accept a match between prototype and implementation in such cases. # The default value is: NO. STRICT_PROTO_MATCHING = NO # The GENERATE_TODOLIST tag can be used to enable (YES) or disable (NO) the todo # list. This list is created by putting \todo commands in the documentation. # The default value is: YES. GENERATE_TODOLIST = YES # The GENERATE_TESTLIST tag can be used to enable (YES) or disable (NO) the test # list. This list is created by putting \test commands in the documentation. # The default value is: YES. GENERATE_TESTLIST = YES # The GENERATE_BUGLIST tag can be used to enable (YES) or disable (NO) the bug # list. This list is created by putting \bug commands in the documentation. # The default value is: YES. GENERATE_BUGLIST = YES # The GENERATE_DEPRECATEDLIST tag can be used to enable (YES) or disable (NO) # the deprecated list. This list is created by putting \deprecated commands in # the documentation. # The default value is: YES. GENERATE_DEPRECATEDLIST= YES # The ENABLED_SECTIONS tag can be used to enable conditional documentation # sections, marked by \if ... \endif and \cond # ... \endcond blocks. ENABLED_SECTIONS = # The MAX_INITIALIZER_LINES tag determines the maximum number of lines that the # initial value of a variable or macro / define can have for it to appear in the # documentation. If the initializer consists of more lines than specified here # it will be hidden. Use a value of 0 to hide initializers completely. The # appearance of the value of individual variables and macros / defines can be # controlled using \showinitializer or \hideinitializer command in the # documentation regardless of this setting. # Minimum value: 0, maximum value: 10000, default value: 30. MAX_INITIALIZER_LINES = 30 # Set the SHOW_USED_FILES tag to NO to disable the list of files generated at # the bottom of the documentation of classes and structs. If set to YES, the # list will mention the files that were used to generate the documentation. # The default value is: YES. SHOW_USED_FILES = YES # Set the SHOW_FILES tag to NO to disable the generation of the Files page. This # will remove the Files entry from the Quick Index and from the Folder Tree View # (if specified). # The default value is: YES. SHOW_FILES = YES # Set the SHOW_NAMESPACES tag to NO to disable the generation of the Namespaces # page. This will remove the Namespaces entry from the Quick Index and from the # Folder Tree View (if specified). # The default value is: YES. SHOW_NAMESPACES = YES # The FILE_VERSION_FILTER tag can be used to specify a program or script that # doxygen should invoke to get the current version for each file (typically from # the version control system). Doxygen will invoke the program by executing (via # popen()) the command command input-file, where command is the value of the # FILE_VERSION_FILTER tag, and input-file is the name of an input file provided # by doxygen. Whatever the program writes to standard output is used as the file # version. For an example see the documentation. FILE_VERSION_FILTER = # The LAYOUT_FILE tag can be used to specify a layout file which will be parsed # by doxygen. The layout file controls the global structure of the generated # output files in an output format independent way. To create the layout file # that represents doxygen's defaults, run doxygen with the -l option. You can # optionally specify a file name after the option, if omitted DoxygenLayout.xml # will be used as the name of the layout file. # # Note that if you run doxygen from a directory containing a file called # DoxygenLayout.xml, doxygen will parse it automatically even if the LAYOUT_FILE # tag is left empty. LAYOUT_FILE = # The CITE_BIB_FILES tag can be used to specify one or more bib files containing # the reference definitions. This must be a list of .bib files. The .bib # extension is automatically appended if omitted. This requires the bibtex tool # to be installed. See also https://en.wikipedia.org/wiki/BibTeX for more info. # For LaTeX the style of the bibliography can be controlled using # LATEX_BIB_STYLE. To use this feature you need bibtex and perl available in the # search path. See also \cite for info how to create references. CITE_BIB_FILES = #--------------------------------------------------------------------------- # Configuration options related to warning and progress messages #--------------------------------------------------------------------------- # The QUIET tag can be used to turn on/off the messages that are generated to # standard output by doxygen. If QUIET is set to YES this implies that the # messages are off. # The default value is: NO. QUIET = NO # The WARNINGS tag can be used to turn on/off the warning messages that are # generated to standard error (stderr) by doxygen. If WARNINGS is set to YES # this implies that the warnings are on. # # Tip: Turn warnings on while writing the documentation. # The default value is: YES. WARNINGS = YES # If the WARN_IF_UNDOCUMENTED tag is set to YES then doxygen will generate # warnings for undocumented members. If EXTRACT_ALL is set to YES then this flag # will automatically be disabled. # The default value is: YES. WARN_IF_UNDOCUMENTED = YES # If the WARN_IF_DOC_ERROR tag is set to YES, doxygen will generate warnings for # potential errors in the documentation, such as not documenting some parameters # in a documented function, or documenting parameters that don't exist or using # markup commands wrongly. # The default value is: YES. WARN_IF_DOC_ERROR = YES # This WARN_NO_PARAMDOC option can be enabled to get warnings for functions that # are documented, but have no documentation for their parameters or return # value. If set to NO, doxygen will only warn about wrong or incomplete # parameter documentation, but not about the absence of documentation. If # EXTRACT_ALL is set to YES then this flag will automatically be disabled. # The default value is: NO. WARN_NO_PARAMDOC = NO # If the WARN_AS_ERROR tag is set to YES then doxygen will immediately stop when # a warning is encountered. If the WARN_AS_ERROR tag is set to FAIL_ON_WARNINGS # then doxygen will continue running as if WARN_AS_ERROR tag is set to NO, but # at the end of the doxygen process doxygen will return with a non-zero status. # Possible values are: NO, YES and FAIL_ON_WARNINGS. # The default value is: NO. WARN_AS_ERROR = NO # The WARN_FORMAT tag determines the format of the warning messages that doxygen # can produce. The string should contain the $file, $line, and $text tags, which # will be replaced by the file and line number from which the warning originated # and the warning text. Optionally the format may contain $version, which will # be replaced by the version of the file (if it could be obtained via # FILE_VERSION_FILTER) # The default value is: $file:$line: $text. WARN_FORMAT = "$file:$line: $text" # The WARN_LOGFILE tag can be used to specify a file to which warning and error # messages should be written. If left blank the output is written to standard # error (stderr). WARN_LOGFILE = #--------------------------------------------------------------------------- # Configuration options related to the input files #--------------------------------------------------------------------------- # The INPUT tag is used to specify the files and/or directories that contain # documented source files. You may enter file names like myfile.cpp or # directories like /usr/src/myproject. Separate the files or directories with # spaces. See also FILE_PATTERNS and EXTENSION_MAPPING # Note: If this tag is empty the current directory is searched. INPUT = . # This tag can be used to specify the character encoding of the source files # that doxygen parses. Internally doxygen uses the UTF-8 encoding. Doxygen uses # libiconv (or the iconv built into libc) for the transcoding. See the libiconv # documentation (see: # https://www.gnu.org/software/libiconv/) for the list of possible encodings. # The default value is: UTF-8. INPUT_ENCODING = UTF-8 # If the value of the INPUT tag contains directories, you can use the # FILE_PATTERNS tag to specify one or more wildcard patterns (like *.cpp and # *.h) to filter out the source-files in the directories. # # Note that for custom extensions or not directly supported extensions you also # need to set EXTENSION_MAPPING for the extension otherwise the files are not # read by doxygen. # # Note the list of default checked file patterns might differ from the list of # default file extension mappings. # # If left blank the following patterns are tested:*.c, *.cc, *.cxx, *.cpp, # *.c++, *.java, *.ii, *.ixx, *.ipp, *.i++, *.inl, *.idl, *.ddl, *.odl, *.h, # *.hh, *.hxx, *.hpp, *.h++, *.cs, *.d, *.php, *.php4, *.php5, *.phtml, *.inc, # *.m, *.markdown, *.md, *.mm, *.dox (to be provided as doxygen C comment), # *.py, *.pyw, *.f90, *.f95, *.f03, *.f08, *.f18, *.f, *.for, *.vhd, *.vhdl, # *.ucf, *.qsf and *.ice. FILE_PATTERNS = *.c \ *.c.in \ *.h \ *.h.in # The RECURSIVE tag can be used to specify whether or not subdirectories should # be searched for input files as well. # The default value is: NO. RECURSIVE = YES # The EXCLUDE tag can be used to specify files and/or directories that should be # excluded from the INPUT source files. This way you can easily exclude a # subdirectory from a directory tree whose root is specified with the INPUT tag. # # Note that relative paths are relative to the directory from which doxygen is # run. EXCLUDE = python # The EXCLUDE_SYMLINKS tag can be used to select whether or not files or # directories that are symbolic links (a Unix file system feature) are excluded # from the input. # The default value is: NO. EXCLUDE_SYMLINKS = NO # If the value of the INPUT tag contains directories, you can use the # EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude # certain files from those directories. # # Note that the wildcards are matched against the file with absolute path, so to # exclude all test directories for example use the pattern */test/* EXCLUDE_PATTERNS = # The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names # (namespaces, classes, functions, etc.) that should be excluded from the # output. The symbol name can be a fully qualified name, a word, or if the # wildcard * is used, a substring. Examples: ANamespace, AClass, # AClass::ANamespace, ANamespace::*Test # # Note that the wildcards are matched against the file with absolute path, so to # exclude all test directories use the pattern */test/* EXCLUDE_SYMBOLS = # The EXAMPLE_PATH tag can be used to specify one or more files or directories # that contain example code fragments that are included (see the \include # command). EXAMPLE_PATH = # If the value of the EXAMPLE_PATH tag contains directories, you can use the # EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp and # *.h) to filter out the source-files in the directories. If left blank all # files are included. EXAMPLE_PATTERNS = * # If the EXAMPLE_RECURSIVE tag is set to YES then subdirectories will be # searched for input files to be used with the \include or \dontinclude commands # irrespective of the value of the RECURSIVE tag. # The default value is: NO. EXAMPLE_RECURSIVE = NO # The IMAGE_PATH tag can be used to specify one or more files or directories # that contain images that are to be included in the documentation (see the # \image command). IMAGE_PATH = # The INPUT_FILTER tag can be used to specify a program that doxygen should # invoke to filter for each input file. Doxygen will invoke the filter program # by executing (via popen()) the command: # # # # where is the value of the INPUT_FILTER tag, and is the # name of an input file. Doxygen will then use the output that the filter # program writes to standard output. If FILTER_PATTERNS is specified, this tag # will be ignored. # # Note that the filter must not add or remove lines; it is applied before the # code is scanned, but not when the output code is generated. If lines are added # or removed, the anchors will not be placed correctly. # # Note that for custom extensions or not directly supported extensions you also # need to set EXTENSION_MAPPING for the extension otherwise the files are not # properly processed by doxygen. INPUT_FILTER = # The FILTER_PATTERNS tag can be used to specify filters on a per file pattern # basis. Doxygen will compare the file name with each pattern and apply the # filter if there is a match. The filters are a list of the form: pattern=filter # (like *.cpp=my_cpp_filter). See INPUT_FILTER for further information on how # filters are used. If the FILTER_PATTERNS tag is empty or if none of the # patterns match the file name, INPUT_FILTER is applied. # # Note that for custom extensions or not directly supported extensions you also # need to set EXTENSION_MAPPING for the extension otherwise the files are not # properly processed by doxygen. FILTER_PATTERNS = # If the FILTER_SOURCE_FILES tag is set to YES, the input filter (if set using # INPUT_FILTER) will also be used to filter the input files that are used for # producing the source files to browse (i.e. when SOURCE_BROWSER is set to YES). # The default value is: NO. FILTER_SOURCE_FILES = NO # The FILTER_SOURCE_PATTERNS tag can be used to specify source filters per file # pattern. A pattern will override the setting for FILTER_PATTERN (if any) and # it is also possible to disable source filtering for a specific pattern using # *.ext= (so without naming a filter). # This tag requires that the tag FILTER_SOURCE_FILES is set to YES. FILTER_SOURCE_PATTERNS = # If the USE_MDFILE_AS_MAINPAGE tag refers to the name of a markdown file that # is part of the input, its contents will be placed on the main page # (index.html). This can be useful if you have a project on for instance GitHub # and want to reuse the introduction page also for the doxygen output. USE_MDFILE_AS_MAINPAGE = #--------------------------------------------------------------------------- # Configuration options related to source browsing #--------------------------------------------------------------------------- # If the SOURCE_BROWSER tag is set to YES then a list of source files will be # generated. Documented entities will be cross-referenced with these sources. # # Note: To get rid of all source code in the generated output, make sure that # also VERBATIM_HEADERS is set to NO. # The default value is: NO. SOURCE_BROWSER = NO # Setting the INLINE_SOURCES tag to YES will include the body of functions, # classes and enums directly into the documentation. # The default value is: NO. INLINE_SOURCES = NO # Setting the STRIP_CODE_COMMENTS tag to YES will instruct doxygen to hide any # special comment blocks from generated source code fragments. Normal C, C++ and # Fortran comments will always remain visible. # The default value is: YES. STRIP_CODE_COMMENTS = YES # If the REFERENCED_BY_RELATION tag is set to YES then for each documented # entity all documented functions referencing it will be listed. # The default value is: NO. REFERENCED_BY_RELATION = NO # If the REFERENCES_RELATION tag is set to YES then for each documented function # all documented entities called/used by that function will be listed. # The default value is: NO. REFERENCES_RELATION = NO # If the REFERENCES_LINK_SOURCE tag is set to YES and SOURCE_BROWSER tag is set # to YES then the hyperlinks from functions in REFERENCES_RELATION and # REFERENCED_BY_RELATION lists will link to the source code. Otherwise they will # link to the documentation. # The default value is: YES. REFERENCES_LINK_SOURCE = YES # If SOURCE_TOOLTIPS is enabled (the default) then hovering a hyperlink in the # source code will show a tooltip with additional information such as prototype, # brief description and links to the definition and documentation. Since this # will make the HTML file larger and loading of large files a bit slower, you # can opt to disable this feature. # The default value is: YES. # This tag requires that the tag SOURCE_BROWSER is set to YES. SOURCE_TOOLTIPS = YES # If the USE_HTAGS tag is set to YES then the references to source code will # point to the HTML generated by the htags(1) tool instead of doxygen built-in # source browser. The htags tool is part of GNU's global source tagging system # (see https://www.gnu.org/software/global/global.html). You will need version # 4.8.6 or higher. # # To use it do the following: # - Install the latest version of global # - Enable SOURCE_BROWSER and USE_HTAGS in the configuration file # - Make sure the INPUT points to the root of the source tree # - Run doxygen as normal # # Doxygen will invoke htags (and that will in turn invoke gtags), so these # tools must be available from the command line (i.e. in the search path). # # The result: instead of the source browser generated by doxygen, the links to # source code will now point to the output of htags. # The default value is: NO. # This tag requires that the tag SOURCE_BROWSER is set to YES. USE_HTAGS = NO # If the VERBATIM_HEADERS tag is set the YES then doxygen will generate a # verbatim copy of the header file for each class for which an include is # specified. Set to NO to disable this. # See also: Section \class. # The default value is: YES. VERBATIM_HEADERS = YES #--------------------------------------------------------------------------- # Configuration options related to the alphabetical class index #--------------------------------------------------------------------------- # If the ALPHABETICAL_INDEX tag is set to YES, an alphabetical index of all # compounds will be generated. Enable this if the project contains a lot of # classes, structs, unions or interfaces. # The default value is: YES. ALPHABETICAL_INDEX = YES # In case all classes in a project start with a common prefix, all classes will # be put under the same header in the alphabetical index. The IGNORE_PREFIX tag # can be used to specify a prefix (or a list of prefixes) that should be ignored # while generating the index headers. # This tag requires that the tag ALPHABETICAL_INDEX is set to YES. IGNORE_PREFIX = #--------------------------------------------------------------------------- # Configuration options related to the HTML output #--------------------------------------------------------------------------- # If the GENERATE_HTML tag is set to YES, doxygen will generate HTML output # The default value is: YES. GENERATE_HTML = YES # The HTML_OUTPUT tag is used to specify where the HTML docs will be put. If a # relative path is entered the value of OUTPUT_DIRECTORY will be put in front of # it. # The default directory is: html. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_OUTPUT = html # The HTML_FILE_EXTENSION tag can be used to specify the file extension for each # generated HTML page (for example: .htm, .php, .asp). # The default value is: .html. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_FILE_EXTENSION = .html # The HTML_HEADER tag can be used to specify a user-defined HTML header file for # each generated HTML page. If the tag is left blank doxygen will generate a # standard header. # # To get valid HTML the header file that includes any scripts and style sheets # that doxygen needs, which is dependent on the configuration options used (e.g. # the setting GENERATE_TREEVIEW). It is highly recommended to start with a # default header using # doxygen -w html new_header.html new_footer.html new_stylesheet.css # YourConfigFile # and then modify the file new_header.html. See also section "Doxygen usage" # for information on how to generate the default header that doxygen normally # uses. # Note: The header is subject to change so you typically have to regenerate the # default header when upgrading to a newer version of doxygen. For a description # of the possible markers and block names see the documentation. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_HEADER = # The HTML_FOOTER tag can be used to specify a user-defined HTML footer for each # generated HTML page. If the tag is left blank doxygen will generate a standard # footer. See HTML_HEADER for more information on how to generate a default # footer and what special commands can be used inside the footer. See also # section "Doxygen usage" for information on how to generate the default footer # that doxygen normally uses. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_FOOTER = # The HTML_STYLESHEET tag can be used to specify a user-defined cascading style # sheet that is used by each HTML page. It can be used to fine-tune the look of # the HTML output. If left blank doxygen will generate a default style sheet. # See also section "Doxygen usage" for information on how to generate the style # sheet that doxygen normally uses. # Note: It is recommended to use HTML_EXTRA_STYLESHEET instead of this tag, as # it is more robust and this tag (HTML_STYLESHEET) will in the future become # obsolete. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_STYLESHEET = # The HTML_EXTRA_STYLESHEET tag can be used to specify additional user-defined # cascading style sheets that are included after the standard style sheets # created by doxygen. Using this option one can overrule certain style aspects. # This is preferred over using HTML_STYLESHEET since it does not replace the # standard style sheet and is therefore more robust against future updates. # Doxygen will copy the style sheet files to the output directory. # Note: The order of the extra style sheet files is of importance (e.g. the last # style sheet in the list overrules the setting of the previous ones in the # list). For an example see the documentation. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_EXTRA_STYLESHEET = # The HTML_EXTRA_FILES tag can be used to specify one or more extra images or # other source files which should be copied to the HTML output directory. Note # that these files will be copied to the base HTML output directory. Use the # $relpath^ marker in the HTML_HEADER and/or HTML_FOOTER files to load these # files. In the HTML_STYLESHEET file, use the file name only. Also note that the # files will be copied as-is; there are no commands or markers available. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_EXTRA_FILES = # The HTML_COLORSTYLE_HUE tag controls the color of the HTML output. Doxygen # will adjust the colors in the style sheet and background images according to # this color. Hue is specified as an angle on a colorwheel, see # https://en.wikipedia.org/wiki/Hue for more information. For instance the value # 0 represents red, 60 is yellow, 120 is green, 180 is cyan, 240 is blue, 300 # purple, and 360 is red again. # Minimum value: 0, maximum value: 359, default value: 220. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_COLORSTYLE_HUE = 220 # The HTML_COLORSTYLE_SAT tag controls the purity (or saturation) of the colors # in the HTML output. For a value of 0 the output will use grayscales only. A # value of 255 will produce the most vivid colors. # Minimum value: 0, maximum value: 255, default value: 100. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_COLORSTYLE_SAT = 100 # The HTML_COLORSTYLE_GAMMA tag controls the gamma correction applied to the # luminance component of the colors in the HTML output. Values below 100 # gradually make the output lighter, whereas values above 100 make the output # darker. The value divided by 100 is the actual gamma applied, so 80 represents # a gamma of 0.8, The value 220 represents a gamma of 2.2, and 100 does not # change the gamma. # Minimum value: 40, maximum value: 240, default value: 80. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_COLORSTYLE_GAMMA = 80 # If the HTML_TIMESTAMP tag is set to YES then the footer of each generated HTML # page will contain the date and time when the page was generated. Setting this # to YES can help to show when doxygen was last run and thus if the # documentation is up to date. # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_TIMESTAMP = NO # If the HTML_DYNAMIC_MENUS tag is set to YES then the generated HTML # documentation will contain a main index with vertical navigation menus that # are dynamically created via JavaScript. If disabled, the navigation index will # consists of multiple levels of tabs that are statically embedded in every HTML # page. Disable this option to support browsers that do not have JavaScript, # like the Qt help browser. # The default value is: YES. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_DYNAMIC_MENUS = YES # If the HTML_DYNAMIC_SECTIONS tag is set to YES then the generated HTML # documentation will contain sections that can be hidden and shown after the # page has loaded. # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_DYNAMIC_SECTIONS = NO # With HTML_INDEX_NUM_ENTRIES one can control the preferred number of entries # shown in the various tree structured indices initially; the user can expand # and collapse entries dynamically later on. Doxygen will expand the tree to # such a level that at most the specified number of entries are visible (unless # a fully collapsed tree already exceeds this amount). So setting the number of # entries 1 will produce a full collapsed tree by default. 0 is a special value # representing an infinite number of entries and will result in a full expanded # tree by default. # Minimum value: 0, maximum value: 9999, default value: 100. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_INDEX_NUM_ENTRIES = 100 # If the GENERATE_DOCSET tag is set to YES, additional index files will be # generated that can be used as input for Apple's Xcode 3 integrated development # environment (see: # https://developer.apple.com/xcode/), introduced with OSX 10.5 (Leopard). To # create a documentation set, doxygen will generate a Makefile in the HTML # output directory. Running make will produce the docset in that directory and # running make install will install the docset in # ~/Library/Developer/Shared/Documentation/DocSets so that Xcode will find it at # startup. See https://developer.apple.com/library/archive/featuredarticles/Doxy # genXcode/_index.html for more information. # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. GENERATE_DOCSET = NO # This tag determines the name of the docset feed. A documentation feed provides # an umbrella under which multiple documentation sets from a single provider # (such as a company or product suite) can be grouped. # The default value is: Doxygen generated docs. # This tag requires that the tag GENERATE_DOCSET is set to YES. DOCSET_FEEDNAME = "Doxygen generated docs" # This tag specifies a string that should uniquely identify the documentation # set bundle. This should be a reverse domain-name style string, e.g. # com.mycompany.MyDocSet. Doxygen will append .docset to the name. # The default value is: org.doxygen.Project. # This tag requires that the tag GENERATE_DOCSET is set to YES. DOCSET_BUNDLE_ID = org.doxygen.Project # The DOCSET_PUBLISHER_ID tag specifies a string that should uniquely identify # the documentation publisher. This should be a reverse domain-name style # string, e.g. com.mycompany.MyDocSet.documentation. # The default value is: org.doxygen.Publisher. # This tag requires that the tag GENERATE_DOCSET is set to YES. DOCSET_PUBLISHER_ID = org.doxygen.Publisher # The DOCSET_PUBLISHER_NAME tag identifies the documentation publisher. # The default value is: Publisher. # This tag requires that the tag GENERATE_DOCSET is set to YES. DOCSET_PUBLISHER_NAME = Publisher # If the GENERATE_HTMLHELP tag is set to YES then doxygen generates three # additional HTML index files: index.hhp, index.hhc, and index.hhk. The # index.hhp is a project file that can be read by Microsoft's HTML Help Workshop # (see: # https://www.microsoft.com/en-us/download/details.aspx?id=21138) on Windows. # # The HTML Help Workshop contains a compiler that can convert all HTML output # generated by doxygen into a single compiled HTML file (.chm). Compiled HTML # files are now used as the Windows 98 help format, and will replace the old # Windows help format (.hlp) on all Windows platforms in the future. Compressed # HTML files also contain an index, a table of contents, and you can search for # words in the documentation. The HTML workshop also contains a viewer for # compressed HTML files. # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. GENERATE_HTMLHELP = NO # The CHM_FILE tag can be used to specify the file name of the resulting .chm # file. You can add a path in front of the file if the result should not be # written to the html output directory. # This tag requires that the tag GENERATE_HTMLHELP is set to YES. CHM_FILE = # The HHC_LOCATION tag can be used to specify the location (absolute path # including file name) of the HTML help compiler (hhc.exe). If non-empty, # doxygen will try to run the HTML help compiler on the generated index.hhp. # The file has to be specified with full path. # This tag requires that the tag GENERATE_HTMLHELP is set to YES. HHC_LOCATION = # The GENERATE_CHI flag controls if a separate .chi index file is generated # (YES) or that it should be included in the main .chm file (NO). # The default value is: NO. # This tag requires that the tag GENERATE_HTMLHELP is set to YES. GENERATE_CHI = NO # The CHM_INDEX_ENCODING is used to encode HtmlHelp index (hhk), content (hhc) # and project file content. # This tag requires that the tag GENERATE_HTMLHELP is set to YES. CHM_INDEX_ENCODING = # The BINARY_TOC flag controls whether a binary table of contents is generated # (YES) or a normal table of contents (NO) in the .chm file. Furthermore it # enables the Previous and Next buttons. # The default value is: NO. # This tag requires that the tag GENERATE_HTMLHELP is set to YES. BINARY_TOC = NO # The TOC_EXPAND flag can be set to YES to add extra items for group members to # the table of contents of the HTML help documentation and to the tree view. # The default value is: NO. # This tag requires that the tag GENERATE_HTMLHELP is set to YES. TOC_EXPAND = NO # If the GENERATE_QHP tag is set to YES and both QHP_NAMESPACE and # QHP_VIRTUAL_FOLDER are set, an additional index file will be generated that # can be used as input for Qt's qhelpgenerator to generate a Qt Compressed Help # (.qch) of the generated HTML documentation. # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. GENERATE_QHP = NO # If the QHG_LOCATION tag is specified, the QCH_FILE tag can be used to specify # the file name of the resulting .qch file. The path specified is relative to # the HTML output folder. # This tag requires that the tag GENERATE_QHP is set to YES. QCH_FILE = # The QHP_NAMESPACE tag specifies the namespace to use when generating Qt Help # Project output. For more information please see Qt Help Project / Namespace # (see: # https://doc.qt.io/archives/qt-4.8/qthelpproject.html#namespace). # The default value is: org.doxygen.Project. # This tag requires that the tag GENERATE_QHP is set to YES. QHP_NAMESPACE = org.doxygen.Project # The QHP_VIRTUAL_FOLDER tag specifies the namespace to use when generating Qt # Help Project output. For more information please see Qt Help Project / Virtual # Folders (see: # https://doc.qt.io/archives/qt-4.8/qthelpproject.html#virtual-folders). # The default value is: doc. # This tag requires that the tag GENERATE_QHP is set to YES. QHP_VIRTUAL_FOLDER = doc # If the QHP_CUST_FILTER_NAME tag is set, it specifies the name of a custom # filter to add. For more information please see Qt Help Project / Custom # Filters (see: # https://doc.qt.io/archives/qt-4.8/qthelpproject.html#custom-filters). # This tag requires that the tag GENERATE_QHP is set to YES. QHP_CUST_FILTER_NAME = # The QHP_CUST_FILTER_ATTRS tag specifies the list of the attributes of the # custom filter to add. For more information please see Qt Help Project / Custom # Filters (see: # https://doc.qt.io/archives/qt-4.8/qthelpproject.html#custom-filters). # This tag requires that the tag GENERATE_QHP is set to YES. QHP_CUST_FILTER_ATTRS = # The QHP_SECT_FILTER_ATTRS tag specifies the list of the attributes this # project's filter section matches. Qt Help Project / Filter Attributes (see: # https://doc.qt.io/archives/qt-4.8/qthelpproject.html#filter-attributes). # This tag requires that the tag GENERATE_QHP is set to YES. QHP_SECT_FILTER_ATTRS = # The QHG_LOCATION tag can be used to specify the location (absolute path # including file name) of Qt's qhelpgenerator. If non-empty doxygen will try to # run qhelpgenerator on the generated .qhp file. # This tag requires that the tag GENERATE_QHP is set to YES. QHG_LOCATION = # If the GENERATE_ECLIPSEHELP tag is set to YES, additional index files will be # generated, together with the HTML files, they form an Eclipse help plugin. To # install this plugin and make it available under the help contents menu in # Eclipse, the contents of the directory containing the HTML and XML files needs # to be copied into the plugins directory of eclipse. The name of the directory # within the plugins directory should be the same as the ECLIPSE_DOC_ID value. # After copying Eclipse needs to be restarted before the help appears. # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. GENERATE_ECLIPSEHELP = NO # A unique identifier for the Eclipse help plugin. When installing the plugin # the directory name containing the HTML and XML files should also have this # name. Each documentation set should have its own identifier. # The default value is: org.doxygen.Project. # This tag requires that the tag GENERATE_ECLIPSEHELP is set to YES. ECLIPSE_DOC_ID = org.doxygen.Project # If you want full control over the layout of the generated HTML pages it might # be necessary to disable the index and replace it with your own. The # DISABLE_INDEX tag can be used to turn on/off the condensed index (tabs) at top # of each HTML page. A value of NO enables the index and the value YES disables # it. Since the tabs in the index contain the same information as the navigation # tree, you can set this option to YES if you also set GENERATE_TREEVIEW to YES. # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. DISABLE_INDEX = NO # The GENERATE_TREEVIEW tag is used to specify whether a tree-like index # structure should be generated to display hierarchical information. If the tag # value is set to YES, a side panel will be generated containing a tree-like # index structure (just like the one that is generated for HTML Help). For this # to work a browser that supports JavaScript, DHTML, CSS and frames is required # (i.e. any modern browser). Windows users are probably better off using the # HTML help feature. Via custom style sheets (see HTML_EXTRA_STYLESHEET) one can # further fine-tune the look of the index. As an example, the default style # sheet generated by doxygen has an example that shows how to put an image at # the root of the tree instead of the PROJECT_NAME. Since the tree basically has # the same information as the tab index, you could consider setting # DISABLE_INDEX to YES when enabling this option. # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. GENERATE_TREEVIEW = NO # The ENUM_VALUES_PER_LINE tag can be used to set the number of enum values that # doxygen will group on one line in the generated HTML documentation. # # Note that a value of 0 will completely suppress the enum values from appearing # in the overview section. # Minimum value: 0, maximum value: 20, default value: 4. # This tag requires that the tag GENERATE_HTML is set to YES. ENUM_VALUES_PER_LINE = 4 # If the treeview is enabled (see GENERATE_TREEVIEW) then this tag can be used # to set the initial width (in pixels) of the frame in which the tree is shown. # Minimum value: 0, maximum value: 1500, default value: 250. # This tag requires that the tag GENERATE_HTML is set to YES. TREEVIEW_WIDTH = 250 # If the EXT_LINKS_IN_WINDOW option is set to YES, doxygen will open links to # external symbols imported via tag files in a separate window. # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. EXT_LINKS_IN_WINDOW = NO # If the HTML_FORMULA_FORMAT option is set to svg, doxygen will use the pdf2svg # tool (see https://github.com/dawbarton/pdf2svg) or inkscape (see # https://inkscape.org) to generate formulas as SVG images instead of PNGs for # the HTML output. These images will generally look nicer at scaled resolutions. # Possible values are: png (the default) and svg (looks nicer but requires the # pdf2svg or inkscape tool). # The default value is: png. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_FORMULA_FORMAT = png # Use this tag to change the font size of LaTeX formulas included as images in # the HTML documentation. When you change the font size after a successful # doxygen run you need to manually remove any form_*.png images from the HTML # output directory to force them to be regenerated. # Minimum value: 8, maximum value: 50, default value: 10. # This tag requires that the tag GENERATE_HTML is set to YES. FORMULA_FONTSIZE = 10 # Use the FORMULA_TRANSPARENT tag to determine whether or not the images # generated for formulas are transparent PNGs. Transparent PNGs are not # supported properly for IE 6.0, but are supported on all modern browsers. # # Note that when changing this option you need to delete any form_*.png files in # the HTML output directory before the changes have effect. # The default value is: YES. # This tag requires that the tag GENERATE_HTML is set to YES. FORMULA_TRANSPARENT = YES # The FORMULA_MACROFILE can contain LaTeX \newcommand and \renewcommand commands # to create new LaTeX commands to be used in formulas as building blocks. See # the section "Including formulas" for details. FORMULA_MACROFILE = # Enable the USE_MATHJAX option to render LaTeX formulas using MathJax (see # https://www.mathjax.org) which uses client side JavaScript for the rendering # instead of using pre-rendered bitmaps. Use this if you do not have LaTeX # installed or if you want to formulas look prettier in the HTML output. When # enabled you may also need to install MathJax separately and configure the path # to it using the MATHJAX_RELPATH option. # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. USE_MATHJAX = NO # When MathJax is enabled you can set the default output format to be used for # the MathJax output. See the MathJax site (see: # http://docs.mathjax.org/en/v2.7-latest/output.html) for more details. # Possible values are: HTML-CSS (which is slower, but has the best # compatibility), NativeMML (i.e. MathML) and SVG. # The default value is: HTML-CSS. # This tag requires that the tag USE_MATHJAX is set to YES. MATHJAX_FORMAT = HTML-CSS # When MathJax is enabled you need to specify the location relative to the HTML # output directory using the MATHJAX_RELPATH option. The destination directory # should contain the MathJax.js script. For instance, if the mathjax directory # is located at the same level as the HTML output directory, then # MATHJAX_RELPATH should be ../mathjax. The default value points to the MathJax # Content Delivery Network so you can quickly see the result without installing # MathJax. However, it is strongly recommended to install a local copy of # MathJax from https://www.mathjax.org before deployment. # The default value is: https://cdn.jsdelivr.net/npm/mathjax@2. # This tag requires that the tag USE_MATHJAX is set to YES. MATHJAX_RELPATH = https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.2/ # The MATHJAX_EXTENSIONS tag can be used to specify one or more MathJax # extension names that should be enabled during MathJax rendering. For example # MATHJAX_EXTENSIONS = TeX/AMSmath TeX/AMSsymbols # This tag requires that the tag USE_MATHJAX is set to YES. MATHJAX_EXTENSIONS = # The MATHJAX_CODEFILE tag can be used to specify a file with javascript pieces # of code that will be used on startup of the MathJax code. See the MathJax site # (see: # http://docs.mathjax.org/en/v2.7-latest/output.html) for more details. For an # example see the documentation. # This tag requires that the tag USE_MATHJAX is set to YES. MATHJAX_CODEFILE = # When the SEARCHENGINE tag is enabled doxygen will generate a search box for # the HTML output. The underlying search engine uses javascript and DHTML and # should work on any modern browser. Note that when using HTML help # (GENERATE_HTMLHELP), Qt help (GENERATE_QHP), or docsets (GENERATE_DOCSET) # there is already a search function so this one should typically be disabled. # For large projects the javascript based search engine can be slow, then # enabling SERVER_BASED_SEARCH may provide a better solution. It is possible to # search using the keyboard; to jump to the search box use + S # (what the is depends on the OS and browser, but it is typically # , /