pax_global_header00006660000000000000000000000064145304216630014516gustar00rootroot0000000000000052 comment=eadbf0f5879fc98d9f3fc2eea7e6edf55789bbab gforcada-haproxy_log_analysis-c5d274d/000077500000000000000000000000001453042166300201365ustar00rootroot00000000000000gforcada-haproxy_log_analysis-c5d274d/.coveragerc000066400000000000000000000000421453042166300222530ustar00rootroot00000000000000[report] omit = */python?.?/* gforcada-haproxy_log_analysis-c5d274d/.flake8000066400000000000000000000013311453042166300213070ustar00rootroot00000000000000[flake8] doctests = 1 ignore = # coding magic comment not found C101, # missing docstring in public package D104, # missing docstring in magic method D105, # missing docstring in public nested class (e.g. Meta class) D106, # missing docstring in __init__ (against Google/NumPy guidelines) D107, # missing blank line after last section D413, # black takes care of whitespace before colons (:) E203, # black takes care of whitespace after commas E231, # black takes care of line length E501, # all-lowercase method names N802, # Change outer quotes to avoid escaping inner quotes Q003, # black takes care of where to break lines W503, gforcada-haproxy_log_analysis-c5d274d/.github/000077500000000000000000000000001453042166300214765ustar00rootroot00000000000000gforcada-haproxy_log_analysis-c5d274d/.github/workflows/000077500000000000000000000000001453042166300235335ustar00rootroot00000000000000gforcada-haproxy_log_analysis-c5d274d/.github/workflows/release.yml000066400000000000000000000017751453042166300257100ustar00rootroot00000000000000name: GitHub on: push: tags: ["*"] jobs: release: permissions: write-all runs-on: "ubuntu-latest" name: Create a Release env: GH_TOKEN: ${{ github.token }} steps: - uses: actions/checkout@v4 - name: Set up Python 3.11 uses: actions/setup-python@v4 with: python-version: "3.11" - name: Create a new GitHub release run: | # get the lines where the changelog for the last release starts and finishes first_line=$(grep -n "\-\-\-\-" CHANGES.rst | cut -d":" -f1 |head -n1) last_line=$(grep -n "\-\-\-\-" CHANGES.rst | cut -d":" -f1 |head -n2 | tail -n1) # do some math to adjust the line numbers first=$((${first_line}+1)) last=$((${last_line}-2)) end=$((${last_line}-1)) # extract the changelog sed -n "${first},${last}p;${end}q" CHANGES.rst > body.txt cat body.txt gh release create ${{ github.ref_name }} -p -F body.txt gforcada-haproxy_log_analysis-c5d274d/.github/workflows/tests.yml000066400000000000000000000031521453042166300254210ustar00rootroot00000000000000name: Testing on: push: branches: [main] pull_request: branches: [main] env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} jobs: test: name: Testing on runs-on: "ubuntu-latest" strategy: matrix: python-version: ["3.12", "3.11", "3.10", "3.9", "3.8", "pypy-3.9"] steps: - uses: actions/checkout@v4 - name: Set up Python uses: actions/setup-python@v4 with: python-version: ${{ matrix.python-version }} - name: Cache packages uses: actions/cache@v3 with: path: ~/.cache/pip key: ${{ runner.os }}-pip-${{ matrix.python-version }}-${{ hashFiles('tox.ini') }} restore-keys: | ${{ runner.os }}-pip-${{ matrix.python-version }}- - name: Install dependencies run: python -m pip install tox tox-gh-actions - name: Test run: tox lint: name: Lint code runs-on: "ubuntu-latest" strategy: matrix: python-version: [3.8] steps: - uses: actions/checkout@v4 - name: Set up Python uses: actions/setup-python@v4 with: python-version: ${{ matrix.python-version }} - name: Cache packages uses: actions/cache@v3 with: path: | ~/.cache/pre-commit ~/.cache/pip key: ${{ runner.os }}-pip-${{ matrix.python-version }}-${{ hashFiles('tox.ini') }} restore-keys: | ${{ runner.os }}-pip-${{ matrix.python-version }}- - name: Install dependencies run: python -m pip install tox - name: Run linting run: tox -e lint gforcada-haproxy_log_analysis-c5d274d/.gitignore000066400000000000000000000003041453042166300221230ustar00rootroot00000000000000*.py[cod] *.pickle # C extensions *.so # Packages *.egg *.egg-info dist build eggs parts bin var sdist develop-eggs .installed.cfg lib lib64 __pycache__ # Unit test / coverage reports coverage gforcada-haproxy_log_analysis-c5d274d/.pre-commit-config.yaml000066400000000000000000000023001453042166300244120ustar00rootroot00000000000000ci: autofix_prs: false autoupdate_schedule: monthly repos: - repo: https://github.com/asottile/pyupgrade rev: v3.14.0 hooks: - id: pyupgrade args: [--py38-plus] - repo: https://github.com/pycqa/isort rev: 5.12.0 hooks: - id: isort - repo: https://github.com/psf/black rev: 23.9.1 hooks: - id: black - repo: https://github.com/PyCQA/flake8 rev: 6.1.0 hooks: - id: flake8 additional_dependencies: - flake8-bugbear - flake8-builtins - flake8-comprehensions - flake8-debugger - flake8-deprecated - flake8-isort - flake8-pep3101 - flake8-quotes - flake8-pytest-style - repo: https://github.com/codespell-project/codespell rev: v2.2.6 hooks: - id: codespell additional_dependencies: - tomli - repo: https://github.com/mgedmin/check-manifest rev: "0.49" hooks: - id: check-manifest - repo: https://github.com/regebro/pyroma rev: "4.2" hooks: - id: pyroma - repo: https://github.com/mgedmin/check-python-versions rev: "0.21.3" hooks: - id: check-python-versions gforcada-haproxy_log_analysis-c5d274d/CHANGES.rst000066400000000000000000000170011453042166300217370ustar00rootroot00000000000000CHANGES ======= 6.0.0a4 (2023-11-25) -------------------- - More GHA automation fixes. [gforcada] 6.0.0a3 (2023-11-25) -------------------- - Play with gh command line tool. [gforcada] 6.0.0a2 (2023-11-12) -------------------- - Test again a release. [gforcada] 6.0.0a1 (2023-11-12) -------------------- - Make listing of commands and filters easier to read. [gforcada] - Improve the filters' and commands' descriptions, with ready to use examples. [gforcada] - Switch logic of `wait_on_queues` filter, count lines that are above the filter, e.g. the lines that took more than the specified time. [gforcada] - move code to a `src` folder [gforcada] - drop `pkg_resources` usage, default to native namespaces [gforcada] - switch to `pyproject.toml` and remove `setup.py` [gforcada] - use `tox` [gforcada] - use `pre-commit` [gforcada] - soft drop python 3.7 (it's EOL, and we stop testing against it) [gforcada] - Pin dependencies installed in `tox.ini` [gforcada] - Add support for Python 3.12 [gforcada] - Automatically create GitHub releases with GitHub Actions. [gforcada] 5.1.0 (2022-12-03) ------------------ - Only get the first IP from `X-Forwarded-For` header. [gforcada] - Improve tests robustness. [gforcada] - Fix `top_ips` and `top_request_paths` commands output. They were showing all output, rather than only the top 10. [gforcada] - Move `tests` folder to the top-level. [gforcada] 5.0.0 (2022-11-27) ------------------ - Drop testing on travis-ci. [gforcada] - Use GitHub Actions. [gforcada] - Format the code with `pyupgrade`, `black` and `isort`. [gforcada] - Use `pip-tools` to keep dependencies locked. [gforcada] - Bump python versions supported to 3.7-3.11 and pypy. [gforcada] - Drop python 3.6 (EOL). [gforcada] 4.1.0 (2020-01-06) ------------------ - **New command:** ``requests_per_hour``. Just like the ``requests_per_minute`` but with hour granularity. Idea and first implementation done by ``valleedelisle``. [gforcada] - Fix parsing truncated requests. Idea and first implementation by ``vixns``. [gforcada] 4.0.0 (2020-01-06) ------------------ **BREAKING CHANGES:** - Complete rewrite to use almost no memory usage even on huge files. [gforcada] - Add parallelization to make parsing faster by parsing multiple lines in parallel. [gforcada] - Rename command ``counter_slow_requests`` to ``slow_requests_counter``, so it is aligned with all other ``_counter`` commands. [gforcada] - Changed the ``counter_invalid`` command to a new command line switch ``--invalid``. [gforcada] **Regular changes:** - Drop Python 2 support, and test on Python 3.8. [gforcada] - Remove the pickling support. [gforcada] - Add `--json` output command line option. [valleedelisle] 3.0.0 (2019-06-10) ------------------ - Fix spelling. [EdwardBetts] - Make ip_counter use client_ip per default. [vixns] - Overhaul testing environment. Test on python 3.7 as well. Use black to format. [gforcada] 2.1 (2017-07-06) ---------------- - Enforce QA checks (flake8) on code. All code has been updated to follow it. [gforcada] - Support Python 3.6. [gforcada] - Support different syslog timestamps (at least NixOS). [gforcada] 2.0.2 (2016-11-17) ------------------ - Improve performance for ``cmd_print``. [kevinjqiu] 2.0.1 (2016-10-29) ------------------ - Allow hostnames to have a dot in it. [gforcada] 2.0 (2016-07-06) ---------------- - Handle unparsable HTTP requests. [gforcada] - Only test on python 2.7 and 3.5 [gforcada] 2.0b0 (2016-04-18) ------------------ - Check the divisor before doing a division to not get ``ZeroDivisionError`` exceptions. [gforcada] 2.0a0 (2016-03-29) ------------------ - Major refactoring: # Rename modules and classes: - haproxy_logline -> line - haproxy_logfile -> logfile - HaproxyLogLine -> Line - HaproxyLogFile -> Log # Parse the log file on Log() creation (i.e. in its __init__) [gforcada] 1.3 (2016-03-29) ---------------- - New filter: ``filter_wait_on_queues``. Get all requests that waited at maximum X amount of milliseconds on HAProxy queues. [gforcada] - Code/docs cleanups and add code analysis. [gforcada] - Avoid using eval. [gforcada] 1.2.1 (2016-02-23) ------------------ - Support -1 as a status_code [Christopher Baines] 1.2 (2015-12-07) ---------------- - Allow a hostname on the syslog part (not only IPs) [danny crasto] 1.1 (2015-04-19) ---------------- - Make syslog optional. Fixes issue https://github.com/gforcada/haproxy_log_analysis/issues/10. [gforcada] 1.0 (2015-03-24) ---------------- - Fix issue #9. log line on the syslog part was too strict, it was expecting the hostname to be a string and was failing if it was an IP. [gforcada] 0.0.3.post2 (2015-01-05) ------------------------ - Finally really fixed issue #7. ``namespace_packages`` was not meant to be on setup.py at all. Silly copy&paste mistake. [gforcada] 0.0.3.post (2015-01-04) ----------------------- - Fix release on PyPI. Solves GitHub issue #7. https://github.com/gforcada/haproxy_log_analysis/issues/7 [gforcada] 0.0.3 (2014-07-09) ------------------ - Fix release on PyPI (again). [gforcada] 0.0.2 (2014-07-09) ------------------ - Fix release on PyPI. [gforcada] 0.0.1 (2014-07-09) ------------------ - Pickle :class::`.HaproxyLogFile` data for faster performance. [gforcada] - Add a way to negate the filters, so that instead of being able to filter by IP, it can output all but that IP information. [gforcada] - Add lots of filters: ip, path, ssl, backend, frontend, server, status_code and so on. See ``--list-filters`` for a complete list of them. [gforcada] - Add :method::`.HaproxyLogFile.parse_data` method to get data from data stream. It allows you use it as a library. [bogdangi] - Add ``--list-filters`` argument on the command line interface. [gforcada] - Add ``--filter`` argument on the command line interface, inspired by Bogdan's early design. [bogdangi] [gforcada] - Create a new module :module::`haproxy.filters` that holds all available filters. [gforcada] - Improve :method::`.HaproxyLogFile.cmd_queue_peaks` output to not only show peaks but also when requests started to queue and when they finished and the amount of requests that had been queued. [gforcada] - Show help when no argument is given. [gforcada] - Polish documentation and docstrings here and there. [gforcada] - Add a ``--list-commands`` argument on the command line interface. [gforcada] - Generate an API doc for ``HaproxyLogLine`` and ``HaproxyLogFile``. [bogdangi] - Create a ``console_script`` `haproxy_log_analysis` for ease of use. [bogdangi] - Add Sphinx documentation system, still empty. [gforcada] - Keep valid log lines sorted so that the exact order of connections is kept. [gforcada] - Add quite a few commands, see `README.rst`_ for a complete list of them. [gforcada] - Run commands passed as arguments (with -c flag). [gforcada] - Add a requirements.txt file to keep track of dependencies and pin them. [gforcada] - Add travis_ and coveralls_ support. See its badges on `README.rst`_. [gforcada] - Add argument parsing and custom validation logic for all arguments. [gforcada] - Add regular expressions for haproxy log lines (HTTP format) and to parse HTTP requests path. Added tests to ensure they work as expected. [gforcada] - Create distribution. [gforcada] .. _travis: https://travis-ci.org/ .. _coveralls: https://coveralls.io/ .. _README.rst: http://github.com/gforcada/haproxy_log_analysis gforcada-haproxy_log_analysis-c5d274d/LICENSE000066400000000000000000001044611453042166300211510ustar00rootroot00000000000000GNU GENERAL PUBLIC LICENSE Version 3, 29 June 2007 Copyright (C) 2007 Free Software Foundation, Inc. Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed. Preamble The GNU General Public License is a free, copyleft license for software and other kinds of works. The licenses for most software and other practical works are designed to take away your freedom to share and change the works. By contrast, the GNU General Public License is intended to guarantee your freedom to share and change all versions of a program--to make sure it remains free software for all its users. We, the Free Software Foundation, use the GNU General Public License for most of our software; it applies also to any other work released this way by its authors. You can apply it to your programs, too. When we speak of free software, we are referring to freedom, not price. Our General Public Licenses are designed to make sure that you have the freedom to distribute copies of free software (and charge for them if you wish), that you receive source code or can get it if you want it, that you can change the software or use pieces of it in new free programs, and that you know you can do these things. To protect your rights, we need to prevent others from denying you these rights or asking you to surrender the rights. Therefore, you have certain responsibilities if you distribute copies of the software, or if you modify it: responsibilities to respect the freedom of others. For example, if you distribute copies of such a program, whether gratis or for a fee, you must pass on to the recipients the same freedoms that you received. You must make sure that they, too, receive or can get the source code. And you must show them these terms so they know their rights. Developers that use the GNU GPL protect your rights with two steps: (1) assert copyright on the software, and (2) offer you this License giving you legal permission to copy, distribute and/or modify it. For the developers' and authors' protection, the GPL clearly explains that there is no warranty for this free software. For both users' and authors' sake, the GPL requires that modified versions be marked as changed, so that their problems will not be attributed erroneously to authors of previous versions. Some devices are designed to deny users access to install or run modified versions of the software inside them, although the manufacturer can do so. This is fundamentally incompatible with the aim of protecting users' freedom to change the software. The systematic pattern of such abuse occurs in the area of products for individuals to use, which is precisely where it is most unacceptable. Therefore, we have designed this version of the GPL to prohibit the practice for those products. If such problems arise substantially in other domains, we stand ready to extend this provision to those domains in future versions of the GPL, as needed to protect the freedom of users. Finally, every program is threatened constantly by software patents. States should not allow patents to restrict development and use of software on general-purpose computers, but in those that do, we wish to avoid the special danger that patents applied to a free program could make it effectively proprietary. To prevent this, the GPL assures that patents cannot be used to render the program non-free. The precise terms and conditions for copying, distribution and modification follow. TERMS AND CONDITIONS 0. Definitions. "This License" refers to version 3 of the GNU General Public License. "Copyright" also means copyright-like laws that apply to other kinds of works, such as semiconductor masks. "The Program" refers to any copyrightable work licensed under this License. Each licensee is addressed as "you". "Licensees" and "recipients" may be individuals or organizations. To "modify" a work means to copy from or adapt all or part of the work in a fashion requiring copyright permission, other than the making of an exact copy. The resulting work is called a "modified version" of the earlier work or a work "based on" the earlier work. A "covered work" means either the unmodified Program or a work based on the Program. To "propagate" a work means to do anything with it that, without permission, would make you directly or secondarily liable for infringement under applicable copyright law, except executing it on a computer or modifying a private copy. Propagation includes copying, distribution (with or without modification), making available to the public, and in some countries other activities as well. To "convey" a work means any kind of propagation that enables other parties to make or receive copies. Mere interaction with a user through a computer network, with no transfer of a copy, is not conveying. An interactive user interface displays "Appropriate Legal Notices" to the extent that it includes a convenient and prominently visible feature that (1) displays an appropriate copyright notice, and (2) tells the user that there is no warranty for the work (except to the extent that warranties are provided), that licensees may convey the work under this License, and how to view a copy of this License. If the interface presents a list of user commands or options, such as a menu, a prominent item in the list meets this criterion. 1. Source Code. The "source code" for a work means the preferred form of the work for making modifications to it. "Object code" means any non-source form of a work. A "Standard Interface" means an interface that either is an official standard defined by a recognized standards body, or, in the case of interfaces specified for a particular programming language, one that is widely used among developers working in that language. The "System Libraries" of an executable work include anything, other than the work as a whole, that (a) is included in the normal form of packaging a Major Component, but which is not part of that Major Component, and (b) serves only to enable use of the work with that Major Component, or to implement a Standard Interface for which an implementation is available to the public in source code form. A "Major Component", in this context, means a major essential component (kernel, window system, and so on) of the specific operating system (if any) on which the executable work runs, or a compiler used to produce the work, or an object code interpreter used to run it. The "Corresponding Source" for a work in object code form means all the source code needed to generate, install, and (for an executable work) run the object code and to modify the work, including scripts to control those activities. However, it does not include the work's System Libraries, or general-purpose tools or generally available free programs which are used unmodified in performing those activities but which are not part of the work. For example, Corresponding Source includes interface definition files associated with source files for the work, and the source code for shared libraries and dynamically linked subprograms that the work is specifically designed to require, such as by intimate data communication or control flow between those subprograms and other parts of the work. The Corresponding Source need not include anything that users can regenerate automatically from other parts of the Corresponding Source. The Corresponding Source for a work in source code form is that same work. 2. Basic Permissions. All rights granted under this License are granted for the term of copyright on the Program, and are irrevocable provided the stated conditions are met. This License explicitly affirms your unlimited permission to run the unmodified Program. The output from running a covered work is covered by this License only if the output, given its content, constitutes a covered work. This License acknowledges your rights of fair use or other equivalent, as provided by copyright law. You may make, run and propagate covered works that you do not convey, without conditions so long as your license otherwise remains in force. You may convey covered works to others for the sole purpose of having them make modifications exclusively for you, or provide you with facilities for running those works, provided that you comply with the terms of this License in conveying all material for which you do not control copyright. Those thus making or running the covered works for you must do so exclusively on your behalf, under your direction and control, on terms that prohibit them from making any copies of your copyrighted material outside their relationship with you. Conveying under any other circumstances is permitted solely under the conditions stated below. Sublicensing is not allowed; section 10 makes it unnecessary. 3. Protecting Users' Legal Rights From Anti-Circumvention Law. No covered work shall be deemed part of an effective technological measure under any applicable law fulfilling obligations under article 11 of the WIPO copyright treaty adopted on 20 December 1996, or similar laws prohibiting or restricting circumvention of such measures. When you convey a covered work, you waive any legal power to forbid circumvention of technological measures to the extent such circumvention is effected by exercising rights under this License with respect to the covered work, and you disclaim any intention to limit operation or modification of the work as a means of enforcing, against the work's users, your or third parties' legal rights to forbid circumvention of technological measures. 4. Conveying Verbatim Copies. You may convey verbatim copies of the Program's source code as you receive it, in any medium, provided that you conspicuously and appropriately publish on each copy an appropriate copyright notice; keep intact all notices stating that this License and any non-permissive terms added in accord with section 7 apply to the code; keep intact all notices of the absence of any warranty; and give all recipients a copy of this License along with the Program. You may charge any price or no price for each copy that you convey, and you may offer support or warranty protection for a fee. 5. Conveying Modified Source Versions. You may convey a work based on the Program, or the modifications to produce it from the Program, in the form of source code under the terms of section 4, provided that you also meet all of these conditions: a) The work must carry prominent notices stating that you modified it, and giving a relevant date. b) The work must carry prominent notices stating that it is released under this License and any conditions added under section 7. This requirement modifies the requirement in section 4 to "keep intact all notices". c) You must license the entire work, as a whole, under this License to anyone who comes into possession of a copy. This License will therefore apply, along with any applicable section 7 additional terms, to the whole of the work, and all its parts, regardless of how they are packaged. This License gives no permission to license the work in any other way, but it does not invalidate such permission if you have separately received it. d) If the work has interactive user interfaces, each must display Appropriate Legal Notices; however, if the Program has interactive interfaces that do not display Appropriate Legal Notices, your work need not make them do so. A compilation of a covered work with other separate and independent works, which are not by their nature extensions of the covered work, and which are not combined with it such as to form a larger program, in or on a volume of a storage or distribution medium, is called an "aggregate" if the compilation and its resulting copyright are not used to limit the access or legal rights of the compilation's users beyond what the individual works permit. Inclusion of a covered work in an aggregate does not cause this License to apply to the other parts of the aggregate. 6. Conveying Non-Source Forms. You may convey a covered work in object code form under the terms of sections 4 and 5, provided that you also convey the machine-readable Corresponding Source under the terms of this License, in one of these ways: a) Convey the object code in, or embodied in, a physical product (including a physical distribution medium), accompanied by the Corresponding Source fixed on a durable physical medium customarily used for software interchange. b) Convey the object code in, or embodied in, a physical product (including a physical distribution medium), accompanied by a written offer, valid for at least three years and valid for as long as you offer spare parts or customer support for that product model, to give anyone who possesses the object code either (1) a copy of the Corresponding Source for all the software in the product that is covered by this License, on a durable physical medium customarily used for software interchange, for a price no more than your reasonable cost of physically performing this conveying of source, or (2) access to copy the Corresponding Source from a network server at no charge. c) Convey individual copies of the object code with a copy of the written offer to provide the Corresponding Source. This alternative is allowed only occasionally and noncommercially, and only if you received the object code with such an offer, in accord with subsection 6b. d) Convey the object code by offering access from a designated place (gratis or for a charge), and offer equivalent access to the Corresponding Source in the same way through the same place at no further charge. You need not require recipients to copy the Corresponding Source along with the object code. If the place to copy the object code is a network server, the Corresponding Source may be on a different server (operated by you or a third party) that supports equivalent copying facilities, provided you maintain clear directions next to the object code saying where to find the Corresponding Source. Regardless of what server hosts the Corresponding Source, you remain obligated to ensure that it is available for as long as needed to satisfy these requirements. e) Convey the object code using peer-to-peer transmission, provided you inform other peers where the object code and Corresponding Source of the work are being offered to the general public at no charge under subsection 6d. A separable portion of the object code, whose source code is excluded from the Corresponding Source as a System Library, need not be included in conveying the object code work. A "User Product" is either (1) a "consumer product", which means any tangible personal property which is normally used for personal, family, or household purposes, or (2) anything designed or sold for incorporation into a dwelling. In determining whether a product is a consumer product, doubtful cases shall be resolved in favor of coverage. For a particular product received by a particular user, "normally used" refers to a typical or common use of that class of product, regardless of the status of the particular user or of the way in which the particular user actually uses, or expects or is expected to use, the product. A product is a consumer product regardless of whether the product has substantial commercial, industrial or non-consumer uses, unless such uses represent the only significant mode of use of the product. "Installation Information" for a User Product means any methods, procedures, authorization keys, or other information required to install and execute modified versions of a covered work in that User Product from a modified version of its Corresponding Source. The information must suffice to ensure that the continued functioning of the modified object code is in no case prevented or interfered with solely because modification has been made. If you convey an object code work under this section in, or with, or specifically for use in, a User Product, and the conveying occurs as part of a transaction in which the right of possession and use of the User Product is transferred to the recipient in perpetuity or for a fixed term (regardless of how the transaction is characterized), the Corresponding Source conveyed under this section must be accompanied by the Installation Information. But this requirement does not apply if neither you nor any third party retains the ability to install modified object code on the User Product (for example, the work has been installed in ROM). The requirement to provide Installation Information does not include a requirement to continue to provide support service, warranty, or updates for a work that has been modified or installed by the recipient, or for the User Product in which it has been modified or installed. Access to a network may be denied when the modification itself materially and adversely affects the operation of the network or violates the rules and protocols for communication across the network. Corresponding Source conveyed, and Installation Information provided, in accord with this section must be in a format that is publicly documented (and with an implementation available to the public in source code form), and must require no special password or key for unpacking, reading or copying. 7. Additional Terms. "Additional permissions" are terms that supplement the terms of this License by making exceptions from one or more of its conditions. Additional permissions that are applicable to the entire Program shall be treated as though they were included in this License, to the extent that they are valid under applicable law. If additional permissions apply only to part of the Program, that part may be used separately under those permissions, but the entire Program remains governed by this License without regard to the additional permissions. When you convey a copy of a covered work, you may at your option remove any additional permissions from that copy, or from any part of it. (Additional permissions may be written to require their own removal in certain cases when you modify the work.) You may place additional permissions on material, added by you to a covered work, for which you have or can give appropriate copyright permission. Notwithstanding any other provision of this License, for material you add to a covered work, you may (if authorized by the copyright holders of that material) supplement the terms of this License with terms: a) Disclaiming warranty or limiting liability differently from the terms of sections 15 and 16 of this License; or b) Requiring preservation of specified reasonable legal notices or author attributions in that material or in the Appropriate Legal Notices displayed by works containing it; or c) Prohibiting misrepresentation of the origin of that material, or requiring that modified versions of such material be marked in reasonable ways as different from the original version; or d) Limiting the use for publicity purposes of names of licensors or authors of the material; or e) Declining to grant rights under trademark law for use of some trade names, trademarks, or service marks; or f) Requiring indemnification of licensors and authors of that material by anyone who conveys the material (or modified versions of it) with contractual assumptions of liability to the recipient, for any liability that these contractual assumptions directly impose on those licensors and authors. All other non-permissive additional terms are considered "further restrictions" within the meaning of section 10. If the Program as you received it, or any part of it, contains a notice stating that it is governed by this License along with a term that is a further restriction, you may remove that term. If a license document contains a further restriction but permits relicensing or conveying under this License, you may add to a covered work material governed by the terms of that license document, provided that the further restriction does not survive such relicensing or conveying. If you add terms to a covered work in accord with this section, you must place, in the relevant source files, a statement of the additional terms that apply to those files, or a notice indicating where to find the applicable terms. Additional terms, permissive or non-permissive, may be stated in the form of a separately written license, or stated as exceptions; the above requirements apply either way. 8. Termination. You may not propagate or modify a covered work except as expressly provided under this License. Any attempt otherwise to propagate or modify it is void, and will automatically terminate your rights under this License (including any patent licenses granted under the third paragraph of section 11). However, if you cease all violation of this License, then your license from a particular copyright holder is reinstated (a) provisionally, unless and until the copyright holder explicitly and finally terminates your license, and (b) permanently, if the copyright holder fails to notify you of the violation by some reasonable means prior to 60 days after the cessation. Moreover, your license from a particular copyright holder is reinstated permanently if the copyright holder notifies you of the violation by some reasonable means, this is the first time you have received notice of violation of this License (for any work) from that copyright holder, and you cure the violation prior to 30 days after your receipt of the notice. Termination of your rights under this section does not terminate the licenses of parties who have received copies or rights from you under this License. If your rights have been terminated and not permanently reinstated, you do not qualify to receive new licenses for the same material under section 10. 9. Acceptance Not Required for Having Copies. You are not required to accept this License in order to receive or run a copy of the Program. Ancillary propagation of a covered work occurring solely as a consequence of using peer-to-peer transmission to receive a copy likewise does not require acceptance. However, nothing other than this License grants you permission to propagate or modify any covered work. These actions infringe copyright if you do not accept this License. Therefore, by modifying or propagating a covered work, you indicate your acceptance of this License to do so. 10. Automatic Licensing of Downstream Recipients. Each time you convey a covered work, the recipient automatically receives a license from the original licensors, to run, modify and propagate that work, subject to this License. You are not responsible for enforcing compliance by third parties with this License. An "entity transaction" is a transaction transferring control of an organization, or substantially all assets of one, or subdividing an organization, or merging organizations. If propagation of a covered work results from an entity transaction, each party to that transaction who receives a copy of the work also receives whatever licenses to the work the party's predecessor in interest had or could give under the previous paragraph, plus a right to possession of the Corresponding Source of the work from the predecessor in interest, if the predecessor has it or can get it with reasonable efforts. You may not impose any further restrictions on the exercise of the rights granted or affirmed under this License. For example, you may not impose a license fee, royalty, or other charge for exercise of rights granted under this License, and you may not initiate litigation (including a cross-claim or counterclaim in a lawsuit) alleging that any patent claim is infringed by making, using, selling, offering for sale, or importing the Program or any portion of it. 11. Patents. A "contributor" is a copyright holder who authorizes use under this License of the Program or a work on which the Program is based. The work thus licensed is called the contributor's "contributor version". A contributor's "essential patent claims" are all patent claims owned or controlled by the contributor, whether already acquired or hereafter acquired, that would be infringed by some manner, permitted by this License, of making, using, or selling its contributor version, but do not include claims that would be infringed only as a consequence of further modification of the contributor version. For purposes of this definition, "control" includes the right to grant patent sublicenses in a manner consistent with the requirements of this License. Each contributor grants you a non-exclusive, worldwide, royalty-free patent license under the contributor's essential patent claims, to make, use, sell, offer for sale, import and otherwise run, modify and propagate the contents of its contributor version. In the following three paragraphs, a "patent license" is any express agreement or commitment, however denominated, not to enforce a patent (such as an express permission to practice a patent or covenant not to sue for patent infringement). To "grant" such a patent license to a party means to make such an agreement or commitment not to enforce a patent against the party. If you convey a covered work, knowingly relying on a patent license, and the Corresponding Source of the work is not available for anyone to copy, free of charge and under the terms of this License, through a publicly available network server or other readily accessible means, then you must either (1) cause the Corresponding Source to be so available, or (2) arrange to deprive yourself of the benefit of the patent license for this particular work, or (3) arrange, in a manner consistent with the requirements of this License, to extend the patent license to downstream recipients. "Knowingly relying" means you have actual knowledge that, but for the patent license, your conveying the covered work in a country, or your recipient's use of the covered work in a country, would infringe one or more identifiable patents in that country that you have reason to believe are valid. If, pursuant to or in connection with a single transaction or arrangement, you convey, or propagate by procuring conveyance of, a covered work, and grant a patent license to some of the parties receiving the covered work authorizing them to use, propagate, modify or convey a specific copy of the covered work, then the patent license you grant is automatically extended to all recipients of the covered work and works based on it. A patent license is "discriminatory" if it does not include within the scope of its coverage, prohibits the exercise of, or is conditioned on the non-exercise of one or more of the rights that are specifically granted under this License. You may not convey a covered work if you are a party to an arrangement with a third party that is in the business of distributing software, under which you make payment to the third party based on the extent of your activity of conveying the work, and under which the third party grants, to any of the parties who would receive the covered work from you, a discriminatory patent license (a) in connection with copies of the covered work conveyed by you (or copies made from those copies), or (b) primarily for and in connection with specific products or compilations that contain the covered work, unless you entered into that arrangement, or that patent license was granted, prior to 28 March 2007. Nothing in this License shall be construed as excluding or limiting any implied license or other defenses to infringement that may otherwise be available to you under applicable patent law. 12. No Surrender of Others' Freedom. If conditions are imposed on you (whether by court order, agreement or otherwise) that contradict the conditions of this License, they do not excuse you from the conditions of this License. If you cannot convey a covered work so as to satisfy simultaneously your obligations under this License and any other pertinent obligations, then as a consequence you may not convey it at all. For example, if you agree to terms that obligate you to collect a royalty for further conveying from those to whom you convey the Program, the only way you could satisfy both those terms and this License would be to refrain entirely from conveying the Program. 13. Use with the GNU Affero General Public License. Notwithstanding any other provision of this License, you have permission to link or combine any covered work with a work licensed under version 3 of the GNU Affero General Public License into a single combined work, and to convey the resulting work. The terms of this License will continue to apply to the part which is the covered work, but the special requirements of the GNU Affero General Public License, section 13, concerning interaction through a network will apply to the combination as such. 14. Revised Versions of this License. The Free Software Foundation may publish revised and/or new versions of the GNU General Public License from time to time. Such new versions will be similar in spirit to the present version, but may differ in detail to address new problems or concerns. Each version is given a distinguishing version number. If the Program specifies that a certain numbered version of the GNU General Public License "or any later version" applies to it, you have the option of following the terms and conditions either of that numbered version or of any later version published by the Free Software Foundation. If the Program does not specify a version number of the GNU General Public License, you may choose any version ever published by the Free Software Foundation. If the Program specifies that a proxy can decide which future versions of the GNU General Public License can be used, that proxy's public statement of acceptance of a version permanently authorizes you to choose that version for the Program. Later license versions may give you additional or different permissions. However, no additional obligations are imposed on any author or copyright holder as a result of your choosing to follow a later version. 15. Disclaimer of Warranty. THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 16. Limitation of Liability. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. 17. Interpretation of Sections 15 and 16. If the disclaimer of warranty and limitation of liability provided above cannot be given local legal effect according to their terms, reviewing courts shall apply local law that most closely approximates an absolute waiver of all civil liability in connection with the Program, unless a warranty or assumption of liability accompanies a copy of the Program in return for a fee. END OF TERMS AND CONDITIONS How to Apply These Terms to Your New Programs If you develop a new program, and you want it to be of the greatest possible use to the public, the best way to achieve this is to make it free software which everyone can redistribute and change under these terms. To do so, attach the following notices to the program. It is safest to attach them to the start of each source file to most effectively state the exclusion of warranty; and each file should have at least the "copyright" line and a pointer to where the full notice is found. {one line to give the program's name and a brief idea of what it does.} Copyright (C) {year} {name of author} This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . Also add information on how to contact you by electronic and paper mail. If the program does terminal interaction, make it output a short notice like this when it starts in an interactive mode: {project} Copyright (C) {year} {fullname} This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. This is free software, and you are welcome to redistribute it under certain conditions; type `show c' for details. The hypothetical commands `show w' and `show c' should show the appropriate parts of the General Public License. Of course, your program's commands might be different; for a GUI interface, you would use an "about box". You should also get your employer (if you work as a programmer) or school, if any, to sign a "copyright disclaimer" for the program, if necessary. For more information on this, and how to apply and follow the GNU GPL, see . The GNU General Public License does not permit incorporating your program into proprietary programs. If your program is a subroutine library, you may consider it more useful to permit linking proprietary applications with the library. If this is what you want to do, use the GNU Lesser General Public License instead of this License. But first, please read . gforcada-haproxy_log_analysis-c5d274d/MANIFEST.in000066400000000000000000000004061453042166300216740ustar00rootroot00000000000000include CHANGES.rst exclude .flake8 exclude *.in exclude *.txt exclude *.yaml exclude .coveragerc exclude tox.ini recursive-exclude docs *.py recursive-exclude docs *.rst recursive-exclude docs Makefile recursive-exclude tests *.log recursive-exclude tests *.py gforcada-haproxy_log_analysis-c5d274d/README.rst000066400000000000000000000130531453042166300216270ustar00rootroot00000000000000.. -*- coding: utf-8 -*- HAProxy log analyzer ==================== This Python package is a `HAProxy`_ log parser. It analyzes HAProxy log files in multiple ways (see commands section below). .. note:: Currently only the `HTTP log format`_ is supported. Tests and coverage ------------------ No project is trustworthy if does not have tests and a decent coverage! .. image:: https://github.com/gforcada/haproxy_log_analysis/actions/workflows/tests.yml/badge.svg?branch=master :target: https://github.com/gforcada/haproxy_log_analysis/actions/workflows/tests.yml .. image:: https://coveralls.io/repos/github/gforcada/haproxy_log_analysis/badge.svg?branch=master :target: https://coveralls.io/github/gforcada/haproxy_log_analysis?branch=master Documentation ------------- See the `documentation and API`_ at ReadTheDocs_. Command-line interface ---------------------- The current ``--help`` looks like this:: usage: haproxy_log_analysis [-h] [-l LOG] [-s START] [-d DELTA] [-c COMMAND] [-f FILTER] [-n] [--list-commands] [--list-filters] [--json] Analyze HAProxy log files and outputs statistics about it optional arguments: -h, --help show this help message and exit -l LOG, --log LOG HAProxy log file to analyze -s START, --start START Process log entries starting at this time, in HAProxy date format (e.g. 11/Dec/2013 or 11/Dec/2013:19:31:41). At least provide the day/month/year. Values not specified will use their base value (e.g. 00 for hour). Use in conjunction with -d to limit the number of entries to process. -d DELTA, --delta DELTA Limit the number of entries to process. Express the time delta as a number and a time unit, e.g.: 1s, 10m, 3h or 4d (for 1 second, 10 minutes, 3 hours or 4 days). Use in conjunction with -s to only analyze certain time delta. If no start time is given, the time on the first line will be used instead. -c COMMAND, --command COMMAND List of commands, comma separated, to run on the log file. See --list-commands to get a full list of them. -f FILTER, --filter FILTER List of filters to apply on the log file. Passed as comma separated and parameters within square brackets, e.g ip[192.168.1.1],ssl,path[/some/path]. See --list- filters to get a full list of them. -n, --negate-filter Make filters passed with -f work the other way around, i.e. if the ``ssl`` filter is passed instead of showing only ssl requests it will show non-ssl traffic. If the ``ip`` filter is used, then all but that ip passed to the filter will be used. --list-commands Lists all commands available. --list-filters Lists all filters available. --json Output results in json. --invalid Print the lines that could not be parsed. Be aware that mixing it with the print command will mix their output. Commands -------- Commands are small purpose specific programs in themselves that report specific statistics about the log file being analyzed. See them all with ``--list-commands`` or online at https://haproxy-log-analyzer.readthedocs.io/modules.html#module-haproxy.commands. - ``average_response_time`` - ``average_waiting_time`` - ``connection_type`` - ``counter`` - ``http_methods`` - ``ip_counter`` - ``print`` - ``queue_peaks`` - ``request_path_counter`` - ``requests_per_hour`` - ``requests_per_minute`` - ``server_load`` - ``slow_requests`` - ``slow_requests_counter`` - ``status_codes_counter`` - ``top_ips`` - ``top_request_paths`` Filters ------- Filters, contrary to commands, are a way to reduce the amount of log lines to be processed. .. note:: The ``-n`` command line argument allows to reverse filters output. This helps when looking for specific traces, like a certain IP, a path... See them all with ``--list-filters`` or online at https://haproxy-log-analyzer.readthedocs.io/modules.html#module-haproxy.filters. - ``backend`` - ``frontend`` - ``http_method`` - ``ip`` - ``ip_range`` - ``path`` - ``response_size`` - ``server`` - ``slow_requests`` - ``ssl`` - ``status_code`` - ``status_code_family`` - ``wait_on_queues`` Installation ------------ After installation you will have a console script `haproxy_log_analysis`:: $ pip install haproxy_log_analysis TODO ---- - add more commands: *(help appreciated)* - reports on servers connection time - reports on termination state - reports around connections (active, frontend, backend, server) - *your ideas here* - think of a way to show the commands output in a meaningful way - be able to specify an output format. For any command that makes sense (slow requests for example) output the given fields for each log line (i.e. acceptance date, path, downstream server, load at that time...) - *your ideas* .. _HAProxy: http://haproxy.1wt.eu/ .. _HTTP log format: http://cbonte.github.io/haproxy-dconv/2.2/configuration.html#8.2.3 .. _documentation and API: https://haproxy-log-analyzer.readthedocs.io/ .. _ReadTheDocs: http://readthedocs.org gforcada-haproxy_log_analysis-c5d274d/docs/000077500000000000000000000000001453042166300210665ustar00rootroot00000000000000gforcada-haproxy_log_analysis-c5d274d/docs/Makefile000066400000000000000000000152431453042166300225330ustar00rootroot00000000000000# Makefile for Sphinx documentation # # You can set these variables from the command line. SPHINXOPTS = SPHINXBUILD = sphinx-build PAPER = BUILDDIR = build # User-friendly check for sphinx-build ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1) $(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/) endif # Internal variables. PAPEROPT_a4 = -D latex_paper_size=a4 PAPEROPT_letter = -D latex_paper_size=letter ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source # the i18n builder cannot share the environment and doctrees with the others I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source .PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext help: @echo "Please use \`make ' where is one of" @echo " html to make standalone HTML files" @echo " dirhtml to make HTML files named index.html in directories" @echo " singlehtml to make a single large HTML file" @echo " pickle to make pickle files" @echo " json to make JSON files" @echo " htmlhelp to make HTML files and a HTML help project" @echo " qthelp to make HTML files and a qthelp project" @echo " devhelp to make HTML files and a Devhelp project" @echo " epub to make an epub" @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" @echo " latexpdf to make LaTeX files and run them through pdflatex" @echo " latexpdfja to make LaTeX files and run them through platex/dvipdfmx" @echo " text to make text files" @echo " man to make manual pages" @echo " texinfo to make Texinfo files" @echo " info to make Texinfo files and run them through makeinfo" @echo " gettext to make PO message catalogs" @echo " changes to make an overview of all changed/added/deprecated items" @echo " xml to make Docutils-native XML files" @echo " pseudoxml to make pseudoxml-XML files for display purposes" @echo " linkcheck to check all external links for integrity" @echo " doctest to run all doctests embedded in the documentation (if enabled)" clean: rm -rf $(BUILDDIR)/* html: $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html @echo @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." dirhtml: $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml @echo @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." singlehtml: $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml @echo @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." pickle: $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle @echo @echo "Build finished; now you can process the pickle files." json: $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json @echo @echo "Build finished; now you can process the JSON files." htmlhelp: $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp @echo @echo "Build finished; now you can run HTML Help Workshop with the" \ ".hhp project file in $(BUILDDIR)/htmlhelp." qthelp: $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp @echo @echo "Build finished; now you can run "qcollectiongenerator" with the" \ ".qhcp project file in $(BUILDDIR)/qthelp, like this:" @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/HAProxyloganalyzer.qhcp" @echo "To view the help file:" @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/HAProxyloganalyzer.qhc" devhelp: $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp @echo @echo "Build finished." @echo "To view the help file:" @echo "# mkdir -p $$HOME/.local/share/devhelp/HAProxyloganalyzer" @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/HAProxyloganalyzer" @echo "# devhelp" epub: $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub @echo @echo "Build finished. The epub file is in $(BUILDDIR)/epub." latex: $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex @echo @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." @echo "Run \`make' in that directory to run these through (pdf)latex" \ "(use \`make latexpdf' here to do that automatically)." latexpdf: $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex @echo "Running LaTeX files through pdflatex..." $(MAKE) -C $(BUILDDIR)/latex all-pdf @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." latexpdfja: $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex @echo "Running LaTeX files through platex and dvipdfmx..." $(MAKE) -C $(BUILDDIR)/latex all-pdf-ja @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." text: $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text @echo @echo "Build finished. The text files are in $(BUILDDIR)/text." man: $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man @echo @echo "Build finished. The manual pages are in $(BUILDDIR)/man." texinfo: $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo @echo @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo." @echo "Run \`make' in that directory to run these through makeinfo" \ "(use \`make info' here to do that automatically)." info: $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo @echo "Running Texinfo files through makeinfo..." make -C $(BUILDDIR)/texinfo info @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo." gettext: $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale @echo @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale." changes: $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes @echo @echo "The overview file is in $(BUILDDIR)/changes." linkcheck: $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck @echo @echo "Link check complete; look for any errors in the above output " \ "or in $(BUILDDIR)/linkcheck/output.txt." doctest: $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest @echo "Testing of doctests in the sources finished, look at the " \ "results in $(BUILDDIR)/doctest/output.txt." xml: $(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml @echo @echo "Build finished. The XML files are in $(BUILDDIR)/xml." pseudoxml: $(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml @echo @echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml." gforcada-haproxy_log_analysis-c5d274d/docs/source/000077500000000000000000000000001453042166300223665ustar00rootroot00000000000000gforcada-haproxy_log_analysis-c5d274d/docs/source/README_link.rst000066400000000000000000000000361453042166300250710ustar00rootroot00000000000000.. include:: ../../README.rst gforcada-haproxy_log_analysis-c5d274d/docs/source/changelog.rst000066400000000000000000000000371453042166300250470ustar00rootroot00000000000000.. include:: ../../CHANGES.rst gforcada-haproxy_log_analysis-c5d274d/docs/source/conf.py000066400000000000000000000204001453042166300236610ustar00rootroot00000000000000# # HAProxy log analyzer documentation build configuration file, created by # sphinx-quickstart on Thu Dec 19 00:06:54 2013. # # This file is execfile()d with the current directory set to its # containing dir. # # Note that not all possible configuration values are present in this # autogenerated file. # # All configuration values have a default; values that are commented out # serve to show the default. # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the # documentation root, use os.path.abspath to make it absolute, like shown here. # sys.path.insert(0, os.path.abspath('.')) # -- General configuration ------------------------------------------------ # If your documentation needs a minimal Sphinx version, state it here. # needs_sphinx = '1.0' # Add any Sphinx extension module names here, as strings. They can be # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. extensions = [ 'sphinx.ext.autodoc', ] # Add any paths that contain templates here, relative to this directory. templates_path = ['_templates'] # The suffix of source filenames. source_suffix = '.rst' # The encoding of source files. # source_encoding = 'utf-8-sig' # The master toctree document. master_doc = 'index' # General information about the project. project = 'HAProxy log analyzer' copyright = '2013, Gil Forcada' # noqa: A001 # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the # built documents. # # The short X.Y version. version = '0.1' # The full version, including alpha/beta/rc tags. release = '0.1' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. # language = None # There are two options for replacing |today|: either, you set today to some # non-false value, then it is used: # today = '' # Else, today_fmt is used as the format for a strftime call. # today_fmt = '%B %d, %Y' # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. exclude_patterns = [] # The reST default role (used for this markup: `text`) to use for all # documents. # default_role = None # If true, '()' will be appended to :func: etc. cross-reference text. # add_function_parentheses = True # If true, the current module name will be prepended to all description # unit titles (such as .. function::). # add_module_names = True # If true, sectionauthor and moduleauthor directives will be shown in the # output. They are ignored by default. # show_authors = False # The name of the Pygments (syntax highlighting) style to use. pygments_style = 'sphinx' # A list of ignored prefixes for module index sorting. # modindex_common_prefix = [] # If true, keep warnings as "system message" paragraphs in the built documents. # keep_warnings = False # -- Options for HTML output ---------------------------------------------- # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. html_theme = 'default' # Theme options are theme-specific and customize the look and feel of a theme # further. For a list of options available for each theme, see the # documentation. # html_theme_options = {} # Add any paths that contain custom themes here, relative to this directory. # html_theme_path = [] # The name for this set of Sphinx documents. If None, it defaults to # " v documentation". # html_title = None # A shorter title for the navigation bar. Default is the same as html_title. # html_short_title = None # The name of an image file (relative to this directory) to place at the top # of the sidebar. # html_logo = None # The name of an image file (within the static path) to use as favicon of the # docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 # pixels large. # html_favicon = None # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". html_static_path = ['_static'] # Add any extra paths that contain custom files (such as robots.txt or # .htaccess) here, relative to this directory. These files are copied # directly to the root of the documentation. # html_extra_path = [] # If not '', a 'Last updated on:' timestamp is inserted at every page bottom, # using the given strftime format. # html_last_updated_fmt = '%b %d, %Y' # If true, SmartyPants will be used to convert quotes and dashes to # typographically correct entities. # html_use_smartypants = True # Custom sidebar templates, maps document names to template names. # html_sidebars = {} # Additional templates that should be rendered to pages, maps page names to # template names. # html_additional_pages = {} # If false, no module index is generated. # html_domain_indices = True # If false, no index is generated. # html_use_index = True # If true, the index is split into individual pages for each letter. # html_split_index = False # If true, links to the reST sources are added to the pages. # html_show_sourcelink = True # If true, "Created using Sphinx" is shown in the HTML footer. Default is True. # html_show_sphinx = True # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. # html_show_copyright = True # If true, an OpenSearch description file will be output, and all pages will # contain a tag referring to it. The value of this option must be the # base URL from which the finished HTML is served. # html_use_opensearch = '' # This is the file name suffix for HTML files (e.g. ".xhtml"). # html_file_suffix = None # Output file base name for HTML help builder. htmlhelp_basename = 'HAProxyloganalyzerdoc' # -- Options for LaTeX output --------------------------------------------- latex_elements = { # The paper size ('letterpaper' or 'a4paper'). # 'papersize': 'letterpaper', # The font size ('10pt', '11pt' or '12pt'). # 'pointsize': '10pt', # Additional stuff for the LaTeX preamble. # 'preamble': '', } # Grouping the document tree into LaTeX files. List of tuples # (source start file, target name, title, # author, documentclass [howto, manual, or own class]). latex_documents = [ ( 'index', 'HAProxyloganalyzer.tex', 'HAProxy log analyzer Documentation', 'Gil Forcada', 'manual', ), ] # The name of an image file (relative to this directory) to place at the top of # the title page. # latex_logo = None # For "manual" documents, if this is true, then toplevel headings are parts, # not chapters. # latex_use_parts = False # If true, show page references after internal links. # latex_show_pagerefs = False # If true, show URL addresses after external links. # latex_show_urls = False # Documents to append as an appendix to all manuals. # latex_appendices = [] # If false, no module index is generated. # latex_domain_indices = True # -- Options for manual page output --------------------------------------- # One entry per manual page. List of tuples # (source start file, name, description, authors, manual section). man_pages = [ ( 'index', 'haproxyloganalyzer', 'HAProxy log analyzer Documentation', ['Gil Forcada'], 1, ) ] # If true, show URL addresses after external links. # man_show_urls = False # -- Options for Texinfo output ------------------------------------------- # Grouping the document tree into Texinfo files. List of tuples # (source start file, target name, title, author, # dir menu entry, description, category) texinfo_documents = [ ( 'index', 'HAProxyloganalyzer', 'HAProxy log analyzer Documentation', 'Gil Forcada', 'HAProxyloganalyzer', 'One line description of project.', 'Miscellaneous', ), ] # Documents to append as an appendix to all manuals. # texinfo_appendices = [] # If false, no module index is generated. # texinfo_domain_indices = True # How to display URL addresses: 'footnote', 'no', or 'inline'. # texinfo_show_urls = 'footnote' # If true, do not generate a @detailmenu in the "Top" node's menu. # texinfo_no_detailmenu = False gforcada-haproxy_log_analysis-c5d274d/docs/source/index.rst000066400000000000000000000010741453042166300242310ustar00rootroot00000000000000.. HAProxy log analyzer documentation master file, created by sphinx-quickstart on Thu Dec 19 00:06:54 2013. You can adapt this file completely to your liking, but it should at least contain the root `toctree` directive. ================================================ Welcome to HAProxy log analyzer's documentation! ================================================ Contents: .. toctree:: :maxdepth: 2 README_link modules changelog ================== Indices and tables ================== * :ref:`genindex` * :ref:`modindex` * :ref:`search` gforcada-haproxy_log_analysis-c5d274d/docs/source/modules.rst000066400000000000000000000005151453042166300245710ustar00rootroot00000000000000Haproxy Modules =============== Log --- .. automodule:: haproxy.logfile .. autoclass:: Log :members: :private-members: Line ---- .. automodule:: haproxy.line .. autoclass:: Line :members: Filters ------- .. automodule:: haproxy.filters :members: Commands -------- .. automodule:: haproxy.commands :members: gforcada-haproxy_log_analysis-c5d274d/pyproject.toml000066400000000000000000000032211453042166300230500ustar00rootroot00000000000000[build-system] requires = ["setuptools"] build-backend = "setuptools.build_meta" [project] name = "haproxy_log_analysis" version = "6.0.0a4" authors = [ { name="Gil Forcada Codinachs", email="gil.gnome@gmail.com" }, ] description = "Analayze HAProxy log files" keywords = ["haproxy", "log", "sysadmin", "devops", "report" ] license = {file = "LICENSE"} readme = "README.rst" requires-python = ">=3.8" classifiers = [ "Development Status :: 5 - Production/Stable", "Environment :: Console", "Intended Audience :: System Administrators", "License :: OSI Approved :: GNU General Public License v3 (GPLv3)", "Operating System :: OS Independent", "Programming Language :: Python", "Programming Language :: Python :: 3", "Programming Language :: Python :: 3 :: Only", "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", "Programming Language :: Python :: Implementation :: CPython", "Programming Language :: Python :: Implementation :: PyPy", "Topic :: Internet :: Log Analysis", ] [project.urls] "Homepage" = "https://github.com/gforcada/haproxy_log_analysis" "Bug Tracker" = "https://github.com/gforcada/haproxy_log_analysis/issues" "Changelog" = "https://github.com/gforcada/haproxy_log_analysis/blob/main/CHANGES.rst" [project.scripts] haproxy_log_analysis = "haproxy.main:console_script" [tool.isort] profile = "plone" [tool.black] target-version = ["py38"] skip-string-normalization = true gforcada-haproxy_log_analysis-c5d274d/requirements.in000066400000000000000000000000221453042166300232030ustar00rootroot00000000000000pytest pytest-cov gforcada-haproxy_log_analysis-c5d274d/requirements.txt000066400000000000000000000010741453042166300234240ustar00rootroot00000000000000# # This file is autogenerated by pip-compile with Python 3.8 # by the following command: # # pip-compile requirements.in # attrs==22.1.0 # via pytest coverage[toml]==6.5.0 # via # coverage # pytest-cov exceptiongroup==1.1.3 # via pytest iniconfig==1.1.1 # via pytest packaging==21.3 # via pytest pluggy==1.0.0 # via pytest pyparsing==3.0.9 # via packaging pytest==7.2.0 # via # -r requirements.in # pytest-cov pytest-cov==4.0.0 # via -r requirements.in tomli==2.0.1 # via # coverage # pytest gforcada-haproxy_log_analysis-c5d274d/setup.cfg000066400000000000000000000001321453042166300217530ustar00rootroot00000000000000[zest.releaser] create-wheel = yes [tool:pytest] testpaths = tests norecursedirs = .venv gforcada-haproxy_log_analysis-c5d274d/src/000077500000000000000000000000001453042166300207255ustar00rootroot00000000000000gforcada-haproxy_log_analysis-c5d274d/src/haproxy/000077500000000000000000000000001453042166300224175ustar00rootroot00000000000000gforcada-haproxy_log_analysis-c5d274d/src/haproxy/__init__.py000066400000000000000000000000001453042166300245160ustar00rootroot00000000000000gforcada-haproxy_log_analysis-c5d274d/src/haproxy/commands.py000066400000000000000000000270461453042166300246030ustar00rootroot00000000000000from collections import defaultdict from collections import OrderedDict from datetime import datetime import json import time class BaseCommandMixin: @classmethod def command_line_name(cls): """Convert class name to lowercase with underscores. i.e. turn HttpMethods to http_methods. """ final_string = cls.__name__[0].lower() for character in cls.__name__[1:]: if character.isupper(): final_string += f'_{character.lower()}' else: final_string += character return final_string def raw_results(self): # pragma: no cover raise NotImplementedError def json_data(self): return self.raw_results() def print_data(self): return self.raw_results() def results(self, output=None): command_name = self.command_line_name().upper() if output == 'json': results = self.json_data() print(json.dumps({command_name: results})) else: results = self.print_data() underline = '=' * len(command_name) print(f'{command_name}\n{underline}\n{results}\n') class AttributeCounterMixin: attribute_name = None def __init__(self): self.stats = defaultdict(int) def __call__(self, line): self.stats[getattr(line, self.attribute_name)] += 1 def raw_results(self): return self.stats def print_data(self): result = '' data = self.raw_results() if isinstance(data, list): data = dict(data) data = sorted(data.items(), key=lambda data_info: data_info[1], reverse=True) for key, value in data: result += f'- {key}: {value}\n' return result def json_data(self): result = [] data = sorted( self.stats.items(), key=lambda data_info: data_info[1], reverse=True ) for key, value in data: result.append({key: value}) return result class SortTrimMixin: @staticmethod def _sort_and_trim(data, reverse=False): """Sorts a dictionary with at least two fields on each of them sorting by the second element. .. warning:: Right now is hardcoded to 10 elements, improve the command line interface to allow to send parameters to each command or globally. """ threshold = 10 data_list = data.items() data_list = sorted( data_list, key=lambda data_info: data_info[1], reverse=reverse ) return data_list[:threshold] class Counter(BaseCommandMixin): """Count valid lines.""" def __init__(self): self.counter = 0 def __call__(self, line): self.counter += 1 def raw_results(self): return self.counter class HttpMethods(AttributeCounterMixin, BaseCommandMixin): """Tally all requests per HTTP method (GET/POST...).""" attribute_name = 'http_request_method' class IpCounter(AttributeCounterMixin, BaseCommandMixin): """Report a breakdown of how many requests have been made per IP. For this to work you need to configure HAProxy to capture the `X-Forwarded-For` header. """ attribute_name = 'ip' class TopIps(IpCounter, SortTrimMixin): """Return the top most frequent IPs (10 items).""" def raw_results(self): return self._sort_and_trim(self.stats, reverse=True) class StatusCodesCounter(AttributeCounterMixin, BaseCommandMixin): """Tally requests per HTTP status (404, 500...)""" attribute_name = 'status_code' class RequestPathCounter(AttributeCounterMixin, BaseCommandMixin): """Tally requests per the request's path.""" attribute_name = 'http_request_path' class TopRequestPaths(RequestPathCounter, SortTrimMixin): """Returns the top most frequent paths (10 items).""" def raw_results(self): return self._sort_and_trim(self.stats, reverse=True) class SlowRequests(BaseCommandMixin): """List all requests that are considered slow to process (1 second).""" threshold = 1000 def __init__(self): self.slow_requests = [] def __call__(self, line): response_time = line.time_wait_response if response_time >= self.threshold: self.slow_requests.append(response_time) def raw_results(self): return sorted(self.slow_requests) class SlowRequestsCounter(SlowRequests): """Counts requests that are considered slow (1 second).""" def raw_results(self): return len(self.slow_requests) class AverageResponseTime(SlowRequests): """Global average response time it took downstream servers to answer requests.""" threshold = 0 def raw_results(self): total_requests = float(len(self.slow_requests)) if total_requests > 0: average = sum(self.slow_requests) / total_requests return round(average, 2) return 0.0 class AverageWaitingTime(BaseCommandMixin): """Return the average time valid requests wait on HAProxy before being dispatched to a backend server.""" def __init__(self): self.waiting_times = [] def __call__(self, line): waiting_time = line.time_wait_queues if waiting_time >= 0: self.waiting_times.append(waiting_time) def raw_results(self): total_requests = float(len(self.waiting_times)) if total_requests > 0: average = sum(self.waiting_times) / total_requests return round(average, 2) return 0.0 class ServerLoad(AttributeCounterMixin, BaseCommandMixin): """Tally requests per downstream server.""" attribute_name = 'server_name' class QueuePeaks(BaseCommandMixin): """Give stats about queue peaks in HAProxy. When servers can not handle all incoming requests, they have to wait on HAProxy. On every log line there is an account for how many requests have been piled up. A queue peak is defined by the biggest value on the backend queue on a series of log lines that are between log lines with the queue empty. """ def __init__(self): self.requests = {} self.threshold = 1 @staticmethod def _generate_key(date): """Create a suitable unique key out of a python datetime.datetime object.""" # get the unix timestamp out of the date, # after removing the microseconds from it no_microseconds = date.replace(microsecond=0) time_parts = no_microseconds.timetuple() unixtime = time.mktime(time_parts) # add back the microseconds to the key, as decimals microseconds = date.microsecond / (10 ** len(str(date.microsecond))) key = unixtime + microseconds return key def __call__(self, line): key = self._generate_key(line.accept_date) self.requests[key] = (line.queue_backend, line.accept_date) def raw_results(self): sorted_requests = OrderedDict(sorted(self.requests.items())) peaks = [] current_peak = 0 requests_on_queue = 0 timestamp = None current_span = 0 first_with_queue = None for requests_on_queue, timestamp in sorted_requests.values(): # set the peak if requests_on_queue > current_peak: current_peak = requests_on_queue # set the span if requests_on_queue > 0: current_span += 1 # set when the queue starts if first_with_queue is None: first_with_queue = timestamp # if the queue is already flushed, record it and reset values if requests_on_queue == 0 and current_peak > self.threshold: data = { 'peak': current_peak, 'span': current_span, 'started': first_with_queue, 'finished': timestamp, } peaks.append(data) current_peak = 0 current_span = 0 first_with_queue = None # case of a series that does not end if requests_on_queue > 0 and current_peak > self.threshold: data = { 'peak': current_peak, 'span': current_span, 'started': first_with_queue, 'finished': timestamp, } peaks.append(data) return peaks def print_data(self): data = '' for peak_info in self.raw_results(): data += f'- peak: {peak_info.get("peak")} ' # noqa: Q000 data += f'- span: {peak_info.get("span")} ' # noqa: Q000 data += f'- started: {peak_info.get("started").isoformat()} ' # noqa: Q000 data += ( f'- finished: {peak_info.get("finished").isoformat()}\n' # noqa: Q000 ) return data def json_data(self): data = self.raw_results() for peak_info in data: peak_info['started'] = peak_info['started'].isoformat() peak_info['finished'] = peak_info['finished'].isoformat() return data class ConnectionType(BaseCommandMixin): """Tally requests per their SSL usage (either yes or no). This only works if the request path contains the default port for SSL (443). """ def __init__(self): self.https = 0 self.non_https = 0 def __call__(self, line): if line.is_https: self.https += 1 else: self.non_https += 1 def raw_results(self): return self.https, self.non_https def print_data(self): https, http = self.raw_results() return f'- https: {https}\n- http: {http}' def json_data(self): https, http = self.raw_results() return [{'https': https}, {'http': http}] class RequestsPerMinute(BaseCommandMixin): """Report the count of requests per minute. Combine it with time constrains (`-s` and `-d`) otherwise the output will be long. """ def __init__(self): self.requests = defaultdict(int) def generate_key(self, accept_date): date_with_minute_precision = accept_date.replace(second=0, microsecond=0) unixtime = time.mktime(date_with_minute_precision.timetuple()) return unixtime def __call__(self, line): key = self.generate_key(line.accept_date) self.requests[key] += 1 def raw_results(self): """Return the list of requests sorted by the timestamp.""" data = sorted(self.requests.items(), key=lambda data_info: data_info[0]) return data def print_data(self): data = '' for date_info, count in self.raw_results(): date = datetime.fromtimestamp(date_info).isoformat() data += f'- {date}: {count}\n' return data def json_data(self): data = [] for date_info, count in self.raw_results(): date = datetime.fromtimestamp(date_info).isoformat() data.append({date: count}) return data class RequestsPerHour(RequestsPerMinute): """Report the count of requests per hour. Combine it with time constrains (`-s` and `-d`) otherwise the output will be long. """ def generate_key(self, accept_date): date_with_hour_precision = accept_date.replace( minute=0, second=0, microsecond=0 ) unixtime = time.mktime(date_with_hour_precision.timetuple()) return unixtime class Print(BaseCommandMixin): """Returns the raw lines to be printed.""" def __call__(self, line): print(line.raw_line) def raw_results(self): return def results(self, output=None): return gforcada-haproxy_log_analysis-c5d274d/src/haproxy/filters.py000066400000000000000000000100571453042166300244440ustar00rootroot00000000000000def filter_ip(ip): """Filter by IP. -f ip[192.168.1.2] # will return only lines that have this IP. Either the client IP, or, if present, the first IP captured in the X-Forwarded-For header. """ def filter_func(log_line): return log_line.ip == ip return filter_func def filter_ip_range(ip_range): """Filter by an IP range. -f ip_range[192.168.1] Rather than proper IP ranges, is a string matching. See `ip` filter about which IP is being. """ def filter_func(log_line): ip = log_line.ip if ip: return ip.startswith(ip_range) return filter_func def filter_path(path): """Filter by the request path. -f path[/one/two] It looks for the given path to be part of the requested path. """ def filter_func(log_line): return path in log_line.http_request_path return filter_func def filter_ssl(ignore=True): """Filter by SSL connection. -f ssl It checks that the request is made via the standard https port. """ def filter_func(log_line): return log_line.is_https return filter_func def filter_slow_requests(slowness): """Filter by response time. -f slow_requests[1000] # get all lines that took more than a second to process Filters by the time it took the downstream server to process the request. Time is in milliseconds. """ def filter_func(log_line): slowness_int = int(slowness) return slowness_int <= log_line.time_wait_response return filter_func def filter_wait_on_queues(max_waiting): """Filter by queue time in HAProxy. -f wait_on_queues[1000] # get all requests that waited more than a second in HAProxy Filters by the time a request had to wait in HAProxy prior to be sent to a downstream server to be processed. """ def filter_func(log_line): waiting = int(max_waiting) return waiting <= log_line.time_wait_queues return filter_func def filter_status_code(http_status): """Filter by a specific HTTP status code. -f status_code[404] """ def filter_func(log_line): return log_line.status_code == http_status return filter_func def filter_status_code_family(family_number): """Filter by a family of HTTP status code. -f status_code_family[5] # get all 5xx status codes """ def filter_func(log_line): return log_line.status_code.startswith(family_number) return filter_func def filter_http_method(http_method): """Filter by HTTP method (GET, POST, PUT, HEAD...). -f http_method[GET] """ def filter_func(log_line): return log_line.http_request_method == http_method return filter_func def filter_backend(backend_name): """Filter by HAProxy backend. -f backend[specific_app] See HAProxy configuration, it can have multiple backends defined. """ def filter_func(log_line): return log_line.backend_name == backend_name return filter_func def filter_frontend(frontend_name): """Filter by which HAProxy frontend got the request. -f frontend[loadbalancer] See HAProxy configuration, it can have multiple frontends defined. """ def filter_func(log_line): return log_line.frontend_name == frontend_name return filter_func def filter_server(server_name): """Filter by downstream server. -f server[app01] """ def filter_func(log_line): return log_line.server_name == server_name return filter_func def filter_response_size(size): """Filter by how big (in bytes) the response was. -f response_size[50000] Specially useful when looking for big file downloads. """ if size.startswith('+'): size_value = int(size[1:]) else: size_value = int(size) def filter_func(log_line): bytes_read = log_line.bytes_read if bytes_read.startswith('+'): bytes_read = int(bytes_read[1:]) else: bytes_read = int(bytes_read) return bytes_read >= size_value return filter_func gforcada-haproxy_log_analysis-c5d274d/src/haproxy/line.py000066400000000000000000000211171453042166300237220ustar00rootroot00000000000000from datetime import datetime import re # Example log line, to understand the regex below (truncated to fit into # 80 chars): # # Dec 9 13:01:26 localhost haproxy[28029]: 127.0.0.1:39759 # [09/Dec/2013:12:59:46.633] loadbalancer default/instance8 # 0/51536/1/48082/99627 200 83285 - - ---- 87/87/87/1/0 0/67 # {77.24.148.74} "GET /path/to/image HTTP/1.1" HAPROXY_LINE_REGEX = re.compile( # Dec 9 13:01:26 localhost haproxy[28029]: # ignore the syslog prefix r'\A.*\]:\s+' # 127.0.0.1:39759 r'(?P[a-fA-F\d+\.:]+):(?P\d+)\s+' # [09/Dec/2013:12:59:46.633] r'\[(?P.+)\]\s+' # loadbalancer default/instance8 r'(?P.*)\s+(?P.*)/(?P.*)\s+' # 0/51536/1/48082/99627 r'(?P-?\d+)/(?P-?\d+)/(?P-?\d+)/' r'(?P-?\d+)/(?P\+?\d+)\s+' # 200 83285 r'(?P-?\d+)\s+(?P\+?\d+)\s+' # - - ---- r'.*\s+' # ignored by now, should capture cookies and termination state # 87/87/87/1/0 r'(?P\d+)/(?P\d+)/(?P\d+)/' r'(?P\d+)/(?P\+?\d+)\s+' # 0/67 r'(?P\d+)/(?P\d+)\s+' # {77.24.148.74} r'({(?P.*)}\s+{(?P.*)}\s+|{(?P.*)}\s+|)' # "GET /path/to/image HTTP/1.1" r'"(?P.*)"' r'\Z' # end of line ) HTTP_REQUEST_REGEX = re.compile( r'(?P\w+)\s+' r'(?P(/[`“\\<>/\w:,;.#$!?=&@%_+\'*^~|()\[\]{\}-]*)+)' r'(\s+(?P\w+/\d\.\d))?' ) class Line: """For a precise and more detailed description of every field see: http://cbonte.github.io/haproxy-dconv/2.2/configuration.html#8.2.3 """ #: IP of the upstream server that made the connection to HAProxy. client_ip = None #: Port used by the upstream server that made the connection to HAProxy. client_port = None # raw string from log line and its python datetime version raw_accept_date = None #: datetime object with the exact date when the connection to HAProxy was #: made. accept_date = None #: HAProxy frontend that received the connection. frontend_name = None #: HAProxy backend that the connection was sent to. backend_name = None #: Downstream server that HAProxy send the connection to. server_name = None #: Time in milliseconds waiting the client to send the full HTTP request #: (``Tq`` in HAProxy documentation). time_wait_request = None #: Time in milliseconds that the request spend on HAProxy queues #: (``Tw`` in HAProxy documentation). time_wait_queues = None #: Time in milliseconds to connect to the final server #: (``Tc`` in HAProxy documentation). time_connect_server = None #: Time in milliseconds waiting the downstream server to send the full #: HTTP response (``Tr`` in HAProxy documentation). time_wait_response = None #: Total time in milliseconds between accepting the HTTP request and #: sending back the HTTP response (``Tt`` in HAProxy documentation). total_time = None #: HTTP status code returned to the client. status_code = None #: Total number of bytes send back to the client. bytes_read = None # not used by now captured_request_cookie = None captured_response_cookie = None # not used by now termination_state = None #: Total number of concurrent connections on the process when the #: session was logged (``actconn`` in HAProxy documentation). connections_active = None #: Total number of concurrent connections on the frontend when the #: session was logged (``feconn`` in HAProxy documentation). connections_frontend = None #: Total number of concurrent connections handled by the backend when #: the session was logged (``beconn`` in HAProxy documentation). connections_backend = None #: Total number of concurrent connections still active on the server #: when the session was logged (``srv_conn`` in HAProxy documentation). connections_server = None #: Number of connection retries experienced by this session when # trying to connect to the server. retries = None #: Total number of requests which were processed before this one in #: the server queue (``srv_queue`` in HAProxy documentation). queue_server = None #: Total number of requests which were processed before this one in #: the backend's global queue (``backend_queue`` in HAProxy documentation). queue_backend = None # List of headers captured in the request. captured_request_headers = None # List of headers captured in the response. captured_response_headers = None raw_http_request = None #: HTTP method (GET, POST...) used on this request. http_request_method = None #: Requested HTTP path. http_request_path = None #: HTTP version used on this request. http_request_protocol = None raw_line = None def __init__(self, line): self.raw_line = line self.is_valid = self._parse_line(line) @property def is_https(self): """Returns True if the log line is a SSL connection. False otherwise.""" if ':443' in self.http_request_path: return True return False def is_within_time_frame(self, start, end): if not start: return True elif start > self.accept_date: return False if not end: return True elif end < self.accept_date: return False return True @property def ip(self): """Returns the IP provided on the log line, or the client_ip if absent/empty.""" if self.captured_request_headers is not None: ip = self.captured_request_headers.split('|')[0] if ip: # only get the first IP, if there are more usually # are the intermediate servers return ip.split(',')[0] return self.client_ip def _parse_line(self, line): matches = HAPROXY_LINE_REGEX.match(line) if matches is None: return False self.client_ip = matches.group('client_ip') self.client_port = int(matches.group('client_port')) self.raw_accept_date = matches.group('accept_date') self.accept_date = self._parse_accept_date() self.frontend_name = matches.group('frontend_name') self.backend_name = matches.group('backend_name') self.server_name = matches.group('server_name') self.time_wait_request = int(matches.group('tq')) self.time_wait_queues = int(matches.group('tw')) self.time_connect_server = int(matches.group('tc')) self.time_wait_response = int(matches.group('tr')) self.total_time = matches.group('tt') self.status_code = matches.group('status_code') self.bytes_read = matches.group('bytes_read') self.connections_active = matches.group('act') self.connections_frontend = matches.group('fe') self.connections_backend = matches.group('be') self.connections_server = matches.group('srv') self.retries = matches.group('retries') self.queue_server = int(matches.group('queue_server')) self.queue_backend = int(matches.group('queue_backend')) self.captured_request_headers = matches.group('request_headers') self.captured_response_headers = matches.group('response_headers') if matches.group('headers') is not None: self.captured_request_headers = matches.group('headers') self.raw_http_request = matches.group('http_request') self._parse_http_request() return True def _parse_accept_date(self): return datetime.strptime(self.raw_accept_date, '%d/%b/%Y:%H:%M:%S.%f') def _parse_http_request(self): matches = HTTP_REQUEST_REGEX.match(self.raw_http_request) if matches: self.http_request_method = matches.group('method') self.http_request_path = matches.group('path') self.http_request_protocol = matches.group('protocol') else: self.handle_bad_http_request() def handle_bad_http_request(self): self.http_request_method = 'invalid' self.http_request_path = 'invalid' self.http_request_protocol = 'invalid' if self.raw_http_request != '': print(f'Could not process HTTP request {self.raw_http_request}') # it is not coverage covered as this is executed by the multiprocessor module, # and setting it up on coverage just for two lines is not worth it def parse_line(line): # pragma: no cover return Line(line.strip()) gforcada-haproxy_log_analysis-c5d274d/src/haproxy/logfile.py000066400000000000000000000027701453042166300244200ustar00rootroot00000000000000from datetime import datetime from haproxy.line import parse_line from haproxy.utils import date_str_to_datetime from haproxy.utils import delta_str_to_timedelta from multiprocessing import Pool class Log: def __init__(self, logfile=None, start=None, delta=None, show_invalid=False): self.logfile = logfile self.show_invalid = show_invalid self.start = None self.end = None if start: self.start = date_str_to_datetime(start) if delta: delta = delta_str_to_timedelta(delta) if isinstance(self.start, datetime): self.end = self.start + delta self.invalid_lines = 0 self.valid_lines = 0 def __iter__(self): start = datetime.now() with open(self.logfile) as logfile, Pool() as pool: for index, line in enumerate(pool.imap(parse_line, logfile)): if line.is_valid: self.valid_lines += 1 if line.is_within_time_frame(self.start, self.end): yield line else: if self.show_invalid: print(line.raw_line) self.invalid_lines += 1 if index % 10000 == 0 and index > 0: # pragma: no cover print('.', end='', flush=True) end = datetime.now() print(f'\nIt took {end - start}') @property def total_lines(self): return self.valid_lines + self.invalid_lines gforcada-haproxy_log_analysis-c5d274d/src/haproxy/main.py000066400000000000000000000202161453042166300237160ustar00rootroot00000000000000from haproxy.logfile import Log from haproxy.utils import VALID_COMMANDS from haproxy.utils import VALID_FILTERS from haproxy.utils import validate_arg_date from haproxy.utils import validate_arg_delta import argparse import os def create_parser(): desc = 'Analyze HAProxy log files and outputs statistics about it' parser = argparse.ArgumentParser(description=desc) parser.add_argument('-l', '--log', help='HAProxy log file to analyze') parser.add_argument( '-s', '--start', help='Process log entries starting at this time, in HAProxy date ' 'format (e.g. 11/Dec/2013 or 11/Dec/2013:19:31:41). ' 'At least provide the day/month/year. Values not specified will ' 'use their base value (e.g. 00 for hour). Use in conjunction ' 'with -d to limit the number of entries to process.', ) parser.add_argument( '-d', '--delta', help='Limit the number of entries to process. Express the time delta ' 'as a number and a time unit, e.g.: 1s, 10m, 3h or 4d (for 1 ' 'second, 10 minutes, 3 hours or 4 days). Use in conjunction with ' '-s to only analyze certain time delta. If no start time is ' 'given, the time on the first line will be used instead.', ) parser.add_argument( '-c', '--command', help='List of commands, comma separated, to run on the log file. See ' '--list-commands to get a full list of them.', ) parser.add_argument( '-f', '--filter', help='List of filters to apply on the log file. Passed as comma ' 'separated and parameters within square brackets, e.g ' 'ip[192.168.1.1],ssl,path[/some/path]. See ' '--list-filters to get a full list of them.', ) parser.add_argument( '-n', '--negate-filter', help='Make filters passed with -f work the other way around, i.e. if ' 'the ``ssl`` filter is passed instead of showing only ssl ' 'requests it will show non-ssl traffic. If the ``ip`` filter is ' 'used, then all but that ip passed to the filter will be used.', action='store_true', ) parser.add_argument( '--list-commands', action='store_true', help='Lists all commands available.' ) parser.add_argument( '--list-filters', action='store_true', help='Lists all filters available.' ) parser.add_argument('--json', action='store_true', help='Output results in json.') parser.add_argument( '--invalid', action='store_false', help='Print the lines that could not be parsed. ' 'Be aware that mixing it with the print command will mix their output.', ) return parser def parse_arguments(args): data = { 'start': None, 'delta': None, 'commands': None, 'filters': None, 'negate_filter': None, 'log': None, 'list_commands': None, 'list_filters': None, 'json': None, 'invalid_lines': None, } if args.list_commands: data['list_commands'] = True # no need to further process any other input parameter return data if args.list_filters: data['list_filters'] = True # no need to further process any other input parameter return data if args.negate_filter: data['negate_filter'] = True if args.start is not None: validate_arg_date(args.start) data['start'] = args.start if args.delta is not None: validate_arg_delta(args.delta) data['delta'] = args.delta if args.command is not None: data['commands'] = parse_arg_commands(args.command) if args.filter is not None: data['filters'] = parse_arg_filters(args.filter) if args.log is not None: _validate_arg_logfile(args.log) data['log'] = args.log if args.json is not None: data['json'] = args.json if args.invalid: data['invalid_lines'] = args.json return data def parse_arg_commands(commands_list): input_commands = commands_list.split(',') for cmd in input_commands: if cmd not in VALID_COMMANDS: raise ValueError( f'command "{cmd}" is not available. ' 'Use --list-commands to get a list of all available commands.' ) return input_commands def parse_arg_filters(filters_arg): input_filters = filters_arg.split(',') return_data = [] for filter_expression in input_filters: filter_name = filter_expression filter_arg = None if filter_expression.endswith(']'): if '[' not in filter_expression: raise ValueError( f'Error on filter "{filter_expression}". ' f'It is missing an opening square bracket.' ) filter_name, filter_arg = filter_expression.split('[') filter_arg = filter_arg[:-1] # remove the closing square bracket if filter_name not in VALID_FILTERS: raise ValueError( f'filter "{filter_name}" is not available. Use --list-filters to get a list of all available filters.' ) return_data.append((filter_name, filter_arg)) return return_data def _validate_arg_logfile(filename): filepath = os.path.join(os.getcwd(), filename) if not os.path.exists(filepath): raise ValueError(f'filename {filepath} does not exist') def print_commands(): """Prints all commands available with their description.""" for command_name in sorted(VALID_COMMANDS.keys()): print(VALID_COMMANDS[command_name]['description']) def print_filters(): """Prints all filters available with their description.""" for filter_name in sorted(VALID_FILTERS.keys()): print(VALID_FILTERS[filter_name]['description']) def show_help(data): # make sure that if no arguments are passed the help is shown show = True ignore_keys = ('log', 'json', 'negate_filter', 'invalid_lines') for key in data: if data[key] is not None and key not in ignore_keys: show = False break if show: parser = create_parser() parser.print_help() return True return False def main(args): if show_help(args): return # show the command list if args['list_commands']: print_commands() # no need to process further return # show the filter list if args['list_filters']: print_filters() # no need to process further return # initialize the log file log_file = Log( logfile=args['log'], start=args['start'], delta=args['delta'], show_invalid=args['invalid_lines'], ) # get the commands and filters to use filters_to_use = requested_filters(args) cmds_to_use = requested_commands(args) # double negation: when a user wants to negate the filters, # the argument parsing sets `negate_filter` to True, # but the filtering logic (the `all()`) returns True if the line meets all filters # so reversing whatever `negate_filter` has is what the user wants :) expected_filtering = True if args['negate_filter']: expected_filtering = False # process all log lines for line in log_file: if all(f(line) for f in filters_to_use) is expected_filtering: for cmd in cmds_to_use: cmd(line) # print the results print('\nRESULTS\n') output = None if args['json']: output = 'json' for cmd in cmds_to_use: cmd.results(output=output) def requested_filters(args): filters_list = [] if args['filters']: for filter_name, arg in args['filters']: filter_func = VALID_FILTERS[filter_name]['obj'] filters_list.append(filter_func(arg)) return filters_list def requested_commands(args): cmds_list = [] for command in args['commands']: cmd_klass = VALID_COMMANDS[command]['klass'] cmds_list.append(cmd_klass()) return cmds_list def console_script(): # pragma: no cover parser = create_parser() arguments = parse_arguments(parser.parse_args()) main(arguments) gforcada-haproxy_log_analysis-c5d274d/src/haproxy/utils.py000066400000000000000000000070051453042166300241330ustar00rootroot00000000000000from datetime import datetime from datetime import timedelta import re DELTA_REGEX = re.compile(r'\A(?P\d+)(?P[smhd])\Z') START_REGEX = re.compile( r'(?P\d+)/(?P\w+)/(?P\d+)' r'(:(?P\d+)|)(:(?P\d+)|)(:(?P\d+)|)' ) DELTA_KEYS = {'s': 'seconds', 'm': 'minutes', 'h': 'hours', 'd': 'days'} def date_str_to_datetime(date): """Convert a string to a datetime object. The format is `day/month/year[[[:hour]:minute]:second]` being: - day a number - month a three letter representation of the month (i.e. Dec, Jan, etc) - year as a 4 digits value - hour/minute/second as 2 digits value, each of them being optional """ matches = START_REGEX.match(date) data = matches.group('day'), matches.group('month'), matches.group('year') raw_date_input = f'{data[0]}/{data[1]}/{data[2]}' date_format = '%d/%b/%Y' for variable, percent in (('hour', ':%H'), ('minute', ':%M'), ('second', ':%S')): match = matches.group(variable) if match: date_format += percent raw_date_input = f'{raw_date_input}:{match}' return datetime.strptime(raw_date_input, date_format) def delta_str_to_timedelta(delta): """Convert a string to a timedelta representation. Format is NUMBER followed by one of the following letters: `s`, `m`, `h`, `d`. Each of them meaning, second, minute, hour and day. """ matches = DELTA_REGEX.match(delta) value = int(matches.group('value')) time_unit = matches.group('time_unit') key = DELTA_KEYS[time_unit] return timedelta(**{key: value}) def validate_arg_date(start): """Check that date argument is valid.""" try: date_str_to_datetime(start) except (AttributeError, ValueError): raise ValueError('--start argument is not valid') def validate_arg_delta(delta): """Check that the delta argument is valid.""" try: delta_str_to_timedelta(delta) except (AttributeError, ValueError): raise ValueError('--delta argument is not valid') def list_filters(): """Return the information of existing filters. Data returned: - their names as the user is expected to use them from the command line - the object itself - its description """ from haproxy import filters data = {} for full_name in dir(filters): if not full_name.startswith('filter_'): continue name = full_name[7:] obj = getattr(filters, full_name) description = _strip_description(obj.__doc__) data[name] = {'obj': obj, 'description': f'{name}:\n\t{description}'} return data def list_commands(): """Return the information of existing commands. Data returned: - their names as the user is expected to use them from the command line - the object itself - its description """ from haproxy import commands data = {} for cmd in dir(commands): if cmd.endswith('Mixin'): continue klass = getattr(commands, cmd) try: name = klass.command_line_name() except AttributeError: continue description = _strip_description(klass.__doc__) data[name] = {'klass': klass, 'description': f'{name}:\n\t{description}'} return data def _strip_description(raw_text): if not raw_text: return '' text = '\n\t'.join([line.strip() for line in raw_text.split('\n') if line.strip()]) return text VALID_COMMANDS = list_commands() VALID_FILTERS = list_filters() gforcada-haproxy_log_analysis-c5d274d/tests/000077500000000000000000000000001453042166300213005ustar00rootroot00000000000000gforcada-haproxy_log_analysis-c5d274d/tests/__init__.py000066400000000000000000000000001453042166300233770ustar00rootroot00000000000000gforcada-haproxy_log_analysis-c5d274d/tests/conftest.py000066400000000000000000000043211453042166300234770ustar00rootroot00000000000000from copy import deepcopy from haproxy.line import Line import pytest DEFAULT_DATA = { 'syslog_date': 'Dec 9 13:01:26', 'process_name_and_pid': 'localhost haproxy[28029]:', 'client_ip': '127.0.0.1', 'client_port': 2345, 'accept_date': '09/Dec/2013:12:59:46.633', 'frontend_name': 'loadbalancer', 'backend_name': 'default', 'server_name': 'instance8', 'tq': 0, 'tw': 51536, 'tc': 1, 'tr': 48082, 'tt': '99627', 'status': '200', 'bytes': '83285', 'act': '87', 'fe': '89', 'be': '98', 'srv': '1', 'retries': '20', 'queue_server': 2, 'queue_backend': 67, 'headers': ' {77.24.148.74}', 'http_request': 'GET /path/to/image HTTP/1.1', } class LinesGenerator: def __init__(self, line_format): self.data = deepcopy(DEFAULT_DATA) self.line_format = line_format def __call__(self, *args, **kwargs): self.data.update(**kwargs) self.data['client_ip_and_port'] = '{client_ip}:{client_port}'.format( **self.data ) self.data[ 'server_names' ] = '{frontend_name} {backend_name}/{server_name}'.format(**self.data) self.data['timers'] = '{tq}/{tw}/{tc}/{tr}/{tt}'.format(**self.data) self.data['status_and_bytes'] = '{status} {bytes}'.format(**self.data) self.data['connections_and_retries'] = '{act}/{fe}/{be}/{srv}/{retries}'.format( **self.data ) self.data['queues'] = '{queue_server}/{queue_backend}'.format(**self.data) log_line = self.line_format.format(**self.data) return Line(log_line) @pytest.fixture() def default_line_data(): return DEFAULT_DATA @pytest.fixture() def line_factory(): # queues and headers parameters are together because if no headers are # saved the field is completely empty and thus there is no double space # between queue backend and http request. raw_line = ( '{syslog_date} {process_name_and_pid} {client_ip_and_port} ' '[{accept_date}] {server_names} {timers} {status_and_bytes} ' '- - ---- {connections_and_retries} {queues}{headers} ' '"{http_request}"' ) generator = LinesGenerator(raw_line) return generator gforcada-haproxy_log_analysis-c5d274d/tests/files/000077500000000000000000000000001453042166300224025ustar00rootroot00000000000000gforcada-haproxy_log_analysis-c5d274d/tests/files/2_ok_1_invalid.log000066400000000000000000000017541453042166300256740ustar00rootroot00000000000000Dec 9 13:01:26 localhost haproxy[28029]: 127.0.0.1:38037 [09/Dec/2013:12:00:03.205] loadbalancer default/instance5 0/133/0/294/430 404 17610 - - ---- 21/21/21/1/0 0/1 {123.123.123.123} "GET /VirtualHostBase/http/www.example.com:80/website/VirtualHostRoot/autoren/dummy/its-time-for-prostitution%231386586409135007 HTTP/1.1" Dec 9 13:01:26 localhost haproxy[28029]: 127.0.0.1:38401 [] loadbalancer default/instance6 0/0/0/155/156 302 15987 - - ---- 18/18/18/0/0 0/0 {123.123.123.123} "GET /VirtualHostBase/http/www.example.com:80/website/VirtualHostRoot/autoren/dummy/westliche-wertegemeinschft/view HTTP/1.1" Dec 9 13:01:26 localhost haproxy[28029]: 127.0.0.1:38414 [09/Dec/2013:12:00:11.476] loadbalancer default/instance9 0/0/0/200/202 200 19056 - - ---- 18/18/18/1/0 0/0 {123.123.123.123} "GET /VirtualHostBase/http/www.example.com:80/website/VirtualHostRoot/acl_users/credentials_cookie_auth/require_login?came_from=http%3A//www.example.com/autoren/dummy/westliche-wertegemeinschft/view HTTP/1.1" gforcada-haproxy_log_analysis-c5d274d/tests/files/small.log000066400000000000000000000035051453042166300242200ustar00rootroot00000000000000Dec 9 12:00:03 localhost haproxy[12345]: 127.0.0.1:38037 [10/Dec/2013:10:01:04.205] loadbalancer default/instance1 0/133/0/201/430 200 17610 - - ---- 21/21/21/1/0 0/1 {123.123.123.123} "GET /hello HTTP/1.1" Dec 9 12:00:03 127.2.3.4 haproxy[12345]: 127.0.0.1:38037 [09/Dec/2013:10:01:04.205] loadbalancer default/instance2 0/133/0/2942/430 300 17610 - - ---- 21/21/21/1/0 0/1 {123.123.124.124} "HEAD /world HTTP/1.1" Dec 9 12:00:03 localhost haproxy[12345]: 127.0.0.1:38037 [10/Dec/2013:12:03:06.205] loadbalancer default/instance3 0/133/0/94/430 404 17610 - - ---- 21/21/21/1/0 0/1 {123.123.124.123} "POST /hello HTTP/1.1" Dec 9 12:00:03 127.2.3.4 haproxy[12345]: 127.0.0.1:38037 [11/Dec/2013:10:01:04.205] loadbalancer default/instance2 0/133/0/1293/430 300 17610 - - ---- 21/21/21/1/0 0/1 {123.123.123.124} "GET /free HTTP/1.1" Dec 9 12:00:03 localhost haproxy[12345]: 127.0.0.1:38037 [09/Dec/2013:11:02:05.205] loadbalancer default/instance3 0/133/0/20095/430 404 17610 - - ---- 21/21/21/1/0 0/1 {123.123.123.123} "HEAD /fra HTTP/1.1" Dec 9 12:00:03 127.2.3.4 haproxy[12345]: 127.0.0.1:38037 [10/Dec/2013:11:02:05.205] loadbalancer default/instance1 0/133/0/2936/430 300 17610 - - ---- 21/21/21/1/0 0/1 {123.123.124.124} "GET /world HTTP/1.1" Dec 9 12:00:03 localhost haproxy[12345]: 127.0.0.1:38037 [11/Dec/2013:12:03:06.205] loadbalancer default/instance1 0/133/0/4/437 300 17610 - - ---- 21/21/21/1/0 0/1 {123.123.123.123} "POST /freitag HTTP/1.1" Dec 9 12:00:03 127.2.3.4 haproxy[12345]: 127.0.0.1:38037 [09/Dec/2013:12:03:06.205] loadbalancer default/instance2 0/133/0/29408/430 200 17610 - - ---- 21/21/21/1/0 0/1 "GET /free HTTP/1.1" Dec 9 12:00:03 localhost haproxy[12345]: 127.0.0.1:38037 [11/Dec/2013:11:02:05.205] loadbalancer default/instance1 0/133/0/409/430 404 17610 - - ---- 21/21/21/1/0 0/1 {123.123.123.123} "HEAD /hello HTTP/1.1" gforcada-haproxy_log_analysis-c5d274d/tests/test_argparse.py000066400000000000000000000113301453042166300245130ustar00rootroot00000000000000from haproxy.main import create_parser from haproxy.main import parse_arg_filters from haproxy.main import parse_arguments import pytest def test_parser_arguments_defaults(): """Test that the argument parsing defaults works.""" parser = create_parser() data = parse_arguments(parser.parse_args([])) assert data == { 'start': None, 'delta': None, 'commands': None, 'filters': None, 'negate_filter': None, 'log': None, 'list_commands': None, 'list_filters': None, 'json': False, 'invalid_lines': False, } @pytest.mark.parametrize( ('argument', 'option'), [ ('--list-commands', 'list_commands'), ('--list-filters', 'list_filters'), ('--negate-filter', 'negate_filter'), ('-n', 'negate_filter'), ('--json', 'json'), ], ) def test_parser_boolean_arguments(argument, option): """Test that the argument parsing defaults works.""" parser = create_parser() data = parse_arguments(parser.parse_args([argument])) assert data[option] is True @pytest.mark.parametrize( ('start', 'delta'), [('30/Dec/2019', '3d'), ('20/Jun/2015', '2h')] ) def test_arguments_dates(start, delta): """Check that properly formatted start and delta arguments are processed fine. Thus they are extracted and stored for later use. """ parser = create_parser() data = parse_arguments(parser.parse_args(['-s', start, '-d', delta])) assert data['start'] == start assert data['delta'] == delta @pytest.mark.parametrize('start', ['33/Dec/2019', '5/Hallo/2019']) def test_arguments_date_invalid(start): """Incorrectly formatted start argument raises an exception.""" parser = create_parser() with pytest.raises(ValueError, match='--start argument is not valid'): parse_arguments(parser.parse_args(['-s', start])) @pytest.mark.parametrize('delta', ['3P', '2323MM']) def test_arguments_delta_invalid(delta): """Incorrectly formatted delta argument raises an exception.""" parser = create_parser() with pytest.raises(ValueError, match='--delta argument is not valid'): parse_arguments(parser.parse_args(['-d', delta])) @pytest.mark.parametrize( ('cmds', 'is_valid'), [ ('counter', True), ('counter,ip_counter', True), ('ip_counter,count_data', False), ('count_data', False), ], ) def test_commands_arguments(cmds, is_valid): """Test that the commands are parsed, and an exception raised otherwise.""" parser = create_parser() if not is_valid: with pytest.raises(ValueError, match='is not available. Use --list-commands'): parse_arguments(parser.parse_args(['-c', cmds])) else: data = parse_arguments(parser.parse_args(['-c', cmds])) assert data['commands'] == cmds.split(',') @pytest.mark.parametrize( ('filters_list', 'is_valid'), [ ('ip_range', True), ('slow_requests,backend', True), ('tomatoes', False), ('slow_requests,potatoes', False), ], ) def test_filters_arguments(filters_list, is_valid): """Test that the filters are parsed, and an exception raised otherwise.""" parser = create_parser() if not is_valid: with pytest.raises(ValueError, match='is not available. Use --list-filters'): parse_arguments(parser.parse_args(['-f', filters_list])) else: data = parse_arguments(parser.parse_args(['-f', filters_list])) assert data['filters'] == [(x, None) for x in filters_list.split(',')] @pytest.mark.parametrize( ('filter_expression', 'expected'), [ ('ip_range', [('ip_range', None)]), ('ip_rangelala]', None), ('ip_range[lala]', [('ip_range', 'lala')]), ], ) def test_filters_with_arguments(filter_expression, expected): """Check that the arguments given to the filters are parsed properly. Or raise and exception otherwise. """ if expected is None: with pytest.raises(ValueError, match='It is missing an opening square bracket'): parse_arg_filters(filter_expression) else: data = parse_arg_filters(filter_expression) assert data == expected @pytest.mark.parametrize( ('filename', 'is_valid'), [ ('tests/conftest.py', True), ('tests/non-existing-file.py', False), ], ) def test_log_argument(filename, is_valid): """Check that the argument parsing validates that the file exists.""" parser = create_parser() if is_valid: data = parse_arguments(parser.parse_args(['-l', filename])) assert data['log'] == filename else: with pytest.raises(ValueError, match=f'{filename} does not exist'): parse_arguments(parser.parse_args(['-l', filename])) gforcada-haproxy_log_analysis-c5d274d/tests/test_commands.py000066400000000000000000000644131453042166300245220ustar00rootroot00000000000000from datetime import datetime from datetime import timedelta from haproxy import commands import pytest def check_output(cmd, output, expected, capsys): """Validate the output of commands.""" name = cmd.command_line_name().upper() cmd.results(output=output) output_text = capsys.readouterr().out if output == 'json': assert f'{{"{name}": {expected}}}' in output_text else: assert f'{name}\n====' in output_text assert f'====\n{expected}\n' in output_text @pytest.mark.parametrize( ('klass', 'expected'), [ (commands.StatusCodesCounter, 'status_codes_counter'), (commands.AverageResponseTime, 'average_response_time'), (commands.Counter, 'counter'), (commands.IpCounter, 'ip_counter'), ], ) def test_commands_names(klass, expected): """Check that the command line name of command classes are generated correctly.""" assert klass.command_line_name() == expected def test_counter_results(): """Test the Counter command. It plain and simply counts all the lines passed to it. """ cmd = commands.Counter() assert cmd.raw_results() == 0 for x in range(3): cmd(x) assert cmd.raw_results() == 3 @pytest.mark.parametrize('output', [None, 'json']) def test_counter_output(capsys, output): """Test the Counter command. It plain and simply counts all the lines passed to it. """ cmd = commands.Counter() for x in range(3): cmd(x) check_output(cmd, output, 3, capsys) def test_http_methods_results(line_factory): """Test the HTTPMethods command. It creates a breakdown of how many times each HTTP verb has been used. """ cmd = commands.HttpMethods() assert cmd.raw_results() == {} for verb, count in (('POST', 4), ('GET', 3), ('PUT', 2)): line = line_factory(http_request=f'{verb} /path/to/image HTTP/1.1') for _ in range(count): cmd(line) results = cmd.raw_results() assert len(results) == 3 assert results['POST'] == 4 assert results['GET'] == 3 assert results['PUT'] == 2 @pytest.mark.parametrize( ('output', 'expected'), [(None, '- PUT: 2\n- GET: 1'), ('json', '[{"PUT": 2}, {"GET": 1}]')], ) def test_http_methods_output(line_factory, capsys, output, expected): """Test the HTTPMethods command. It creates a breakdown of how many times each HTTP verb has been used. """ cmd = commands.HttpMethods() for verb, count in (('GET', 1), ('PUT', 2)): line = line_factory(http_request=f'{verb} /path/to/image HTTP/1.1') for _ in range(count): cmd(line) check_output(cmd, output, expected, capsys) def test_ip_counter_results(line_factory): """Test the IpCounter command. It creates a breakdown of how many times each IP has been used. """ cmd = commands.IpCounter() assert cmd.raw_results() == {} for ip, count in (('192.168.0.1', 4), ('172.4.3.2', 3), ('8.7.6.5', 2)): line = line_factory(headers=f' {{{ip}}}') for _ in range(count): cmd(line) results = cmd.raw_results() assert len(results) == 3 assert results['192.168.0.1'] == 4 assert results['172.4.3.2'] == 3 assert results['8.7.6.5'] == 2 @pytest.mark.parametrize( ('output', 'expected'), [ (None, '- 172.4.3.2: 3\n- 8.7.6.5: 2'), ('json', '[{"172.4.3.2": 3}, {"8.7.6.5": 2}]'), ], ) def test_ip_counter_output(line_factory, capsys, output, expected): """Test the IpCounter command. It creates a breakdown of how many times each IP has been used. """ cmd = commands.IpCounter() for ip, count in (('172.4.3.2', 3), ('8.7.6.5', 2)): line = line_factory(headers=f' {{{ip}}}') for _ in range(count): cmd(line) check_output(cmd, output, expected, capsys) def test_top_ips_results(line_factory): """Test the TopIps command. It lists the 10 most used IPs, and how much where they used. """ cmd = commands.TopIps() assert cmd.raw_results() == [] for ip, count in ((f'192.168.0.{x}', x) for x in range(11)): line = line_factory(headers=f' {{{ip}}}') for _ in range(count): cmd(line) results = cmd.raw_results() assert len(results) == 10 assert results[0] == ('192.168.0.10', 10) assert results[1] == ('192.168.0.9', 9) assert results[2] == ('192.168.0.8', 8) assert results[3] == ('192.168.0.7', 7) assert results[4] == ('192.168.0.6', 6) assert results[5] == ('192.168.0.5', 5) assert results[6] == ('192.168.0.4', 4) assert results[7] == ('192.168.0.3', 3) assert results[8] == ('192.168.0.2', 2) assert results[9] == ('192.168.0.1', 1) def test_top_ips_print_results(line_factory): """Test the TopIps command. Ensure that when they are printed, only 10 results are shown. """ cmd = commands.TopIps() for ip, count in ((f'192.168.0.{x}', x) for x in range(14)): line = line_factory(headers=f' {{{ip}}}') for _ in range(count): cmd(line) results = cmd.print_data() results = [x for x in results.split('\n') if x] assert len(results) == 10 assert results[0] == '- 192.168.0.13: 13' assert results[-1] == '- 192.168.0.4: 4' @pytest.mark.parametrize( ('output', 'expected'), [ (None, '- 192.168.0.2: 2\n- 192.168.0.1: 1'), ('json', '[{"192.168.0.2": 2}, {"192.168.0.1": 1}]'), ], ) def test_top_ips_output(line_factory, capsys, output, expected): """Test the TopIps command. It lists the 10 most used IPs, and how much where they used. """ cmd = commands.TopIps() assert cmd.raw_results() == [] for ip, count in ((f'192.168.0.{x}', x) for x in range(3)): line = line_factory(headers=f' {{{ip}}}') for _ in range(count): cmd(line) check_output(cmd, output, expected, capsys) def test_status_codes_counter_results(line_factory): """Test the StatusCodesCounter command. It creates a breakdown of which status codes have been used and how many each. """ cmd = commands.StatusCodesCounter() assert cmd.raw_results() == {} for status_code, count in (('200', 4), ('301', 3), ('500', 2)): line = line_factory(status=status_code) for _ in range(count): cmd(line) results = cmd.raw_results() assert len(results) == 3 assert results['200'] == 4 assert results['301'] == 3 assert results['500'] == 2 @pytest.mark.parametrize( ('output', 'expected'), [(None, '- 301: 3\n- 500: 2'), ('json', '[{"301": 3}, {"500": 2}]')], ) def test_status_codes_counter_output(line_factory, capsys, output, expected): """Test the StatusCodesCounter command. It creates a breakdown of which status codes have been used and how many each. """ cmd = commands.StatusCodesCounter() for status_code, count in (('301', 3), ('500', 2)): line = line_factory(status=status_code) for _ in range(count): cmd(line) check_output(cmd, output, expected, capsys) def test_request_path_counter_results(line_factory): """Test the RequestPathCounter command. It creates a breakdown of how many times each URL path has been used. """ cmd = commands.RequestPathCounter() assert cmd.raw_results() == {} for path, count in (('/image/one', 4), ('/video/two', 3), ('/article/three', 2)): line = line_factory(http_request=f'GET {path} HTTP/1.1') for _ in range(count): cmd(line) results = cmd.raw_results() assert len(results) == 3 assert results['/image/one'] == 4 assert results['/video/two'] == 3 assert results['/article/three'] == 2 @pytest.mark.parametrize( ('output', 'expected'), [ (None, '- /video/two: 3\n- /article/three: 2'), ('json', '[{"/video/two": 3}, {"/article/three": 2}]'), ], ) def test_request_path_counter_output(line_factory, capsys, output, expected): """Test the RequestPathCounter command. It creates a breakdown of how many times each URL path has been used. """ cmd = commands.RequestPathCounter() for path, count in (('/video/two', 3), ('/article/three', 2)): line = line_factory(http_request=f'GET {path} HTTP/1.1') for _ in range(count): cmd(line) check_output(cmd, output, expected, capsys) def test_slow_requests_results(line_factory): """Test the SlowRequests command. It lists all requests that took more than 1000 milliseconds to respond. """ cmd = commands.SlowRequests() assert cmd.raw_results() == [] for total_time in (1003, 987, 456, 2013, 45000, 1000, 3200, 999): cmd(line_factory(tr=total_time)) results = cmd.raw_results() assert results == [1000, 1003, 2013, 3200, 45000] @pytest.mark.parametrize( ('output', 'expected'), [ (None, [1000, 1003, 2013, 3200, 45000]), ('json', '[1000, 1003, 2013, 3200, 45000]'), ], ) def test_slow_requests_output(line_factory, capsys, output, expected): """Test the SlowRequests command. It lists all requests that took more than 1000 milliseconds to respond. """ cmd = commands.SlowRequests() for total_time in (1003, 987, 456, 2013, 45000, 1000, 3200, 999): cmd(line_factory(tr=total_time)) check_output(cmd, output, expected, capsys) def test_top_request_paths_results(line_factory): """Test the TopRequestPaths command. It lists the 10 most used URL paths, and how much where they used. """ cmd = commands.TopRequestPaths() assert cmd.raw_results() == [] for path, count in ((f'/file/{x}', x) for x in range(11)): line = line_factory(http_request=f'GET {path} HTTP/1.1') for _ in range(count): cmd(line) results = cmd.raw_results() assert len(results) == 10 assert results[0] == ('/file/10', 10) assert results[1] == ('/file/9', 9) assert results[2] == ('/file/8', 8) assert results[3] == ('/file/7', 7) assert results[4] == ('/file/6', 6) assert results[5] == ('/file/5', 5) assert results[6] == ('/file/4', 4) assert results[7] == ('/file/3', 3) assert results[8] == ('/file/2', 2) assert results[9] == ('/file/1', 1) def test_top_request_paths_print_results(line_factory): """Test the TopRequestPaths command. Ensure that when they are printed, only 10 results are shown. """ cmd = commands.TopRequestPaths() for path, count in ((f'/file/{x}', x) for x in range(14)): line = line_factory(http_request=f'GET {path} HTTP/1.1') for _ in range(count): cmd(line) results = cmd.print_data() results = [x for x in results.split('\n') if x] assert len(results) == 10 assert results[0] == '- /file/13: 13' assert results[-1] == '- /file/4: 4' @pytest.mark.parametrize( ('output', 'expected'), [ (None, '- /file/2: 2\n- /file/1: 1'), ('json', '[{"/file/2": 2}, {"/file/1": 1}]'), ], ) def test_top_request_paths_output(line_factory, capsys, output, expected): """Test the TopRequestPaths command. It lists the 10 most used URL paths, and how much where they used. """ cmd = commands.TopRequestPaths() for path, count in ((f'/file/{x}', x) for x in range(3)): line = line_factory(http_request=f'GET {path} HTTP/1.1') for _ in range(count): cmd(line) check_output(cmd, output, expected, capsys) def test_slow_requests_counter_results(line_factory): """Test the SlowRequestsCounter command. It counts how many requests took more than 1000 milliseconds to complete. """ cmd = commands.SlowRequestsCounter() assert cmd.raw_results() == 0 for total_time in (1003, 987, 456, 2013, 45000, 1000, 3200, 999): cmd(line_factory(tr=total_time)) results = cmd.raw_results() assert results == 5 @pytest.mark.parametrize('output', [None, 'json']) def test_slow_requests_counter_output(line_factory, capsys, output): """Test the SlowRequestsCounter command. It counts how many requests took more than 1000 milliseconds to complete. """ cmd = commands.SlowRequestsCounter() for total_time in (1003, 987, 456, 2013, 45000, 1000, 3200, 999): cmd(line_factory(tr=total_time)) check_output(cmd, output, 5, capsys) @pytest.mark.parametrize( ('series', 'average'), [ ((1003, 987, 456, 2013, 1000, 3200, 999), 1379.71), ((110, -1, 110), 110), # aborted connections are ignored ((45, 30, 0), 25), # responses that take 0 milliseconds are still counted ], ) def test_average_response_time_results(line_factory, series, average): """Test the AverageResponseTime command. Returns the average response time of all valid requests. """ cmd = commands.AverageResponseTime() assert cmd.raw_results() == 0.0 for total_time in series: cmd(line_factory(tr=total_time)) results = cmd.raw_results() assert results == average @pytest.mark.parametrize('output', [None, 'json']) def test_average_response_time_output(line_factory, capsys, output): """Test the AverageResponseTime command. Returns the average response time of all valid requests. """ cmd = commands.AverageResponseTime() for total_time in ( 40, 30, ): cmd(line_factory(tr=total_time)) check_output(cmd, output, 35.0, capsys) @pytest.mark.parametrize( ('series', 'average'), [ ((1003, 987, 456, 2013, 1000, 3200, 999), 1379.71), ((110, -1, 110), 110), # aborted connections are ignored ((45, 30, 0), 25), # requests that do not wait at all are still counted ], ) def test_average_waiting_time_results(line_factory, series, average): """Test the AverageWaitingTime command. Returns the average time requests had to wait to get processed. """ cmd = commands.AverageWaitingTime() assert cmd.raw_results() == 0.0 for wait_time in series: cmd(line_factory(tw=wait_time)) results = cmd.raw_results() assert results == average @pytest.mark.parametrize('output', [None, 'json']) def test_average_waiting_time_output(line_factory, capsys, output): """Test the AverageWaitingTime command. Returns the average time requests had to wait to get processed. """ cmd = commands.AverageWaitingTime() for wait_time in (40, 30): cmd(line_factory(tw=wait_time)) check_output(cmd, output, 35.0, capsys) def test_server_load_results(line_factory): """Test the ServerLoad command. It creates a breakdown of how many requests each server processed. """ cmd = commands.ServerLoad() assert cmd.raw_results() == {} for name, count in (('server4', 4), ('server3', 3), ('server5', 5)): line = line_factory(server_name=name) for _ in range(count): cmd(line) results = cmd.raw_results() assert len(results) == 3 assert results['server5'] == 5 assert results['server4'] == 4 assert results['server3'] == 3 @pytest.mark.parametrize( ('output', 'expected'), [ (None, '- server5: 5\n- server3: 3'), ('json', '[{"server5": 5}, {"server3": 3}]'), ], ) def test_server_load_output(line_factory, capsys, output, expected): """Test the ServerLoad command. It creates a breakdown of how many requests each server processed. """ cmd = commands.ServerLoad() for name, count in (('server3', 3), ('server5', 5)): line = line_factory(server_name=name) for _ in range(count): cmd(line) check_output(cmd, output, expected, capsys) def test_queue_peaks_no_lines_results(line_factory): """Test the QueuePeaks command. If there are no log lines processed, nothing should be returned. """ cmd = commands.QueuePeaks() assert cmd.raw_results() == [] def test_queue_peaks_no_queues(line_factory): """Test the QueuePeaks command. If there are no log lines processed, nothing should be returned. """ cmd = commands.QueuePeaks() now = datetime.now() for second in range(4): accept_date = now.replace(second=second).strftime('%d/%b/%Y:%H:%M:%S.%f') cmd(line_factory(queue_backend=0, accept_date=accept_date)) assert len(cmd.requests) == 4 assert cmd.raw_results() == [] @pytest.mark.parametrize( ('date', 'expected_key'), [ ('10/Dec/2019:15:40:12.12345', 1575988812.12345), ('15/Jan/2017:05:23:05.456', 1484454185.456), ('15/Jan/2017:05:23:05.0', 1484454185.0), ], ) def test_queue_peaks_generated_keys(line_factory, date, expected_key): """Test the QueuePeaks command. Check how the keys for the requests dictionary are generated. """ cmd = commands.QueuePeaks() cmd(line_factory(queue_backend=0, accept_date=date)) keys = list(cmd.requests.keys()) # account for a 1h difference, if UTC is used (as in CI) assert expected_key - 4000 <= keys[0] <= expected_key + 4000 # check that microseconds are exact though assert expected_key - int(expected_key) == keys[0] - int(keys[0]) def test_queue_peaks_details(line_factory): """Test the QueuePeaks command. Check the information returned for each peak. """ cmd = commands.QueuePeaks() for microseconds, queue in enumerate([0, 4, 7, 8, 19, 4, 0]): line = line_factory( queue_backend=queue, accept_date=f'15/Jan/2017:05:23:05.{microseconds}' ) cmd(line) day = datetime(year=2017, month=1, day=15, hour=5, minute=23, second=5) results = cmd.raw_results() assert len(results) == 1 peak_info = results[0] assert peak_info['peak'] == 19 assert peak_info['span'] == 5 assert peak_info['started'] == day.replace(microsecond=100000) assert peak_info['finished'] == day.replace(microsecond=600000) def test_queue_peaks_multiple_sorted(line_factory): """Test the QueuePeaks command. Peaks information are returned sorted by date. """ cmd = commands.QueuePeaks() for microseconds, queue in enumerate([0, 4, 0, 0, 19, 4, 0]): line = line_factory( queue_backend=queue, accept_date=f'15/Jan/2017:05:23:05.{microseconds}' ) cmd(line) day = datetime(year=2017, month=1, day=15, hour=5, minute=23, second=5) results = cmd.raw_results() assert len(results) == 2 assert results[0]['peak'] == 4 assert results[0]['started'] == day.replace(microsecond=100000) assert results[1]['peak'] == 19 assert results[1]['started'] == day.replace(microsecond=400000) def test_queue_peaks_already_started(line_factory): """Test the QueuePeaks command. Check that QueuePeaks handles the corner case of a peak that has already started. """ cmd = commands.QueuePeaks() for microseconds, queue in enumerate([4, 19, 0]): line = line_factory( queue_backend=queue, accept_date=f'15/Jan/2017:05:23:05.{microseconds}' ) cmd(line) day = datetime(year=2017, month=1, day=15, hour=5, minute=23, second=5) results = cmd.raw_results() assert len(results) == 1 peak_info = results[0] assert peak_info['peak'] == 19 assert peak_info['span'] == 2 assert peak_info['started'] == day assert peak_info['finished'] == day.replace(microsecond=200000) def test_queue_peaks_did_not_finish(line_factory): """Test the QueuePeaks command. Check that QueuePeaks handles the corner case of a peak that does not finish. """ cmd = commands.QueuePeaks() for microseconds, queue in enumerate([4, 19, 12]): line = line_factory( queue_backend=queue, accept_date=f'15/Jan/2017:05:23:05.{microseconds}' ) cmd(line) day = datetime(year=2017, month=1, day=15, hour=5, minute=23, second=5) results = cmd.raw_results() assert len(results) == 1 peak_info = results[0] assert peak_info['peak'] == 19 assert peak_info['span'] == 3 assert peak_info['started'] == day assert peak_info['finished'] == day.replace(microsecond=200000) @pytest.mark.parametrize( ('output', 'expected'), [ ( None, '- peak: 4 - span: 1 - started: 2017-01-15T05:23:05.100000 - finished: 2017-01-15T05:23:05.200000\n' '- peak: 19 - span: 2 - started: 2017-01-15T05:23:05.400000 - finished: 2017-01-15T05:23:05.600000', ), ( 'json', '[{"peak": 4, "span": 1, "started": "2017-01-15T05:23:05.100000", "finished": "2017-01-15T05:23:05.200000"}, ' '{"peak": 19, "span": 2, "started": "2017-01-15T05:23:05.400000", "finished": "2017-01-15T05:23:05.600000"}]', ), ], ) def test_queue_peaks_output(line_factory, capsys, output, expected): """Test the QueuePeaks command. Peaks information are returned sorted by date. """ cmd = commands.QueuePeaks() for microseconds, queue in enumerate([0, 4, 0, 0, 19, 4, 0]): line = line_factory( queue_backend=queue, accept_date=f'15/Jan/2017:05:23:05.{microseconds}' ) cmd(line) check_output(cmd, output, expected, capsys) def test_connection_type_results(line_factory): """Test the ConnectionType command. It counts how many requests have been made by SSL, and which ones not. """ cmd = commands.ConnectionType() assert cmd.raw_results() == (0, 0) for path, count in (('/Virtual:443/something', 4), ('/something', 2)): line = line_factory(http_request=f'GET {path} HTTP/1.1') for _ in range(count): cmd(line) assert cmd.raw_results() == (4, 2) @pytest.mark.parametrize( ('output', 'expected'), [(None, '- https: 4\n- http: 2'), ('json', '[{"https": 4}, {"http": 2}]')], ) def test_connection_type_output(line_factory, capsys, output, expected): """Test the ConnectionType command. It counts how many requests have been made by SSL, and which ones not. """ cmd = commands.ConnectionType() for path, count in (('/Virtual:443/something', 4), ('/something', 2)): line = line_factory(http_request=f'GET {path} HTTP/1.1') for _ in range(count): cmd(line) check_output(cmd, output, expected, capsys) def test_requests_per_minute_results(line_factory): """Test the RequestsPerMinute command. It counts how many requests have been made per minute. """ cmd = commands.RequestsPerMinute() assert cmd.raw_results() == [] now = datetime.now() # to avoid leaping into the next/previous minute with the timedeltas below now = now.replace(second=30) microseconds = timedelta(microseconds=200) seconds = timedelta(seconds=5) minutes = timedelta(minutes=5) hours = timedelta(hours=2) dates = [ now, now + microseconds, now - microseconds, now + seconds, now - seconds, now + minutes, now - minutes, now + hours, now - hours, ] for time in dates: cmd(line_factory(accept_date=f'{time:%d/%b/%Y:%H:%M:%S.%f}')) results = cmd.raw_results() assert len(results) == 5 assert results[0][1] == 1 assert results[1][1] == 1 assert results[2][1] == 5 # now and the +- microseconds and +- seconds assert results[3][1] == 1 assert results[4][1] == 1 @pytest.mark.parametrize('output', [None, 'json']) def test_requests_per_minute_output(line_factory, capsys, output): """Test the RequestsPerMinute command. It counts how many requests have been made per minute. """ cmd = commands.RequestsPerMinute() now = datetime.now() for time in (now, now + timedelta(hours=2)): cmd(line_factory(accept_date=f'{time:%d/%b/%Y:%H:%M:%S.%f}')) name = cmd.command_line_name().upper() cmd.results(output=output) output_text = capsys.readouterr().out if output == 'json': assert f'{{"{name}": ' in output_text # this is quite fuzzy to not have to fiddle with the date formatting # change it once we hit 2030 :) assert ':00": 1}, {"202' in output_text else: assert f'{name}\n====' in output_text # this is quite fuzzy to not have to fiddle with the date formatting assert ':00: 1\n- ' in output_text def test_requests_per_hour_results(line_factory): """Test the RequestsPerHour command. It counts how many requests have been made per hour. """ cmd = commands.RequestsPerHour() assert cmd.raw_results() == [] specific_date = datetime(year=2022, month=12, day=3, hour=14, minute=10, second=30) minutes = timedelta(minutes=5) hours = timedelta(hours=2) dates = [ specific_date, specific_date + minutes, specific_date - minutes, specific_date + hours, specific_date - hours, specific_date + hours * 2, specific_date - hours * 2, ] for time in dates: cmd(line_factory(accept_date=f'{time:%d/%b/%Y:%H:%M:%S.%f}')) results = cmd.raw_results() assert len(results) == 5 assert results[0][1] == 1 assert results[1][1] == 1 assert results[2][1] == 3 # now and the +- minutes assert results[3][1] == 1 assert results[4][1] == 1 @pytest.mark.parametrize('output', [None, 'json']) def test_requests_per_hour_output(line_factory, capsys, output): """Test the RequestsPerHour command. It counts how many requests have been made per hour. """ cmd = commands.RequestsPerHour() now = datetime.now() for time in (now, now + timedelta(hours=2)): cmd(line_factory(accept_date=f'{time:%d/%b/%Y:%H:%M:%S.%f}')) name = cmd.command_line_name().upper() cmd.results(output=output) output_text = capsys.readouterr().out if output == 'json': assert f'{{"{name}": ' in output_text # this is quite fuzzy to not have to fiddle with the date formatting # change it once we hit 2030 :) assert ':00": 1}, {"202' in output_text else: assert f'{name}\n====' in output_text # this is quite fuzzy to not have to fiddle with the date formatting assert ':00: 1\n- ' in output_text def test_print_results_and_output(line_factory, capsys): """Test the Print command. It simply prints the verbatim line. """ cmd = commands.Print() assert cmd.raw_results() is None for path in ('/first-thing-to-do', '/second/thing/to-do'): cmd(line_factory(http_request=f'GET {path} HTTP/1.1')) assert cmd.raw_results() is None output_text = capsys.readouterr().out lines = output_text.split('\n') assert len(lines) == 3 assert '/first-thing-to-do' in lines[0] assert '/second/thing/to-do' in lines[1] assert lines[2] == '' gforcada-haproxy_log_analysis-c5d274d/tests/test_filters.py000066400000000000000000000151701453042166300243650ustar00rootroot00000000000000from haproxy import filters import pytest @pytest.mark.parametrize( ('to_filter', 'to_check', 'result'), [ ('1.2.3.4', '1.2.3.4', True), ('2.3.4.5', '5.3.5.4', False), ('2001:db8::8a2e:370:7334', '2001:db8::8a2e:370:7334', True), ('2001:db8::8a2e:370:7334', '2001:db8::8a2e:456:7321', False), ], ) def test_filter_ip(line_factory, to_filter, to_check, result): """Check that filter_ip filter works as expected.""" current_filter = filters.filter_ip(to_filter) headers = f' {{{to_check}}}' line = line_factory(headers=headers) assert current_filter(line) is result @pytest.mark.parametrize( ('to_filter', 'to_check', 'result'), [ ('1.2.3', '1.2.3.4', True), ('1.2.3', '1.2.3.78', True), ('2.3.4.5', '5.3.5.4', False), ('2001:db8', '2001:db8::8a2e:370:7334', True), ('2001:db8', '2001:db8::8a2e:456:7321', True), ('2134:db8', '2001:db8::8a2e:456:7321', False), ], ) def test_filter_ip_range(line_factory, to_filter, to_check, result): """Check that filter_ip_range filter works as expected.""" current_filter = filters.filter_ip_range(to_filter) headers = f' {{{to_check}}}' line = line_factory(headers=headers) assert current_filter(line) is result @pytest.mark.parametrize( ('path', 'result'), [ ('/path/to/image', True), ('/something/else', False), ('/another/image/here', True), ], ) def test_filter_path(line_factory, path, result): """Check that filter_path filter works as expected.""" current_filter = filters.filter_path('/image') http_request = f'GET {path} HTTP/1.1' line = line_factory(http_request=http_request) assert current_filter(line) is result @pytest.mark.parametrize( ('path', 'result'), [ ('/ssl_path:443/image', True), ('/something/else', False), ('/another:443/ssl', True), ], ) def test_filter_ssl(line_factory, path, result): """Check that filter_path filter works as expected.""" current_filter = filters.filter_ssl() http_request = f'GET {path} HTTP/1.1' line = line_factory(http_request=http_request) assert current_filter(line) is result @pytest.mark.parametrize(('tr', 'result'), [(45, False), (13000, True), (4566, False)]) def test_filter_slow_requests(line_factory, tr, result): """Check that filter_slow_requests filter works as expected.""" current_filter = filters.filter_slow_requests('10000') line = line_factory(tr=tr) assert current_filter(line) is result @pytest.mark.parametrize(('tw', 'result'), [(45, False), (13000, True), (4566, True)]) def test_filter_wait_on_queues(line_factory, tw, result): """Check that filter_wait_on_queues filter works as expected""" current_filter = filters.filter_wait_on_queues('50') line = line_factory(tw=tw) assert current_filter(line) is result @pytest.mark.parametrize( ('to_filter', 'to_check', 'result'), [ ('200', '200', True), ('200', '230', False), ('300', '300', True), ('300', '400', False), ], ) def test_filter_status_code(line_factory, to_filter, to_check, result): """Test that the status_code filter works as expected.""" current_filter = filters.filter_status_code(to_filter) line = line_factory(status=to_check) assert current_filter(line) is result @pytest.mark.parametrize( ('to_filter', 'to_check', 'result'), [ ('2', '200', True), ('2', '230', True), ('2', '300', False), ('3', '300', True), ('3', '330', True), ('3', '400', False), ], ) def test_filter_status_code_family(line_factory, to_filter, to_check, result): """Test that the status_code_family filter works as expected.""" current_filter = filters.filter_status_code_family(to_filter) line = line_factory(status=to_check) assert current_filter(line) is result @pytest.mark.parametrize( ('to_filter', 'to_check', 'result'), [ ('GET', 'GET', True), ('GET', 'POST', False), ('GET', 'PUT', False), ('GET', 'PATCH', False), ('GET', 'DELETE', False), ('PATCH', 'PATCH', True), ('DELETE', 'DELETE', True), ], ) def test_filter_http_method(line_factory, to_filter, to_check, result): """Test that the http_method filter works as expected.""" current_filter = filters.filter_http_method(to_filter) line = line_factory(http_request=f'{to_check} /path HTTP/1.1') assert current_filter(line) is result @pytest.mark.parametrize( ('to_filter', 'to_check', 'result'), [ ('default', 'default', True), ('default', 'backend', False), ('backend', 'backend', True), ('backend', 'default', False), ], ) def test_filter_backend(line_factory, to_filter, to_check, result): """Test that the backend filter works as expected.""" current_filter = filters.filter_backend(to_filter) line = line_factory(backend_name=to_check) assert current_filter(line) is result @pytest.mark.parametrize( ('to_filter', 'to_check', 'result'), [ ('varnish', 'varnish', True), ('varnish', 'nginx', False), ('nginx', 'nginx', True), ('nginx', 'varnish', False), ], ) def test_filter_frontend(line_factory, to_filter, to_check, result): """Test that the frontend filter works as expected.""" current_filter = filters.filter_frontend(to_filter) line = line_factory(frontend_name=to_check) assert current_filter(line) is result @pytest.mark.parametrize( ('to_filter', 'to_check', 'result'), [ ('server1', 'server1', True), ('server1', 'backend23', False), ('backend23', 'backend23', True), ('backend23', 'server1', False), ], ) def test_filter_server(line_factory, to_filter, to_check, result): """Test that the server filter works as expected.""" current_filter = filters.filter_server(to_filter) line = line_factory(server_name=to_check) assert current_filter(line) is result @pytest.mark.parametrize( ('to_filter', 'to_check', 'result'), [ ('400', '500', True), ('400', '+500', True), ('+400', '500', True), ('+400', '+500', True), ('400', '300', False), ('400', '+300', False), ('+400', '300', False), ('+400', '+300', False), ], ) def test_filter_response_size(line_factory, to_filter, to_check, result): """Test that the size filter works as expected. Note that both filter and value can have a leading plus sign. """ current_filter = filters.filter_response_size(to_filter) line = line_factory(bytes=to_check) assert current_filter(line) is result gforcada-haproxy_log_analysis-c5d274d/tests/test_log_file.py000066400000000000000000000074671453042166300245070ustar00rootroot00000000000000from datetime import datetime from haproxy.logfile import Log import pytest def test_logfile_default_values(): """Check that the default values are set.""" log_file = Log('something') assert log_file.logfile == 'something' assert log_file.show_invalid is False assert log_file.invalid_lines == 0 assert log_file.valid_lines == 0 assert log_file.total_lines == 0 assert log_file.start is None assert log_file.end is None @pytest.mark.parametrize( ('start_str', 'start_obj', 'delta', 'end_obj'), [ (None, None, None, None), (None, None, '3d', None), ('12/Dec/2019', datetime(2019, 12, 12), None, None), ('12/Dec/2019', datetime(2019, 12, 12), '3d', datetime(2019, 12, 15)), ], ) def test_start_and_end_attributes(start_str, start_obj, delta, end_obj): """Check that the start and end of attributes of Log objects are set as expected.""" log_file = Log('something', start=start_str, delta=delta) assert log_file.logfile == 'something' assert log_file.invalid_lines == 0 assert log_file.start == start_obj assert log_file.end == end_obj @pytest.mark.parametrize('accept_date', ['09/Dec/2013:12:59:46.633', None]) def test_lines_validity(tmp_path, line_factory, accept_date): """Check that lines are either counted as valid or invalid.""" file_path = tmp_path / 'haproxy.log' line = '' if accept_date: line = line_factory(accept_date=accept_date).raw_line with open(file_path, 'w') as file_obj: file_obj.write(f'{line}\n') log_file = Log(file_path) _ = list(log_file) assert log_file.total_lines == 1 if accept_date: assert log_file.valid_lines == 1 assert log_file.invalid_lines == 0 else: assert log_file.valid_lines == 0 assert log_file.invalid_lines == 1 @pytest.mark.parametrize( ('accept_date', 'start', 'delta', 'is_valid'), [ # valid line and no time frame, returned ('09/Dec/2013:12:59:46.633', None, None, True), # invalid line, not returned (None, None, None, False), # valid line before time frame, not returned ('09/Dec/2013:12:59:46.633', '09/Dec/2014', None, False), # valid line after time frame, not returned ('09/Dec/2013:12:59:46.633', '08/Dec/2012', '3d', False), # valid line within time frame, returned ('09/Dec/2013:12:59:46.633', '08/Dec/2013', '3d', True), ], ) def test_returned_lines(tmp_path, line_factory, accept_date, start, delta, is_valid): """Check that lines are only returned if they are valid AND within the time frame.""" file_path = tmp_path / 'haproxy.log' line = '' if accept_date: line = line_factory(accept_date=accept_date).raw_line with open(file_path, 'w') as file_obj: file_obj.write(f'{line}\n') log_file = Log(file_path, start=start, delta=delta) lines = list(log_file) assert bool(len(lines)) is is_valid def test_total_lines(): """Check that the total amount of lines are always counted.""" log_file = Log(logfile='tests/files/2_ok_1_invalid.log') _ = list(log_file) assert log_file.total_lines == 3 assert log_file.valid_lines == 2 assert log_file.invalid_lines == 1 @pytest.mark.parametrize('headers', [' {1.2.3.4}', 'random-value-that-breaks']) def test_print_invalid_lines(tmp_path, line_factory, headers, capsys): """Check that invalid lines are printed, if asked to do so.""" file_path = tmp_path / 'haproxy.log' line = line_factory(headers=headers).raw_line with open(file_path, 'w') as file_obj: file_obj.write(f'{line}\n') log_file = Log(file_path, show_invalid=True) _ = list(log_file) output = capsys.readouterr().out if log_file.valid_lines == 1: assert headers not in output else: assert headers in output gforcada-haproxy_log_analysis-c5d274d/tests/test_log_line.py000066400000000000000000000150621453042166300245050ustar00rootroot00000000000000from datetime import datetime from datetime import timedelta import pytest NOW = datetime.now() TWO_DAYS_AGO = NOW - timedelta(days=2) IN_TWO_DAYS = NOW + timedelta(days=2) def test_default_values(line_factory, default_line_data): line = line_factory() assert line.client_ip == default_line_data['client_ip'] assert line.client_port == default_line_data['client_port'] assert line.raw_accept_date in default_line_data['accept_date'] assert line.frontend_name == default_line_data['frontend_name'] assert line.backend_name == default_line_data['backend_name'] assert line.server_name == default_line_data['server_name'] assert line.time_wait_request == default_line_data['tq'] assert line.time_wait_queues == default_line_data['tw'] assert line.time_connect_server == default_line_data['tc'] assert line.time_wait_response == default_line_data['tr'] assert line.total_time == default_line_data['tt'] assert line.status_code == default_line_data['status'] assert line.bytes_read == default_line_data['bytes'] assert line.connections_active == default_line_data['act'] assert line.connections_frontend == default_line_data['fe'] assert line.connections_backend == default_line_data['be'] assert line.connections_server == default_line_data['srv'] assert line.retries == default_line_data['retries'] assert line.queue_server == default_line_data['queue_server'] assert line.queue_backend == default_line_data['queue_backend'] assert line.captured_request_headers == default_line_data['headers'].strip()[1:-1] assert line.captured_response_headers is None assert line.raw_http_request == default_line_data['http_request'] assert line.is_valid def test_unused_values(line_factory): line = line_factory() assert line.captured_request_cookie is None assert line.captured_response_cookie is None assert line.termination_state is None def test_datetime_value(line_factory): line = line_factory() assert isinstance(line.accept_date, datetime) def test_http_request_values(line_factory): method = 'PUT' path = '/path/to/my/image' protocol = 'HTTP/2.0' line = line_factory(http_request=f'{method} {path} {protocol}') assert line.http_request_method == method assert line.http_request_path == path assert line.http_request_protocol == protocol def test_invalid_line(line_factory): line = line_factory(bytes='wroooong') assert not line.is_valid def test_no_captured_headers(line_factory): """A log line without captured headers is still valid.""" line = line_factory(headers='') assert line.is_valid def test_request_and_response_captured_headers(line_factory): """Request and response headers captured are parsed correctly.""" request_headers = '{something}' response_headers = '{something_else}' line = line_factory(headers=f' {request_headers} {response_headers}') assert line.is_valid assert f'{{{line.captured_request_headers}}}' == request_headers assert f'{{{line.captured_response_headers}}}' == response_headers def test_request_is_https_valid(line_factory): """Check that if a log line contains the SSL port on it, is reported as a https connection. """ line = line_factory(http_request='GET /domain:443/to/image HTTP/1.1') assert line.is_https def test_request_is_https_false(line_factory): """Check that if a log line does not contains the SSL port on it, is not reported as a https connection. """ line = line_factory(http_request='GET /domain:80/to/image HTTP/1.1') assert not line.is_https def test_request_is_front_page(line_factory): """Check that if a request is for the front page the request path is correctly stored. """ line = line_factory(http_request='GET / HTTP/1.1') assert line.http_request_path == '/' @pytest.mark.parametrize( 'process', [ 'ip-192-168-1-1 haproxy[28029]:', 'dvd-ctrl1 haproxy[403100]:', 'localhost.localdomain haproxy[2345]:', ], ) def test_process_names(line_factory, process): """Checks that different styles of process names are handled correctly.""" line = line_factory(process_name_and_pid=process) assert line.is_valid is True def test_unparseable_http_request(line_factory): line = line_factory(http_request='something') assert line.http_request_method == 'invalid' assert line.http_request_path == 'invalid' assert line.http_request_protocol == 'invalid' def test_truncated_requests(line_factory): """Check that truncated requests are still valid. That would be requests that do not have the protocol part specified. """ line = line_factory(http_request='GET /') assert line.http_request_method == 'GET' assert line.http_request_path == '/' assert line.http_request_protocol is None @pytest.mark.parametrize( 'syslog', [ # nixos format '2017-07-06T14:29:39+02:00', # regular format 'Dec 9 13:01:26', ], ) def test_syslog(line_factory, syslog): """Check that the timestamp at the beginning are parsed. We support different syslog formats, NixOS style and the one on other Linux. """ line = line_factory(syslog_date=syslog) assert line.is_valid is True def test_ip_from_headers(line_factory): """Check that the IP from the captured headers takes precedence.""" line = line_factory(headers=' {1.2.3.4}') assert line.ip == '1.2.3.4' @pytest.mark.parametrize( 'ip', ['1.2.3.4', '1.2.3.4, 2.3.4.5', '1.2.3.4,2.3.4.5,5.4.3.2'], ) def test_only_first_ip_from_headers(line_factory, ip): """Check that if there are multiple IPs, only the first one is used.""" line = line_factory(headers=f' {{{ip}}}') assert line.ip == '1.2.3.4' @pytest.mark.parametrize( 'ip', ['127.1.2.7', '1.127.230.47', 'fe80::9379:c29e:6701:cef8', 'fe80::9379:c29e::'], ) def test_ip_from_client_ip(line_factory, ip): """Check that if there is no IP on the captured headers, the client IP is used.""" line = line_factory(headers='', client_ip=ip) assert line.ip == ip @pytest.mark.parametrize( ('start', 'end', 'result'), [ (None, None, True), (TWO_DAYS_AGO, None, True), (IN_TWO_DAYS, None, False), (TWO_DAYS_AGO, IN_TWO_DAYS, True), (TWO_DAYS_AGO, TWO_DAYS_AGO, False), ], ) def test_is_within_timeframe(line_factory, start, end, result): """Check that a line is within a given time frame.""" line = line_factory(accept_date=NOW.strftime('%d/%b/%Y:%H:%M:%S.%f')) assert line.is_within_time_frame(start, end) is result gforcada-haproxy_log_analysis-c5d274d/tests/test_main.py000066400000000000000000000063261453042166300236440ustar00rootroot00000000000000from haproxy.main import create_parser from haproxy.main import main from haproxy.main import parse_arguments from haproxy.utils import VALID_COMMANDS from haproxy.utils import VALID_FILTERS import pytest import sys PY310_OR_HIGHER = sys.version_info[1] > 9 @pytest.fixture() def default_arguments(): """Return all the expected arguments the main function expects.""" return { 'start': None, 'delta': None, 'log': 'tests/files/small.log', 'commands': ['counter'], 'negate_filter': None, 'filters': None, 'list_commands': False, 'list_filters': False, 'json': False, 'invalid_lines': False, } @pytest.mark.parametrize( ('switch', 'listing'), [('list-filters', VALID_FILTERS), ('list-commands', VALID_COMMANDS)], ) def test_list_filters_and_commands(capsys, switch, listing): """Test that one can request the filters/commands to be listed.""" parser = create_parser() data = parse_arguments(parser.parse_args([f'--{switch}'])) argument = switch.replace('-', '_') for key in data: expected = None if key == argument: expected = True assert data[key] is expected main(data) output_text = capsys.readouterr().out for name in listing: assert f'{name}:\n\t' in output_text def test_show_help(capsys): """Check that the help is shown if no arguments are given.""" parser = create_parser() data = parse_arguments(parser.parse_args([])) main(data) output_text = capsys.readouterr().out if PY310_OR_HIGHER: assert 'options:' in output_text else: assert 'optional arguments:' in output_text assert '--list-filters ' in output_text assert '--list-commands ' in output_text def test_main(capsys, default_arguments): """Check that the main function works as expected with default arguments.""" main(default_arguments) output_text = capsys.readouterr().out assert 'COUNTER\n=======\n9' in output_text def test_main_with_filter(capsys, default_arguments): """Check that the filters are applied as expected.""" default_arguments['filters'] = [ ('server', 'instance1'), ] main(default_arguments) output_text = capsys.readouterr().out assert 'COUNTER\n=======\n4' in output_text def test_main_negate_filter(capsys, default_arguments): """Check that filters can be reversed.""" default_arguments['filters'] = [ ('server', 'instance1'), ] default_arguments['negate_filter'] = True main(default_arguments) output_text = capsys.readouterr().out assert 'COUNTER\n=======\n5' in output_text def test_print_no_output(capsys, default_arguments): """Check that the print header is not shown.""" default_arguments['commands'] = ['print'] main(default_arguments) output_text = capsys.readouterr().out assert 'PRINT\n=====' not in output_text def test_json_output(capsys, default_arguments): """Check that the JSON switch is used and JSON output is printed.""" default_arguments['json'] = True main(default_arguments) output_text = capsys.readouterr().out assert 'COUNTER\n=======\n9' not in output_text assert '{"COUNTER": 9}' in output_text gforcada-haproxy_log_analysis-c5d274d/tests/test_regex.py000066400000000000000000000160661453042166300240340ustar00rootroot00000000000000from datetime import datetime from haproxy.line import HAPROXY_LINE_REGEX from haproxy.line import HTTP_REQUEST_REGEX import pytest import random def test_default_values(line_factory, default_line_data): """Check that the default line with default values is parsed.""" line = line_factory() matches = HAPROXY_LINE_REGEX.match(line.raw_line) assert matches.group('http_request') == default_line_data['http_request'] def test_client_ip_and_port(line_factory): """Check that the client IP and port are extracted correctly.""" ip = '192.168.0.250' port = '34' line = line_factory(client_ip=ip, client_port=port) matches = HAPROXY_LINE_REGEX.match(line.raw_line) assert matches.group('client_ip') == ip assert matches.group('client_port') == port def test_accept_date(line_factory): """Check that the accept date is extracted correctly.""" accept_date = datetime.now().strftime('%d/%b/%Y:%H:%M:%S.%f') line = line_factory(accept_date=accept_date) matches = HAPROXY_LINE_REGEX.match(line.raw_line) assert matches.group('accept_date') == accept_date def test_server_names(line_factory): """Check that the server names are extracted correctly.""" frontend_name = 'SomeThing4' backend_name = 'Another1' server_name = 'Cloud9' line = line_factory( frontend_name=frontend_name, backend_name=backend_name, server_name=server_name ) matches = HAPROXY_LINE_REGEX.match(line.raw_line) assert matches.group('frontend_name') == frontend_name assert matches.group('backend_name') == backend_name assert matches.group('server_name') == server_name @pytest.mark.parametrize( ('tq', 'tw', 'tc', 'tr', 'tt'), [ ('0', '0', '0', '0', '0'), ('23', '55', '3', '4', '5'), ('-23', '-33', '-3', '-4', '5'), ('23', '33', '3', '4', '+5'), ], ) def test_timers(line_factory, tq, tw, tc, tr, tt): """Check that the timers are extracted correctly. Note that all timers can be negative but `tt`, and that `tt` is the only one that can have a positive sign. """ line = line_factory(tq=tq, tw=tw, tc=tc, tr=tr, tt=tt) matches = HAPROXY_LINE_REGEX.match(line.raw_line) assert matches.group('tq') == tq assert matches.group('tw') == tw assert matches.group('tc') == tc assert matches.group('tr') == tr assert matches.group('tt') == tt @pytest.mark.parametrize( ('status', 'bytes_read'), [('200', '0'), ('-301', '543'), ('200', '+543')] ) def test_status_and_bytes(line_factory, status, bytes_read): """Check that the status code and bytes are extracted correctly. Note that `status` can be negative (for terminated requests), and `bytes` can be prefixed with a plus sign. """ line = line_factory(status=status, bytes=bytes_read) matches = HAPROXY_LINE_REGEX.match(line.raw_line) assert matches.group('status_code') == status assert matches.group('bytes_read') == bytes_read @pytest.mark.parametrize( ('act', 'fe', 'be', 'srv', 'retries'), [ ('0', '0', '0', '0', '0'), ('40', '10', '11', '12', '14'), ('40', '10', '11', '12', '+14'), ], ) def test_connections_and_retries(line_factory, act, fe, be, srv, retries): """Check that the connections and retries are extracted correctly. Note that `retries` might have a plus sign prefixed. """ line = line_factory(act=act, fe=fe, be=be, srv=srv, retries=retries) matches = HAPROXY_LINE_REGEX.match(line.raw_line) assert matches.group('act') == act assert matches.group('fe') == fe assert matches.group('be') == be assert matches.group('srv') == srv assert matches.group('retries') == retries @pytest.mark.parametrize(('server', 'backend'), [('0', '0'), ('200', '200')]) def test_queues(line_factory, server, backend): """Check that the server and backend queues are extracted correctly.""" line = line_factory(queue_server=server, queue_backend=backend) matches = HAPROXY_LINE_REGEX.match(line.raw_line) assert matches.group('queue_server') == server assert matches.group('queue_backend') == backend @pytest.mark.parametrize( ('request_header', 'response_header'), [ ('', ''), ('something', None), ('something here', 'and there'), ('multiple | request | headers', 'and | multiple | response ones'), ], ) def test_captured_headers(line_factory, request_header, response_header): """Check that captured headers are extracted correctly.""" if response_header: headers = f' {{{request_header}}} {{{response_header}}}' else: headers = f' {{{request_header}}}' line = line_factory(headers=headers) matches = HAPROXY_LINE_REGEX.match(line.raw_line) if response_header: assert matches.group('request_headers') == request_header assert matches.group('response_headers') == response_header else: assert matches.group('headers') == request_header assert matches.group('request_headers') is None assert matches.group('response_headers') is None def test_http_request(line_factory): """Check that the HTTP request is extracted correctly.""" http_request = 'something in the air' line = line_factory(http_request=http_request) matches = HAPROXY_LINE_REGEX.match(line.raw_line) assert matches.group('http_request') == http_request @pytest.mark.parametrize( 'path', [ '/path/to/image', '/path/with/port:80', # with port '/path/with/example.com', # with domain '/path/to/article#section', # with anchor '/article?hello=world&goodbye=lennin', # with parameters '/article-with-dashes_and_underscores', # dashes and underscores '/redirect_to?http://example.com', # double slashes '/@@funny', # at sign '/something%20encoded', # percent sign '/++adding++is+always+fun', # plus sign '/here_or|here', # vertical bar '/here~~~e', # tilde sign '/here_*or', # asterisk sign '/something;or-not', # colon '/something-important!probably', # exclamation mark '/something$important', # dollar sign "/there's-one's-way-or-another's" # single quote sign '/there?la=as,is', # comma '/here_or(here)', # parenthesis '/here_or[here]', # square brackets '/georg}von{grote/\\', # curly brackets '/here_or<', # less than '/here_or>', # more than '/georg-von-grote/\\', # back slash '/georg`von“grote/\\', # diacritics '/georg`von^grote/\\', # caret ], ) def test_http_request_regex(path): """Test that the method/path/protocol are extracted properly from the HTTP request.""" verbs = ('GET', 'POST', 'DELETE', 'PATCH', 'PUT') protocols = ( 'HTTP/1.0', 'HTTP/1.1', 'HTTP/2.0', ) method = random.choice(verbs) protocol = random.choice(protocols) matches = HTTP_REQUEST_REGEX.match(f'{method} {path} {protocol}') assert matches.group('method') == method assert matches.group('path') == path assert matches.group('protocol') == protocol gforcada-haproxy_log_analysis-c5d274d/tests/test_utils.py000066400000000000000000000052641453042166300240600ustar00rootroot00000000000000from datetime import datetime from datetime import timedelta from haproxy.utils import date_str_to_datetime from haproxy.utils import delta_str_to_timedelta from haproxy.utils import VALID_COMMANDS from haproxy.utils import VALID_FILTERS from haproxy.utils import validate_arg_date from haproxy.utils import validate_arg_delta import pytest @pytest.mark.parametrize( ('text', 'expected'), [ ('45s', timedelta(seconds=45)), ('2m', timedelta(minutes=2)), ('13h', timedelta(hours=13)), ('2d', timedelta(days=2)), ], ) def test_str_to_timedelta(text, expected): """Check that deltas are converted to timedelta objects.""" assert delta_str_to_timedelta(text) == expected @pytest.mark.parametrize( ('text', 'expected'), [ ('04/Jan/2013', datetime(2013, 1, 4)), ('13/May/2015:13', datetime(2015, 5, 13, 13)), ('22/Jun/2017:12:11', datetime(2017, 6, 22, 12, 11)), ('29/Aug/2019:10:09:08', datetime(2019, 8, 29, 10, 9, 8)), ], ) def test_str_to_datetime(text, expected): """Check that start are converted to datetime objects.""" assert date_str_to_datetime(text) == expected @pytest.mark.parametrize('cmd_key', [*VALID_COMMANDS]) def test_valid_commands(cmd_key): """Check that the commands' information is complete.""" cmd_data = VALID_COMMANDS[cmd_key] assert cmd_data['klass'] assert cmd_data['klass'].command_line_name() == cmd_key assert cmd_data['description'] assert cmd_data['description'].startswith(f'{cmd_key}:\n\t') @pytest.mark.parametrize('filter_key', [*VALID_FILTERS]) def test_valid_filters(filter_key): """Check that the filters' information is complete.""" filter_data = VALID_FILTERS[filter_key] assert filter_data['obj'] assert filter_data['obj'].__name__ == f'filter_{filter_key}' assert filter_data['description'] assert filter_data['description'].startswith(f'{filter_key}:\n\t') @pytest.mark.parametrize(('value', 'expected'), [('', None), ('30/Dec/2019', True)]) def test_validate_date(value, expected): """Check that the date is validated or an exception raised.""" if expected is None: with pytest.raises(ValueError, match='--start argument is not valid'): validate_arg_date(value) else: assert validate_arg_date(value) is None @pytest.mark.parametrize(('value', 'expected'), [('', None), ('3d', True)]) def test_validate_delta(value, expected): """Check that the delta is validated or an exception raised.""" if expected is None: with pytest.raises(ValueError, match='--delta argument is not valid'): validate_arg_delta(value) else: assert validate_arg_delta(value) is None gforcada-haproxy_log_analysis-c5d274d/tox.ini000066400000000000000000000021461453042166300214540ustar00rootroot00000000000000[tox] min_version = 4.4.0 envlist = format lint coverage py38 py39 py310 py311 py312 pypy3 [gh-actions] python = 3.8: py38 3.9: py39 3.10: py310 3.11: py311 3.12: py312 [testenv] description = run the distribution tests use_develop = true skip_install = false constrain_package_deps = true deps = -r requirements.txt commands = pytest [testenv:format] description = automatically reformat code skip_install = true deps = pre-commit commands = pre-commit run -a pyupgrade pre-commit run -a isort pre-commit run -a black [testenv:lint] description = run linters that will help improve the code style skip_install = true deps = pre-commit commands = pre-commit run -a [testenv:coverage] description = get a test coverage report use_develop = true skip_install = false deps = -r requirements.txt commands = pytest --cov --cov-report term-missing [testenv:generate-constrains] description = update the constrains.txt file basepython = python3.8 skip_install = true deps = pip-tools commands = pip-compile requirements.in