pax_global_header00006660000000000000000000000064151314201030014500gustar00rootroot0000000000000052 comment=88766afb74de34b064141d7bd576fbb619681294 rndusr-torf-547b989/000077500000000000000000000000001513142010300142625ustar00rootroot00000000000000rndusr-torf-547b989/.coveragerc000066400000000000000000000002551513142010300164050ustar00rootroot00000000000000[run] source = torf command_line = -m pytest branch = True concurrency = multiprocessing [report] show_missing = True skip_covered = False [html] directory = /tmp/htmlcov rndusr-torf-547b989/.flake8000066400000000000000000000016061513142010300154400ustar00rootroot00000000000000[flake8] ignore = # visually indented line with same indent as next logical line E129, # missing whitespace before ':' E203, # multiple spaces before operator E221, # missing whitespace after ',' E231, # too many leading '#' for block comment E266, # multiple spaces after keyword E271, # multiple spaces before keyword E272, # line too long E501, # expected 2 blank lines E302, # too many blank lines E303, # expected 2 blank lines after class or function definition E305, # multiple spaces after ',' E241, # multiple statements on one line (colon) E701, # multiple statements on one line (def) E704, # ambiguous variable name 'l' E741, # line break before binary operator W503, # line break after binary operator W504, # invalid escape sequence '\ ' W605, rndusr-torf-547b989/.gitignore000066400000000000000000000002501513142010300162470ustar00rootroot00000000000000# Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] *.egg-info/ pypirc # Pytest cache .cache .pytest_cache .tox # Virtual environment venv .python-version rndusr-torf-547b989/.isort.cfg000066400000000000000000000001421513142010300161560ustar00rootroot00000000000000[settings] line_length = 120 wrap_length = 10 multi_line_output = 3 include_trailing_comma = true rndusr-torf-547b989/CHANGELOG000066400000000000000000000224011513142010300154730ustar00rootroot000000000000002026-01-13 4.3.1 - Officially support Python 3.14. - Include dead flatbencode dependency for easier distro packaging. 2025-02-24 4.3.0 - Torf.read_stream() can now read `bytes` and `bytearray` objects in addition to file-like objects. - Provide type annotations for the public API. 2024-06-13 4.2.7 - Exclude tests from the package. 2024-03-25 4.2.6 - Validate creation date if it exists. 2024-03-09 4.2.5 - Bugfix: Include symbolic links in the torrent's files. 2023-12-04 4.2.4 - Bugfix: Remove hardcoded minimum and maximum piece sizes from Torrent.calculate_piece_size(). 2023-12-01 4.2.3 - Torrent.calculate_piece_size() now returns 16 MiB for content sizes bigger than 16 GiB. (Thanks @cannibalChipper) 2023-09-11 4.2.2 - Fix a bug that was introduced in 4.2.0. Instead of forcibly decoding all encodings as UTF-8, go back to defaulting to undecoded bytes (like before) and only perform a decode-and-replace-invalid-characters routine on known strings like ["info"]["name"]. 2023-07-02 4.2.1 - Setting Torrent.piece_size_min/max now also sets Torrent.piece_size if it is too small/big. 2023-04-16 4.2.0 - When reading torrent files, strings are now always decoded as UTF-8 (except for ["info"]["pieces"]). This makes it harder to store binary data in non-standard fields, but it also means ["info"]["name"] is always a unicode string. - Allow any "piece length" that is divisible by 16 KiB. Previously, power of 2 was required. 2023-01-26 4.1.4 - Fix "Too many open files" error when creating a torrent with loads of small files. 2023-01-09 4.1.3 - Try to reduce memory usage before being out of memory. - Terminate piece hashing threads if they are idle for more than 500 milliseconds. 2022-11-24 4.1.2 - Bugfix: Torrent's piece_size_min and piece_size_max arguments are no longer ignored. - Torrent.piece_size_min and Torrent.piece_size_max are now properties. They default to the new class attributes piece_size_min_default and piece_size_max_default. - Torrent.calculate_piece_size(): New arguments: min_size, max_size specify the minimum and maximum piece size. 2022-11-15 4.1.1 - Bugfix: Don't crash if "creation date" is something weird like an empty string. 2022-11-09 4.1.0 - Torrent now accepts piece_size_min and piece_size_max arguments. - Bugfix: Don't reuse a torrent that has a smaller piece_size than piece_size_min. 2022-09-22 4.0.4 - Bugfix: Expect non-standard fields (e.g. "md5sum") in file list when reusing torrent. 2022-07-02 4.0.3 - Bugfix: Copy file order from reused torrent. 2022-06-19 4.0.2 - Bugfix: Don't reuse a torrent that has a bigger piece_size than piece_size_max. 2022-05-17 4.0.1 - Bugfix: RecursionError when pickling File objects 2022-05-05 4.0.0 - Torrent.verify(): The "skip_on_error" argument was removed. - The new TorrentFileStream class lets you operate on a torrent's stream of concatenated files. - The new attribute Torrent.location provides the file system path without the torrent's name. - The new Torrent.reuse() method copies piece hashes from an existing torrent file with the same name and file tree. It can also find a matching torrent in a bunch of directories. 2020-10-29 3.1.3 - Bugfix: Exclude tests from package. 2020-10-25 3.1.2 - Bugfix: Preserve binary values of fields that aren't part of the BitTorrent specification. (Thanks, @ayasechan) 2020-09-26 3.1.1 - Bugfix: Fix poor performance when torrent contains huge number of files (Thanks, @mon) 2020-08-11 3.1.0 - Bugfix: Gracefully handle empty string for "url-list" and other URL lists in metainfo - The new properties Torrent.include_globs and Torrent.include_regexs can be used to keep files even if they match an exclude pattern. 2020-06-20 3.0.2 - Bugfix: Torrent.validate() and Torrent.read_stream() now raise MetainfoError if the "info" field is not a dictionary. - Bugfix: Magnet.from_string() raised ValueError instead of MagnetError if the "xl" parameter wasn't a number. - Bugfix: Magnet.from_string() strips whitespace from the beginning and end before parsing the URI. - Bugfix: Magnet.kt used commas to separate keywords. (I don't know why.) - Bugfix: Gracefully handle empty path components in torrent file (e.g. "foo//bar") - Torrent.private is now None instead of False if there is no "private" field in the "info" section of the metainfo. 2020-04-07 3.0.1 - Make things work with Python 3.6. - Magnet.as_torrent is now a method called Magnet.torrent(). 2020-04-02 3.0.0 - Depend on flatbencode instead of bencoder.pyx - Bug fixed: Setting the "private" property to False removed the flag from the metainfo which could potentially change the info hash if a torrent file had the flag explicitly disabled. - Bug fixed: Torrent.read() validated if the "validate" argument was False - Hashing pieces uses multiple threads for better performance. - Support for the "md5sum" field was dropped. Calculating MD5 hashes besides the SHA1 hashes is no longer easily possible due to multithreading and it's unclear to me if/how this field is even useful. - The new methods Torrent.verify_content() and Torrent.verify_filesize() check if on-disk data matches a given torrent file. - The property Torrent.exclude was replaced by Torrent.exclude_globs and Torrent.exclude_regexs. These return special lists that filter files when changed. - Torrent.path is a path-like object. - Torrent.files, Torrent.filepaths, Torrent.trackers, Torrent.webseeds and Torrent.httpseeds are mutable lists of path-like objects or URLs that automatically synchronize with Torrent.metainfo when changed. - Torrent.filetree uses File objects as leaf nodes. File is a path-like object that also stores the file size. - The new class attributes Torrent.piece_size_min and Torrent.piece_size_max can be used to quickly specify piece size limits. Setting the piece_size property to an out-of-bounds piece size or returning one with Torrent.calculate_piece_size() raises PieceSizeError. - Torrent.validate() is better at finding invalid stuff in the metainfo. - Exceptions were added and removed. If you don't catch TorfError, make sure you're expecting the correct exceptions. - Except for ReadError and WriteError, exceptions no longer have an "errno" property. - When setting the Torrent.path property to None, only "pieces" is removed from the metainfo. "piece length", "pieces", "length" and "files" are kept. "name" is only changed when a new path is set. - The license was changed to GPLv3. 2019-07-01 2.1.0 - Keep piece size smaller for large torrents and use more pieces to compensate. - Implement your own piece size calculation of arbitrary complexity by simply overloading Torrent.calculate_piece_size(). 2019-04-04 2.0.0 - Use proper version number scheme - Raise PieceSizeError if 'piece_size' is set to a number that isn't a power of two 2018-06-25 1.5 - New methods read_stream() and write_stream() to import/export a torrent from any file-like object 2018-06-15 1.4 - New method: calculate_piece_size() - Piece size is now automatically calculated when path is set instead of calculating it on demand when requested - Setting piece size to a non-number now raises ValueError instead of RuntimeError - Exclude patterns are now matched against every part of a file's path, not just the last part (i.e. the file name) - Setting torrent.path to '.' or '..' now sets the correct name - Torrent instances are equal (==) if their metainfo is equal - Torrent instances are hashable - Torrent instances can be copied with the copy() method or the copy module from the standard library 2018-04-07 1.3 - Fix 'filepaths' attribute when setting a custom name 2018-02-19 1.2 - Don't leave an empty file when calling write() on an invalid torrent 2018-02-18 1.1 - 'announce' in metainfo is now a single URL instead of the first tier (Marcin Kurczewski) 2018-02-01 1.0 - Nothing changed except that this is now the final 1.0 version 2018-01-13 1.0rc5 - Fixed a bug where overwriting a torrent file resulted in corrupt torrent - Added property 'pieces' that returns the number of pieces - Added property 'filetree' that can be used to create a beautiful tree of a torrent's content - Added property 'is_ready' that is True when torrent can be exported - When reading torrent files, don't read large files all the way to EOF before failing - All exceptions now have an 'errno' attribute (see 'errno' module) 2018-01-03 1.0rc4 - Torrent.write() and Torrent.read() take a file path instead of an opened file object - Some exception names have changed - Allow reading arbitrary bencoded data with validation turned off - Default 'created_by' value is now 'torf/' 2017-12-27 1.0rc3 - Reduce entropy with 'randomize_infohash' enabled (some parsers seem to have issues with large integers) 2017-12-25 1.0rc2 - Add 'randomize_infohash' as a Torrent argument - Add 'name' as a Torrent argument - Call generate() callback again when all pieces are hashed - Validate 'metainfo' when accessing 'infohash' property 2017-12-21 1.0rc1 Initial release rndusr-torf-547b989/LICENSE000066400000000000000000001045151513142010300152750ustar00rootroot00000000000000 GNU GENERAL PUBLIC LICENSE Version 3, 29 June 2007 Copyright (C) 2007 Free Software Foundation, Inc. Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed. Preamble The GNU General Public License is a free, copyleft license for software and other kinds of works. The licenses for most software and other practical works are designed to take away your freedom to share and change the works. By contrast, the GNU General Public License is intended to guarantee your freedom to share and change all versions of a program--to make sure it remains free software for all its users. We, the Free Software Foundation, use the GNU General Public License for most of our software; it applies also to any other work released this way by its authors. You can apply it to your programs, too. When we speak of free software, we are referring to freedom, not price. Our General Public Licenses are designed to make sure that you have the freedom to distribute copies of free software (and charge for them if you wish), that you receive source code or can get it if you want it, that you can change the software or use pieces of it in new free programs, and that you know you can do these things. To protect your rights, we need to prevent others from denying you these rights or asking you to surrender the rights. Therefore, you have certain responsibilities if you distribute copies of the software, or if you modify it: responsibilities to respect the freedom of others. For example, if you distribute copies of such a program, whether gratis or for a fee, you must pass on to the recipients the same freedoms that you received. You must make sure that they, too, receive or can get the source code. And you must show them these terms so they know their rights. Developers that use the GNU GPL protect your rights with two steps: (1) assert copyright on the software, and (2) offer you this License giving you legal permission to copy, distribute and/or modify it. For the developers' and authors' protection, the GPL clearly explains that there is no warranty for this free software. For both users' and authors' sake, the GPL requires that modified versions be marked as changed, so that their problems will not be attributed erroneously to authors of previous versions. Some devices are designed to deny users access to install or run modified versions of the software inside them, although the manufacturer can do so. This is fundamentally incompatible with the aim of protecting users' freedom to change the software. The systematic pattern of such abuse occurs in the area of products for individuals to use, which is precisely where it is most unacceptable. Therefore, we have designed this version of the GPL to prohibit the practice for those products. If such problems arise substantially in other domains, we stand ready to extend this provision to those domains in future versions of the GPL, as needed to protect the freedom of users. Finally, every program is threatened constantly by software patents. States should not allow patents to restrict development and use of software on general-purpose computers, but in those that do, we wish to avoid the special danger that patents applied to a free program could make it effectively proprietary. To prevent this, the GPL assures that patents cannot be used to render the program non-free. The precise terms and conditions for copying, distribution and modification follow. TERMS AND CONDITIONS 0. Definitions. "This License" refers to version 3 of the GNU General Public License. "Copyright" also means copyright-like laws that apply to other kinds of works, such as semiconductor masks. "The Program" refers to any copyrightable work licensed under this License. Each licensee is addressed as "you". "Licensees" and "recipients" may be individuals or organizations. To "modify" a work means to copy from or adapt all or part of the work in a fashion requiring copyright permission, other than the making of an exact copy. The resulting work is called a "modified version" of the earlier work or a work "based on" the earlier work. A "covered work" means either the unmodified Program or a work based on the Program. To "propagate" a work means to do anything with it that, without permission, would make you directly or secondarily liable for infringement under applicable copyright law, except executing it on a computer or modifying a private copy. Propagation includes copying, distribution (with or without modification), making available to the public, and in some countries other activities as well. To "convey" a work means any kind of propagation that enables other parties to make or receive copies. Mere interaction with a user through a computer network, with no transfer of a copy, is not conveying. An interactive user interface displays "Appropriate Legal Notices" to the extent that it includes a convenient and prominently visible feature that (1) displays an appropriate copyright notice, and (2) tells the user that there is no warranty for the work (except to the extent that warranties are provided), that licensees may convey the work under this License, and how to view a copy of this License. If the interface presents a list of user commands or options, such as a menu, a prominent item in the list meets this criterion. 1. Source Code. The "source code" for a work means the preferred form of the work for making modifications to it. "Object code" means any non-source form of a work. A "Standard Interface" means an interface that either is an official standard defined by a recognized standards body, or, in the case of interfaces specified for a particular programming language, one that is widely used among developers working in that language. The "System Libraries" of an executable work include anything, other than the work as a whole, that (a) is included in the normal form of packaging a Major Component, but which is not part of that Major Component, and (b) serves only to enable use of the work with that Major Component, or to implement a Standard Interface for which an implementation is available to the public in source code form. A "Major Component", in this context, means a major essential component (kernel, window system, and so on) of the specific operating system (if any) on which the executable work runs, or a compiler used to produce the work, or an object code interpreter used to run it. The "Corresponding Source" for a work in object code form means all the source code needed to generate, install, and (for an executable work) run the object code and to modify the work, including scripts to control those activities. However, it does not include the work's System Libraries, or general-purpose tools or generally available free programs which are used unmodified in performing those activities but which are not part of the work. For example, Corresponding Source includes interface definition files associated with source files for the work, and the source code for shared libraries and dynamically linked subprograms that the work is specifically designed to require, such as by intimate data communication or control flow between those subprograms and other parts of the work. The Corresponding Source need not include anything that users can regenerate automatically from other parts of the Corresponding Source. The Corresponding Source for a work in source code form is that same work. 2. Basic Permissions. All rights granted under this License are granted for the term of copyright on the Program, and are irrevocable provided the stated conditions are met. This License explicitly affirms your unlimited permission to run the unmodified Program. The output from running a covered work is covered by this License only if the output, given its content, constitutes a covered work. This License acknowledges your rights of fair use or other equivalent, as provided by copyright law. You may make, run and propagate covered works that you do not convey, without conditions so long as your license otherwise remains in force. You may convey covered works to others for the sole purpose of having them make modifications exclusively for you, or provide you with facilities for running those works, provided that you comply with the terms of this License in conveying all material for which you do not control copyright. Those thus making or running the covered works for you must do so exclusively on your behalf, under your direction and control, on terms that prohibit them from making any copies of your copyrighted material outside their relationship with you. Conveying under any other circumstances is permitted solely under the conditions stated below. Sublicensing is not allowed; section 10 makes it unnecessary. 3. Protecting Users' Legal Rights From Anti-Circumvention Law. No covered work shall be deemed part of an effective technological measure under any applicable law fulfilling obligations under article 11 of the WIPO copyright treaty adopted on 20 December 1996, or similar laws prohibiting or restricting circumvention of such measures. When you convey a covered work, you waive any legal power to forbid circumvention of technological measures to the extent such circumvention is effected by exercising rights under this License with respect to the covered work, and you disclaim any intention to limit operation or modification of the work as a means of enforcing, against the work's users, your or third parties' legal rights to forbid circumvention of technological measures. 4. Conveying Verbatim Copies. You may convey verbatim copies of the Program's source code as you receive it, in any medium, provided that you conspicuously and appropriately publish on each copy an appropriate copyright notice; keep intact all notices stating that this License and any non-permissive terms added in accord with section 7 apply to the code; keep intact all notices of the absence of any warranty; and give all recipients a copy of this License along with the Program. You may charge any price or no price for each copy that you convey, and you may offer support or warranty protection for a fee. 5. Conveying Modified Source Versions. You may convey a work based on the Program, or the modifications to produce it from the Program, in the form of source code under the terms of section 4, provided that you also meet all of these conditions: a) The work must carry prominent notices stating that you modified it, and giving a relevant date. b) The work must carry prominent notices stating that it is released under this License and any conditions added under section 7. This requirement modifies the requirement in section 4 to "keep intact all notices". c) You must license the entire work, as a whole, under this License to anyone who comes into possession of a copy. This License will therefore apply, along with any applicable section 7 additional terms, to the whole of the work, and all its parts, regardless of how they are packaged. This License gives no permission to license the work in any other way, but it does not invalidate such permission if you have separately received it. d) If the work has interactive user interfaces, each must display Appropriate Legal Notices; however, if the Program has interactive interfaces that do not display Appropriate Legal Notices, your work need not make them do so. A compilation of a covered work with other separate and independent works, which are not by their nature extensions of the covered work, and which are not combined with it such as to form a larger program, in or on a volume of a storage or distribution medium, is called an "aggregate" if the compilation and its resulting copyright are not used to limit the access or legal rights of the compilation's users beyond what the individual works permit. Inclusion of a covered work in an aggregate does not cause this License to apply to the other parts of the aggregate. 6. Conveying Non-Source Forms. You may convey a covered work in object code form under the terms of sections 4 and 5, provided that you also convey the machine-readable Corresponding Source under the terms of this License, in one of these ways: a) Convey the object code in, or embodied in, a physical product (including a physical distribution medium), accompanied by the Corresponding Source fixed on a durable physical medium customarily used for software interchange. b) Convey the object code in, or embodied in, a physical product (including a physical distribution medium), accompanied by a written offer, valid for at least three years and valid for as long as you offer spare parts or customer support for that product model, to give anyone who possesses the object code either (1) a copy of the Corresponding Source for all the software in the product that is covered by this License, on a durable physical medium customarily used for software interchange, for a price no more than your reasonable cost of physically performing this conveying of source, or (2) access to copy the Corresponding Source from a network server at no charge. c) Convey individual copies of the object code with a copy of the written offer to provide the Corresponding Source. This alternative is allowed only occasionally and noncommercially, and only if you received the object code with such an offer, in accord with subsection 6b. d) Convey the object code by offering access from a designated place (gratis or for a charge), and offer equivalent access to the Corresponding Source in the same way through the same place at no further charge. You need not require recipients to copy the Corresponding Source along with the object code. If the place to copy the object code is a network server, the Corresponding Source may be on a different server (operated by you or a third party) that supports equivalent copying facilities, provided you maintain clear directions next to the object code saying where to find the Corresponding Source. Regardless of what server hosts the Corresponding Source, you remain obligated to ensure that it is available for as long as needed to satisfy these requirements. e) Convey the object code using peer-to-peer transmission, provided you inform other peers where the object code and Corresponding Source of the work are being offered to the general public at no charge under subsection 6d. A separable portion of the object code, whose source code is excluded from the Corresponding Source as a System Library, need not be included in conveying the object code work. A "User Product" is either (1) a "consumer product", which means any tangible personal property which is normally used for personal, family, or household purposes, or (2) anything designed or sold for incorporation into a dwelling. In determining whether a product is a consumer product, doubtful cases shall be resolved in favor of coverage. For a particular product received by a particular user, "normally used" refers to a typical or common use of that class of product, regardless of the status of the particular user or of the way in which the particular user actually uses, or expects or is expected to use, the product. A product is a consumer product regardless of whether the product has substantial commercial, industrial or non-consumer uses, unless such uses represent the only significant mode of use of the product. "Installation Information" for a User Product means any methods, procedures, authorization keys, or other information required to install and execute modified versions of a covered work in that User Product from a modified version of its Corresponding Source. The information must suffice to ensure that the continued functioning of the modified object code is in no case prevented or interfered with solely because modification has been made. If you convey an object code work under this section in, or with, or specifically for use in, a User Product, and the conveying occurs as part of a transaction in which the right of possession and use of the User Product is transferred to the recipient in perpetuity or for a fixed term (regardless of how the transaction is characterized), the Corresponding Source conveyed under this section must be accompanied by the Installation Information. But this requirement does not apply if neither you nor any third party retains the ability to install modified object code on the User Product (for example, the work has been installed in ROM). The requirement to provide Installation Information does not include a requirement to continue to provide support service, warranty, or updates for a work that has been modified or installed by the recipient, or for the User Product in which it has been modified or installed. Access to a network may be denied when the modification itself materially and adversely affects the operation of the network or violates the rules and protocols for communication across the network. Corresponding Source conveyed, and Installation Information provided, in accord with this section must be in a format that is publicly documented (and with an implementation available to the public in source code form), and must require no special password or key for unpacking, reading or copying. 7. Additional Terms. "Additional permissions" are terms that supplement the terms of this License by making exceptions from one or more of its conditions. Additional permissions that are applicable to the entire Program shall be treated as though they were included in this License, to the extent that they are valid under applicable law. If additional permissions apply only to part of the Program, that part may be used separately under those permissions, but the entire Program remains governed by this License without regard to the additional permissions. When you convey a copy of a covered work, you may at your option remove any additional permissions from that copy, or from any part of it. (Additional permissions may be written to require their own removal in certain cases when you modify the work.) You may place additional permissions on material, added by you to a covered work, for which you have or can give appropriate copyright permission. Notwithstanding any other provision of this License, for material you add to a covered work, you may (if authorized by the copyright holders of that material) supplement the terms of this License with terms: a) Disclaiming warranty or limiting liability differently from the terms of sections 15 and 16 of this License; or b) Requiring preservation of specified reasonable legal notices or author attributions in that material or in the Appropriate Legal Notices displayed by works containing it; or c) Prohibiting misrepresentation of the origin of that material, or requiring that modified versions of such material be marked in reasonable ways as different from the original version; or d) Limiting the use for publicity purposes of names of licensors or authors of the material; or e) Declining to grant rights under trademark law for use of some trade names, trademarks, or service marks; or f) Requiring indemnification of licensors and authors of that material by anyone who conveys the material (or modified versions of it) with contractual assumptions of liability to the recipient, for any liability that these contractual assumptions directly impose on those licensors and authors. All other non-permissive additional terms are considered "further restrictions" within the meaning of section 10. If the Program as you received it, or any part of it, contains a notice stating that it is governed by this License along with a term that is a further restriction, you may remove that term. If a license document contains a further restriction but permits relicensing or conveying under this License, you may add to a covered work material governed by the terms of that license document, provided that the further restriction does not survive such relicensing or conveying. If you add terms to a covered work in accord with this section, you must place, in the relevant source files, a statement of the additional terms that apply to those files, or a notice indicating where to find the applicable terms. Additional terms, permissive or non-permissive, may be stated in the form of a separately written license, or stated as exceptions; the above requirements apply either way. 8. Termination. You may not propagate or modify a covered work except as expressly provided under this License. Any attempt otherwise to propagate or modify it is void, and will automatically terminate your rights under this License (including any patent licenses granted under the third paragraph of section 11). However, if you cease all violation of this License, then your license from a particular copyright holder is reinstated (a) provisionally, unless and until the copyright holder explicitly and finally terminates your license, and (b) permanently, if the copyright holder fails to notify you of the violation by some reasonable means prior to 60 days after the cessation. Moreover, your license from a particular copyright holder is reinstated permanently if the copyright holder notifies you of the violation by some reasonable means, this is the first time you have received notice of violation of this License (for any work) from that copyright holder, and you cure the violation prior to 30 days after your receipt of the notice. Termination of your rights under this section does not terminate the licenses of parties who have received copies or rights from you under this License. If your rights have been terminated and not permanently reinstated, you do not qualify to receive new licenses for the same material under section 10. 9. Acceptance Not Required for Having Copies. You are not required to accept this License in order to receive or run a copy of the Program. Ancillary propagation of a covered work occurring solely as a consequence of using peer-to-peer transmission to receive a copy likewise does not require acceptance. However, nothing other than this License grants you permission to propagate or modify any covered work. These actions infringe copyright if you do not accept this License. Therefore, by modifying or propagating a covered work, you indicate your acceptance of this License to do so. 10. Automatic Licensing of Downstream Recipients. Each time you convey a covered work, the recipient automatically receives a license from the original licensors, to run, modify and propagate that work, subject to this License. You are not responsible for enforcing compliance by third parties with this License. An "entity transaction" is a transaction transferring control of an organization, or substantially all assets of one, or subdividing an organization, or merging organizations. If propagation of a covered work results from an entity transaction, each party to that transaction who receives a copy of the work also receives whatever licenses to the work the party's predecessor in interest had or could give under the previous paragraph, plus a right to possession of the Corresponding Source of the work from the predecessor in interest, if the predecessor has it or can get it with reasonable efforts. You may not impose any further restrictions on the exercise of the rights granted or affirmed under this License. For example, you may not impose a license fee, royalty, or other charge for exercise of rights granted under this License, and you may not initiate litigation (including a cross-claim or counterclaim in a lawsuit) alleging that any patent claim is infringed by making, using, selling, offering for sale, or importing the Program or any portion of it. 11. Patents. A "contributor" is a copyright holder who authorizes use under this License of the Program or a work on which the Program is based. The work thus licensed is called the contributor's "contributor version". A contributor's "essential patent claims" are all patent claims owned or controlled by the contributor, whether already acquired or hereafter acquired, that would be infringed by some manner, permitted by this License, of making, using, or selling its contributor version, but do not include claims that would be infringed only as a consequence of further modification of the contributor version. For purposes of this definition, "control" includes the right to grant patent sublicenses in a manner consistent with the requirements of this License. Each contributor grants you a non-exclusive, worldwide, royalty-free patent license under the contributor's essential patent claims, to make, use, sell, offer for sale, import and otherwise run, modify and propagate the contents of its contributor version. In the following three paragraphs, a "patent license" is any express agreement or commitment, however denominated, not to enforce a patent (such as an express permission to practice a patent or covenant not to sue for patent infringement). To "grant" such a patent license to a party means to make such an agreement or commitment not to enforce a patent against the party. If you convey a covered work, knowingly relying on a patent license, and the Corresponding Source of the work is not available for anyone to copy, free of charge and under the terms of this License, through a publicly available network server or other readily accessible means, then you must either (1) cause the Corresponding Source to be so available, or (2) arrange to deprive yourself of the benefit of the patent license for this particular work, or (3) arrange, in a manner consistent with the requirements of this License, to extend the patent license to downstream recipients. "Knowingly relying" means you have actual knowledge that, but for the patent license, your conveying the covered work in a country, or your recipient's use of the covered work in a country, would infringe one or more identifiable patents in that country that you have reason to believe are valid. If, pursuant to or in connection with a single transaction or arrangement, you convey, or propagate by procuring conveyance of, a covered work, and grant a patent license to some of the parties receiving the covered work authorizing them to use, propagate, modify or convey a specific copy of the covered work, then the patent license you grant is automatically extended to all recipients of the covered work and works based on it. A patent license is "discriminatory" if it does not include within the scope of its coverage, prohibits the exercise of, or is conditioned on the non-exercise of one or more of the rights that are specifically granted under this License. You may not convey a covered work if you are a party to an arrangement with a third party that is in the business of distributing software, under which you make payment to the third party based on the extent of your activity of conveying the work, and under which the third party grants, to any of the parties who would receive the covered work from you, a discriminatory patent license (a) in connection with copies of the covered work conveyed by you (or copies made from those copies), or (b) primarily for and in connection with specific products or compilations that contain the covered work, unless you entered into that arrangement, or that patent license was granted, prior to 28 March 2007. Nothing in this License shall be construed as excluding or limiting any implied license or other defenses to infringement that may otherwise be available to you under applicable patent law. 12. No Surrender of Others' Freedom. If conditions are imposed on you (whether by court order, agreement or otherwise) that contradict the conditions of this License, they do not excuse you from the conditions of this License. If you cannot convey a covered work so as to satisfy simultaneously your obligations under this License and any other pertinent obligations, then as a consequence you may not convey it at all. For example, if you agree to terms that obligate you to collect a royalty for further conveying from those to whom you convey the Program, the only way you could satisfy both those terms and this License would be to refrain entirely from conveying the Program. 13. Use with the GNU Affero General Public License. Notwithstanding any other provision of this License, you have permission to link or combine any covered work with a work licensed under version 3 of the GNU Affero General Public License into a single combined work, and to convey the resulting work. The terms of this License will continue to apply to the part which is the covered work, but the special requirements of the GNU Affero General Public License, section 13, concerning interaction through a network will apply to the combination as such. 14. Revised Versions of this License. The Free Software Foundation may publish revised and/or new versions of the GNU General Public License from time to time. Such new versions will be similar in spirit to the present version, but may differ in detail to address new problems or concerns. Each version is given a distinguishing version number. If the Program specifies that a certain numbered version of the GNU General Public License "or any later version" applies to it, you have the option of following the terms and conditions either of that numbered version or of any later version published by the Free Software Foundation. If the Program does not specify a version number of the GNU General Public License, you may choose any version ever published by the Free Software Foundation. If the Program specifies that a proxy can decide which future versions of the GNU General Public License can be used, that proxy's public statement of acceptance of a version permanently authorizes you to choose that version for the Program. Later license versions may give you additional or different permissions. However, no additional obligations are imposed on any author or copyright holder as a result of your choosing to follow a later version. 15. Disclaimer of Warranty. THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 16. Limitation of Liability. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. 17. Interpretation of Sections 15 and 16. If the disclaimer of warranty and limitation of liability provided above cannot be given local legal effect according to their terms, reviewing courts shall apply local law that most closely approximates an absolute waiver of all civil liability in connection with the Program, unless a warranty or assumption of liability accompanies a copy of the Program in return for a fee. END OF TERMS AND CONDITIONS How to Apply These Terms to Your New Programs If you develop a new program, and you want it to be of the greatest possible use to the public, the best way to achieve this is to make it free software which everyone can redistribute and change under these terms. To do so, attach the following notices to the program. It is safest to attach them to the start of each source file to most effectively state the exclusion of warranty; and each file should have at least the "copyright" line and a pointer to where the full notice is found. Copyright (C) This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . Also add information on how to contact you by electronic and paper mail. If the program does terminal interaction, make it output a short notice like this when it starts in an interactive mode: Copyright (C) This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. This is free software, and you are welcome to redistribute it under certain conditions; type `show c' for details. The hypothetical commands `show w' and `show c' should show the appropriate parts of the General Public License. Of course, your program's commands might be different; for a GUI interface, you would use an "about box". You should also get your employer (if you work as a programmer) or school, if any, to sign a "copyright disclaimer" for the program, if necessary. For more information on this, and how to apply and follow the GNU GPL, see . The GNU General Public License does not permit incorporating your program into proprietary programs. If your program is a subroutine library, you may consider it more useful to permit linking proprietary applications with the library. If this is what you want to do, use the GNU Lesser General Public License instead of this License. But first, please read . rndusr-torf-547b989/Makefile000066400000000000000000000004771513142010300157320ustar00rootroot00000000000000VENV_PATH?=venv PYTHON?=python3 clean: find . -name "*.pyc" -delete find . -name "__pycache__" -delete rm -rf dist build rm -rf .pytest_cache rm -rf .tox rm -rf .coverage .coverage.* rm -rf "$(VENV_PATH)" *.egg-info venv: "$(PYTHON)" -m venv "$(VENV_PATH)" "$(VENV_PATH)"/bin/pip install --editable '.[dev]' rndusr-torf-547b989/README.rst000066400000000000000000000042031513142010300157500ustar00rootroot00000000000000torf ==== torf provides a ``Torrent`` and a ``Magnet`` class. `torf-cli `_ and `torf-gui `_ provide user interfaces for torf. This project started as a fork of `dottorrent `_ but turned into a rewrite. Features -------- - Create a ``Torrent`` instance from a path to the torrent's content or by reading an existing ``.torrent`` file - High-level access to standard metainfo fields via properties - Low-level access to arbitrary metainfo fields via ``metainfo`` property - Optional metainfo validation with helpful error messages - Generate a `BTIH magnet URI `_ from a ``.torrent`` file (the reverse is also possible but the resulting torrent is incomplete due to the lack of information in magnet URIs) - Use multiple CPU cores to compute piece hashes - Randomize the info hash to help with cross-seeding - Conveniently re-use piece hashes from an existing torrent file Example ------- .. code:: python from torf import Torrent t = Torrent(path='path/to/content', trackers=['https://tracker1.example.org:1234/announce', 'https://tracker2.example.org:5678/announce'], comment='This is a comment') t.private = True t.generate() t.write('my.torrent') Documentation ------------- Everything should be explained in the docstrings. Read it with ``pydoc3 torf.Torrent`` or ``pydoc3 torf.Magnet``. Documentation is also available at `torf.readthedocs.io `_ or `torf.readthedocs.io/en/latest `_ for the development version. Installation ------------ torf is available on `PyPI `_. The latest development version is in the master branch on `GitHub `_. Contributing ------------ I consider this project feature complete, but feel free to request new features or improvements. Bug reports are always welcome, of course. License ------- `GPLv3+ `_ rndusr-torf-547b989/docs/000077500000000000000000000000001513142010300152125ustar00rootroot00000000000000rndusr-torf-547b989/docs/conf.py000066400000000000000000000001431513142010300165070ustar00rootroot00000000000000master_doc = 'index' extensions = [ 'sphinx.ext.autodoc', ] autodoc_member_order = 'bysource' rndusr-torf-547b989/docs/index.rst000066400000000000000000000022501513142010300170520ustar00rootroot00000000000000`torf `_ Reference ================================================== .. autoclass:: torf.Torrent :members: :member-order: bysource .. autoclass:: torf.Magnet :members: :member-order: bysource .. autoclass:: torf.File :members: :member-order: bysource .. autoclass:: torf.Filepath :members: :member-order: bysource .. autoclass:: torf.TorrentFileStream :members: :member-order: bysource .. autoexception:: torf.TorfError :members: .. autoexception:: torf.URLError :members: .. autoexception:: torf.PieceSizeError :members: .. autoexception:: torf.MetainfoError :members: .. autoexception:: torf.BdecodeError :members: .. autoexception:: torf.MagnetError :members: .. autoexception:: torf.PathError :members: .. autoexception:: torf.CommonPathError :members: .. autoexception:: torf.VerifyNotDirectoryError :members: .. autoexception:: torf.VerifyIsDirectoryError :members: .. autoexception:: torf.VerifyFileSizeError :members: .. autoexception:: torf.VerifyContentError :members: .. autoexception:: torf.ReadError :members: .. autoexception:: torf.WriteError :members: rndusr-torf-547b989/docs/requirements.txt000066400000000000000000000000211513142010300204670ustar00rootroot00000000000000sphinx_rtd_theme rndusr-torf-547b989/pyproject.toml000066400000000000000000000034631513142010300172040ustar00rootroot00000000000000[project] name = "torf" description = "Python 3 module for creating and parsing torrent files and magnet URIs" readme = "README.rst" license = {text = "GPL-3.0-or-later"} authors = [ { name="Random User", email="rndusr@posteo.de" }, ] keywords = ["bittorrent", "torrent", "magnet"] dynamic = ["version"] # Get version from PROJECT/__version__ classifiers = [ "Development Status :: 5 - Production/Stable", "Intended Audience :: Developers", "License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+)", "Operating System :: OS Independent", "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", "Programming Language :: Python :: 3.13", "Programming Language :: Python :: 3.14", "Topic :: Software Development :: Libraries", ] requires-python = ">=3.8" dependencies = [ ] [project.optional-dependencies] dev = [ "pytest", "pytest-xdist", "pytest-httpserver", "pytest-mock", "tox", "coverage", "pytest-cov", "ruff", "flake8", "isort", "mypy", ] [project.urls] Repository = "https://github.com/rndusr/torf" Documentation = "https://torf.readthedocs.io/" "Bug Tracker" = "https://github.com/rndusr/torf/issues" Changelog = "https://raw.githubusercontent.com/rndusr/torf/master/CHANGELOG" [build-system] requires = ["setuptools"] build-backend = "setuptools.build_meta" [tool.setuptools.packages.find] include = ["torf*"] [tool.setuptools.dynamic] version = {attr = "torf.__version__"} [tool.mypy] strict = true pretty = true exclude = [ "torf/_reuse.py", # Not part of the public API "torf/_generate.py", # Not part of the public API "tests/", "docs/", ] rndusr-torf-547b989/pytest.ini000066400000000000000000000001171513142010300163120ustar00rootroot00000000000000[pytest] addopts = --log-level=DEBUG --numprocesses=9 log_format = %(message)s rndusr-torf-547b989/readthedocs.yml000066400000000000000000000004531513142010300172740ustar00rootroot00000000000000# https://docs.readthedocs.io/en/stable/config-file/v2.html version: 2 build: os: ubuntu-22.04 tools: python: "3.12" sphinx: builder: html configuration: docs/conf.py fail_on_warning: true python: install: - requirements: docs/requirements.txt - method: pip path: . rndusr-torf-547b989/ruff.toml000066400000000000000000000004541513142010300161240ustar00rootroot00000000000000line-length = 120 lint.select = [ "E", # pycodestyle "F", # pyflakes "I", # isort ] lint.ignore = [ "E741", # Ambiguous variable name: `l` ] [lint.per-file-ignores] "__init__.py" = [ # imported but unused "F401", ] "tests/*" = [ # Line too long "E501", ] rndusr-torf-547b989/tests/000077500000000000000000000000001513142010300154245ustar00rootroot00000000000000rndusr-torf-547b989/tests/__init__.py000066400000000000000000000470041513142010300175420ustar00rootroot00000000000000import collections import itertools import logging import os import random import torf debug = logging.getLogger('test').debug def display_filespecs(filespecs, piece_size): filecount = len(filespecs) header = ['.' + ' ' * (((4 * filecount) + (2 * filecount - 1)) + 2 - 1)] for i in range(8): header.append(str(i) + ' ' * (piece_size - 1)) line = (', '.join(f'{fn}:{fs:2d}' for fn,fs in filespecs), ' - ', ''.join(fn * fs for fn,fs in filespecs)) debug(f'\n{"".join(header)}\n{"".join(line)}') class fuzzylist(list): """ List that is fuzzily equal to other lists >>> x = fuzzylist('a', 'b', 'c', maybe=('x', 'y', 'z')) >>> x ['a', 'b', 'c'] >>> x == ['z', 'b', 'a', 'c', 'y'] True Limit the number of optional items: >>> x = fuzzylist('a', 'b', 'c', maybe=('x', 'x')) >>> x == ['a', 'x', 'b', 'x', 'c'] True >>> x == ['a', 'x', 'b', 'x', 'c', 'x'] False `max_maybe_items` also allows you to limit the number of optional items: >>> x = fuzzylist('a', 'b', 'c', maybe=('x', 'y', 'z'), max_maybe_items={'x':1}) >>> x == ['a', 'x', 'b', 'z', 'c'] True >>> x == ['a', 'x', 'b', 'x', 'c'] False Unlike `set(...) == set(...)`, this doesn't remove duplicate items and allows unhashable items. """ def __init__(self, *args, maybe=(), max_maybe_items={}): self.maybe = list(maybe) self.max_maybe_items = dict(max_maybe_items) super().__init__(args) def __eq__(self, other): if tuple(self) != tuple(other): # Check if either list contains any disallowed items, accepting # items from `maybe`. other_maybe = getattr(other, 'maybe', []) for item in self: if item not in other and item not in other_maybe: return False self_maybe = self.maybe for item in other: if item not in self and item not in self_maybe: return False # Check if either list contains an excess of items. other_max = getattr(other, 'max_maybe_items', {}) for item in itertools.chain(self, self.maybe): maxcount = max(other_max.get(item, 1), (other + other_maybe).count(item)) if self.count(item) > maxcount: return False self_max = self.max_maybe_items for item in itertools.chain(other, other_maybe): maxcount = max(self_max.get(item, 1), (self + self_maybe).count(item)) if other.count(item) > maxcount: return False return True def __ne__(self, other): return not self.__eq__(other) def __bool__(self): return len(self) > 0 or len(self.maybe) > 0 def __add__(self, other): items = super().__add__(other) maybe = self.maybe + getattr(other, 'maybe', []) max_maybe_items = {**self.max_maybe_items, **getattr(other, 'max_maybe_items', {})} return type(self)(*items, maybe=maybe, max_maybe_items=max_maybe_items) def __repr__(self): s = f'{type(self).__name__}(' s += '[' + ', '.join(repr(item) for item in super().__iter__()) + ']' if self.maybe: s += f', maybe={repr(self.maybe)}' if self.max_maybe_items: s += f', max_maybe_items={repr(self.max_maybe_items)}' return s + ')' class fuzzydict(dict): """ Dictionary that ignores empty `fuzzylist` values when determining equality, e.g. fuzzydict(x=fuzzylist()) == {} """ def __eq__(self, other): if super().__eq__(other): return True elif not isinstance(other, dict): return NotImplemented keys_same = set(self).intersection(other) for k in keys_same: if self[k] != other[k]: return False keys_diff = set(self).difference(other) for k in keys_diff: sv = self.get(k, fuzzylist()) ov = other.get(k, fuzzylist()) if sv != ov: return False return True def __repr__(self): return f'{type(self).__name__}({super().__repr__()})' def ComparableException(exc): """ Horrible hack that allows us to compare exceptions comfortably `exc1 == exc2` is True if both exceptions have the same type and the same message. Type checking with issubclass() and isinstance() also works as expected. """ # Make the returned class object an instance of the type of `exc` and the # returned Comparable* class. class ComparableExceptionMeta(type): _cls = type(exc) @classmethod def __subclasscheck__(mcls, cls): return issubclass(cls, mcls._cls) or issubclass(cls, mcls) @classmethod def __instancecheck__(mcls, inst): return isinstance(cls, mcls._cls) or isinstance(cls, mcls) # Make subclass of the same name with "Comparable" prepended clsname = 'Comparable' + type(exc).__name__ bases = (type(exc),) def __eq__(self, other, _real_cls=type(exc)): return isinstance(other, (type(self), _real_cls)) and str(self) == str(other) def __hash__(self): return hash(str(self)) attrs = {} attrs['__eq__'] = __eq__ attrs['__hash__'] = __hash__ cls = ComparableExceptionMeta(clsname, bases, attrs) if isinstance(exc, torf.TorfError): return cls(*exc.posargs, **exc.kwargs) else: raise exc def random_positions(stream): """Return list of 1 to 5 random indexes in `stream`""" positions = random.sample(range(len(stream)), k=min(len(stream), 5)) return sorted(positions[:random.randint(1, len(positions))]) def change_file_size(filepath, original_size): """Randomly change size of `filepath` on disk and return new contents""" diff_range = list(range(-original_size, original_size + 1)) diff_range.remove(0) diff = random.choice(diff_range) data = bytearray(open(filepath, 'rb').read()) debug(f' Original data ({len(data)} bytes): {data}') if diff > 0: # Make add `diff` bytes at `pos` pos = random.choice(range(original_size + 1)) data[pos:pos] = b'\xA0' * diff elif diff < 0: # Remove `abs(diff)` bytes at `pos` pos = random.choice(range(original_size - abs(diff) + 1)) data[pos : pos + abs(diff)] = () with open(filepath, 'wb') as f: f.write(data) f.truncate() assert os.path.getsize(filepath) == original_size + diff debug(f' Changed data ({len(data)} bytes): {data}') with open(filepath, 'rb') as f: return f.read() def round_up_to_multiple(n, x): """Round `n` up to the next multiple of `x`""" return n - n % -x def round_down_to_multiple(n, x): """Round `n` down to the previous multiple of `x`""" if n % x != 0: return round_up_to_multiple(n, x) - x else: return n def file_range(filename, filespecs): """Return `filename`'s first and last byte index in stream""" pos = 0 for fn,size in filespecs: if fn == filename: return pos, pos + size - 1 pos += size raise RuntimeError(f'Could not find {filename} in {filespecs}') def file_piece_indexes(filename, filespecs, piece_size, exclusive=False): """ Return list of indexes of pieces that contain bytes from `filename` If `exclusive` is True, don't include pieces that contain bytes from multiple files. """ file_beg,file_end = file_range(filename, filespecs) first_piece_index_pos = round_down_to_multiple(file_beg, piece_size) piece_indexes = [] for pos in range(first_piece_index_pos, file_end + 1, piece_size): if not exclusive or len(pos2files(pos, filespecs, piece_size)) == 1: piece_indexes.append(pos // piece_size) return piece_indexes def pos2files(pos, filespecs, piece_size, include_file_at_pos=True): """ Calculate which piece the byte at `pos` belongs to and return a list of file names of those files that are covered by that piece. """ p = 0 filenames = [] for filename,filesize in filespecs: filepos_beg = p filepos_end = filepos_beg + filesize - 1 first_piece_index = filepos_beg // piece_size last_piece_index = filepos_end // piece_size first_piece_index_pos_beg = first_piece_index * piece_size last_piece_index_pos_end = (last_piece_index + 1) * piece_size - 1 if first_piece_index_pos_beg <= pos <= last_piece_index_pos_end: filenames.append(filename) p += filesize if not include_file_at_pos: file_at_pos,_ = pos2file(pos, filespecs, piece_size) return [f for f in filenames if f != file_at_pos] else: return filenames def pos2file(pos, filespecs, piece_size): """Return file name and relative position of `pos` in file""" p = 0 for filename,filesize in filespecs: if p <= pos < p + filesize: return (filename, pos - p) p += filesize raise RuntimeError(f'Could not find file at position {pos} in {filespecs}') def calc_piece_indexes(filespecs, piece_size, files_missing=(), files_missized=()): """ Turn a list of (filename, filesize) tuples into a dictionary that maps file names to the piece indexes they cover. Pieces that overlap multiple files belong to the last file they cover. """ piece_indexes = collections.defaultdict(lambda: fuzzylist()) pos = 0 for i, (filename, filesize) in enumerate(filespecs): # Piece indexes that cover only one file must be reported for that file. exclusive_file_pis = file_piece_indexes(filename, filespecs, piece_size, exclusive=True) # Piece indexes that cover multiple files may be reported for any of # those files. multiple_file_pis = [ pi for pi in file_piece_indexes(filename, filespecs, piece_size, exclusive=False) if pi not in exclusive_file_pis ] piece_indexes[filename].extend(exclusive_file_pis) piece_indexes[filename].maybe.extend(multiple_file_pis) pos += filesize # Remove empty lists for k in tuple(piece_indexes): if not piece_indexes[k]: del piece_indexes[k] # For each missing/missized file, the first piece of the file may get two # calls, one for the "no such file"/"wrong file size" error and one for the # "corrupt piece" error. for filepath in itertools.chain(files_missing, files_missized): filename = os.path.basename(filepath) file_beg,file_end = file_range(filename, filespecs) piece_index = file_beg // piece_size if piece_index not in piece_indexes[filename].maybe: piece_indexes[filename].maybe.append(piece_index) return fuzzydict(piece_indexes) def calc_good_pieces(filespecs, piece_size, files_missing, corruption_positions, files_missized): """ Same as `calc_piece_indexes`, but exclude corrupt pieces and pieces of missing or missized files """ debug('* Calculating good pieces') all_piece_indexes = calc_piece_indexes(filespecs, piece_size, files_missing, files_missized) bad_pis = {corrpos // piece_size for corrpos in corruption_positions} debug(f' missing files: {files_missing}') debug(f' missized files: {files_missized}') debug(f' all piece_indexes: {all_piece_indexes}') debug(f' corrupt piece_indexes: {bad_pis}') # Find pieces that exclusively belong to missing or missized files for filepath in itertools.chain(files_missing, files_missized): file_beg,file_end = file_range(os.path.basename(filepath), filespecs) first_bad_pi = file_beg // piece_size last_bad_pi = file_end // piece_size bad_pis.update(range(first_bad_pi, last_bad_pi + 1)) debug(f' combined bad piece_indexes: {bad_pis}') # Remove pieces that are in bad_pis good_pieces = collections.defaultdict(lambda: fuzzylist()) for fname,all_pis in all_piece_indexes.items(): # Maintain mandatory and optional piece_indexes from all_piece_indexes for pi in itertools.chain(all_pis, all_pis.maybe): if pi not in bad_pis: debug(f' filename={fname}: piece_index={pi}: good') if pi in all_pis.maybe: good_pieces[fname].maybe.append(pi) else: good_pieces[fname].append(pi) else: debug(f' filename={fname}: piece_index={pi}: bad') good_pieces = fuzzydict(good_pieces) debug(f' corruptions and missing/missized files removed: {good_pieces}') return good_pieces def skip_good_pieces(good_pieces, filespecs, piece_size, corruption_positions): """ For each file in `good_pieces`, remove piece_indexes between the first corruption and the end of the file """ debug('* Skipping good pieces after corruptions') # Find out which piece_indexes should be skipped skipped_pis = set() for corrpos in sorted(corruption_positions): corr_pi = corrpos // piece_size affected_files = pos2files(corrpos, filespecs, piece_size) debug(f' corruption at position {corrpos}, piece_index {corr_pi}: {affected_files}') for file in affected_files: file_pis_exclusive = file_piece_indexes(file, filespecs, piece_size, exclusive=True) debug(f' {file}: piece_indexes exclusive: {file_pis_exclusive}') file_pis = file_piece_indexes(file, filespecs, piece_size, exclusive=False) debug(f' piece_indexes non-exclusive: {file_pis}') try: first_corr_index_in_file = file_pis.index(corr_pi) except ValueError: # Skip all pieces in `file` that don't contain bytes from other files debug(f' piece_index {corr_pi} is not part of {file}: {file_pis_exclusive}') skipped_pis.update(file_pis_exclusive) else: # Skip all pieces after the first corrupted piece in `file` skip_pis = file_pis[first_corr_index_in_file + 1:] debug(f' skipping piece_indexes after corruption: {skip_pis}') skipped_pis.update(skip_pis) # Make skipped piece_indexes optional while unskipped piece_indexes stay # mandatory. debug(f' skipping piece_indexes: {skipped_pis}') good_pieces_skipped = collections.defaultdict(lambda: fuzzylist()) for fname,pis in good_pieces.items(): for pi in pis: if pi in skipped_pis: good_pieces_skipped[fname].maybe.append(pi) else: good_pieces_skipped[fname].append(pi) return fuzzydict(good_pieces_skipped) def calc_corruptions(filespecs, piece_size, corruption_positions): """Map file names to (piece_index, exception) tuples""" exceptions = [] reported = set() for corrpos in sorted(corruption_positions): corr_pi = corrpos // piece_size if corr_pi not in reported: filepath, _ = pos2file(corrpos, filespecs, piece_size) exc = ComparableException(torf.VerifyContentError(filepath, corr_pi, piece_size, filespecs)) exceptions.append(exc) reported.add(corr_pi) return fuzzylist(*exceptions) def skip_corruptions(all_corruptions, filespecs, piece_size, corruption_positions, files_missing, files_missized): """Make every non-first corruption optional""" debug(f'Skipping corruptions: {all_corruptions}') pis_seen = set() files_seen = set() corruptions = fuzzylist() files_autoskipped = set(str(f) for f in itertools.chain(files_missing, files_missized)) debug(f' missing or missized: {files_autoskipped}') for exc in all_corruptions: # Corruptions for files we haven't seen yet must be reported if any(f not in files_seen and f not in files_autoskipped for f in exc.files): debug(f' mandatory: {exc}') files_seen.update(exc.files) pis_seen.add(exc.piece_index) corruptions.append(exc) # Corruptions for files we already have seen may still be reported # because skipping is racy and it's impossible to predict how many # pieces are processed before the skip manifests. else: debug(f' optional: {exc}') corruptions.maybe.append(exc) pis_seen.add(exc.piece_index) # Because we fake skipped files, their last piece is reported as corrupt if # it contains bytes from the next file even if there is no corruption in the # skipped file's last piece. But this is not guaranteed because it's # possible the corrupt file is fully processed before its corruption is # noticed. for corrpos in corruption_positions: # Find all files that are affected by the corruption affected_files = pos2files(corrpos, filespecs, piece_size) debug(f' affected_files: {affected_files}') # Find piece_index of the end of the last affected file _,file_end = file_range(affected_files[-1], filespecs) piece_index = file_end // piece_size debug(f' {affected_files[-1]} ends at piece_index {piece_index}') # Add optional exception for that piece exc = ComparableException(torf.VerifyContentError(piece_index, piece_size, filespecs)) if exc not in itertools.chain(corruptions, corruptions.maybe): debug(f'Adding possible exception for last affected file {affected_files[-1]}: {exc}') corruptions.maybe.append(exc) return corruptions def calc_pieces_done(filespecs_abspath, piece_size, files_missing=(), files_missized=()): debug('* Calculating pieces_done') # The callback gets the number of verified pieces (pieces_done). This # function calculates the expected values for that argument. # # It's not as simple as range(1, +1). For example, if a # file is missing, we get the same pieces_done value two times, once for "No # such file" and maybe again for "Corrupt piece" if the piece contains parts # of another file. # Every pieces_done value is reported at least once total_size = sum(filesize for _,filesize in filespecs_abspath) pieces_done_list = list((pi // piece_size) + 1 for pi in range(0, total_size, piece_size)) debug(f' progress reports: {pieces_done_list}') # List of pieces_done values that may appear multiple times maybes = set() # Map pieces_done values to the number of times they may appear max_maybe_items = collections.defaultdict(lambda: 1) # Missing or missized files are reported in addition to progress reports files_missing = {str(filepath) for filepath in files_missing} debug(f' files_missing: {files_missing}') files_missized = {str(filepath) for filepath in files_missized} debug(f' files_missized: {files_missized}') for filepath in files_missing.union(files_missized): # Because we're multithreaded, we can't expect the missing/missized file # to be reported at its first piece. We can't predict at all when the # error is reported. The only thing we can savely say that for each # missing/missized file, every pieces_done_value *may* increase by 1. for pieces_done_value in pieces_done_list: maybes.add(pieces_done_value) max_maybe_items[pieces_done_value] += 1 fuzzy_pieces_done_list = fuzzylist(*pieces_done_list, maybe=sorted(maybes), max_maybe_items=max_maybe_items) return fuzzy_pieces_done_list rndusr-torf-547b989/tests/conftest.py000066400000000000000000000565231513142010300176360ustar00rootroot00000000000000import argparse import contextlib import functools import itertools import math import os import random import string import time from collections import OrderedDict from types import SimpleNamespace from unittest import mock import pytest import torf # Make piece size and the number of pieces to use for testing torrents # configurable def pytest_addoption(parser): class IntList(argparse.Action): def __call__(self, parser, namespace, values, option_string=None): setattr(namespace, self.dest, tuple(int(value) for value in values.split(','))) parser.addoption('--piece-sizes', default=(8,), action=IntList, help='Comma-separated list of piece sizes to use for test torrents') parser.addoption('--piece-counts', default=(1, 2, 3, 4, 23, 24), action=IntList, help='Comma-separated list of number of pieces to use for test torrents') parser.addoption('--file-counts', default=(1, 2, 3, 4), action=IntList, help='Comma-separated list of number of files to use for test torrents') parser.addoption('--fuzzy', action='store_true', help='Whether to randomize file sizes for --file-counts >= 4') alphabet = 'abcdefghijklmnopqrstuvwxyz' def pytest_generate_tests(metafunc): piece_sizes = metafunc.config.getoption('piece_sizes') piece_counts = metafunc.config.getoption('piece_counts') file_counts = metafunc.config.getoption('file_counts') fixturenames = metafunc.fixturenames if 'filespecs' in fixturenames: argnames, argvalues, ids = _parametrize_filespecs(file_counts, piece_sizes, piece_counts, filespec_indexes='filespec_indexes' in fixturenames, fuzzy=metafunc.config.getoption('fuzzy')) metafunc.parametrize(argnames, argvalues, ids=ids) else: if 'piece_size' in fixturenames: metafunc.parametrize('piece_size', piece_sizes) if 'callback' in fixturenames: argvalues = [{'enabled': True}, {'enabled': False}] metafunc.parametrize('callback', argvalues, ids=['callback' if c['enabled'] else '' for c in argvalues]) def _parametrize_filespecs(file_counts, piece_sizes, piece_counts, filespec_indexes=False, fuzzy=False): argnames = ['filespecs', 'piece_size'] if filespec_indexes: argnames.append('filespec_indexes') argvalues = [] ids = [] for file_count in file_counts: for piece_size in piece_sizes: for piece_count in piece_counts: filespecs = _generate_filespecs(file_count, piece_size, piece_count, fuzzy=fuzzy) _display_filespecs(filespecs, file_count, piece_size) # piece_size is connected to file sizes (i.e. filespecs) for filespec in filespecs: values = (filespec, piece_size) # Generate combinations of file indexes if filespec_indexes: for number_of_indexes in range(1, file_count + 1): for indexes in itertools.combinations(range(0, file_count), number_of_indexes): argvalues.append(values + (indexes,)) ids.append(','.join(f'{fname}={fsize}' for fname,fsize in filespec) + f'-pc={piece_count}' + f'-ps={piece_size}' + f'-fsi={",".join(str(i) for i in indexes)}') else: argvalues.append(values) ids.append(','.join(f'{fname}={fsize}' for fname,fsize in filespec) + f'-pc={piece_count}' + f'-ps={piece_size}') return argnames, argvalues, ids def _generate_filespecs(file_count, piece_size, piece_count, fuzzy=False): filesizes = (max(1, (piece_size * piece_count // file_count) - 1), (piece_size * piece_count // file_count), (piece_size * piece_count // file_count) + 1) if file_count == 1: return ( ((alphabet[0], filesizes[0]),), ((alphabet[0], filesizes[1]),), ((alphabet[0], filesizes[2]),), ) elif file_count == 2: return ( ((alphabet[0], filesizes[0]), (alphabet[1], filesizes[0])), ((alphabet[0], filesizes[0]), (alphabet[1], filesizes[1])), ((alphabet[0], filesizes[0]), (alphabet[1], filesizes[2])), ((alphabet[0], filesizes[1]), (alphabet[1], filesizes[0])), ((alphabet[0], filesizes[1]), (alphabet[1], filesizes[1])), ((alphabet[0], filesizes[1]), (alphabet[1], filesizes[2])), ((alphabet[0], filesizes[2]), (alphabet[1], filesizes[0])), ((alphabet[0], filesizes[2]), (alphabet[1], filesizes[1])), ((alphabet[0], filesizes[2]), (alphabet[1], filesizes[2])), ) elif file_count == 3: return ( ((alphabet[0], filesizes[0]), (alphabet[1], filesizes[0]), (alphabet[2], filesizes[0])), ((alphabet[0], filesizes[0]), (alphabet[1], filesizes[1]), (alphabet[2], filesizes[2])), ((alphabet[0], filesizes[1]), (alphabet[1], filesizes[2]), (alphabet[2], filesizes[0])), ((alphabet[0], filesizes[1]), (alphabet[1], filesizes[1]), (alphabet[2], filesizes[2])), ((alphabet[0], filesizes[2]), (alphabet[1], filesizes[1]), (alphabet[2], filesizes[1])), ((alphabet[0], filesizes[2]), (alphabet[1], filesizes[2]), (alphabet[2], filesizes[1])), ((alphabet[0], filesizes[2]), (alphabet[1], filesizes[1]), (alphabet[2], filesizes[2])), ((alphabet[0], filesizes[2]), (alphabet[1], filesizes[2]), (alphabet[2], filesizes[2])), ) else: filesizes = set(filesizes) # For itertools.combinations() to produce more than one item, we need at # least one more file size than files. i = 2 while len(filesizes) < file_count + 1: filesizes.add(max(1, piece_size * piece_count // file_count - i)) filesizes.add(piece_size * piece_count // file_count + i) i += 1 filesizes.update((max(1, piece_size // 2), max(1, piece_size // 3))) # Limit filesizes to reduce number of test while len(filesizes) > file_count + 2: middle_item = sorted(filesizes)[int(len(filesizes) / 2)] filesizes.discard(middle_item) filesizes = list(sorted(filesizes)) filesizeorders = ['small_first', 'small_middle', 'small_last'] if fuzzy: random.shuffle(filesizes) random.shuffle(filesizeorders) filesizeorders_iter = itertools.cycle(filesizeorders) filespecs = set() for fsizes in itertools.combinations(filesizes, file_count): order = next(filesizeorders_iter) if order == 'small_first': fsizes = sorted(fsizes, reverse=False) elif order == 'small_last': fsizes = sorted(fsizes, reverse=True) elif order == 'small_middle': groupsize = int(len(fsizes) / 3) + 1 fsizes = ( sorted(fsizes[-groupsize:], reverse=True) + sorted(fsizes[groupsize:-groupsize], reverse=False) + sorted(fsizes[:groupsize], reverse=False) ) filespecs.add(tuple((alphabet[i], fsize) for i,fsize in enumerate(fsizes))) # Ensure identical order or xdist will complain with --numprocesses > 1 return sorted(sorted(filespecs), key=lambda f: sum(s[1] for s in f)) def _display_filespecs(filespecs, file_count, piece_size): lines = [] for filespec in filespecs: line = (', '.join(f'{fn}:{fs:2d}' for fn,fs in filespec), ' - ', ''.join(fn * fs for fn,fs in filespec)) lines.append(''.join(line)) print(f'{len(filespecs)} filespecs:') for i,line in enumerate(lines): if i % 10 == 0: header = [' ' * (((4 * file_count) + (2 * file_count - 1)) + 1)] for i in range(6): header.append(str(i) + ' ' * (piece_size - 2)) print(' '.join(header)) print(line) @pytest.fixture def valid_singlefile_metainfo(): return OrderedDict([ (b'announce', b'http://localhost'), (b'comment', b'This is a test comment'), (b'created by', b'The creator'), (b'creation date', 1513440897), (b'info', OrderedDict([ (b'length', 500000), (b'name', b'Torrent for testing'), (b'piece length', 32768), (b'pieces', b'\x00' * 20 * 16), (b'private', 1) ])) ]) @pytest.fixture def valid_multifile_metainfo(): return OrderedDict([ (b'announce', b'http://localhost'), (b'comment', b'This is a test comment'), (b'created by', b'The creator'), (b'creation date', 1513440897), (b'info', OrderedDict([ (b'files', [{b'length': 123, b'path': [b'A file']}, {b'length': 456, b'path': [b'Another file']}, {b'length': 789, b'path': [b'A', b'third', b'file in a subdir']}]), (b'name', b'Torrent for testing'), (b'piece length', 32768), (b'pieces', b'\x00' * 20), (b'private', 1) ])) ]) @pytest.fixture def random_seed(): @contextlib.contextmanager def _random_seed(seed): random.seed(seed) yield random.seed() return _random_seed testdir_base = 'test_files' letters = string.ascii_letters + string.digits + ' ²öäåóíéëúæøœœï©®¹³¤óíøï' def _randstr(): length = random.randint(10, 20) return ''.join(random.choice(letters) for _ in range(length)) def _mktempdir(tmp_path_factory, subdir=None): path = tmp_path_factory.mktemp(testdir_base, numbered=True) if subdir is None: subdir = '' subdir += ':' + _randstr() (path / subdir).mkdir() return path / subdir def _generate_random_file(dirpath, filename=None, hidden=False): filesize = random.randint(int(1e3), int(1e6)) filecontent = bytearray(random.getrandbits(8) for _ in range(filesize)) if filename is None: filename = '' filename += ':' + _randstr() if hidden: filename = '.' + filename filepath = os.path.join(testdir_base, dirpath, filename) with open(filepath, 'wb') as f: f.write(filecontent) assert os.path.getsize(filepath) == filesize return filepath @pytest.fixture(scope='session') def singlefile_content(tmp_path_factory): random.seed(0) # Make sure random file names and content are identical every time content_path = _mktempdir(tmp_path_factory) filepath = _generate_random_file(content_path, filename='sinģle fíle') random.seed() # Re-enable randomness exp_pieces = b'\xc8\xfa\x0fV\x95\xecl\x97t\xb2v\x84S\x98{\x92[ \x13\xe5\x04\xef-\xb0;sF\xc2\x93W\xcf\xc6X\x14\x9b]_r\xfb\x80\'}\xe5\xc4\x05\xdct\xb5^\xe9\x7f0b|\xc9\xf1\x9d\xd7G\x06 ,l8m\x01\xbf2\xf6:\x03r-\x8d\x1f,\x8bk:\xad\xdbN\xa2V\x96/\xf2@w\xa5\x98\xf8\t3fU\x13;\x90\xc0F\xe3[\x15\xea\x8f\x92\xcdN:\xc1\x0fG\x9b\xeb\xd9\x93A\xca\xa7L\xd2\x9ef|\xddd\xd4\x94.f\xee\xea3\xa8\x04|\xe9h\xa7\xa1t\xa2\xb5\xb3*\x89\xf7\x14\xdf\x16M/\xc6\xa5\x85\xdaF\xca\xa7?\x9d\xe1zd\xc8\xe1\x1d\x1epC\x06+\xe1Q\x0fi\x9fv\x19\xa2(\xd0\x90\xb3\xb0\xcf\xa9\x1cy\xf0\x96\x17\n\x05\xa5*IZJ\x8c\xbb\x87\xdd\xed|d.\xf0\xb9\xfe\x00\xa6\nufY\x18\xe35\xee\xdf\xa6D\xed<\xc5W\x0fa\x80\xc6}\xdd\xf4\xbd\xc1:\xe3\xda\nj\xbag\x93\xd0\xdc\xbd\xb8\xfb\xc2\x99\x9a/&\x1d\xf3\xe9\xa3,\x9b>\'\xa5\xaa~\xabb\x81\x88\x80^\xddd\xc7\xea\x83n\x05%\x8f4\x8a\x82\xe2\xff[\xab\xa8\x92\x1f\xaarG\xc5\x00\xcae\x9e\x93\xc4\x9015\x02\xe7\x8a\xb1I\xa6\x16DF\x8a\x0b\xeb\xca@\th?WL\xe0Vf\xc9X>##?t\x08\xdf[\xac\x16\x7f\xe9\x1a\xc4\x11\x0c\xc9\xac?\xded\xed\xf5\x1b\xd0Qq\x90_\x88]\xbf\xb7\xbc\xf5\x8et4f<\x14\xb6\x98\xbb\xdd0H\x14\xfaZ\xc1\x07l3\xd6""l\x99X@\xb7\x9c\xbc/h\xe9\xc0\x83\x0e\xfb\x91\x83\xdf\x1d+\xf6\xd1_\xb8\x04\xdd\xb8\x05\'\x1c\x1b\x94\x1cl\x9a_[An:\xcdw\xe8\xfb\xbf\xb9\x82olQ6\xa4\x15\x9f\xd2c#\x1b\x9a\xb6\x84\x88@\x89\xdd\x01\x18H\xbce\rK1aS\xd8\xb8\xffD\x9f\x89\xd4\xb59y7\xff\x8b\xc1\xc3\t\xdd\x9e\xa6\xa7\x02:\xa5\xea<\xb9\x95\xd7ePU~\xbc\x16\x9a\x0f\xb1h\xad4\xfa\x18yv\x95\x96\x0cRo\x88\xe6L\x08\xfd\x94gh\x92\'w\xb3\xd1BCqC\x12_\x1f\x92\xfc\xc6\xfd\t\xcd\xab\xe0\xbd\xcc\x06\xf9\xa7\xb1e\xa9\xbe\x0c\x8d\xfcI\x00\x0e\xd7\xbe\x0c&\xf2\xc5\xa5Yl\xf8\xc0\x8e.\x97\x0c\xd5zp~8\xc0g\xa6C\x16\xd0v\x1e\xa1\xa37\xceM[\xd6\x18\xc6\xa5\xc9\xbc\x11\x99\xc4\xe9\x0f9\xab\x98\x01\xaf\xe22\n_\x83\x9b\xdegG\xb4#\xd6\x17\xf1z4\x11v\xef\xcf!\x03\xdc\x14_\x9e;\'\xdckvHh\xd5x\xf48\xdaFa\xae\x02\xf0\x16| \xa3\x97\xe5\xed\xf6\x11$\xe4\xacb\xaf\x8a\x07fH\x96\x00\x00\x98\x87(\x97\xcd!\xff\xf8a\x02\xc4\xca\xff\xef\xe1P\x01_\x9b\x9b\x83:\x7f\xdd\x92\xfb\xe9\x94\xc3SEWh\x18\xb3\x9c\xd8\xf9M\x1d!\xd25\xcb' exp_metainfo = {'announce' : 'http://localhost:123', 'created by' : 'mktorrent 1.0', 'creation date' : 1513522263, 'info': {'name' : os.path.basename(filepath), 'piece length' : 2**14, 'pieces' : exp_pieces, 'length' : os.path.getsize(filepath)}} exp_attrs = SimpleNamespace(path=str(filepath), infohash='7febf5a5a6e6bac79df2eb4340a63009109fecd5', infohash_base32=b'P7V7LJNG425MPHPS5NBUBJRQBEIJ73GV', size=os.path.getsize(filepath), pieces=math.ceil(os.path.getsize(filepath) / exp_metainfo['info']['piece length'])) return SimpleNamespace(path=exp_attrs.path, exp_metainfo=exp_metainfo, exp_attrs=exp_attrs) @pytest.fixture(scope='session') def multifile_content(tmp_path_factory): random.seed(0) # Make sure random file names and content are identical every time content_path = _mktempdir(tmp_path_factory, subdir='Multifile torrent') for n in range(2): _generate_random_file(content_path, filename=f'File {n}') (content_path / 'subdir').mkdir() _generate_random_file(content_path / 'subdir', filename='File in subdir') random.seed() # Re-enable randomness exp_files = [{'length': 649406, 'path': ['File 0:JïYR WN93kœ']}, {'length': 199019, 'path': ['File 1:aä¤ELYœPTófsdtœe©í']}, {'length': 333198, 'path': ['subdir', 'File in subdir:F³bæ¹inRf ¤RTggTSóz']}] exp_pieces = b'BHG\xb7[\xdf\xaa\xf1\xf3<\xd3C\xeb\xab\xecjZ3\x06\x97\x0c*\xb7G3\xc5G\xe3\x0e\xdb\x96\xf1V-D@\xdd\t\xcf\x88GB\xa3\xdf\xdd\x1fxCQd=8\xc7\x81\x96\x0f\xaf(-\xe6FB\x10\xd1\xbf\xad\x88\x1d\x1d\xc3\x03\xb3\x08\xc0\xe0\x0b\x8a\\\x19\xdf\xed\x03\xdb\x7f\x17o3uI\xef(\n\x80\xdbbF\x91\xd90%\xe6\xfay\x16O\x06n-\xad\x1b\x06\x98SJ:\xf3d64=\xf2\xc8\t~\xbf\x08\xdd\x1am\xae\xbe\xed\xf1\x94\x8f\x08X5\x85\x0e\xa2wM\xa3\x14K,\x9dO\xd2n\xb6\x98\x16\xe6s\xa2\t\t0\xa4\x05\xd1\x95*\x02S\xf1y\x14\xf3G\xf8]eUD\x81`_m\xeaW\x0e\xb5\xc1r\n2\xf0Qo\r\xba\x07\xb3!Vr\xacn\x06\xeb\x1a\xce9\x0e\xa1j\xb1\xf9\xc9\xe0J\xda\xa2v\xe4d\'\x8cf5!Z\xd4g[\x9b\xf4fr\xc2\xee\xb3;\xe7\xe3\x9e\xe0\x06}\xe3\xe6\xc9\xa2\xf9t\x0c\xe1\xf5h\xfe\x13\xf5\xe4\xaa\xd6\x01\x91\xe3\xb7\xb2x\xe1\xd7\xb1o\x10\xe7\xd6\xd2b%d\xae\xe4\x8a\x910\x1b\xb6\x1b\xda\x944\xce\t\xd6\xdf%*n\x05\x16\xd9\x8ft\xed\xb7\xeb"\xfd\xb0Q+t\xbdy|\xed\x01<\xb9\xd2"@\xa2\x85\xa6\x8a\x1d|\x89Z\x13w\xdb\xe7\xdd\xe2\xcey\x00R\xa3[k\x8e\xde\x98""\xfd\xc0]{\xc2H\n%8 \xd3\x01\xd2i\x9f\xf0n\x05^\x90\xbc\xcb\xb5\x8a\xde$\xef\xbd\x02\x83\xe2m\x93:K\x10\xfc\xc7\xb6\xf5\xcf\x9a!\xe06as\x8b`\xda\x12\xf3\x13\xc73\xbf\xad\xcc\x86V\x14Tm5\xb4&C\x8c\x89\x17*\x83A\xc9o\x04\x9e\xe8p\x0e\x1fIx\xf2\\\xc9\xca\x8c\xd1\xfb#\x08\xeb\x0eq\xf3\r].\xacfH\xea\xc1q\xcc\x1bw\xe3\xe6-o\xf6Hb\x85\xc7\xefk\xa5\xc7\xea\xd1\xa0\xb4h\xb7\xdd\x9fe/\x98g\xef\xea6\x02f\x1a\xc1\xe5N\xf3\x10\x04\xe0\x004!\xca\x81\xa4\xfc\x12\xceS\x9c\x8e,L82\xbb\x83\x8f\x95#\x93\xe2\x83\xaf\xfd\xe9T|@oy\x07x[rp;\x89\xe0a\xdc\xee\xcekW\xaf/\xe8g\x19 \x1b\xd8\x8e.\xc2B\xaf\x94\xd9\xa5X\x94\x85\xc0\xa8\x047\xa6\xcc\xa0i( \x04\x98\xce>A\x87\x92\x8d_\xe8\x8d\xa4\xf2(\xa6\x88\xc7\xfe \xee\xdbe\xc9\r\x19{\xc8T\xc9JU[\x1d\xd3\xb0\xc6-\xdc\xc0YS\xae\x01\x12t(\xc7`m\xc6\x8c\xa8Xr\xb27\xf2\xec\xa3\x0b\r\xfe\xc4\xc0\xf0At\x00Y\xb5\x1b\xebE\x8c:p\xd4\xc1\x80k\x13\xc8I\xfe$\xday\xd2\xcc/\x00\n\t\x02B\xfa\r\x13o\x0f\x8d\xd9<7\xb5\xd0\xa3/\xee\xac\xae&"\x83\xa4)\x10L\xd0-q\xab \x9c\\\xc0\x92\x07MC\x85D\x17Z\xa49\xe3U\xa9\xc4\xc8z|\x1c\xe2\x03\t\x1d\x03\xe2J\x0fM\xfa5!\x98>5\x19h\xbc;{H\xa1\x14\xe7\xcb.X\x93\x7f\x0c\x15\xad' exp_metainfo = {'announce' : 'http://localhost:123', 'created by' : 'mktorrent 1.0', 'creation date' : 1513521463, 'info': {'name' : os.path.basename(content_path), 'piece length' : 2**15, 'pieces' : exp_pieces, 'files' : exp_files}} exp_attrs = SimpleNamespace(path=str(content_path), infohash='0e2e012468101efec5b1ac81ded6b8d95591c1fb', infohash_base32=b'BYXACJDICAPP5RNRVSA55VVY3FKZDQP3', size=sum(fileinfo['length'] for fileinfo in exp_files)) return SimpleNamespace(path=exp_attrs.path, exp_metainfo=exp_metainfo, exp_attrs=exp_attrs) def _write_content_file(filepath, spec): if isinstance(spec, (int, float)): filepath.write_bytes(_random_bytes(int(spec))) elif isinstance(spec, str): filepath.write_text(spec) elif isinstance(spec, (bytes, bytearray)): filepath.write_bytes(spec) else: raise RuntimeError(f'Invalid spec for {filepath}: {spec!r}') def _random_bytes(length): if random.choice((0, 1)): b = bytes(random.getrandbits(8) for _ in range(int(length))) else: # We use b'\x00' as a placeholder for padding when faking missing files # during verification, so we increase the probability of b'\x00' at the # beginning and/or end if random.choice((0, 1)): beg = b'\x00' * random.randint(0, int(length / 2)) else: beg = b'' if random.choice((0, 1)): end = b'\x00' * random.randint(0, int(length / 2)) else: end = b'' b = beg + bytes(random.getrandbits(8) for _ in range(int(length - len(beg) - len(end)))) + end assert len(b) == length return b @pytest.fixture def create_file(tmp_path): def _create_file(tmp_path, filename, spec): filepath = tmp_path / filename _write_content_file(filepath, spec) return filepath return functools.partial(_create_file, tmp_path) @pytest.fixture def create_dir(tmp_path): def _create_dir(tmp_path, dirname, *files): content_path = tmp_path / dirname if not os.path.exists(content_path): content_path.mkdir() for filepath, spec in files: parts = [part for part in filepath.split(os.sep) if part] dirpath = content_path for part in parts[:-1]: dirpath = dirpath / part if not os.path.exists(dirpath): dirpath.mkdir() filepath = dirpath / parts[-1] _write_content_file(filepath, spec) return content_path return functools.partial(_create_dir, tmp_path) @pytest.fixture def generated_singlefile_torrent(create_torrent, singlefile_content): torrent = create_torrent(path=singlefile_content.path) torrent.generate() return torrent @pytest.fixture def generated_multifile_torrent(create_torrent, multifile_content): torrent = create_torrent(path=multifile_content.path) torrent.generate() return torrent @pytest.fixture def create_torrent(): def _create_torrent(**kwargs): rand_kwargs = { 'trackers' : random.choice(([], ['http://localhost:123/announce'], ['http://localhost:123/announce', 'http://localhost:456/announce'], [['http://localhost:123/announce', 'http://localhost:456/announce'], ['http://localhost:789/announce', 'http://localhost:111/announce']])), 'webseeds' : random.choice(([], ['http://localhost:123/webseed'], ['http://localhost:123/webseed', 'http://localhost:456/webseed'])), 'httpseeds' : random.choice(([], ['http://localhost:123/httpseed'], ['http://localhost:123/httpseed', 'http://localhost:456/httpseed'])), 'comment' : _randstr(), 'creation_date' : random.randint(0, int(time.time())), 'created_by' : _randstr(), 'source' : _randstr(), 'piece_size' : random.choice((None, 2**14, 2**15, 2**16, 2**17, 2**18, 2**19, 2**20)), } # Remove random items from args rand_kwargs = dict(random.sample(tuple(rand_kwargs.items()), random.randint(0, len(rand_kwargs)))) # Overload given random kwargs with kwargs return torf.Torrent(**{**rand_kwargs, **kwargs}) return _create_torrent @pytest.fixture def create_torrent_file(tmp_path): @contextlib.contextmanager def _create_torrent_file(tmp_path, **kwargs): torrent_file = tmp_path / 'test.torrent' try: t = torf.Torrent(**kwargs) t.generate() t.write(torrent_file) yield torrent_file finally: if os.path.exists(torrent_file): os.remove(torrent_file) return functools.partial(_create_torrent_file, tmp_path) @pytest.fixture def forced_piece_size(pytestconfig): @contextlib.contextmanager def _forced_piece_size(piece_size): orig_piece_size_min = torf.Torrent.piece_size_min_default torf.Torrent.piece_size_min_default = piece_size with mock.patch('torf.Torrent.piece_size', new_callable=mock.PropertyMock) as mock_piece_size: def piece_size_setter(prop, torrent, value): torrent.metainfo['info']['piece length'] = piece_size mock_piece_size.return_value = piece_size mock_piece_size.__set__ = piece_size_setter yield piece_size torf.Torrent.piece_size_min_default = orig_piece_size_min return _forced_piece_size # https://stackoverflow.com/a/45690594 @pytest.fixture def free_port(): import socket with contextlib.closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as s: s.bind(('', 0)) s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) return s.getsockname()[1] rndusr-torf-547b989/tests/test_attributes.py000066400000000000000000002117121513142010300212270ustar00rootroot00000000000000import copy import glob import math import os import pickle import re from datetime import datetime from pathlib import Path from unittest.mock import patch import pytest import torf from torf import _errors as errors from torf import _utils as utils def test_path_doesnt_exist(create_torrent): torrent = create_torrent() with pytest.raises(torf.ReadError) as excinfo: torrent.path = '/this/path/does/not/exist' assert excinfo.match('^/this/path/does/not/exist: No such file or directory$') assert torrent.path is None for key in ('name', 'files', 'length', 'pieces'): assert key not in torrent.metainfo['info'] def test_path_is_empty_directory(create_torrent, tmp_path): (tmp_path / 'empty').mkdir() torrent = create_torrent(path=tmp_path / 'empty') assert torrent.path == tmp_path / 'empty' for key in ('name', 'files', 'length', 'pieces'): assert key not in torrent.metainfo['info'] def test_path_is_empty_file(create_torrent, tmp_path): (tmp_path / 'empty').write_text('') torrent = create_torrent(path=tmp_path / 'empty') assert torrent.path == tmp_path / 'empty' for key in ('name', 'files', 'length', 'pieces'): assert key not in torrent.metainfo['info'] def test_path_is_directory_with_empty_file(create_torrent, tmp_path): (tmp_path / 'content').mkdir() (tmp_path / 'content' / 'empty_file').write_text('') torrent = create_torrent(path=tmp_path / 'content') assert torrent.path == tmp_path / 'content' for key in ('name', 'files', 'length', 'pieces'): assert key not in torrent.metainfo['info'] def test_path_reset(create_torrent, singlefile_content, multifile_content): torrent = create_torrent() torrent.path = singlefile_content.path assert torrent.path == Path(singlefile_content.path) torrent.private = True torrent.generate() assert 'pieces' in torrent.metainfo['info'] assert torrent.metainfo['info']['private'] is True assert torrent.metainfo['info']['name'] == os.path.basename(singlefile_content.path) assert 'length' in torrent.metainfo['info'] assert 'files' not in torrent.metainfo['info'] torrent.path = multifile_content.path assert torrent.path == Path(multifile_content.path) assert 'pieces' not in torrent.metainfo['info'] assert torrent.metainfo['info']['private'] is True torrent.generate() assert 'pieces' in torrent.metainfo['info'] assert torrent.metainfo['info']['private'] is True assert torrent.metainfo['info']['name'] == os.path.basename(multifile_content.path) assert 'files' in torrent.metainfo['info'] assert 'length' not in torrent.metainfo['info'] torrent.path = None assert torrent.path is None assert 'pieces' not in torrent.metainfo['info'] assert 'length' not in torrent.metainfo['info'] assert torrent.metainfo['info']['private'] is True assert torrent.metainfo['info']['name'] == os.path.basename(multifile_content.path) assert torrent.metainfo['info']['files'] == multifile_content.exp_metainfo['info']['files'] def test_path_switch_from_singlefile_to_multifile(create_torrent, singlefile_content, multifile_content): torrent = create_torrent() torrent.path = singlefile_content.path assert torrent.path == Path(singlefile_content.path) assert torrent.metainfo['info']['name'] == singlefile_content.exp_metainfo['info']['name'] assert torrent.metainfo['info']['length'] == singlefile_content.exp_metainfo['info']['length'] assert 'files' not in torrent.metainfo['info'] torrent.path = multifile_content.path assert torrent.path == Path(multifile_content.path) assert torrent.metainfo['info']['name'] == multifile_content.exp_metainfo['info']['name'] assert torrent.metainfo['info']['files'] == multifile_content.exp_metainfo['info']['files'] assert 'length' not in torrent.metainfo['info'] def test_path_switch_from_multifile_to_singlefile(create_torrent, singlefile_content, multifile_content): torrent = create_torrent() torrent.path = multifile_content.path assert torrent.path == Path(multifile_content.path) assert torrent.metainfo['info']['name'] == multifile_content.exp_metainfo['info']['name'] assert torrent.metainfo['info']['files'] == multifile_content.exp_metainfo['info']['files'] assert 'length' not in torrent.metainfo['info'] torrent.path = singlefile_content.path assert torrent.path == Path(singlefile_content.path) assert torrent.metainfo['info']['name'] == singlefile_content.exp_metainfo['info']['name'] assert torrent.metainfo['info']['length'] == singlefile_content.exp_metainfo['info']['length'] assert 'files' not in torrent.metainfo['info'] def test_path_is_period(create_torrent, multifile_content): torrent = create_torrent() cwd = os.getcwd() try: os.chdir(multifile_content.path) torrent.path = os.curdir assert torrent.path == Path(os.curdir) assert torrent.metainfo['info']['name'] == os.path.basename(multifile_content.path) assert torrent.metainfo['info']['files'] == multifile_content.exp_metainfo['info']['files'] finally: os.chdir(cwd) def test_path_is_double_period(create_torrent, multifile_content): torrent = create_torrent() cwd = os.getcwd() try: os.chdir(os.path.join(multifile_content.path, 'subdir')) torrent.path = os.pardir assert torrent.path == Path(os.pardir) assert torrent.metainfo['info']['name'] == os.path.basename(multifile_content.path) assert torrent.metainfo['info']['files'] == multifile_content.exp_metainfo['info']['files'] finally: os.chdir(cwd) def test_path_ends_with_period(create_torrent, multifile_content): torrent = create_torrent() torrent.path = Path(multifile_content.path, os.curdir) assert torrent.path == Path(multifile_content.path) assert torrent.metainfo['info']['name'] == multifile_content.exp_metainfo['info']['name'] assert torrent.metainfo['info']['files'] == multifile_content.exp_metainfo['info']['files'] def test_path_ends_with_double_period(create_torrent, multifile_content): torrent = create_torrent() torrent.path = Path(multifile_content.path, 'subdir' , os.pardir) assert torrent.path == Path(multifile_content.path, 'subdir', os.pardir) assert torrent.metainfo['info']['name'] == multifile_content.exp_metainfo['info']['name'] assert torrent.metainfo['info']['files'] == multifile_content.exp_metainfo['info']['files'] def test_location(create_torrent, tmp_path): torrent = create_torrent() assert torrent.location is None torrent.path = tmp_path assert torrent.location == tmp_path.parent def test_mode(singlefile_content, multifile_content): torrent = torf.Torrent() assert torrent.mode is None torrent.path = singlefile_content.path assert torrent.mode == 'singlefile' torrent.path = multifile_content.path assert torrent.mode == 'multifile' torrent.path = None assert torrent.mode == 'multifile' def test_files_singlefile(create_torrent, singlefile_content): torrent = create_torrent(path=singlefile_content.path) exp_files1 = (torf.File(singlefile_content.exp_metainfo['info']['name'], size=singlefile_content.exp_metainfo['info']['length']),) exp_files2 = (torf.File(torrent.name, size=torrent.size),) assert torrent.files == exp_files1 assert torrent.files == exp_files2 def test_files_multifile(create_torrent, multifile_content): torrent = create_torrent(path=multifile_content.path) torrent_name = os.path.basename(multifile_content.path) exp_files1 = tuple(torf.File([torrent_name] + fileinfo['path'], size=fileinfo['length']) for fileinfo in multifile_content.exp_metainfo['info']['files']) exp_files2 = tuple(torf.File([torrent.name] + fileinfo['path'], size=fileinfo['length']) for fileinfo in torrent.metainfo['info']['files']) assert torrent.files == exp_files1 assert torrent.files == exp_files2 def test_files_with_no_path(create_torrent): torrent = create_torrent() assert torrent.files == () def test_files_with_no_name(create_torrent, singlefile_content, multifile_content): for content in (singlefile_content, multifile_content): torrent = create_torrent(path=content.path) del torrent.metainfo['info']['name'] assert all(f.parts[0] == 'UNNAMED TORRENT' for f in torrent.files) def test_files_only_accepts_Iterables(create_torrent, tmp_path): (tmp_path / 'foo').write_text('asdf') torrent = create_torrent(path=tmp_path / 'foo') torrent.generate() assert torrent.metainfo['info']['name'] == 'foo' assert torrent.metainfo['info']['length'] == 4 with pytest.raises(ValueError) as excinfo: torrent.files = 'foo/bar' assert str(excinfo.value) == 'Not an Iterable: foo/bar' # metainfo did not change assert torrent.metainfo['info']['name'] == 'foo' assert torrent.metainfo['info']['length'] == 4 assert 'pieces' in torrent.metainfo['info'] assert torrent.files == (torf.File(Path('foo'), size=4),) def test_files_only_accepts_File_objects(create_torrent, tmp_path): (tmp_path / 'foo').write_text('asdf') torrent = create_torrent(path=tmp_path / 'foo') torrent.generate() with pytest.raises(ValueError) as excinfo: torrent.files = ('foo/bar',) assert str(excinfo.value) == 'Not a File object: foo/bar' # metainfo did not change assert torrent.metainfo['info']['name'] == 'foo' assert torrent.metainfo['info']['length'] == 4 assert 'pieces' in torrent.metainfo['info'] assert torrent.files == (torf.File(Path('foo'), size=4),) def test_files_only_accepts_relative_paths(create_torrent, tmp_path): (tmp_path / 'foo').write_text('asdf') torrent = create_torrent(path=tmp_path / 'foo') torrent.generate() with pytest.raises(torf.PathError) as excinfo: torrent.files = (torf.File('/1/2/3', size=123),) assert str(excinfo.value) == '/1/2/3: Not a relative path' # metainfo did not change assert torrent.metainfo['info']['name'] == 'foo' assert torrent.metainfo['info']['length'] == 4 assert 'pieces' in torrent.metainfo['info'] assert torrent.files == (torf.File(Path('foo'), size=4),) def test_files_needs_common_path(create_torrent, tmp_path): content = tmp_path / 'asdf' ; content.mkdir() # noqa: E702 for i in range(1, 3): (content / f'file{i}').write_text('') # noqa: E701 torrent = create_torrent(path=content) torrent.generate() with pytest.raises(torf.CommonPathError) as excinfo: torrent.files = (torf.File(Path('foo/bar/baz'), size=123), torf.File(Path('quux/bar/bam'), size=456),) assert str(excinfo.value) == 'No common parent path: foo/bar/baz, quux/bar/bam' # metainfo did not change assert torrent.metainfo['info']['files'] == [{'path': ['file1'], 'length': 6}, {'path': ['file2'], 'length': 6}] assert torrent.metainfo['info']['name'] == 'asdf' assert 'pieces' in torrent.metainfo['info'] assert 'length' not in torrent.metainfo['info'] def test_files_updates_metainfo_when_manipulated(create_torrent, tmp_path): content = tmp_path / 'bar' ; content.mkdir() # noqa: E702 for i in range(1, 3): (content / f'file{i}').write_text('') # noqa: E701 torrent = create_torrent(path=content) torrent.generate() assert torrent.metainfo['info']['name'] == 'bar' assert 'pieces' in torrent.metainfo['info'] assert 'length' not in torrent.metainfo['info'] assert torrent.files == (torf.File(Path('bar', 'file1'), size=6), torf.File(Path('bar', 'file2'), size=6),) torrent.files.append(torf.File(Path('bar', 'subdir', 'file3'), size=123)) assert torrent.metainfo['info']['files'] == [{'path': ['file1'], 'length': 6}, {'path': ['file2'], 'length': 6}, {'path': ['subdir', 'file3'], 'length': 123}] assert torrent.metainfo['info']['name'] == 'bar' assert 'length' not in torrent.metainfo['info'] assert 'pieces' not in torrent.metainfo['info'] assert torrent.files == (torf.File(Path('bar', 'file1'), size=6), torf.File(Path('bar', 'file2'), size=6), torf.File(Path('bar', 'subdir', 'file3'), size=123)) def test_files_switch_from_singlefile_to_multifile(create_torrent, tmp_path): (tmp_path / 'foo').write_text('asdf') torrent = create_torrent(path=tmp_path / 'foo') torrent.generate() assert torrent.metainfo['info']['length'] == 4 assert torrent.metainfo['info']['name'] == 'foo' assert 'pieces' in torrent.metainfo['info'] assert torrent.files == (torf.File(Path('foo'), size=4),) torrent.files = (torf.File(Path('bar', 'file1'), size=123), torf.File(Path('bar', 'file2'), size=456)) assert torrent.metainfo['info']['files'] == [{'path': ['file1'], 'length': 123}, {'path': ['file2'], 'length': 456}] assert torrent.metainfo['info']['name'] == 'bar' assert 'length' not in torrent.metainfo['info'] assert 'pieces' not in torrent.metainfo['info'] assert torrent.files == (torf.File(Path('bar', 'file1'), size=123), torf.File(Path('bar', 'file2'), size=456)) def test_files_switch_from_multifile_to_singlefile(create_torrent, tmp_path): (tmp_path / 'bar').mkdir() for i in range(1, 3): (tmp_path / 'bar' / f'file{i}').write_text('') torrent = create_torrent(path=tmp_path / 'bar') torrent.generate() assert torrent.metainfo['info']['name'] == 'bar' assert torrent.metainfo['info']['files'] == [{'path': ['file1'], 'length': 6}, {'path': ['file2'], 'length': 6}] assert 'pieces' in torrent.metainfo['info'] assert 'length' not in torrent.metainfo['info'] assert torrent.files == (torf.File(Path('bar', 'file1'), size=6), torf.File(Path('bar', 'file2'), size=6)) torrent.files = (torf.File(Path('foo'), size=123),) assert torrent.metainfo['info']['name'] == 'foo' assert torrent.metainfo['info']['length'] == 123 assert 'pieces' not in torrent.metainfo['info'] assert 'files' not in torrent.metainfo['info'] assert torrent.files == (torf.File(Path('foo'), size=123),) def test_filepaths_singlefile(create_torrent, singlefile_content): torrent = create_torrent(path=singlefile_content.path) exp_filepaths1 = [Path(singlefile_content.path)] exp_filepaths2 = [Path(torrent.path)] assert torrent.filepaths == exp_filepaths1 assert torrent.filepaths == exp_filepaths2 def test_filepaths_multifile(create_torrent, multifile_content): torrent = create_torrent(path=multifile_content.path) exp_filepaths1 = tuple(Path(multifile_content.path, *fileinfo['path']) for fileinfo in multifile_content.exp_metainfo['info']['files']) exp_filepaths2 = tuple(Path(torrent.path, *fileinfo['path']) for fileinfo in torrent.metainfo['info']['files']) assert torrent.filepaths == exp_filepaths1 assert torrent.filepaths == exp_filepaths2 def test_filepaths_is_set_to_empty_tuple(create_torrent, multifile_content, singlefile_content): for content in (singlefile_content, multifile_content): torrent = create_torrent(path=content.path) torrent.generate() assert 'name' in torrent.metainfo['info'] assert 'pieces' in torrent.metainfo['info'] if content is singlefile_content: assert 'length' in torrent.metainfo['info'] else: assert 'files' in torrent.metainfo['info'] torrent.filepaths = () assert torrent.filepaths == () for key in ('files', 'length', 'pieces'): assert key not in torrent.metainfo['info'] assert torrent.metainfo['info']['name'] == os.path.basename(content.path) def test_filepaths_with_single_file_in_directory(create_torrent, tmp_path): (tmp_path / 'content').mkdir() (tmp_path / 'content' / 'file1').write_text('not empty') torrent = create_torrent(path=tmp_path / 'content') assert torrent.filepaths == (tmp_path / 'content' / 'file1',) assert torrent.mode == 'multifile' assert torrent.metainfo['info']['name'] == 'content' assert torrent.metainfo['info']['files'] == [{'path': ['file1'], 'length': 9}] def test_filepaths_with_single_file_is_changed_to_different_file(create_torrent, tmp_path): (tmp_path / 'content').write_bytes(b'foo') torrent = create_torrent(path=tmp_path / 'content') assert torrent.filepaths == (tmp_path / 'content',) assert torrent.metainfo['info']['name'] == 'content' assert torrent.metainfo['info']['length'] == 3 (tmp_path / 'content2').write_bytes(b'fooo') torrent.filepaths = (tmp_path / 'content2',) assert torrent.filepaths == [tmp_path / 'content2'] assert torrent.metainfo['info']['name'] == 'content2' assert torrent.metainfo['info']['length'] == 4 def test_filepaths_with_single_file_is_changed_to_multiple_files(create_torrent, tmp_path): (tmp_path / 'content').write_bytes(b'foo') torrent = create_torrent(path=tmp_path / 'content') assert torrent.filepaths == (tmp_path / 'content',) assert torrent.metainfo['info']['name'] == 'content' assert torrent.metainfo['info']['length'] == 3 (tmp_path / 'content2').mkdir() (tmp_path / 'content2' / 'foo').write_bytes(b'one') (tmp_path / 'content2' / 'bar').write_bytes(b'three') torrent.filepaths = (tmp_path / 'content2' / 'foo', tmp_path / 'content2' / 'bar') assert torrent.filepaths == [tmp_path / 'content2' / 'foo', tmp_path / 'content2' / 'bar'] assert torrent.metainfo['info']['name'] == 'content2' assert 'length' not in torrent.metainfo['info'] assert torrent.metainfo['info']['files'] == [{'path': ['bar'], 'length': 5}, {'path': ['foo'], 'length': 3}] def test_filepaths_with_multiple_files_is_changed_to_different_files(create_torrent, tmp_path): (tmp_path / 'content').mkdir() (tmp_path / 'content' / 'foo').write_bytes(b'one') (tmp_path / 'content' / 'bar').write_bytes(b'three') torrent = create_torrent(path=tmp_path / 'content') assert torrent.filepaths == [tmp_path / 'content' / 'foo', tmp_path / 'content' / 'bar'] assert torrent.metainfo['info']['name'] == 'content' assert torrent.metainfo['info']['files'] == [{'path': ['bar'], 'length': 5}, {'path': ['foo'], 'length': 3}] (tmp_path / 'content2').mkdir() (tmp_path / 'content2' / 'one').write_bytes(b'foo') (tmp_path / 'content2' / 'two').write_bytes(b'bar') (tmp_path / 'content2' / 'nope').write_bytes(b'unwanted') torrent.filepaths = (tmp_path / 'content2' / 'one', tmp_path / 'content2' / 'two') assert torrent.filepaths == [tmp_path / 'content2' / 'one', tmp_path / 'content2' / 'two'] assert torrent.metainfo['info']['name'] == 'content2' assert torrent.metainfo['info']['files'] == [{'path': ['one'], 'length': 3}, {'path': ['two'], 'length': 3}] def test_filepaths_with_multiple_files_is_changed_to_single_file(create_torrent, tmp_path): (tmp_path / 'content').mkdir() (tmp_path / 'content' / 'foo').write_bytes(b'one') (tmp_path / 'content' / 'bar').write_bytes(b'three') torrent = create_torrent(path=tmp_path / 'content') assert torrent.filepaths == [tmp_path / 'content' / 'foo', tmp_path / 'content' / 'bar'] assert torrent.metainfo['info']['name'] == 'content' assert torrent.metainfo['info']['files'] == [{'path': ['bar'], 'length': 5}, {'path': ['foo'], 'length': 3}] (tmp_path / 'content2').write_bytes(b'foo') torrent.filepaths = (tmp_path / 'content2',) assert torrent.filepaths == (tmp_path / 'content2',) assert torrent.metainfo['info']['name'] == 'content2' assert torrent.metainfo['info']['length'] == 3 assert 'files' not in torrent.metainfo['info'] def test_filepaths_with_single_file_manipulated_into_multifile(create_torrent, tmp_path): (tmp_path / 'content').mkdir() (tmp_path / 'content' / 'file1').write_bytes(b'foo') (tmp_path / 'content' / 'file2').write_bytes(b'foo') torrent = create_torrent(path=tmp_path / 'content' / 'file1') assert torrent.filepaths == (tmp_path / 'content' / 'file1',) assert torrent.metainfo['info']['name'] == 'file1' assert torrent.metainfo['info']['length'] == 3 assert 'files' not in torrent.metainfo['info'] torrent.filepaths.append(tmp_path / 'content' / 'file2') assert torrent.metainfo['info']['name'] == 'content' assert torrent.metainfo['info']['files'] == [{'path': ['file1'], 'length': 3}, {'path': ['file2'], 'length': 3}] assert 'length' not in torrent.metainfo['info'] def test_filepaths_updates_metainfo_automatically_when_manipulated(create_torrent, tmp_path): (tmp_path / 'content').mkdir() for i in range(1, 5): (tmp_path / 'content' / f'file{i}').write_text('') torrent = create_torrent(path=tmp_path / 'content') assert torrent.metainfo['info']['files'] == [{'path': ['file1'], 'length': 6}, {'path': ['file2'], 'length': 6}, {'path': ['file3'], 'length': 6}, {'path': ['file4'], 'length': 6}] torrent.filepaths.remove(tmp_path / 'content' / 'file3') assert torrent.metainfo['info']['files'] == [{'path': ['file1'], 'length': 6}, {'path': ['file2'], 'length': 6}, {'path': ['file4'], 'length': 6}] torrent.filepaths.append(tmp_path / 'content' / 'file3') assert torrent.metainfo['info']['files'] == [{'path': ['file1'], 'length': 6}, {'path': ['file2'], 'length': 6}, {'path': ['file3'], 'length': 6}, {'path': ['file4'], 'length': 6}] torrent.filepaths.remove(tmp_path / 'content' / 'file2') assert torrent.metainfo['info']['files'] == [{'path': ['file1'], 'length': 6}, {'path': ['file3'], 'length': 6}, {'path': ['file4'], 'length': 6}] def test_filepaths_gets_information_from_metainfo(create_torrent, tmp_path): (tmp_path / 'content').mkdir() for i in range(1, 5): (tmp_path / 'content' / f'file{i}').write_text('') torrent = create_torrent(path=tmp_path / 'content') torrent.metainfo['info']['files'].remove({'path': ['file1'], 'length': 6}) torrent.metainfo['info']['files'].remove({'path': ['file2'], 'length': 6}) torrent.metainfo['info']['files'].append({'path': ['file9'], 'length': 6000}) assert torrent.filepaths == [tmp_path / 'content' / 'file3', tmp_path / 'content' / 'file4', tmp_path / 'content' / 'file9'] with pytest.raises(torf.ReadError) as excinfo: torrent.generate() assert str(excinfo.value) == f'{tmp_path / "content" / "file9"}: No such file or directory' def test_filepaths_uses_common_parent_directory(create_torrent, tmp_path): (tmp_path / 'content').mkdir() for i in range(1, 4): (tmp_path / 'content' / f'file{i}').write_text('') (tmp_path / 'content' / 'subdir').mkdir() for i in range(4, 6): (tmp_path / 'content' / 'subdir' / f'file{i}').write_text('') torrent = create_torrent(path=tmp_path / 'content' / 'subdir') assert torrent.metainfo['info']['name'] == 'subdir' assert torrent.metainfo['info']['files'] == [{'path': ['file4'], 'length': 11}, {'path': ['file5'], 'length': 11}] torrent.filepaths.append(tmp_path / 'content' / 'file3') assert torrent.metainfo['info']['name'] == 'content' assert torrent.metainfo['info']['files'] == [{'path': ['file3'], 'length': 6}, {'path': ['subdir', 'file4'], 'length': 11}, {'path': ['subdir', 'file5'], 'length': 11}] def test_filepaths_resolves_directories(create_torrent, tmp_path): (tmp_path / 'content').mkdir() for i in range(1, 3): (tmp_path / 'content' / f'file{i}').write_text('') (tmp_path / 'content' / 'subdir').mkdir() for i in range(3, 5): (tmp_path / 'content' / 'subdir' / f'file{i}').write_text('') (tmp_path / 'content' / 'subdir' / 'subsubdir').mkdir() for i in range(5, 7): (tmp_path / 'content' / 'subdir' / 'subsubdir' / f'file{i}').write_text('') torrent = create_torrent(path=tmp_path / 'content') assert torrent.metainfo['info']['name'] == 'content' assert torrent.metainfo['info']['files'] == [{'path': ['file1'], 'length': 6}, {'path': ['file2'], 'length': 6}, {'path': ['subdir', 'file3'], 'length': 9}, {'path': ['subdir', 'file4'], 'length': 9}, {'path': ['subdir', 'subsubdir', 'file5'], 'length': 12}, {'path': ['subdir', 'subsubdir', 'file6'], 'length': 12}] torrent.filepaths = (tmp_path / 'content' / 'subdir' / 'subsubdir',) assert torrent.metainfo['info']['name'] == 'subsubdir' assert torrent.metainfo['info']['files'] == [{'path': ['file5'], 'length': 12}, {'path': ['file6'], 'length': 12}] torrent.filepaths = (tmp_path / 'content' / 'subdir',) assert torrent.metainfo['info']['name'] == 'subdir' assert torrent.metainfo['info']['files'] == [{'path': ['file3'], 'length': 9}, {'path': ['file4'], 'length': 9}, {'path': ['subsubdir', 'file5'], 'length': 12}, {'path': ['subsubdir', 'file6'], 'length': 12}] def test_filepaths_understands_relative_paths(create_torrent, tmp_path): (tmp_path / 'parent' / 'content').mkdir(parents=True) for i in range(1, 4): (tmp_path / 'parent' / 'content' / f'file{i}').write_text('') cwd = os.getcwd() try: os.chdir(tmp_path) abspath = tmp_path / 'parent' / 'content' relpath = Path('parent', 'content') torrent = create_torrent(path=relpath) # File paths are relative assert torrent.filepaths == [relpath / 'file1', relpath / 'file2', relpath / 'file3'] assert torrent.name == 'content' # Remove file3 as absolute path torrent.filepaths.remove(abspath / 'file3') assert torrent.filepaths == [relpath / 'file1', relpath / 'file2'] assert torrent.name == 'content' # Append file3 as absolute path torrent.filepaths.append(abspath / 'file3') assert torrent.filepaths == [relpath / 'file1', relpath / 'file2', abspath / 'file3'] assert torrent.name == 'content' # Add file outside of torrent.path as relative path (tmp_path / 'parent' / 'outsider').write_text('') torrent.filepaths.append(tmp_path / 'parent' / 'outsider') assert torrent.name == 'parent' assert torrent.filepaths == [relpath / 'file1', relpath / 'file2', abspath / 'file3', Path('parent', 'outsider')] finally: os.chdir(cwd) def test_filepaths_does_not_accept_nonexisting_files(create_torrent, tmp_path): (tmp_path / 'content').mkdir() for i in range(1, 5): (tmp_path / 'content' / f'file{i}').write_text('') torrent = create_torrent(path=tmp_path / 'content') with pytest.raises(torf.ReadError) as excinfo: torrent.filepaths.append(tmp_path / 'content' / 'asdf') assert str(excinfo.value) == f'{tmp_path / "content" / "asdf"}: No such file or directory' assert torrent.metainfo['info']['name'] == 'content' assert torrent.metainfo['info']['files'] == [{'path': ['file1'], 'length': 6}, {'path': ['file2'], 'length': 6}, {'path': ['file3'], 'length': 6}, {'path': ['file4'], 'length': 6}] def test_filepaths_updates_path(create_torrent, tmp_path): torrent = create_torrent() assert torrent.path is None assert torrent.filepaths == () (tmp_path / 'some_file').write_text('') torrent.filepaths.append(tmp_path / 'some_file') assert torrent.path == tmp_path / 'some_file' assert torrent.filepaths == [tmp_path / 'some_file'] def test_filetree_with_no_path(create_torrent): torrent = create_torrent() assert torrent.filetree == {} def test_filetree_with_subdirectories(create_torrent, tmp_path): (tmp_path / 'content').mkdir() for i in range(1, 3): (tmp_path / 'content' / f'file{i}').write_text('') (tmp_path / 'content' / 'subdir').mkdir() for i in range(3, 5): (tmp_path / 'content' / 'subdir' / f'file{i}').write_text('') (tmp_path / 'content' / 'subdir' / 'subsubdir').mkdir() for i in range(5, 7): (tmp_path / 'content' / 'subdir' / 'subsubdir' / f'file{i}').write_text('') torrent = create_torrent(path=tmp_path / 'content') File = torf.File assert torrent.filetree == {'content': { 'file1': File(Path('content', 'file1'), size=6), 'file2': File(Path('content', 'file2'), size=6), 'subdir': {'file3': File(Path('content', 'subdir', 'file3'), size=9), 'file4': File(Path('content', 'subdir', 'file4'), size=9), 'subsubdir': {'file5': File(Path('content/subdir/subsubdir/file5'), size=12), 'file6': File(Path('content/subdir/subsubdir/file6'), size=12)}}}} def test_filetree_with_single_file_in_directory(create_torrent, tmp_path): (tmp_path / 'content').mkdir() (tmp_path / 'content' / 'file').write_text('') torrent = create_torrent(path=tmp_path / 'content') File = torf.File assert torrent.filetree == {'content': {'file': File(Path('content', 'file'), size=6)}} def test_filetree_with_single_file(create_torrent, tmp_path): (tmp_path / 'content').write_text('') torrent = create_torrent(path=tmp_path / 'content') File = torf.File assert torrent.filetree == {'content': File(Path('content'), size=6)} def test_name(create_torrent, singlefile_content, multifile_content): def generate_exp_files(content, torrent_name): if content is singlefile_content: return (Path(torrent_name),) else: filewalker = (Path(f) for f in glob.iglob(os.path.join(content.path, '**'), recursive=True) if os.path.isfile(f)) rel_paths = sorted(path.relative_to(content.path) for path in filewalker) exp_files = tuple(torf.File(Path(torrent_name, path), size=os.path.getsize(Path(content.path, path))) for path in rel_paths) return exp_files def generate_exp_filepaths(content): if content is singlefile_content: return (Path(content.path),) else: return tuple(sorted(Path(f) for f in glob.iglob(os.path.join(content.path, '**'), recursive=True) if os.path.isfile(f))) torrent = create_torrent() for content in (singlefile_content, multifile_content): torrent.name = None torrent.path = content.path assert torrent.name == os.path.basename(torrent.path) assert torrent.files == generate_exp_files(content, os.path.basename(content.path)) assert torrent.filepaths == generate_exp_filepaths(content) for fp in torrent.filepaths: assert os.path.exists(fp) torrent.name = 'Any name should be allowed' assert torrent.name == 'Any name should be allowed' assert torrent.files == generate_exp_files(content, 'Any name should be allowed') assert torrent.filepaths == generate_exp_filepaths(content) for fp in torrent.filepaths: assert os.path.exists(fp) torrent.path = None assert torrent.name == 'Any name should be allowed' assert torrent.files == generate_exp_files(content, 'Any name should be allowed') assert torrent.filepaths == () torrent.name = 'foo' assert torrent.name == 'foo' assert torrent.files == generate_exp_files(content, 'foo') assert torrent.filepaths == () torrent.path = content.path assert torrent.name == os.path.basename(torrent.path) assert torrent.files == generate_exp_files(content, os.path.basename(torrent.path)) assert torrent.filepaths == generate_exp_filepaths(content) for fp in torrent.filepaths: assert os.path.exists(fp) def test_size(create_torrent, singlefile_content, multifile_content): torrent = create_torrent() assert torrent.size == 0 for content in (singlefile_content, multifile_content): torrent.path = content.path assert torrent.size == content.exp_attrs.size def test_piece_size_of_empty_torrent_is_zero(): assert torf.Torrent().piece_size == 0 def test_piece_size_is_set_automatically(create_torrent, multifile_content): torrent = create_torrent(path=multifile_content.path) assert torrent.piece_size != 0 assert 'piece length' in torrent.metainfo['info'] torrent = torf.Torrent() assert torrent.piece_size == 0 assert 'piece length' not in torrent.metainfo['info'] torrent.path = multifile_content.path assert torrent.piece_size != 0 assert 'piece length' in torrent.metainfo['info'] def test_piece_size_is_set_manually(create_torrent, multifile_content): torrent = create_torrent(path=multifile_content.path, piece_size=16 * 2**20) assert torrent.piece_size == 16 * 2**20 assert torrent.metainfo['info']['piece length'] == 16 * 2**20 torrent = torf.Torrent(piece_size=16 * 2**20) assert torrent.piece_size == 16 * 2**20 assert torrent.metainfo['info']['piece length'] == 16 * 2**20 torrent.path = multifile_content.path assert torrent.piece_size != 16 * 2**20 assert torrent.metainfo['info']['piece length'] != 16 * 2**20 def test_piece_size_defaults_to_return_value_of_calculate_piece_size(create_torrent, multifile_content): torrent = create_torrent(path=multifile_content.path) assert torrent.piece_size != 4 * 2**20 assert torrent.metainfo['info']['piece length'] != 4 * 2**20 with patch.object(torf.Torrent, 'calculate_piece_size', lambda self, size, min_size, max_size: 4 * 2**20): torrent.piece_size = None assert torrent.piece_size == 4 * 2**20 assert torrent.metainfo['info']['piece length'] == 4 * 2**20 def test_piece_size_when_torrent_size_is_zero(create_torrent, multifile_content): torrent = torf.Torrent(path=multifile_content.path, exclude_globs=('*',)) assert torrent.size == 0 assert torrent.piece_size == 0 assert 'piece length' not in torrent.metainfo['info'] def test_piece_size_is_set_to_wrong_type(create_torrent): torrent = create_torrent() with pytest.raises(ValueError) as excinfo: torrent.piece_size = 'hello' assert str(excinfo.value) == "piece_size must be int, not str: 'hello'" @pytest.mark.parametrize('piece_size_', (-1, 0, 16385)) def test_piece_size_is_set_manually_to_number_not_divisible_by_16_kib(piece_size_, create_torrent): torrent = create_torrent() with pytest.raises(torf.PieceSizeError) as excinfo: torrent.piece_size = piece_size_ assert str(excinfo.value) == f'Piece size must be divisible by 16 KiB: {piece_size_}' def test_piece_size_can_be_invalid_in_metainfo(create_torrent): torrent = create_torrent() torrent.metainfo['info']['piece length'] = 123 torrent.metainfo['info']['piece length'] = 'foo' torrent.metainfo['info']['piece length'] = -12 PIECE_SIZE_MIN_DEFAULT = 32768 PIECE_SIZE_MAX_DEFAULT = 163840 # "piece_size_" because "piece_size" is already used for --piece-size # (see conftest.py) @pytest.mark.parametrize('with_path', (True, False), ids=['with path', 'without path']) @pytest.mark.parametrize( argnames='piece_size_, piece_size_min, piece_size_max, exp_piece_size_min, exp_piece_size_max, exp_exception', argvalues=( pytest.param( None, None, None, PIECE_SIZE_MIN_DEFAULT, PIECE_SIZE_MAX_DEFAULT, None, id='All default values', ), pytest.param( None, 262144, 1048576, 262144, 1048576, None, id='Custom min/max values', ), pytest.param( PIECE_SIZE_MIN_DEFAULT / 2, None, None, None, None, errors.PieceSizeError(int(PIECE_SIZE_MIN_DEFAULT / 2), min=PIECE_SIZE_MIN_DEFAULT, max=PIECE_SIZE_MAX_DEFAULT), id='Custom piece size smaller than Torrent.piece_size_min_default', ), pytest.param( PIECE_SIZE_MAX_DEFAULT * 2, None, None, None, None, errors.PieceSizeError(int(PIECE_SIZE_MAX_DEFAULT * 2), min=PIECE_SIZE_MIN_DEFAULT, max=PIECE_SIZE_MAX_DEFAULT), id='Custom piece size bigger than Torrent.piece_size_max_default', ), pytest.param( 16384, 262144, 524288, None, None, errors.PieceSizeError(16384, min=262144, max=524288), id='Custom piece size smaller than custom piece_size_min', ), pytest.param( 1048576, 262144, 524288, None, None, errors.PieceSizeError(1048576, min=262144, max=524288), id='Custom piece size bigger than custom piece_size_max', ), pytest.param( PIECE_SIZE_MIN_DEFAULT - 1, None, None, None, None, errors.PieceSizeError(PIECE_SIZE_MIN_DEFAULT - 1), id='Invalid custom piece_size', ), pytest.param( None, 123, None, None, None, errors.PieceSizeError(123), id='Invalid custom piece_size_min', ), pytest.param( None, None, 456, None, None, errors.PieceSizeError(456), id='Invalid custom piece_size_max', ), ), ids=lambda v: repr(v), ) def test_piece_size_min_max_arguments(piece_size_, piece_size_min, piece_size_max, exp_piece_size_max, exp_piece_size_min, exp_exception, with_path, singlefile_content, mocker): mocker.patch.object(torf.Torrent, 'piece_size_min_default', PIECE_SIZE_MIN_DEFAULT) mocker.patch.object(torf.Torrent, 'piece_size_max_default', PIECE_SIZE_MAX_DEFAULT) if exp_exception: with pytest.raises(type(exp_exception), match=rf'^{re.escape(str(exp_exception))}$'): torf.Torrent( path=singlefile_content.path if with_path else None, piece_size=piece_size_, piece_size_min=piece_size_min, piece_size_max=piece_size_max, ) else: torrent = torf.Torrent( path=singlefile_content.path if with_path else None, piece_size=piece_size_, piece_size_min=piece_size_min, piece_size_max=piece_size_max, ) if with_path: assert torrent.piece_size not in (0, None) else: assert torrent.piece_size == 0 assert torrent.piece_size_min == exp_piece_size_min assert torrent.piece_size_max == exp_piece_size_max assert torrent.piece_size_min_default == PIECE_SIZE_MIN_DEFAULT assert torrent.piece_size_max_default == PIECE_SIZE_MAX_DEFAULT @pytest.mark.parametrize( argnames=( 'old_piece_size, new_piece_size, piece_size_min, piece_size_max, order,' 'exp_piece_size, exp_piece_size_min, exp_piece_size_max, exp_exception' ), argvalues=( pytest.param( 0, 524288, 131072, 1048576, 'piece_size_is_set_first', 524288, 131072, 1048576, None, id='old_piece_size=0; new_piece_size between min/max; piece_size_is_set_first', ), pytest.param( 0, 524288, 131072, 1048576, 'min_max_is_set_first', 524288, 131072, 1048576, None, id='old_piece_size=0; new_piece_size between min/max; min_max_is_set_first', ), pytest.param( 0, 65536, 131072, 1048576, 'piece_size_is_set_first', 131072, 131072, 1048576, None, id='old_piece_size=0; new_piece_size < min; piece_size_is_set_first', ), pytest.param( 0, 65536, 131072, 1048576, 'min_max_is_set_first', 0, 131072, 1048576, errors.PieceSizeError(65536, min=131072, max=1048576), id='old_piece_size=0; new_piece_size < min; min_max_is_set_first', ), pytest.param( 0, 1048576 * 2, 131072, 1048576, 'piece_size_is_set_first', 1048576, 131072, 1048576, None, id='old_piece_size=0; new_piece_size > max; piece_size_is_set_first', ), pytest.param( 0, 1048576 * 2, 131072, 1048576, 'min_max_is_set_first', 0, 131072, 1048576, errors.PieceSizeError(1048576 * 2, min=131072, max=1048576), id='old_piece_size=0; new_piece_size > max; min_max_is_set_first', ), pytest.param( 262144, 524288, 131072, 1048576, 'piece_size_is_set_first', 524288, 131072, 1048576, None, id='old_piece_size=262144; new_piece_size between min/max; piece_size_is_set_first', ), pytest.param( 262144, 524288, 131072, 1048576, 'min_max_is_set_first', 524288, 131072, 1048576, None, id='old_piece_size=262144; new_piece_size between min/max; min_max_is_set_first', ), pytest.param( 262144, 65536, 131072, 1048576, 'piece_size_is_set_first', 131072, 131072, 1048576, None, id='old_piece_size=262144; new_piece_size < min; piece_size_is_set_first', ), pytest.param( 262144, 65536, 131072, 1048576, 'min_max_is_set_first', 262144, 131072, 1048576, errors.PieceSizeError(65536, min=131072, max=1048576), id='old_piece_size=262144; new_piece_size < min; min_max_is_set_first', ), pytest.param( 262144, 1048576 * 2, 131072, 1048576, 'piece_size_is_set_first', 1048576, 131072, 1048576, None, id='old_piece_size=262144; new_piece_size > max; piece_size_is_set_first', ), pytest.param( 262144, 1048576 * 2, 131072, 1048576, 'min_max_is_set_first', 262144, 131072, 1048576, errors.PieceSizeError(1048576 * 2, min=131072, max=1048576), id='old_piece_size=262144; new_piece_size > max; min_max_is_set_first', ), ), ids=lambda v: repr(v), ) def test_piece_size_min_max_attributes(old_piece_size, new_piece_size, piece_size_min, piece_size_max, order, exp_piece_size, exp_piece_size_min, exp_piece_size_max, exp_exception, singlefile_content, mocker): mocker.patch.object(torf.Torrent, 'piece_size_min_default', 0) mocker.patch.object(torf.Torrent, 'piece_size_max_default', float('inf')) torrent = torf.Torrent() torrent.metainfo['info']['piece length'] = old_piece_size assert torrent.piece_size == old_piece_size def do_test(): if order == 'piece_size_is_set_first': torrent.piece_size = new_piece_size torrent.piece_size_min = piece_size_min torrent.piece_size_max = piece_size_max else: torrent.piece_size_min = piece_size_min torrent.piece_size_max = piece_size_max torrent.piece_size = new_piece_size if exp_exception: with pytest.raises(type(exp_exception), match=rf'^{re.escape(str(exp_exception))}$'): do_test() else: do_test() assert torrent.piece_size == exp_piece_size assert torrent.piece_size_min == exp_piece_size_min assert torrent.piece_size_max == exp_piece_size_max @pytest.mark.parametrize( argnames='kwargs, cls_attrs, exp_min_piece_size, exp_max_piece_size', argvalues=( # Defaults pytest.param( {}, {}, torf.Torrent.piece_size_min_default, torf.Torrent.piece_size_max_default, id='defaults', ), # Custom min/max piece size provided via keyword arguments pytest.param( {'min_size': 128 * 2**10}, {}, 128 * 2**10, torf.Torrent.piece_size_max_default, id='min_size > piece_size_min_default', ), pytest.param( {'max_size': 4 * 2**20}, {}, torf.Torrent.piece_size_min_default, 4 * 2**20, id='max_size < piece_size_max_default', ), pytest.param( {'max_size': 64 * 2**20}, {}, torf.Torrent.piece_size_min_default, 64 * 2**20, id='max_size > piece_size_max_default', ), pytest.param( {'min_size': 128 * 2**10, 'max_size': 4 * 2**20}, {}, 128 * 2**10, 4 * 2**20, id='min_size > piece_size_min_default / max_size < piece_size_max_default', ), pytest.param( {'min_size': 128 * 2**10, 'max_size': 64 * 2**20}, {}, 128 * 2**10, 64 * 2**20, id='min_size > piece_size_min_default / max_size > piece_size_max_default', ), # Custom min/max piece size provided via class attributes pytest.param( {}, {'piece_size_min_default': 256 * 2**10}, 256 * 2**10, torf.Torrent.piece_size_max_default, id='increased piece_size_min_default', ), pytest.param( {}, {'piece_size_max_default': 2 * 2**20}, torf.Torrent.piece_size_min_default, 2 * 2**20, id='decreased piece_size_max_default', ), pytest.param( {}, {'piece_size_max_default': 128 * 2**20}, torf.Torrent.piece_size_min_default, 128 * 2**20, id='increased piece_size_max_default', ), pytest.param( {}, {'piece_size_min_default': 256 * 2**10, 'piece_size_max_default': 2 * 2**20}, 256 * 2**10, 2 * 2**20, id='increased piece_size_min_default / decreased piece_size_max_default', ), pytest.param( {}, {'piece_size_min_default': 256 * 2**10, 'piece_size_max_default': 128 * 2**20}, 256 * 2**10, 128 * 2**20, id='increased piece_size_min_default / increased piece_size_max_default', ), # Custom min/max piece size provided via keyword arguments and class attributes pytest.param( {'min_size': 128 * 2**10}, {'piece_size_min_default': 256 * 2**10}, 128 * 2**10, torf.Torrent.piece_size_max_default, id='min_size < increased piece_size_min_default', ), pytest.param( {'min_size': 512 * 2**10}, {'piece_size_min_default': 256 * 2**10}, 512 * 2**10, torf.Torrent.piece_size_max_default, id='min_size > increased piece_size_min_default', ), pytest.param( {'max_size': 32 * 2**20}, {'piece_size_max_default': 128 * 2**20}, torf.Torrent.piece_size_min_default, 32 * 2**20, id='max_size < increased piece_size_max_default', ), pytest.param( {'max_size': 256 * 2**20}, {'piece_size_max_default': 128 * 2**20}, torf.Torrent.piece_size_min_default, 256 * 2**20, id='max_size > increased piece_size_max_default', ), ), ids=lambda v: repr(v), ) @pytest.mark.parametrize( argnames='content_size, exp_unconstrained_piece_size', argvalues=( ( 1, 16 * 2**10), # 1 piece # noqa:E201 ( 10, 16 * 2**10), # 1 piece # noqa:E201 ( 100, 16 * 2**10), # 1 piece # noqa:E201 (1000, 16 * 2**10), # 1 piece # noqa:E201 ( 1 * 2**10, 16 * 2**10), # 1 piece # noqa:E201 ( 10 * 2**10, 16 * 2**10), # 1 piece # noqa:E201 ( 100 * 2**10, 16 * 2**10), # 7 pieces # noqa:E201 ( 300 * 2**10, 16 * 2**10), # 19 pieces # noqa:E201 ( 600 * 2**10, 16 * 2**10), # 38 pieces # noqa:E201 (1000 * 2**10, 16 * 2**10), # 63 pieces # noqa:E201 ( 1 * 2**20, 16 * 2**10), # 64 pieces # noqa:E201 ( 3 * 2**20, 16 * 2**10), # 192 pieces # noqa:E201 ( 6 * 2**20, 16 * 2**10), # 384 pieces # noqa:E201 ( 10 * 2**20, 32 * 2**10), # 320 pieces # noqa:E201 ( 30 * 2**20, 64 * 2**10), # 480 pieces # noqa: E201 ( 60 * 2**20, 128 * 2**10), # 480 pieces # noqa:E201 ( 100 * 2**20, 256 * 2**10), # 400 pieces # noqa:E201 ( 300 * 2**20, 1 * 2**20), # 300 pieces # noqa:E201 ( 600 * 2**20, 2 * 2**20), # 300 pieces # noqa:E201 (1000 * 2**20, 2 * 2**20), # 500 pieces # noqa:E201 ( 1 * 2**30, 2 * 2**20), # 512 pieces # noqa:E201 ( 3 * 2**30, 4 * 2**20), # 768 pieces # noqa:E201 ( 6 * 2**30, 8 * 2**20), # 1536 pieces # noqa:E201 ( 10 * 2**30, 8 * 2**20), # 1200 pieces # noqa:E201 ( 30 * 2**30, 16 * 2**20), # 1920 pieces # noqa:E201 ( 60 * 2**30, 32 * 2**20), # 1920 pieces # noqa:E201 ( 100 * 2**30, 64 * 2**20), # 1600 pieces # noqa:E201 (1000 * 2**30, 512 * 2**20), # 2000 pieces # noqa:E201 ), ids=lambda v: repr(v), ) def test_calculate_piece_size( kwargs, cls_attrs, exp_min_piece_size, exp_max_piece_size, content_size, exp_unconstrained_piece_size, monkeypatch, ): for name, value in cls_attrs.items(): monkeypatch.setattr(torf.Torrent, name, value) exp_piece_size = max( min( exp_unconstrained_piece_size, exp_max_piece_size, ), exp_min_piece_size ) print('exp piece size:', exp_piece_size) piece_size = torf.Torrent.calculate_piece_size(content_size, **kwargs) print('piece count:', math.ceil(content_size / piece_size)) assert piece_size == exp_piece_size # "piece_size_" because "piece_size" is already used for --piece-size # (see conftest.py) @pytest.mark.parametrize( argnames='length, piece_size_, exp_pieces', argvalues=( (0, 8, 0), (1, 8, 1), (7, 8, 1), (8, 8, 1), (9, 8, 2), (55, 8, 7), (56, 8, 7), (57, 8, 8), (123, 0, 0), ), ) def test_pieces(length, piece_size_, exp_pieces, create_torrent, mocker): torrent = create_torrent() torrent.metainfo['info']['length'] = length torrent.metainfo['info']['piece length'] = piece_size_ assert torrent.pieces == exp_pieces def test_hashes(create_torrent, multifile_content): torrent = create_torrent() assert torrent.hashes == () torrent.path = multifile_content.path torrent.piece_size = multifile_content.exp_metainfo['info']['piece length'] assert torrent.hashes == () torrent.generate() hashes_string = multifile_content.exp_metainfo['info']['pieces'] assert torrent.hashes == tuple(hashes_string[pos : pos + 20] for pos in range(0, len(hashes_string), 20)) torrent.path = None assert torrent.hashes == () def test_trackers__correct_type(create_torrent): torrent = create_torrent() assert isinstance(torrent.trackers, utils.Trackers) torrent.trackers = ('http://foo', ('http://bar', 'http://baz')) assert isinstance(torrent.trackers, utils.Trackers) def test_trackers__set_to_invalid_type(create_torrent): torrent = create_torrent() with pytest.raises(ValueError) as e: torrent.trackers = 17 assert str(e.value) == 'Must be Iterable, str or None, not int: 17' def test_trackers__set_to_None(create_torrent): torrent = create_torrent() torrent.trackers = ('http://foo', ('http://bar', 'http://baz')) torrent.trackers = None assert torrent.trackers == [] assert 'announce' not in torrent.metainfo assert 'announce-list' not in torrent.metainfo def test_trackers__sync_to_metainfo(create_torrent): torrent = create_torrent() torrent.trackers = ('http://foo', 'http://bar') assert torrent.trackers == [['http://foo'], ['http://bar']] assert torrent.metainfo['announce'] == 'http://foo' assert torrent.metainfo['announce-list'] == [['http://foo'], ['http://bar']] torrent.trackers.append('http://asdf') assert torrent.trackers == [['http://foo'], ['http://bar'], ['http://asdf']] assert torrent.metainfo['announce'] == 'http://foo' assert torrent.metainfo['announce-list'] == [['http://foo'], ['http://bar'], ['http://asdf']] torrent.trackers[0].insert(0, 'http://quux') assert torrent.trackers == [['http://quux', 'http://foo'], ['http://bar'], ['http://asdf']] assert torrent.metainfo['announce'] == 'http://quux' assert torrent.metainfo['announce-list'] == [['http://quux', 'http://foo'], ['http://bar'], ['http://asdf']] torrent.trackers[1].remove('http://bar') assert torrent.trackers == [['http://quux', 'http://foo'], ['http://asdf']] assert torrent.metainfo['announce'] == 'http://quux' assert torrent.metainfo['announce-list'] == [['http://quux', 'http://foo'], ['http://asdf']] del torrent.trackers[0] assert torrent.trackers == [['http://asdf']] assert torrent.metainfo['announce'] == 'http://asdf' assert 'announce-list' not in torrent.metainfo del torrent.trackers[0] assert torrent.trackers == [] assert 'announce' not in torrent.metainfo assert 'announce-list' not in torrent.metainfo def test_trackers__announce_in_metainfo_is_automatically_included_in_announce_list(create_torrent): torrent = create_torrent() torrent.metainfo['announce'] = 'http://foo:123' torrent.metainfo['announce-list'] = [['http://bar:456', 'http://baz:789'], ['http://quux']] assert torrent.trackers == [['http://foo:123'], ['http://bar:456', 'http://baz:789'], ['http://quux']] assert torrent.metainfo['announce-list'] == [['http://bar:456', 'http://baz:789'], ['http://quux']] assert torrent.metainfo['announce'] == 'http://foo:123' def test_trackers__announce_in_metainfo_is_not_duplicated(create_torrent): torrent = create_torrent() torrent.metainfo['announce'] = 'http://foo:123' torrent.metainfo['announce-list'] = [['http://foo:123'], ['http://bar:456', 'http://baz:789']] exp = [['http://foo:123'], ['http://bar:456', 'http://baz:789']] assert torrent.trackers == exp assert torrent.metainfo['announce-list'] == exp assert torrent.metainfo['announce'] == 'http://foo:123' torrent.metainfo['announce-list'] = [['http://foo:123', 'http://bar:456', 'http://baz:789']] exp = [['http://foo:123', 'http://bar:456', 'http://baz:789']] assert torrent.trackers == exp assert torrent.metainfo['announce-list'] == exp assert torrent.metainfo['announce'] == 'http://foo:123' torrent.metainfo['announce-list'] = [['http://bar:456', 'http://foo:123', 'http://baz:789']] exp = [['http://bar:456', 'http://foo:123', 'http://baz:789']] assert torrent.trackers == exp assert torrent.metainfo['announce-list'] == exp assert torrent.metainfo['announce'] == 'http://foo:123' def test_trackers__single_url_only_sets_announce_in_metainfo(create_torrent): torrent = create_torrent() torrent.metainfo['announce-list'] = [['http://foo:123'], ['http://bar:456']] torrent.trackers = 'http://foo:123' assert torrent.trackers == [['http://foo:123']] assert 'announce-list' not in torrent.metainfo assert torrent.metainfo['announce'] == 'http://foo:123' def test_trackers__multiple_urls_sets_announce_and_announcelist_in_metainfo(create_torrent): torrent = create_torrent() torrent.trackers = ['http://foo:123', 'http://bar:456', 'http://baz:789'] exp = [['http://foo:123'], ['http://bar:456'], ['http://baz:789']] assert torrent.trackers == exp assert torrent.metainfo['announce-list'] == exp assert torrent.metainfo['announce'] == 'http://foo:123' def test_trackers__multiple_lists_of_urls_sets_announce_and_announcelist_in_metainfo(create_torrent): torrent = create_torrent() torrent.trackers = [['http://foo:123', 'http://bar:456'], ['http://asdf'], ['http://a', 'http://b', 'http://c']] exp = [['http://foo:123', 'http://bar:456'], ['http://asdf'], ['http://a', 'http://b', 'http://c']] assert torrent.trackers == exp assert torrent.metainfo['announce-list'] == exp assert torrent.metainfo['announce'] == 'http://foo:123' def test_trackers__no_trackers(create_torrent): torrent = create_torrent() torrent.metainfo['announce'] = 'http://foo:123' torrent.metainfo['announce-list'] = [['http://foo:123'], ['http://bar:456', 'http://baz:789']] torrent.trackers = () assert torrent.trackers == [] assert 'announce-list' not in torrent.metainfo assert 'announce' not in torrent.metainfo def test_trackers__addition(create_torrent): torrent = create_torrent() torrent.trackers = 'http://foo' torrent.trackers += ('http://bar',) assert torrent.trackers == [['http://foo'], ['http://bar']] assert torrent.metainfo['announce-list'] == [['http://foo'], ['http://bar']] assert torrent.metainfo['announce'] == 'http://foo' def test_webseeds__correct_type(create_torrent): torrent = create_torrent() for value in ((), 'http://foo', ['http://foo', 'http://bar'], None): torrent.webseeds = value assert isinstance(torrent.webseeds, utils.URLs) def test_webseeds__sync_to_metainfo(create_torrent): torrent = create_torrent(webseeds=()) assert torrent.webseeds == [] assert 'url-list' not in torrent.metainfo torrent.webseeds = ['http://foo'] assert torrent.webseeds == ['http://foo'] assert torrent.metainfo['url-list'] == ['http://foo'] torrent.webseeds.clear() assert torrent.webseeds == [] assert 'url-list' not in torrent.metainfo def test_webseeds__sync_from_metainfo(create_torrent): torrent = create_torrent(webseeds=()) assert torrent.webseeds == [] assert 'url-list' not in torrent.metainfo torrent.metainfo['url-list'] = ('http://foo', 'http://bar') assert torrent.webseeds == ('http://foo', 'http://bar') torrent.metainfo['url-list'] = () assert torrent.webseeds == [] def test_webseeds__urls_are_validated(create_torrent): torrent = create_torrent() with pytest.raises(errors.URLError) as e: torrent.webseeds.append('http://foo:bar') assert str(e.value) == 'http://foo:bar: Invalid URL' with pytest.raises(errors.URLError) as e: torrent.webseeds = ['http://foo', 'http://foo:bar'] assert str(e.value) == 'http://foo:bar: Invalid URL' def test_webseeds__setting_to_invalid_type(create_torrent): torrent = create_torrent() with pytest.raises(ValueError) as e: torrent.webseeds = 23 assert str(e.value) == 'Must be Iterable, str or None, not int: 23' def test_webseeds__addition(create_torrent): torrent = create_torrent() torrent.webseeds = ['http://foo'] torrent.webseeds += ['http://bar'] assert torrent.webseeds == ['http://foo', 'http://bar'] def test_httpseeds__correct_type(create_torrent): torrent = create_torrent() for value in ((), 'http://foo', ['http://foo', 'http://bar'], None): torrent.httpseeds = value assert isinstance(torrent.httpseeds, utils.URLs) def test_httpseeds__sync_to_metainfo(create_torrent): torrent = create_torrent(httpseeds=()) assert torrent.httpseeds == [] assert 'httpseeds' not in torrent.metainfo torrent.httpseeds = ['http://foo'] assert torrent.httpseeds == ['http://foo'] assert torrent.metainfo['httpseeds'] == ['http://foo'] torrent.httpseeds.clear() assert torrent.httpseeds == [] assert 'httpseeds' not in torrent.metainfo def test_httpseeds__sync_from_metainfo(create_torrent): torrent = create_torrent(httpseeds=()) torrent.metainfo['httpseeds'] = ['http://foo'] assert torrent.httpseeds == ['http://foo'] torrent.metainfo['httpseeds'].append('http://bar') assert torrent.httpseeds == ['http://foo', 'http://bar'] torrent.metainfo['httpseeds'] = [] assert torrent.httpseeds == [] def test_httpseeds__urls_are_validated(create_torrent): torrent = create_torrent() with pytest.raises(errors.URLError) as e: torrent.httpseeds = ['http://foo', 'http://foo:bar'] assert str(e.value) == 'http://foo:bar: Invalid URL' def test_httpseeds__setting_to_invalid_type(create_torrent): torrent = create_torrent() with pytest.raises(ValueError) as e: torrent.httpseeds = 23 assert str(e.value) == 'Must be Iterable, str or None, not int: 23' def test_httpseeds__addition(create_torrent): torrent = create_torrent() torrent.httpseeds = ['http://foo'] torrent.httpseeds += ['http://bar'] assert torrent.httpseeds == ['http://foo', 'http://bar'] def test_leaving_private_unset_does_not_include_it_in_metainfo(create_torrent): torrent = create_torrent() assert torrent.private is None assert 'private' not in torrent.metainfo['info'] def test_setting_private_always_includes_it_in_metainfo(create_torrent): torrent = create_torrent() for private in (True, False): torrent = create_torrent(private=private) assert torrent.private is private assert 'private' in torrent.metainfo['info'] def test_setting_private_to_None_removes_it_from_metainfo(create_torrent): torrent = create_torrent() for private in (True, False): torrent = create_torrent(private=private) assert torrent.private is private torrent.private = None assert torrent.private is None assert 'private' not in torrent.metainfo['info'] def test_setting_private_enforces_boolean_values(create_torrent): torrent = create_torrent() torrent.private = 'this evaluates to True' assert torrent.private is True assert torrent.metainfo['info']['private'] is True torrent.private = [] # This evaluates to False assert torrent.private is False assert torrent.metainfo['info']['private'] is False def test_comment(create_torrent): torrent = create_torrent() torrent.comment = '' assert torrent.comment == '' assert torrent.metainfo['comment'] == '' torrent.comment = None assert torrent.comment is None assert 'comment' not in torrent.metainfo def test_source(create_torrent): torrent = create_torrent() torrent.source = '' assert torrent.source == '' assert torrent.metainfo['info']['source'] == '' torrent.source = None assert torrent.source is None assert 'source' not in torrent.metainfo['info'] @pytest.mark.parametrize( argnames='date, exp_date', argvalues=( (1234, datetime.fromtimestamp(1234)), (datetime.fromtimestamp(4567), datetime.fromtimestamp(4567)), (None, None), ('', None), (b'', None), ([1, 2, 3], ValueError('Must be None, int or datetime object, not list: [1, 2, 3]')), ), ids=lambda v: repr(v), ) def test_creation_date(date, exp_date, create_torrent): torrent = create_torrent() if isinstance(exp_date, Exception): with pytest.raises(type(exp_date), match=rf'^{re.escape(str(exp_date))}$'): torrent.creation_date = date else: torrent.creation_date = date assert torrent.creation_date == exp_date if torrent.creation_date is None: assert 'creation date' not in torrent.metainfo else: assert torrent.creation_date is torrent.metainfo['creation date'] def test_created_by(create_torrent): torrent = create_torrent() torrent.created_by = 'somebody' assert torrent.created_by == 'somebody' assert torrent.metainfo['created by'] == 'somebody' torrent.created_by = None assert torrent.created_by is None assert 'created by' not in torrent.metainfo def test_repr_string(singlefile_content): from datetime import datetime t = torf.Torrent() assert repr(t) == 'Torrent()' t.private = True assert repr(t) == 'Torrent(private=True)' t.private = False assert repr(t) == 'Torrent()' now = datetime.now() t.creation_date = now assert repr(t) == f'Torrent(creation_date={now!r})' t.piece_size = 2**20 assert repr(t) == f'Torrent(creation_date={now!r}, piece_size={2**20})' t.creation_date = None for name in ('comment', 'created_by', 'source'): setattr(t, name, 'foo') assert repr(t) == f"Torrent(comment='foo', source='foo', created_by='foo', piece_size={2**20})" def test_equality(singlefile_content): kwargs = {'trackers': ['https://localhost/'], 'comment': 'Foo', 'created_by': 'Bar'} t1 = torf.Torrent(singlefile_content.path, **kwargs) t2 = torf.Torrent(singlefile_content.path, **kwargs) assert t1 == t2 t1.metainfo['foo'] = 'bar' assert t1 != t2 del t1.metainfo['foo'] assert t1 == t2 t2.comment = 'asdf' assert t1 != t2 t2.comment = t1.comment assert t1 == t2 t1.trackers += ['https://remotehost'] assert t1 != t2 del t1.trackers[-1] assert t1 == t2 def check_hash(content, hashname): t = torf.Torrent(content.path, trackers=['http://localhost/'], piece_size=content.exp_metainfo['info']['piece length']) assert t.piece_size == content.exp_metainfo['info']['piece length'] t.generate() exp_attrs = content.exp_attrs assert getattr(t, hashname) == getattr(exp_attrs, hashname) del t.metainfo['info']['piece length'] with pytest.raises(torf.MetainfoError) as excinfo: getattr(t, hashname) assert str(excinfo.value) == "Invalid metainfo: Missing 'piece length' in ['info']" def test_infohash_singlefile(singlefile_content): check_hash(singlefile_content, 'infohash') def test_infohash_base32_singlefile(singlefile_content): check_hash(singlefile_content, 'infohash_base32') def test_infohash_multifile(multifile_content): check_hash(multifile_content, 'infohash') def test_infohash_base32_multifile(multifile_content): check_hash(multifile_content, 'infohash_base32') def test_randomize_infohash(singlefile_content): t1 = torf.Torrent(singlefile_content.path) t2 = torf.Torrent(singlefile_content.path) t1.generate() t2.generate() t1.randomize_infohash = False t2.randomize_infohash = False assert t1.infohash == t2.infohash t1.randomize_infohash = True t2.randomize_infohash = True assert t1.infohash != t2.infohash def test_copy_before_ready(singlefile_content): t1 = torf.Torrent(singlefile_content.path, comment='Asdf.', randomize_infohash=True, webseeds=['http://foo']) assert not t1.is_ready t2 = t1.copy() assert t1 == t2 assert t1 is not t2 def test_copy_when_ready(singlefile_content): t1 = torf.Torrent(singlefile_content.path, comment='Asdf.', randomize_infohash=True, webseeds=['http://foo']) t1.generate() assert t1.is_ready t2 = t1.copy() assert t1 == t2 assert t1 is not t2 def test_copy_with_copy_module(singlefile_content): t1 = torf.Torrent(singlefile_content.path, comment='Asdf.', randomize_infohash=True, webseeds=['http://foo']) t1.generate() t2 = copy.copy(t1) assert t1 == t2 assert t1 is not t2 t2 = copy.deepcopy(t1) assert t1 == t2 assert t1 is not t2 def test_Torrent_object_is_picklable(generated_multifile_torrent): t1 = generated_multifile_torrent t1.path = None t1.trackers = ['http://localhost:123'] t1.webseeds = ['http://localhost:234'] t1.httpseeds = ['http://localhost:345'] t1.private = True t1.comment = 'Foo' t1.source = 'ASDF' t1.creation_date = 123456 t1.created_by = 'ME!' t1.piece_size = 1048576 t1.randomize_infohash = True t1_metainfo = copy.deepcopy(t1.metainfo.copy()) t2 = pickle.loads(pickle.dumps(t1)) t2_metainfo = copy.deepcopy(t2.metainfo.copy()) assert t2_metainfo == t1_metainfo rndusr-torf-547b989/tests/test_convert.py000066400000000000000000000036051513142010300205210ustar00rootroot00000000000000from collections import OrderedDict from datetime import datetime import pytest import torf def test_valid_metainfo(): t = torf.Torrent(created_by=None) now = datetime.now() t.metainfo['foo'] = now t.metainfo['baz'] = {'one': True, 'two': 2.34, 'bam': ['x', 'y', ('z',False)]} exp = OrderedDict([(b'baz', OrderedDict([(b'bam', [b'x', b'y', [b'z', 0]]), (b'one', 1), (b'two', 2)])), (b'foo', int(now.timestamp())), (b'info', OrderedDict())]) assert t.convert() == exp def test_invalid_metainfo(): t = torf.Torrent() t.metainfo['invalid'] = lambda foo: 'bar' with pytest.raises(torf.MetainfoError) as excinfo: t.convert() assert excinfo.match("Invalid value: .*lambda") t.metainfo['invalid'] = {'arf': int} with pytest.raises(torf.MetainfoError) as excinfo: t.convert() assert excinfo.match("Invalid value: ") t.metainfo['invalid'] = [3, ['a', 'b', {str: 'c'}], 4, 5] with pytest.raises(torf.MetainfoError) as excinfo: t.convert() assert excinfo.match("Invalid key: ") t.metainfo['invalid'] = {'x': [3, ['a', 'b', {Exception, 'c'}], 4, 5]} with pytest.raises(torf.MetainfoError) as excinfo: t.convert() assert excinfo.match("Invalid value: ") def test_metainfo_sort_order(create_torrent): torrent = create_torrent() md_conv = torrent.convert() exp_keys = sorted(bytes(key, encoding='utf-8', errors='replace') for key in torrent.metainfo) assert list(md_conv) == exp_keys exp_info_keys = sorted(bytes(key, encoding='utf-8', errors='replace') for key in torrent.metainfo['info']) assert list(md_conv[b'info']) == exp_info_keys rndusr-torf-547b989/tests/test_exclude.py000066400000000000000000000270701513142010300204740ustar00rootroot00000000000000import os import pytest @pytest.fixture def content(tmp_path): content = tmp_path / 'content' ; content.mkdir() # noqa: E702 for i in range(1, 5): ext = 'jpg' if i % 2 == 0 else 'txt' (content / f'file{i}.{ext}').write_text('') subdir = content / 'subdir' ; subdir.mkdir() # noqa: E702 for i in range(1, 4): ext = 'jpg' if i % 2 == 0 else 'pdf' (subdir / f'file{i}.{ext}').write_text('') return content def test_exclude_when_path_is_None(create_torrent): torrent = create_torrent() torrent.metainfo['info']['files'] = [{'length': 6, 'path': ['file1.txt']}, {'length': 6, 'path': ['file2.jpg']}, {'length': 6, 'path': ['file3.txt']}] torrent.path = None torrent.exclude_globs.append('*.jpg') assert torrent.metainfo['info']['files'] == [{'length': 6, 'path': ['file1.txt']}, {'length': 6, 'path': ['file3.txt']}] torrent.exclude_regexs.append('file3') assert torrent.metainfo['info']['files'] == [{'length': 6, 'path': ['file1.txt']}] assert torrent.path is None def test_exclude_with_singlefile_torrent_and_existing_path(create_torrent, content): torrent = create_torrent(path=content / 'file1.txt') assert torrent.metainfo['info']['name'] == 'file1.txt' assert torrent.metainfo['info']['length'] == 6 torrent.exclude_globs.append('*.txt') assert torrent.metainfo['info']['name'] == 'file1.txt' assert 'length' not in torrent.metainfo['info'] def test_exclude_with_singlefile_torrent_and_nonexisting_path(create_torrent): torrent = create_torrent() torrent.metainfo['info']['name'] = 'foo.txt' torrent.metainfo['info']['length'] = 123 torrent.exclude_regexs.append(r'fo+\.txt') assert torrent.metainfo['info']['name'] == 'foo.txt' assert 'length' not in torrent.metainfo['info'] def test_exclude_with_multifile_torrent_and_existing_path(create_torrent, content): torrent = create_torrent(path=content) assert torrent.metainfo['info']['files'] == [{'length': 6, 'path': ['file1.txt']}, {'length': 6, 'path': ['file2.jpg']}, {'length': 6, 'path': ['file3.txt']}, {'length': 6, 'path': ['file4.jpg']}, {'length': 6, 'path': ['subdir', 'file1.pdf']}, {'length': 6, 'path': ['subdir', 'file2.jpg']}, {'length': 6, 'path': ['subdir', 'file3.pdf']}] torrent.exclude_regexs.extend((r'.*1\....$', rf'^{torrent.name}/subdir/.*\.pdf$')) assert torrent.metainfo['info']['files'] == [{'length': 6, 'path': ['file2.jpg']}, {'length': 6, 'path': ['file3.txt']}, {'length': 6, 'path': ['file4.jpg']}, {'length': 6, 'path': ['subdir', 'file2.jpg']}] def test_exclude_with_multifile_torrent_and_nonexisting_path(create_torrent): torrent = create_torrent() torrent.metainfo['info']['name'] = 'content' torrent.metainfo['info']['files'] = [{'length': 6, 'path': ['file1.txt']}, {'length': 6, 'path': ['file2.jpg']}, {'length': 6, 'path': ['file3.txt']}, {'length': 6, 'path': ['subdir', 'file1.pdf']}, {'length': 6, 'path': ['subdir', 'file2.jpg']}, {'length': 6, 'path': ['subdir', 'file3.pdf']}] torrent.exclude_globs.extend(('*.jpg', '*/subdir/*3.*')) assert torrent.metainfo['info']['files'] == [{'length': 6, 'path': ['file1.txt']}, {'length': 6, 'path': ['file3.txt']}, {'length': 6, 'path': ['subdir', 'file1.pdf']}] def test_exclude_globs_can_be_set(create_torrent, content): torrent = create_torrent(path=content) torrent.exclude_globs = (f'*{os.sep}file2.*',) assert torrent.metainfo['info']['files'] == [{'length': 6, 'path': ['file1.txt']}, {'length': 6, 'path': ['file3.txt']}, {'length': 6, 'path': ['file4.jpg']}, {'length': 6, 'path': ['subdir', 'file1.pdf']}, {'length': 6, 'path': ['subdir', 'file3.pdf']}] def test_exclude_regexs_can_be_set(create_torrent, content): torrent = create_torrent(path=content) torrent.exclude_regexs = (f'{os.sep}subdir{os.sep}',) assert torrent.metainfo['info']['files'] == [{'length': 6, 'path': ['file1.txt']}, {'length': 6, 'path': ['file2.jpg']}, {'length': 6, 'path': ['file3.txt']}, {'length': 6, 'path': ['file4.jpg']}] def test_exclude_globs_and_exclude_regexs_are_combined(create_torrent, content): torrent = create_torrent(path=content) torrent.exclude_globs = ('*.jpg',) torrent.exclude_regexs = ('txt$',) assert torrent.metainfo['info']['files'] == [{'length': 6, 'path': ['subdir', 'file1.pdf']}, {'length': 6, 'path': ['subdir', 'file3.pdf']}] def test_more_exclude_globs_tests(create_torrent, tmp_path): (tmp_path / 'content' / 'foo' / 'bar').mkdir(parents=True) (tmp_path / 'content' / 'bar' / 'baz').mkdir(parents=True) (tmp_path / 'content' / 'foo' / 'file_bar').write_text('data') (tmp_path / 'content' / 'foo' / 'bar' / 'file2').write_text('data') (tmp_path / 'content' / 'bar' / 'file3').write_text('data') (tmp_path / 'content' / 'bar' / 'baz' / 'file4').write_text('data') torrent = create_torrent(path=tmp_path / 'content') assert torrent.metainfo['info']['name'] == 'content' assert torrent.metainfo['info']['files'] == [{'length': 4, 'path': ['bar', 'baz', 'file4']}, {'length': 4, 'path': ['bar', 'file3']}, {'length': 4, 'path': ['foo', 'bar', 'file2']}, {'length': 4, 'path': ['foo', 'file_bar']}] torrent.exclude_globs = ('*oo/*',) assert torrent.metainfo['info']['files'] == [{'length': 4, 'path': ['bar', 'baz', 'file4']}, {'length': 4, 'path': ['bar', 'file3']}] torrent.exclude_globs = ('*/ba*',) assert torrent.metainfo['info']['files'] == [{'length': 4, 'path': ['foo', 'file_bar']}] torrent.exclude_globs = ('*baz*',) assert torrent.metainfo['info']['files'] == [{'length': 4, 'path': ['bar', 'file3']}, {'length': 4, 'path': ['foo', 'bar', 'file2']}, {'length': 4, 'path': ['foo', 'file_bar']}] torrent.exclude_globs = ('*/file[23]',) assert torrent.metainfo['info']['files'] == [{'length': 4, 'path': ['bar', 'baz', 'file4']}, {'length': 4, 'path': ['foo', 'file_bar']}] torrent.exclude_globs = ('*Z*',) assert torrent.metainfo['info']['files'] == [{'length': 4, 'path': ['bar', 'file3']}, {'length': 4, 'path': ['foo', 'bar', 'file2']}, {'length': 4, 'path': ['foo', 'file_bar']}] def test_more_exclude_regexs_tests(create_torrent, tmp_path): (tmp_path / 'content' / 'foo' / 'bar').mkdir(parents=True) (tmp_path / 'content' / 'bar' / 'baz').mkdir(parents=True) (tmp_path / 'content' / 'foo' / 'file_bar').write_text('data') (tmp_path / 'content' / 'foo' / 'bar' / 'file2').write_text('data') (tmp_path / 'content' / 'bar' / 'file3').write_text('data') (tmp_path / 'content' / 'bar' / 'baz' / 'file4').write_text('data') torrent = create_torrent(path=tmp_path / 'content') assert torrent.metainfo['info']['name'] == 'content' assert torrent.metainfo['info']['files'] == [{'length': 4, 'path': ['bar', 'baz', 'file4']}, {'length': 4, 'path': ['bar', 'file3']}, {'length': 4, 'path': ['foo', 'bar', 'file2']}, {'length': 4, 'path': ['foo', 'file_bar']}] torrent.exclude_regexs = ('^content/foo',) assert torrent.metainfo['info']['files'] == [{'length': 4, 'path': ['bar', 'baz', 'file4']}, {'length': 4, 'path': ['bar', 'file3']}] torrent.exclude_regexs = ('.*(?:_bar|2)$',) assert torrent.metainfo['info']['files'] == [{'length': 4, 'path': ['bar', 'baz', 'file4']}, {'length': 4, 'path': ['bar', 'file3']}] def test_include_globs_take_precedence(create_torrent, tmp_path): (tmp_path / 'content' / 'foo' / 'bar').mkdir(parents=True) (tmp_path / 'content' / 'bar' / 'baz').mkdir(parents=True) (tmp_path / 'content' / 'foo' / 'file_bar').write_text('data') (tmp_path / 'content' / 'foo' / 'bar' / 'file2').write_text('data') (tmp_path / 'content' / 'bar' / 'file3').write_text('data') (tmp_path / 'content' / 'bar' / 'baz' / 'file4').write_text('data') torrent = create_torrent(path=tmp_path / 'content') assert torrent.metainfo['info']['files'] == [{'length': 4, 'path': ['bar', 'baz', 'file4']}, {'length': 4, 'path': ['bar', 'file3']}, {'length': 4, 'path': ['foo', 'bar', 'file2']}, {'length': 4, 'path': ['foo', 'file_bar']}] torrent.exclude_globs = ('*foo*',) torrent.include_globs = ('*foo/*/file?',) assert torrent.metainfo['info']['files'] == [{'length': 4, 'path': ['bar', 'baz', 'file4']}, {'length': 4, 'path': ['bar', 'file3']}, {'length': 4, 'path': ['foo', 'bar', 'file2']}] def test_include_regexs_take_precedence(create_torrent, tmp_path): (tmp_path / 'content' / 'foo' / 'bar').mkdir(parents=True) (tmp_path / 'content' / 'bar' / 'baz').mkdir(parents=True) (tmp_path / 'content' / 'foo' / 'file_bar').write_text('data') (tmp_path / 'content' / 'foo' / 'bar' / 'file2').write_text('data') (tmp_path / 'content' / 'bar' / 'file3').write_text('data') (tmp_path / 'content' / 'bar' / 'baz' / 'file4').write_text('data') torrent = create_torrent(path=tmp_path / 'content') assert torrent.metainfo['info']['files'] == [{'length': 4, 'path': ['bar', 'baz', 'file4']}, {'length': 4, 'path': ['bar', 'file3']}, {'length': 4, 'path': ['foo', 'bar', 'file2']}, {'length': 4, 'path': ['foo', 'file_bar']}] torrent.exclude_regexs = ('file.$',) torrent.include_regexs = ('file[23]',) assert torrent.metainfo['info']['files'] == [{'length': 4, 'path': ['bar', 'file3']}, {'length': 4, 'path': ['foo', 'bar', 'file2']}, {'length': 4, 'path': ['foo', 'file_bar']}] rndusr-torf-547b989/tests/test_fuzzy.py000066400000000000000000000031461513142010300202300ustar00rootroot00000000000000from . import fuzzydict, fuzzylist def test_fuzzylist(): x = fuzzylist('a', 'b', 'c', maybe=('x', 'y', 'z'), max_maybe_items={'x':1}) assert x != ['a', 'b'] assert not x == ['a', 'b'] assert x == ['a', 'c', 'b'] assert not x != ['a', 'c', 'b'] assert x == ['a', 'x', 'c', 'y', 'b'] assert not x != ['a', 'x', 'c', 'y', 'b'] assert x == ['a', 'x', 'b', 'z', 'c', 'y'] assert not x != ['a', 'x', 'b', 'z', 'c', 'y'] assert x != ['a', 'l', 'b', 'z', 'c', 'y'] assert not x == ['a', 'l', 'b', 'z', 'c', 'y'] assert x != ['x', 'b', 'x', 'a', 'c', 'y'] assert not x == ['x', 'b', 'x', 'a', 'c', 'y'] assert fuzzylist(0) == fuzzylist(maybe=(0,)) assert fuzzylist(maybe=(0,)) == fuzzylist(0) assert fuzzylist(0) != fuzzylist(maybe=(1,)) assert fuzzylist(maybe=(1,)) != fuzzylist(0) assert [1, 1, 2, 3] != fuzzylist(1, 2, 3) assert fuzzylist(1, 2, 3) != [1, 1, 2, 3] assert fuzzylist(0, 0, 1) == fuzzylist(0, 1, maybe=[0]) assert fuzzylist(0, 1, maybe=[0]) == fuzzylist(0, 0, 1) def test_fuzzydict(): assert fuzzydict(a='foo', b=fuzzylist(maybe=(1, 2, 3))) == {'a': 'foo'} assert fuzzydict(a='foo', b=fuzzylist(maybe=(1, 2, 3))) == {'a': 'foo', 'b': []} assert fuzzydict(a='foo', b=fuzzylist(maybe=(1, 2, 3))) != {'a': 'foo', 'b': ['bar']} assert fuzzydict(a='foo', b=fuzzylist(maybe=(1, 2, 3))) != {'b': []} assert fuzzydict(a='foo', b=fuzzylist(maybe=(1, 2, 3))) != {} assert fuzzydict(b=fuzzylist(maybe=(1, 2, 3))) == {} assert fuzzydict(b=fuzzylist(maybe=(1, 2, 3))) == {'x': fuzzylist(maybe=(4, 5, 6))} rndusr-torf-547b989/tests/test_generate.py000066400000000000000000000231761513142010300206400ustar00rootroot00000000000000import base64 import os from collections import defaultdict from pathlib import Path from unittest import mock import pytest import torf from . import * # noqa: F403 def test_no_path(): t = torf.Torrent() with pytest.raises(RuntimeError) as e: t.generate() assert str(e.value) == 'generate() called with no path specified' def test_with_empty_file(create_file): # Create content so we can set path content_path = create_file('file.jpg', '') t = torf.Torrent(content_path) content_path.write_text('') with pytest.raises(torf.PathError) as e: t.generate() assert str(e.value) == f'{t.path}: Empty or all files excluded' def test_with_empty_directory(create_dir): # Create content so we can set path content_path = create_dir('empty', ('a file', '')) t = torf.Torrent(content_path) (content_path / 'a file').unlink() with pytest.raises(torf.ReadError) as e: t.generate() assert str(e.value) == f'{content_path / "a file"}: No such file or directory' def test_nonexisting_path(create_file): content_path = create_file('file.jpg', '') t = torf.Torrent(content_path) content_path.unlink() with pytest.raises(torf.ReadError) as e: t.generate() assert str(e.value) == f'{content_path}: No such file or directory' def test_with_all_files_excluded(create_dir): # Create content so we can set path content_path = create_dir('content', ('a.jpg', ''), ('b.jpg', ''), ('c.jpg', '')) t = torf.Torrent(content_path, exclude_globs=['*.jpg']) with pytest.raises(torf.PathError) as e: t.generate() assert str(e.value) == f'{t.path}: Empty or all files excluded' def test_unreadable_basedir_in_multifile_torrent(create_dir): content_path = create_dir('content', ('a.jpg', ''), ('b.jpg', ''), ('c.jpg', '')) t = torf.Torrent(content_path) old_mode = os.stat(content_path).st_mode try: os.chmod(content_path, mode=0o222) with pytest.raises(torf.ReadError) as e: t.generate() assert str(e.value) == f'{content_path / "a.jpg"}: Permission denied' finally: os.chmod(content_path, mode=old_mode) def test_unreadable_file_in_multifile_torrent(create_dir): content_path = create_dir('content', ('a.jpg', ''), ('b.jpg', ''), ('c.jpg', '')) t = torf.Torrent(content_path) old_mode = os.stat(content_path).st_mode try: os.chmod(content_path / 'b.jpg', mode=0o222) with pytest.raises(torf.ReadError) as e: t.generate() assert str(e.value) == f'{content_path / "b.jpg"}: Permission denied' finally: os.chmod(content_path, mode=old_mode) def test_metainfo_with_singlefile_torrent(create_file, random_seed): with random_seed(0): content_path = create_file('file.jpg', torf.Torrent.piece_size_min_default * 10.123) # exp_* values come from these commands: # $ mktorrent -l 15 /tmp/pytest-of-*/pytest-current/test_metainfo_with_singlefile_current/file.jpg # $ btcheck -i file.jpg.torrent -n | grep Hash # $ python3 -c "from flatbencode import decode; print(decode(open('file.jpg.torrent', 'rb').read())[b'info'][b'pieces'])" exp_infohash = 'e7e02c57df57f30f5e66a69bfa210e9c61a5a8f6' exp_pieces = (b"<\x9c7\x80\xa5\xf6-\xb7)\xd0A\x1d\xb5\x1b\xacw\x10\x91\x9c\xe8\xb4\x16" b"\x00bg\xbc`\xc5\xc2\xf86\x88\xb2~\xd6E\xeeZ\xb0d\xcd\x9ek(\xc746G\x17" b"\xab\xa6'/D\xba\xd9\xf0d\x81\xe3\xf5C\x82JQ\xde\xb5\x17w\xda\xbc\xb7Ek" b"\nHU\xcd\x1f\xd6C\xcb!\xb0CW\\\xc4\x8d\xad9\xbe\xb4V\x8a7\xdf\x9a\xabV" b"\xa6\xe5\xee3\x81\xe5I\xa7\xfe#\xcb\xea\xc3\x8e\xc4\x00\x91\xdb\x00\xaf") _check_metainfo(content_path, 2**15, exp_infohash, exp_pieces) def test_metainfo_with_multifile_torrent(create_dir, random_seed): with random_seed(0): content_path = create_dir('content', ('a.jpg', torf.Torrent.piece_size_min_default * 1.123), ('b.jpg', torf.Torrent.piece_size_min_default * 2.456), ('c.jpg', torf.Torrent.piece_size_min_default * 3.789)) # exp_* values come from these commands: # $ mktorrent -l 15 /tmp/pytest-of-*/pytest-current/test_metainfo_with_multifile_tcurrent/content/ # $ btcheck -i content.torrent -n | grep Hash # $ python3 -c "from flatbencode import decode; print(decode(open('content.torrent', 'rb').read())[b'info'][b'pieces'])" exp_infohash = 'b36eeca9231867ebf650ed82a54216617408d2ce' exp_pieces = (b'\x84{\x9eM\x16\xa9\xe9\xf7V\xb8\xb3\xc2\xb8Q\xfaw\xea \xb9\xdc' b'\xf2\xc0\x0e\rXE\x85g\xe6k\x1dt\xa6\xca\x7f/\xb5)A"5!\xb9\xda\xe2' b'"\x15c^\x0e\xf7\x91|\x06V\xdc}\xd9\xb0<./\x0fBe\xcb\xd8*\xae\xd1"' b'\x05\n\x1b\xf3\x18\x1c\xd7u\xe3') _check_metainfo(content_path, 2**15, exp_infohash, exp_pieces) def _check_metainfo(content_path, piece_size, exp_infohash, exp_pieces): exp_hashes = tuple(exp_pieces[i : i + 20] for i in range(0, len(exp_pieces), 20)) t = torf.Torrent(content_path) t.piece_size = piece_size t.generate() assert t.infohash == exp_infohash assert t.infohash_base32 == base64.b32encode(base64.b16decode(exp_infohash.upper())) assert t.metainfo['info']['pieces'] == exp_pieces assert t.hashes == exp_hashes assert t.piece_size == piece_size assert t.metainfo['info']['piece length'] == piece_size def test_callback_is_called_with_correct_arguments(filespecs, piece_size, create_file, create_dir, forced_piece_size): display_filespecs(filespecs, piece_size) # noqa: F405 if len(filespecs) == 1: content_path = create_file(filespecs[0][0], filespecs[0][1]) else: content_path = create_dir('content', *filespecs) exp_pieces_done = 1 seen_filepaths = defaultdict(lambda: 0) def assert_cb_args(torrent, filepath, pieces_done, pieces_total): nonlocal exp_pieces_done assert torrent is t assert pieces_done == exp_pieces_done exp_pieces_done += 1 assert isinstance(filepath, os.PathLike) seen_filepaths[filepath.name] += 1 assert pieces_total == t.pieces with forced_piece_size(piece_size): t = torf.Torrent(content_path) cb = mock.Mock(side_effect=assert_cb_args) success = t.generate(callback=cb, interval=0) assert success is True assert t.piece_size == piece_size assert cb.call_count == t.pieces exp_filepaths = defaultdict(lambda: 0) for pos in range(0, t.size, piece_size): files = pos2files(pos, filespecs, piece_size) # noqa: F405 exp_filepaths[files[-1]] += 1 assert seen_filepaths == exp_filepaths def test_callback_is_called_at_interval(filespecs, piece_size, create_file, create_dir, forced_piece_size, monkeypatch): display_filespecs(filespecs, piece_size) # noqa: F405 if len(filespecs) == 1: content_path = create_file(filespecs[0][0], filespecs[0][1]) else: content_path = create_dir('content', *filespecs) with forced_piece_size(piece_size): t = torf.Torrent(content_path) monkeypatch.setattr(torf._generate, 'time_monotonic', mock.Mock(side_effect=range(int(1e9)))) for interval in (1, 2, 3): cb = mock.Mock(return_value=None) success = t.generate(callback=cb, interval=interval) assert success is True if interval > 1 and t.pieces % interval == 0: exp_call_count = t.pieces // interval + t.pieces % interval + 1 else: exp_call_count = t.pieces // interval + t.pieces % interval assert cb.call_count == exp_call_count def test_callback_cancels(piece_size, create_file, forced_piece_size, mocker): def maybe_cancel(torrent, filepath, pieces_done, pieces_total): if pieces_done / pieces_total > 0.1: return 'STOP THE PRESSES!' cb = mock.Mock(side_effect=maybe_cancel) piece_count = 1000 content_path = create_file('file.jpg', piece_size * piece_count) with forced_piece_size(piece_size): t = torf.Torrent(content_path) success = t.generate(callback=cb, interval=0, threads=1) assert success is False assert cb.call_count < piece_count def test_callback_raises_exception(piece_size, create_file, forced_piece_size): # We need a large file size so we can test that the hashers actually stop # before all pieces are hashed. content_path = create_file('file.jpg', piece_size * 1000) with forced_piece_size(piece_size): with mock.patch('torf._generate.sha1') as sha1_mock: def mock_digest(): return b'\x00' * 20 sha1_mock.return_value.digest.side_effect = mock_digest cb = mock.Mock(side_effect=Exception('Argh!')) t = torf.Torrent(content_path) with pytest.raises(Exception) as e: t.generate(callback=cb) assert str(e.value) == 'Argh!' cb.assert_called_once_with(t, Path(content_path), 1, t.pieces) # The pool of hashers should be stopped before all pieces are hashed assert sha1_mock.call_count < t.pieces assert not t.is_ready rndusr-torf-547b989/tests/test_magnet.py000066400000000000000000000606421513142010300203200ustar00rootroot00000000000000import base64 import binascii import hashlib import time import urllib from unittest import mock from urllib.parse import quote_plus import pytest import torf from . import ComparableException @pytest.fixture def hash16(): def make_base16_hash(data): return hashlib.sha1(data).hexdigest() return make_base16_hash @pytest.fixture def hash32(): def make_base32_hash(data): return base64.b32encode(hashlib.sha1(data).digest()).decode('utf-8') return make_base32_hash @pytest.fixture def xt(hash16): return 'urn:btih:' + hash16(b'anything') def test_invalid_argument(): with pytest.raises(TypeError): torf.Magnet(foo='bar') def test_xt_missing(): with pytest.raises(TypeError): torf.Magnet() def test_xt_invalid(): with pytest.raises(torf.MagnetError) as excinfo: torf.Magnet('asdf') assert str(excinfo.value) == 'asdf: Invalid exact topic ("xt")' def test_xt_is_base16(hash16): xt = 'urn:btih:' + hash16(b'foo') m = torf.Magnet(xt) assert m.xt == xt assert m.infohash == hash16(b'foo') assert str(m) == f'magnet:?xt=urn:btih:{hash16(b"foo")}' m.infohash = hash16(b'bar') assert m.infohash == hash16(b'bar') assert str(m) == f'magnet:?xt=urn:btih:{hash16(b"bar")}' def test_xt_is_base32(hash32): xt = 'urn:btih:' + hash32(b'foo') m = torf.Magnet(xt) assert m.xt == xt assert m.infohash == hash32(b'foo') assert str(m) == f'magnet:?xt=urn:btih:{hash32(b"foo")}' m.infohash = hash32(b'bar') assert m.infohash == hash32(b'bar') assert str(m) == f'magnet:?xt=urn:btih:{hash32(b"bar")}' def test_xt_is_naked_infohash(hash16, hash32): for infohash in (hash16(b'foo'), hash32(b'foo')): m = torf.Magnet(infohash) assert m.xt == f'urn:btih:{infohash}' def test_dn(xt): m = torf.Magnet(xt, dn='Héllo Wörld!') assert m.dn == 'Héllo Wörld!' assert str(m) == f'magnet:?xt={xt}&dn=H%C3%A9llo+W%C3%B6rld%21' m.dn = 'Göödbye World!' assert m.dn == 'Göödbye World!' assert str(m) == f'magnet:?xt={xt}&dn=G%C3%B6%C3%B6dbye+World%21' m.dn = (1, 2, 3) assert m.dn == '(1, 2, 3)' assert str(m) == f'magnet:?xt={xt}&dn=%281%2C+2%2C+3%29' def test_xl(xt): m = torf.Magnet(xt, xl=123) assert m.xl == 123 assert str(m) == f'magnet:?xt={xt}&xl=123' m.xl = 456 assert str(m) == f'magnet:?xt={xt}&xl=456' with pytest.raises(torf.MagnetError) as excinfo: m.xl = 'foo' assert str(excinfo.value) == 'foo: Invalid exact length ("xl")' with pytest.raises(torf.MagnetError) as excinfo: m.xl = -123 assert str(excinfo.value) == '-123: Must be 1 or larger' def test_tr(xt): m = torf.Magnet(xt, tr=('http://foo.bar/baz',)) assert m.tr == ['http://foo.bar/baz'] assert str(m) == f'magnet:?xt={xt}&tr=http%3A%2F%2Ffoo.bar%2Fbaz' m.tr.append('http://blim/blam') assert m.tr == ['http://foo.bar/baz', 'http://blim/blam'] assert str(m) == f'magnet:?xt={xt}&tr=http%3A%2F%2Ffoo.bar%2Fbaz&tr=http%3A%2F%2Fblim%2Fblam' with pytest.raises(torf.URLError): m.tr = 'foo' assert m.tr == ['http://foo.bar/baz', 'http://blim/blam'] with pytest.raises(torf.URLError): m.tr.append('foo') assert m.tr == ['http://foo.bar/baz', 'http://blim/blam'] m.tr = None assert m.tr == [] def test_xs(xt): m = torf.Magnet(xt, xs='http://foo.bar/baz.torrent') assert m.xs == 'http://foo.bar/baz.torrent' assert str(m) == f'magnet:?xt={xt}&xs=http%3A%2F%2Ffoo.bar%2Fbaz.torrent' m.xs = 'http://blim/blam.torrent' assert m.xs == 'http://blim/blam.torrent' assert str(m) == f'magnet:?xt={xt}&xs=http%3A%2F%2Fblim%2Fblam.torrent' with pytest.raises(torf.URLError): m.xs = 23 def test_as(xt): m = torf.Magnet(xt, as_='http://foo.bar/baz.torrent') assert m.as_ == 'http://foo.bar/baz.torrent' assert str(m) == f'magnet:?xt={xt}&as_=http%3A%2F%2Ffoo.bar%2Fbaz.torrent' m.as_ = 'http://blim/blam.torrent' assert m.as_ == 'http://blim/blam.torrent' assert str(m) == f'magnet:?xt={xt}&as_=http%3A%2F%2Fblim%2Fblam.torrent' with pytest.raises(torf.URLError): m.as_ = 23 def test_ws(xt): m = torf.Magnet(xt, ws=['http://foo.bar/baz.jpg', 'http://bar.foo/baz.jpg']) assert m.ws == ['http://foo.bar/baz.jpg', 'http://bar.foo/baz.jpg'] with pytest.raises(torf.URLError): m.ws = ['foo'] assert str(m) == f'magnet:?xt={xt}&ws=http%3A%2F%2Ffoo.bar%2Fbaz.jpg&ws=http%3A%2F%2Fbar.foo%2Fbaz.jpg' m.ws.remove('http://foo.bar/baz.jpg') assert str(m) == f'magnet:?xt={xt}&ws=http%3A%2F%2Fbar.foo%2Fbaz.jpg' m.ws = 'http://some/other/url/to/baz.jpg' assert m.ws == ['http://some/other/url/to/baz.jpg'] with pytest.raises(torf.URLError): m.ws.replace(('adf',)) assert m.ws == ['http://some/other/url/to/baz.jpg'] def test_kt(xt): m = torf.Magnet(xt, kt=('that', 'thing')) assert m.kt == ['that', 'thing'] assert str(m) == f'magnet:?xt={xt}&kt=that+thing' m.kt = ('that', 'other', 'thing') assert m.kt == ['that', 'other', 'thing'] assert str(m) == f'magnet:?xt={xt}&kt=that+other+thing' with pytest.raises(torf.MagnetError) as excinfo: m.kt = 17 assert str(excinfo.value) == '17: Invalid keyword topic ("kt")' def test_x(xt): m = torf.Magnet(xt, x_foo='asdf', x_bar=(1, 2, 3)) assert m.x['foo'] == 'asdf' assert m.x['bar'] == (1, 2, 3) m.x['foo'] = '1234' assert m.x['foo'] == '1234' assert m.x['baz'] is None def test_torrent(hash16, hash32): m = torf.Magnet(xt='urn:btih:' + hash16(b'some string'), dn='foo', xl=1e6, tr=('http://foo.bar/baz', 'http://asdf'), ws=('http://x/y', 'http://z')) t = m.torrent() assert t.name == 'foo' assert t.size == 1e6 assert t.trackers == [['http://foo.bar/baz'], ['http://asdf']] assert t.webseeds == ['http://x/y', 'http://z'] assert t.infohash == hash16(b'some string') m = torf.Magnet(xt='urn:btih:' + hash32(b'some string')) assert m.torrent().infohash == hash16(b'some string') assert 'length' not in m.torrent().metainfo['info'] def test_from_string(hash32): m = torf.Magnet.from_string(f'magnet:?xt=urn:btih:{hash32(b"asdf")}' '&dn=Some+Name' '&xl=123456' '&tr=http://tracker1.example.com/&tr=http://tracker2.example.com/' '&xs=http://source.example.com/' '&as=http://asource.example.com/' '&ws=http://webseed1.example.com/&ws=http://webseed2.example.com/' '&kt=one+two+three') assert m.xt == f'urn:btih:{hash32(b"asdf")}' assert m.dn == 'Some Name' assert m.xl == 123456 assert m.tr == ['http://tracker1.example.com/', 'http://tracker2.example.com/'] assert m.xs == 'http://source.example.com/' assert m.as_ == 'http://asource.example.com/' assert m.ws == ['http://webseed1.example.com/', 'http://webseed2.example.com/'] assert m.kt == ['one', 'two', 'three'] def test_from_string_with_wrong_scheme(xt, hash16, hash32): uri = f'http:?xt=urn:btih:{hash32(b"asdf")}' with pytest.raises(torf.MagnetError) as excinfo: torf.Magnet.from_string(uri) assert str(excinfo.value) == f'{uri}: Not a magnet URI' def test_from_string_with_unknown_parameter(xt, hash16, hash32): uri = (f'magnet:?xt=urn:btih:{hash32(b"asdf")}' '&dn=Some+Name' '&ab=foo') with pytest.raises(torf.MagnetError) as excinfo: torf.Magnet.from_string(uri) assert str(excinfo.value) == f'{uri}: ab: Unknown parameter' def test_from_string_with_multiple_xt_parameters(xt, hash16, hash32): uri = (f'magnet:?xt=urn:btih:{hash32(b"asdf")}' f'&xt=urn:btih:{hash16(b"fdsa")}') with pytest.raises(torf.MagnetError) as excinfo: torf.Magnet.from_string(uri) assert str(excinfo.value) == f'{uri}: Multiple exact topics ("xt")' def test_from_string_with_multiple_dn_parameters(xt, hash16, hash32): uri = f'magnet:?xt={xt}&dn=Foo&dn=Foo' with pytest.raises(torf.MagnetError) as excinfo: torf.Magnet.from_string(uri) assert str(excinfo.value) == f'{uri}: Multiple display names ("dn")' def test_from_string_with_multiple_xl_parameters(xt, hash16, hash32): uri = f'magnet:?xt={xt}&xl=1234&xl=2345' with pytest.raises(torf.MagnetError) as excinfo: torf.Magnet.from_string(uri) assert str(excinfo.value) == f'{uri}: Multiple exact lengths ("xl")' def test_from_string_with_multiple_xs_parameters(xt, hash16, hash32): uri = (f'magnet:?xt={xt}' '&xs=http%3A%2F%2Ffoo.bar%2Fbaz.torrent' '&xs=http%3A%2F%2Fbar.foo%2Fbaz.torrent') with pytest.raises(torf.MagnetError) as excinfo: torf.Magnet.from_string(uri) assert str(excinfo.value) == f'{uri}: Multiple exact sources ("xs")' def test_from_string_with_multiple_as_parameters(xt, hash16, hash32): uri = (f'magnet:?xt={xt}' '&as=http%3A%2F%2Ffoo.bar%2Fbaz.torrent' '&as=http%3A%2F%2Fbar.foo%2Fbaz.torrent') with pytest.raises(torf.MagnetError) as excinfo: torf.Magnet.from_string(uri) assert str(excinfo.value) == f'{uri}: Multiple acceptable sources ("as")' def test_from_string_with_multiple_kt_parameters(xt, hash16, hash32): uri = (f'magnet:?xt={xt}' '&kt=a+b+c' '&kt=1+2+5') with pytest.raises(torf.MagnetError) as excinfo: torf.Magnet.from_string(uri) assert str(excinfo.value) == f'{uri}: Multiple keyword topics ("kt")' def test_from_string_with_invalid_xt_parameter(): uri = 'magnet:?xt=foo' with pytest.raises(torf.MagnetError) as excinfo: torf.Magnet.from_string(uri) assert str(excinfo.value) == 'foo: Invalid exact topic ("xt")' def test_from_string_with_invalid_xl_parameter(xt): uri = f'magnet:?xt={xt}&xl=nan' with pytest.raises(torf.MagnetError) as excinfo: torf.Magnet.from_string(uri) assert str(excinfo.value) == 'nan: Invalid exact length ("xl")' def test_from_string_with_invalid_tr_parameter(xt): uri = f'magnet:?xt={xt}&tr=not+a+URL' with pytest.raises(torf.URLError) as excinfo: torf.Magnet.from_string(uri) assert str(excinfo.value) == 'not a URL: Invalid URL' def test_from_string_with_invalid_xs_parameter(xt): uri = f'magnet:?xt={xt}&xs=not+a+URL' with pytest.raises(torf.URLError) as excinfo: torf.Magnet.from_string(uri) assert str(excinfo.value) == 'not a URL: Invalid URL' def test_from_string_with_invalid_as_parameter(xt): uri = f'magnet:?xt={xt}&as=not+a+URL' with pytest.raises(torf.URLError) as excinfo: torf.Magnet.from_string(uri) assert str(excinfo.value) == 'not a URL: Invalid URL' def test_from_string_with_invalid_ws_parameter(xt): uri = f'magnet:?xt={xt}&ws=not+a+URL' with pytest.raises(torf.URLError) as excinfo: torf.Magnet.from_string(uri) assert str(excinfo.value) == 'not a URL: Invalid URL' def test_from_torrent(singlefile_content, multifile_content): for content in singlefile_content, multifile_content: t = torf.Torrent(content.path, trackers=['http://foo', 'http://bar'], webseeds=['http://qux', 'http://quux']) t.generate() assert str(t.magnet()) == (f'magnet:?xt=urn:btih:{t.infohash}' f'&dn={quote_plus(t.name)}' f'&xl={t.size}' '&tr=http%3A%2F%2Ffoo&tr=http%3A%2F%2Fbar' '&ws=http%3A%2F%2Fqux&ws=http%3A%2F%2Fquux') def test_from_torrent_without_name(singlefile_content, multifile_content): for content in singlefile_content, multifile_content: t = torf.Torrent(content.path, trackers=['http://foo', 'http://bar']) t.generate() assert str(t.magnet(name=False)) == (f'magnet:?xt=urn:btih:{t.infohash}' f'&xl={t.size}' f'&tr=http%3A%2F%2Ffoo&tr=http%3A%2F%2Fbar') def test_from_torrent_without_size(singlefile_content, multifile_content): for content in singlefile_content, multifile_content: t = torf.Torrent(content.path, trackers=['http://foo', 'http://bar']) t.generate() assert str(t.magnet(size=False)) == (f'magnet:?xt=urn:btih:{t.infohash}' f'&dn={quote_plus(t.name)}' f'&tr=http%3A%2F%2Ffoo&tr=http%3A%2F%2Fbar') def test_from_torrent_with_single_tracker(singlefile_content, multifile_content): for content in singlefile_content, multifile_content: t = torf.Torrent(content.path, trackers=['http://foo']) t.generate() assert str(t.magnet()) == (f'magnet:?xt=urn:btih:{t.infohash}' f'&dn={quote_plus(t.name)}' f'&xl={t.size}' f'&tr=http%3A%2F%2Ffoo') assert str(t.magnet(tracker=True, trackers=False)) == (f'magnet:?xt=urn:btih:{t.infohash}' f'&dn={quote_plus(t.name)}' f'&xl={t.size}' f'&tr=http%3A%2F%2Ffoo') assert str(t.magnet(tracker=False, trackers=False)) == (f'magnet:?xt=urn:btih:{t.infohash}' f'&dn={quote_plus(t.name)}' f'&xl={t.size}') assert str(t.magnet(tracker=True, trackers=True)) == (f'magnet:?xt=urn:btih:{t.infohash}' f'&dn={quote_plus(t.name)}' f'&xl={t.size}' f'&tr=http%3A%2F%2Ffoo') assert str(t.magnet(tracker=False, trackers=True)) == (f'magnet:?xt=urn:btih:{t.infohash}' f'&dn={quote_plus(t.name)}' f'&xl={t.size}' f'&tr=http%3A%2F%2Ffoo') def test_from_torrent_with_multiple_trackers(singlefile_content, multifile_content): for content in singlefile_content, multifile_content: t = torf.Torrent(content.path, trackers=['http://foo', 'http://bar']) t.generate() assert str(t.magnet()) == (f'magnet:?xt=urn:btih:{t.infohash}' f'&dn={quote_plus(t.name)}' f'&xl={t.size}' f'&tr=http%3A%2F%2Ffoo&tr=http%3A%2F%2Fbar') assert str(t.magnet(tracker=True, trackers=False)) == (f'magnet:?xt=urn:btih:{t.infohash}' f'&dn={quote_plus(t.name)}' f'&xl={t.size}' f'&tr=http%3A%2F%2Ffoo') assert str(t.magnet(tracker=False, trackers=False)) == (f'magnet:?xt=urn:btih:{t.infohash}' f'&dn={quote_plus(t.name)}' f'&xl={t.size}') assert str(t.magnet(tracker=True, trackers=True)) == (f'magnet:?xt=urn:btih:{t.infohash}' f'&dn={quote_plus(t.name)}' f'&xl={t.size}' f'&tr=http%3A%2F%2Ffoo') assert str(t.magnet(tracker=False, trackers=True)) == (f'magnet:?xt=urn:btih:{t.infohash}' f'&dn={quote_plus(t.name)}' f'&xl={t.size}' f'&tr=http%3A%2F%2Ffoo&tr=http%3A%2F%2Fbar') def test_repr(xt): m = torf.Magnet(xt, dn='Foo', xl=123, tr=('http://tracker:123',), xs='http://primary.source/url.torrent', as_='http://alt.source/url.torrent', ws=('http://webseed/url/file.content',), kt=('keyword1', 'keyword2'), x_foo='some', x_bar='junk') assert repr(m) == ("Magnet(xt='urn:btih:8867c88b56e0bfb82cffaf15a66bc8d107d6754a', " "dn='Foo', xl=123, tr=['http://tracker:123'], " "xs='http://primary.source/url.torrent', " "as_='http://alt.source/url.torrent', " "ws=['http://webseed/url/file.content'], " "kt=['keyword1', 'keyword2'], " "x_foo='some', x_bar='junk')") def test_setting_info_with_wrong_infohash(generated_singlefile_torrent, generated_multifile_torrent): magnet = torf.Magnet(generated_singlefile_torrent.infohash) with pytest.raises(torf.MetainfoError) as excinfo: magnet._set_info_from_torrent(generated_multifile_torrent.dump(), validate=True) assert str(excinfo.value) == ('Invalid metainfo: Mismatching info hashes: ' f'{generated_singlefile_torrent.infohash} != {generated_multifile_torrent.infohash}') magnet._set_info_from_torrent(generated_multifile_torrent.dump(), validate=False) assert magnet._info == generated_multifile_torrent.metainfo['info'] def test_getting_info__unsupported_protocol(generated_singlefile_torrent): torrent = generated_singlefile_torrent magnet = torf.Magnet(torrent.infohash, xs='asdf://xs.foo:123/torrent') cb = mock.MagicMock() assert magnet.get_info(callback=cb) is False exp_calls = [mock.call(ComparableException(torf.ConnectionError('asdf://xs.foo:123/torrent', 'Unsupported protocol')))] assert cb.call_args_list == exp_calls torrent_ = magnet.torrent() assert torrent_.metainfo['info'] == {} def test_getting_info__xs_fails__as_fails(generated_singlefile_torrent): torrent = generated_singlefile_torrent magnet = torf.Magnet(torrent.infohash, xs='http://xs.foo:123/torrent', as_='http://as.foo:123/torrent') cb = mock.MagicMock() assert magnet.get_info(callback=cb) is False exp_calls = [mock.call(ComparableException(torf.ConnectionError('http://xs.foo:123/torrent', 'Name or service not known'))), mock.call(ComparableException(torf.ConnectionError('http://as.foo:123/torrent', 'Name or service not known')))] assert cb.call_args_list == exp_calls torrent_ = magnet.torrent() assert torrent_.metainfo['info'] == {} def test_getting_info__xs_succeeds__as_fails(generated_singlefile_torrent, httpserver): torrent = generated_singlefile_torrent magnet = torf.Magnet(torrent.infohash, xs=httpserver.url_for('/torrent'), as_='http://as.foo:123/torrent') httpserver.expect_request('/torrent').respond_with_data(torrent.dump()) cb = mock.MagicMock() assert magnet.get_info(callback=cb) is True assert cb.call_args_list == [] torrent_ = magnet.torrent() assert torrent_.metainfo['info'] == torrent.metainfo['info'] def test_getting_info__xs_fails__as_succeeds(generated_singlefile_torrent, httpserver, monkeypatch): torrent = generated_singlefile_torrent total_timeout = 100 now = 0.0 mock_time_monotonic = mock.MagicMock(return_value=now) monkeypatch.setattr(time, 'monotonic', mock_time_monotonic) def timed_out_download(url, *args, **kwargs): # First download() call (xs) took almost all our available time mock_time_monotonic.return_value = now + total_timeout - 1 # Remove mock for second download() call (as) download_patch.stop() raise torf.ConnectionError(url, 'Nope') download_patch = mock.patch('torf._utils.download', timed_out_download) download_patch.start() httpserver.expect_request('/as.torrent').respond_with_data(torrent.dump()) magnet = torf.Magnet(torrent.infohash, xs='http://xs.foo:123/torrent', as_=httpserver.url_for('/as.torrent')) cb = mock.MagicMock() assert magnet.get_info(callback=cb, timeout=total_timeout) is True exp_calls = [mock.call(ComparableException(torf.ConnectionError('http://xs.foo:123/torrent', 'Nope')))] assert cb.call_args_list == exp_calls torrent_ = magnet.torrent() assert torrent_.metainfo['info'] == torrent.metainfo['info'] def test_getting_info__xs_returns_invalid_bytes(generated_singlefile_torrent, httpserver): torrent = generated_singlefile_torrent magnet = torf.Magnet(torrent.infohash, xs=httpserver.url_for('/torrent'), as_='http://as.foo:123/torrent') httpserver.expect_request('/torrent').respond_with_data(b'not bencoded bytes') cb = mock.MagicMock() assert magnet.get_info(callback=cb) is False exp_calls = [mock.call(ComparableException(torf.BdecodeError())), mock.call(ComparableException(torf.ConnectionError('http://as.foo:123/torrent', 'Name or service not known')))] assert cb.call_args_list == exp_calls torrent_ = magnet.torrent() assert torrent_.metainfo['info'] == {} def test_getting_info__as_returns_invalid_bytes(generated_singlefile_torrent, httpserver): torrent = generated_singlefile_torrent magnet = torf.Magnet(torrent.infohash, xs='http://xs.foo:123/torrent', as_=httpserver.url_for('/torrent')) httpserver.expect_request('/torrent').respond_with_data(b'not bencoded bytes') cb = mock.MagicMock() assert magnet.get_info(callback=cb) is False exp_calls = [mock.call(ComparableException(torf.ConnectionError('http://xs.foo:123/torrent', 'Name or service not known'))), mock.call(ComparableException(torf.BdecodeError()))] assert cb.call_args_list == exp_calls torrent_ = magnet.torrent() assert torrent_.metainfo['info'] == {} def test_getting_info__xs_times_out(generated_singlefile_torrent, monkeypatch): torrent = generated_singlefile_torrent total_timeout = 100 now = 0.0 mock_time_monotonic = mock.MagicMock(return_value=now) monkeypatch.setattr(time, 'monotonic', mock_time_monotonic) def timed_out_download(url, *args, **kwargs): # First download() call (xs) took almost all our available time mock_time_monotonic.return_value = now + total_timeout # Remove mock for second download() call (as) download_patch.stop() raise torf.ConnectionError(url, 'Timed out (mocked)') download_patch = mock.patch('torf._utils.download', timed_out_download) download_patch.start() magnet = torf.Magnet(torrent.infohash, xs='http://xs.foo:123/torrent', as_='http://as.foo:123/torrent') cb = mock.MagicMock() assert magnet.get_info(callback=cb, timeout=total_timeout) is False exp_calls = [mock.call(ComparableException(torf.ConnectionError('http://xs.foo:123/torrent', 'Timed out (mocked)'))), mock.call(ComparableException(torf.ConnectionError('http://as.foo:123/torrent', 'Timed out')))] assert cb.call_args_list == exp_calls torrent_ = magnet.torrent() assert torrent_.metainfo['info'] == {} def test_getting_info_from_ws(generated_multifile_torrent, httpserver): torrent = generated_multifile_torrent magnet = torf.Magnet(torrent.infohash, ws=[httpserver.url_for('/bar//')]) httpserver.expect_request('/bar.torrent').respond_with_data(torrent.dump()) cb = mock.MagicMock() assert magnet.get_info(callback=cb) is True assert cb.call_args_list == [] torrent_ = magnet.torrent() assert torrent_.metainfo['info'] == torrent.metainfo['info'] def test_getting_info_from_tr(generated_multifile_torrent, httpserver): torrent = generated_multifile_torrent magnet = torf.Magnet(torrent.infohash, tr=[httpserver.url_for('/announce')]) infohash_enc = urllib.parse.quote_from_bytes(binascii.unhexlify(torrent.infohash)) httpserver.expect_request('/file', query_string=f'info_hash={infohash_enc}').respond_with_data(torrent.dump()) cb = mock.MagicMock() assert magnet.get_info(callback=cb) is True assert cb.call_args_list == [] torrent_ = magnet.torrent() assert torrent_.metainfo['info'] == torrent.metainfo['info'] rndusr-torf-547b989/tests/test_partial_size.py000066400000000000000000000075161513142010300215340ustar00rootroot00000000000000import pytest import torf def test_partial_size__singlefile__providing_correct_name(tmp_path): (tmp_path / 'content.jpg').write_text('some data') t = torf.Torrent(tmp_path / 'content.jpg') assert t.partial_size('content.jpg') == 9 assert t.partial_size(['content.jpg']) == 9 def test_partial_size__singlefile__providing_wrong_name(tmp_path): (tmp_path / 'content.jpg').write_text('some data') t = torf.Torrent(tmp_path / 'content.jpg') for path in ('foo.jpg', ['foo.jpg']): with pytest.raises(torf.PathError) as excinfo: t.partial_size(path) assert excinfo.match('^foo.jpg: Unknown path$') def test_partial_size__singlefile__providing_path(tmp_path): (tmp_path / 'content.jpg').write_text('some data') t = torf.Torrent(tmp_path / 'content.jpg') for path in ('bar/foo.jpg', ['bar', 'foo.jpg']): with pytest.raises(torf.PathError) as excinfo: t.partial_size(path) assert excinfo.match('^bar/foo.jpg: Unknown path$') def test_partial_size__multifile__providing_path_to_file(tmp_path): (tmp_path / 'content').mkdir() (tmp_path / 'content' / 'file1.jpg').write_text('some data') (tmp_path / 'content' / 'file2.jpg').write_text('some other data') (tmp_path / 'content' / 'subcontent').mkdir() (tmp_path / 'content' / 'subcontent' / 'file3.jpg').write_text('some more data') t = torf.Torrent(tmp_path / 'content') for path in ('content/file1.jpg', ['content', 'file1.jpg']): assert t.partial_size(path) == 9 for path in ('content/file2.jpg', ['content', 'file2.jpg']): assert t.partial_size(path) == 15 for path in ('content/subcontent/file3.jpg', ['content', 'subcontent', 'file3.jpg']): assert t.partial_size(path) == 14 def test_partial_size__multifile__providing_path_to_dir(tmp_path): (tmp_path / 'content').mkdir() (tmp_path / 'content' / 'file1.jpg').write_text('some data') (tmp_path / 'content' / 'file2.jpg').write_text('some other data') (tmp_path / 'content' / 'subcontent1').mkdir() (tmp_path / 'content' / 'subcontent1' / 'file3.jpg').write_text('some more data') (tmp_path / 'content' / 'subcontent1' / 'file4.jpg').write_text('and even more data') (tmp_path / 'content' / 'subcontent2').mkdir() (tmp_path / 'content' / 'subcontent2' / 'file5.jpg').write_text('some more other data') (tmp_path / 'content' / 'subcontent2' / 'file6.jpg').write_text('and even more other data') t = torf.Torrent(tmp_path / 'content') for path in ('content', ['content']): assert t.partial_size(path) == 100 for path in ('content/subcontent1', ['content', 'subcontent1']): assert t.partial_size(path) == 32 for path in ('content/subcontent2', ['content', 'subcontent2']): assert t.partial_size(path) == 44 def test_partial_size__multifile__providing_unknown_path(tmp_path): (tmp_path / 'content').mkdir() (tmp_path / 'content' / 'file1.jpg').write_text('some data') (tmp_path / 'content' / 'file2.jpg').write_text('some other data') (tmp_path / 'content' / 'subcontent').mkdir() (tmp_path / 'content' / 'subcontent' / 'file3.jpg').write_text('some more data') t = torf.Torrent(tmp_path / 'content') for path in ('content/subcontent/file1.jpg', ['content', 'subcontent', 'file1.jpg']): with pytest.raises(torf.PathError) as excinfo: t.partial_size(path) assert excinfo.match('^content/subcontent/file1.jpg: Unknown path$') for path in ('content/file3.jpg', ['content', 'file3.jpg']): with pytest.raises(torf.PathError) as excinfo: t.partial_size(path) assert excinfo.match('^content/file3.jpg: Unknown path$') for path in ('file1.jpg', ['file1.jpg']): with pytest.raises(torf.PathError) as excinfo: t.partial_size(path) assert excinfo.match('^file1.jpg: Unknown path$') rndusr-torf-547b989/tests/test_read.py000066400000000000000000000305431513142010300177550ustar00rootroot00000000000000import io from collections import OrderedDict from datetime import datetime from hashlib import sha1 from pathlib import Path import pytest import torf from torf import _flatbencode as bencode from torf import _utils def test_non_bencoded_data(): fo = io.BytesIO(b'not valid bencoded data') with pytest.raises(torf.BdecodeError) as excinfo: torf.Torrent.read_stream(fo) assert excinfo.match('^Invalid metainfo format$') def test_unreadable_stream(): class Unreadable(io.BytesIO): def read(self, *args, **kwargs): raise OSError('Refusing to read') fo = Unreadable(b'foo') with pytest.raises(torf.ReadError) as excinfo: torf.Torrent.read_stream(fo) assert excinfo.match('^Unable to read$') def test_validate_when_reading_stream(valid_singlefile_metainfo): del valid_singlefile_metainfo[b'info'][b'name'] fo = io.BytesIO(bencode.encode(valid_singlefile_metainfo)) with pytest.raises(torf.MetainfoError) as excinfo: torf.Torrent.read_stream(fo, validate=True) assert excinfo.match(r"^Invalid metainfo: Missing 'name' in \['info'\]$") fo.seek(0) t = torf.Torrent.read_stream(fo, validate=False) assert isinstance(t, torf.Torrent) def test_validate_when_reading_file(tmp_path, valid_singlefile_metainfo): del valid_singlefile_metainfo[b'info'][b'length'] torrent_file = tmp_path / 'invalid.torrent' with open(torrent_file, 'wb') as f: f.write(bencode.encode(valid_singlefile_metainfo)) with pytest.raises(torf.MetainfoError) as excinfo: torf.Torrent.read(torrent_file, validate=True) assert excinfo.match("^Invalid metainfo: Missing 'length' or 'files' in 'info'$") t = torf.Torrent.read(torrent_file, validate=False) assert isinstance(t, torf.Torrent) def test_successful_read(valid_singlefile_metainfo): fo = io.BytesIO(bencode.encode(valid_singlefile_metainfo)) t = torf.Torrent.read_stream(fo) assert t.path is None assert t.files == (Path(str(valid_singlefile_metainfo[b'info'][b'name'], encoding='utf-8')),) assert t.filepaths == () assert t.name == str(valid_singlefile_metainfo[b'info'][b'name'], encoding='utf-8') assert t.size == valid_singlefile_metainfo[b'info'][b'length'] assert t.infohash == sha1(bencode.encode(valid_singlefile_metainfo[b'info'])).hexdigest() assert t.comment == str(valid_singlefile_metainfo[b'comment'], encoding='utf-8') assert t.creation_date == datetime.fromtimestamp(valid_singlefile_metainfo[b'creation date']) assert t.created_by == str(valid_singlefile_metainfo[b'created by'], encoding='utf-8') assert t.private is bool(valid_singlefile_metainfo[b'info'][b'private']) assert t.piece_size == valid_singlefile_metainfo[b'info'][b'piece length'] def test_single_tracker(valid_singlefile_metainfo): valid_singlefile_metainfo[b'announce'] = b'http://lonelyhost/announce' valid_singlefile_metainfo.pop(b'announce-list', None) fo = io.BytesIO(bencode.encode(valid_singlefile_metainfo)) t = torf.Torrent.read_stream(fo) assert t.trackers == [[str(valid_singlefile_metainfo[b'announce'], encoding='utf-8')]] def test_multiple_trackers(valid_singlefile_metainfo): valid_singlefile_metainfo[b'announce-list'] = [[b'http://localhost', b'http://foohost'], [b'http://bazhost']] valid_singlefile_metainfo.pop(b'announce', None) fo = io.BytesIO(bencode.encode(valid_singlefile_metainfo)) t = torf.Torrent.read_stream(fo) assert t.trackers == [[str(url, encoding='utf-8') for url in tier] for tier in valid_singlefile_metainfo[b'announce-list']] def test_validate_nondict(): data = b'3:foo' with pytest.raises(torf.BdecodeError) as excinfo: torf.Torrent.read_stream(io.BytesIO(data), validate=True) assert excinfo.match("^Invalid metainfo format$") with pytest.raises(torf.BdecodeError) as excinfo: torf.Torrent.read_stream(io.BytesIO(data), validate=False) assert excinfo.match("^Invalid metainfo format$") def test_validate_missing_info(): data = OrderedDict([(b'foo', b'bar')]) with pytest.raises(torf.MetainfoError) as excinfo: torf.Torrent.read_stream(io.BytesIO(bencode.encode(data)), validate=True) assert excinfo.match(r"^Invalid metainfo: Missing 'info'$") t = torf.Torrent.read_stream(io.BytesIO(bencode.encode(data)), validate=False) assert t.metainfo == {'foo': 'bar', 'info': {}} def test_validate_info_not_a_dictionary(): data = OrderedDict([(b'info', 1)]) with pytest.raises(torf.MetainfoError) as excinfo: torf.Torrent.read_stream(io.BytesIO(bencode.encode(data)), validate=True) assert excinfo.match(r"^Invalid metainfo: \['info'\] must be dict, not int: 1$") with pytest.raises(torf.MetainfoError) as excinfo: torf.Torrent.read_stream(io.BytesIO(bencode.encode(data)), validate=False) assert excinfo.match(r"^Invalid metainfo: \['info'\] must be dict, not int: 1$") def test_validate_missing_pieces(): data = OrderedDict([(b'info', {b'name': b'Foo', b'piece length': 16384})]) fo = io.BytesIO(bencode.encode(data)) with pytest.raises(torf.MetainfoError) as excinfo: torf.Torrent.read_stream(fo, validate=True) assert excinfo.match(r"^Invalid metainfo: Missing 'pieces' in \['info'\]$") def test_read_nonstandard_data_without_validation(): data = OrderedDict([ (b'foo', b'bar'), (b'number', 17), (b'list', [1, b'two']), (b'dict', OrderedDict([ (b'yes', 1), (b'no', 0), ])) ]) fo = io.BytesIO(bencode.encode(data)) t = torf.Torrent.read_stream(fo, validate=False) assert t.metainfo['foo'] == 'bar' assert t.metainfo['number'] == 17 assert t.metainfo['list'] == [1, 'two'] assert t.metainfo['dict'] == {'yes': 1, 'no': 0} assert t.metainfo['info'] == {} def test_read_from_unreadable_file(valid_singlefile_metainfo, tmp_path): f = (tmp_path / 'a.torrent') f.write_bytes(bencode.encode(valid_singlefile_metainfo)) f.chmod(mode=0o222) with pytest.raises(torf.ReadError) as excinfo: torf.Torrent.read(str(f)) assert excinfo.match(f'^{f}: Permission denied$') def test_read_from_invalid_file(tmp_path): f = tmp_path / 'a.torrent' f.write_bytes(b'this is not metainfo') with pytest.raises(torf.BdecodeError) as excinfo: torf.Torrent.read(f) assert excinfo.match(f'^{f}: Invalid torrent file format$') def test_read_from_nonexisting_file(tmp_path): f = tmp_path / 'a.torrent' with pytest.raises(torf.ReadError) as excinfo: torf.Torrent.read(f) assert excinfo.match(f'^{f}: No such file or directory$') def test_read_from_proper_torrent_file(valid_multifile_metainfo, tmp_path): f = tmp_path / 'a.torrent' f.write_bytes(bencode.encode(valid_multifile_metainfo)) t = torf.Torrent.read(f) exp_info = valid_multifile_metainfo[b'info'] assert t.path is None assert t.files == tuple(Path(str(b'/'.join([exp_info[b'name']] + f[b'path']), encoding='utf-8')) for f in exp_info[b'files']) assert t.filepaths == () assert t.name == str(exp_info[b'name'], encoding='utf-8') assert t.size == sum(f[b'length'] for f in exp_info[b'files']) assert t.infohash == sha1(bencode.encode(exp_info)).hexdigest() assert t.comment == str(valid_multifile_metainfo[b'comment'], encoding='utf-8') assert t.creation_date == datetime.fromtimestamp(valid_multifile_metainfo[b'creation date']) assert t.created_by == str(valid_multifile_metainfo[b'created by'], encoding='utf-8') assert t.private is bool(exp_info[b'private']) assert t.piece_size == exp_info[b'piece length'] @pytest.mark.parametrize('bytes_type', (bytes, bytearray), ids=lambda t: t.__name__) def test_read_from_bytes(bytes_type, valid_multifile_metainfo, tmp_path): bytes = bytes_type(bencode.encode(valid_multifile_metainfo)) t = torf.Torrent.read_stream(bytes) exp_info = valid_multifile_metainfo[b'info'] assert t.path is None assert t.files == tuple(Path(str(b'/'.join([exp_info[b'name']] + f[b'path']), encoding='utf-8')) for f in exp_info[b'files']) assert t.filepaths == () assert t.name == str(exp_info[b'name'], encoding='utf-8') assert t.size == sum(f[b'length'] for f in exp_info[b'files']) assert t.infohash == sha1(bencode.encode(exp_info)).hexdigest() assert t.comment == str(valid_multifile_metainfo[b'comment'], encoding='utf-8') assert t.creation_date == datetime.fromtimestamp(valid_multifile_metainfo[b'creation date']) assert t.created_by == str(valid_multifile_metainfo[b'created by'], encoding='utf-8') assert t.private is bool(exp_info[b'private']) assert t.piece_size == exp_info[b'piece length'] @pytest.mark.parametrize('bytes_type', (bytes, bytearray), ids=lambda t: t.__name__) def test_read_from_too_many_bytes(bytes_type, valid_multifile_metainfo, tmp_path): bytes = bytes_type(b'x' * (torf.Torrent.MAX_TORRENT_FILE_SIZE + 1)) with pytest.raises(ValueError, match=( r'^Size of stream exceeds Torrent.MAX_TORRENT_FILE_SIZE: ' f'{torf.Torrent.MAX_TORRENT_FILE_SIZE + 1} > {torf.Torrent.MAX_TORRENT_FILE_SIZE}$' )): torf.Torrent.read_stream(bytes) def test_read_from_invalid_type(valid_multifile_metainfo, tmp_path): obj = 123 with pytest.raises(TypeError, match=r'^Expected bytes, bytearray or a readable file-like object, got int$'): torf.Torrent.read_stream(obj) def test_reading_converts_private_flag_to_bool(tmp_path, valid_singlefile_metainfo): valid_singlefile_metainfo[b'info'][b'private'] = 1 fo = io.BytesIO(bencode.encode(valid_singlefile_metainfo)) torrent = torf.Torrent.read_stream(fo) assert torrent.metainfo['info']['private'] is True valid_singlefile_metainfo[b'info'][b'private'] = 0 fo = io.BytesIO(bencode.encode(valid_singlefile_metainfo)) torrent = torf.Torrent.read_stream(fo) assert torrent.metainfo['info']['private'] is False def test_reading_torrent_without_private_flag(tmp_path, valid_singlefile_metainfo): valid_singlefile_metainfo[b'info'][b'private'] = 1 fo = io.BytesIO(bencode.encode(valid_singlefile_metainfo)) torrent = torf.Torrent.read_stream(fo) assert torrent.metainfo['info']['private'] is True assert torrent.private is True del valid_singlefile_metainfo[b'info'][b'private'] fo = io.BytesIO(bencode.encode(valid_singlefile_metainfo)) torrent = torf.Torrent.read_stream(fo) assert 'private' not in torrent.metainfo['info'] assert torrent.private is None def test_reading_torrent_without_creation_date(tmp_path, valid_singlefile_metainfo): del valid_singlefile_metainfo[b'creation date'] fo = io.BytesIO(bencode.encode(valid_singlefile_metainfo)) torrent = torf.Torrent.read_stream(fo) assert 'creation date' not in torrent.metainfo['info'] assert torrent.creation_date is None def test_read_from_torrent_file_with_empty_path_components(valid_multifile_metainfo, tmp_path): valid_multifile_metainfo[b'info'][b'files'][0][b'path'] = [b'', b'foo', b'', b'', b'bar', b''] f = (tmp_path / 'foo.torrent') f.write_bytes(bencode.encode(valid_multifile_metainfo)) t = torf.Torrent.read(str(f)) exp_path = f'{valid_multifile_metainfo[b"info"][b"name"].decode()}/foo/bar' assert exp_path in tuple(str(f) for f in t.files) def test_read_nonutf8_encoded_metainfo(valid_multifile_metainfo, tmp_path): valid_multifile_metainfo[b'comment'] = 'A çommقnt'.encode('CP1256') valid_multifile_metainfo[b'created by'] = 'bДd ЗncodeЯ'.encode('CP866') valid_multifile_metainfo[b'info'][b'name'] = 'Thê ñãme'.encode('CP860') valid_multifile_metainfo[b'info'][b'source'] = 'Þhe ßource'.encode('CP861') valid_multifile_metainfo[b'info'][b'files'] = [ { b'path': [ 'FΩO'.encode('ISO8859-7'), 'BAΓ'.encode('ISO8859-7'), 'βAZ'.encode('ISO8859-7'), ], b'length': 124, }, ] f = (tmp_path / 'test.torrent') f.write_bytes(bencode.encode(valid_multifile_metainfo)) t = torf.Torrent.read(str(f)) assert t.name == 'Th� ��me' assert t.comment == 'A �omm�nt' assert t.created_by == 'b�d �ncode�' assert t.source == '�he �ource' assert t.files == [ _utils.File('Th� ��me/F�O/BA�/�AZ', size=124), ] rndusr-torf-547b989/tests/test_reuse.py000066400000000000000000000723121513142010300201650ustar00rootroot00000000000000import collections import copy import errno import os import re from types import SimpleNamespace from unittest.mock import MagicMock, Mock, call import pytest import torf from . import ComparableException @pytest.fixture(autouse=True) def ordered_listdir(mocker): def ordered_listdir(*args, _real_listdir=os.listdir, **kwargs): return sorted(_real_listdir(*args, **kwargs)) mocker.patch('os.listdir', ordered_listdir) @pytest.fixture def existing_torrents(create_dir, create_file, tmp_path): class ExistingTorrents: def __init__(self, **torrent_directories): self._torrents = {} for dirname, info in torrent_directories.items(): self._torrents[dirname] = self._create_torrents(dirname, *info) # Sprinkle in some non-torrent files for dirname in self._torrents: (tmp_path / dirname / 'foo.jpg').write_bytes(b"Ceci n'est pas une JPEG") (tmp_path / dirname / 'foo.txt').write_text('But this looks like text') @staticmethod def _create_torrents(directory, *items): torrents_directory = tmp_path / directory torrents_directory.mkdir(exist_ok=True) torrents = [] for item in items: torrent_name = item[0] create_args = item[1] torrent_kwargs = item[2] if isinstance(create_args, collections.abc.Sequence) and not isinstance(create_args, str): content_path = create_dir(torrent_name, *create_args) else: content_path = create_file(torrent_name, create_args) torrent = torf.Torrent(path=content_path, **torrent_kwargs) torrent_filepath = torrents_directory / f'{torrent_name}.torrent' # Add some non-standard fields into each file list if 'files' in torrent.metainfo['info']: for i in range(len(torrent.metainfo['info']['files'])): torrent.metainfo['info']['files'][i]['foohash'] = 'This could be your MD5 sum' torrent.generate() torrent.write(torrent_filepath) torrents.append(SimpleNamespace( torrent=torrent, torrent_path=torrent_filepath, content_path=content_path, )) print('created torrent:\n', torrents[-1].torrent_path, '\n', torrents[-1].torrent.metainfo) return torrents def __del__(self, *args, **kwargs): # Make sure pytest can delete files and directories for dirname in self._torrents: (tmp_path / dirname).chmod(0o700) for rootdir, dirnames, filenames in os.walk(tmp_path / dirname): for dirname in dirnames: os.chmod(os.path.join(rootdir, dirname), 0o700) for filename in filenames: os.chmod(os.path.join(rootdir, filename), 0o600) def __getattr__(self, name): return self._torrents[name] @property def locations(self): return {dirname: (tmp_path / dirname) for dirname in self._torrents} @property def location_paths(self): return tuple(tmp_path / dirname for dirname in self._torrents) @property def torrent_filepaths(self): return tuple( tmp_path / dirname / info.torrent_path for dirname, infos in self._torrents.items() for info in infos ) return ExistingTorrents @pytest.mark.parametrize( argnames='path, exp_find_torrent_files_args, exp_exception', argvalues=( ('a/path', ('a/path',), None), (('a/path', 'another/path'), ('a/path', 'another/path'), None), (iter(('a/path', 'another/path')), ('a/path', 'another/path'), None), (123, (), ValueError('Invalid path argument: 123')), ), ) def test_path_argument(path, exp_find_torrent_files_args, exp_exception, create_file, mocker): find_torrent_files_mock = mocker.patch('torf._reuse.find_torrent_files', MagicMock( __iter__=MagicMock(return_value=()), total=0, )) torrent = torf.Torrent(path=create_file('just_a_file', 'foo')) if exp_exception: with pytest.raises(type(exp_exception), match=rf'^{re.escape(str(exp_exception))}$'): torrent.reuse(path) else: return_value = torrent.reuse(path) assert return_value is False assert find_torrent_files_mock.call_args_list == [call( *exp_find_torrent_files_args, max_file_size=torf.Torrent.MAX_TORRENT_FILE_SIZE, )] def test_max_torrent_file_size(create_file, existing_torrents, mocker): # Create and prepare existing torrents existing_torrents = existing_torrents( subpath1=( ('a', 'foo', {'creation_date': 123}), ('b', 'bar', {'creation_date': 456}), ('c', 'baz', {'creation_date': 789}), ), subpath2=( ('d', 'hey', {'private': True}), ('e', 'ho', {'comment': 'yo'}), ('f', 'oh', {'comment': 'oy'}), ('g', 'ohh', {'comment': 'oyy'}), ), ) # Make some torrents really big with open(existing_torrents.torrent_filepaths[1], 'wb') as f: f.truncate(20 * 1048576) with open(existing_torrents.torrent_filepaths[3], 'wb') as f: f.truncate(30 * 1048576) callback = Mock(return_value=None) new_torrent = torf.Torrent(path=create_file('just_a_file', 'foo')) return_value = new_torrent.reuse(existing_torrents.location_paths, callback=callback) assert return_value is False assert callback.call_args_list == [ call(new_torrent, str(existing_torrents.subpath1[0].torrent_path), 1, 5, False, None), call(new_torrent, str(existing_torrents.subpath1[2].torrent_path), 2, 5, False, None), call(new_torrent, str(existing_torrents.subpath2[1].torrent_path), 3, 5, False, None), call(new_torrent, str(existing_torrents.subpath2[2].torrent_path), 4, 5, False, None), call(new_torrent, str(existing_torrents.subpath2[3].torrent_path), 5, 5, False, None), ] @pytest.mark.parametrize('with_callback', (True, False), ids=('with_callback', 'without_callback')) def test__singlefile__no_exceptions(with_callback, existing_torrents): # Create and prepare existing torrents existing_torrents = existing_torrents( my_torrents=( ('a', 'foo', {'creation_date': 123}), ('b', 'bar', {'creation_date': 456}), ('c', 'baz', {'creation_date': 789}), ('d', 'arf', {'created_by': 'me!'}), ('e', 'barf', {'source': 'you!'}), ), ) # Create and prepare the torrent we want to generate reused = existing_torrents.my_torrents[2] new_torrent = torf.Torrent( path=reused.content_path, trackers=('http://foo:1000', 'http://foo:2000'), webseeds=('http://bar:1000',), httpseeds=('http://baz:1000',), private=True, comment='This is a custom torrent', creation_date=123000, created_by='CREATOR', source='SRC', piece_size=8 * 1048576, randomize_infohash=True, ) # Expect the same metainfo, but with important parts copied exp_joined_metainfo = copy.deepcopy(new_torrent.metainfo) exp_joined_metainfo['info']['piece length'] = reused.torrent.metainfo['info']['piece length'] exp_joined_metainfo['info']['pieces'] = reused.torrent.metainfo['info']['pieces'] # Reuse existing torrent if with_callback: callback = Mock(return_value=None) return_value = new_torrent.reuse(existing_torrents.location_paths, callback=callback) # Confirm everything happened as expected assert return_value is True assert new_torrent.metainfo == exp_joined_metainfo assert callback.call_args_list == [ call(new_torrent, str(existing_torrents.my_torrents[0].torrent_path), 1, 5, False, None), call(new_torrent, str(existing_torrents.my_torrents[1].torrent_path), 2, 5, False, None), call(new_torrent, str(existing_torrents.my_torrents[2].torrent_path), 3, 5, None, None), call(new_torrent, str(existing_torrents.my_torrents[2].torrent_path), 3, 5, True, None), ] else: return_value = new_torrent.reuse(existing_torrents.location_paths) # Confirm everything happened as expected assert return_value is True assert new_torrent.metainfo == exp_joined_metainfo @pytest.mark.parametrize('with_callback', (True, False), ids=('with_callback', 'without_callback')) def test__multifile__no_exceptions(with_callback, existing_torrents): # Create and prepare existing torrents with some of them sharing the same # (torrent name, file name, file size) but different file contents existing_torrents = existing_torrents( torrents1=( ('a', ( ('this.jpg', 16380 * 30), ('that.txt', 'text data'), ), {'creation_date': 123}), ('b', ( ('this.jpg', 16380 * 30), ('that.txt', 'text doto'), ), {'creation_date': 456}), ('c', ( ('this.jpg', 16380 * 30), ('that.txt', 'text diti'), ), {'creation_date': 789}), ), torrents2=( ('a', ( ('this.jpg', 16380 * 30), ('that.txt', 'more text'), ), {'creation_date': 234}), ('b', ( ('this.jpg', 16380 * 30), ('that.txt', 'mare text'), ), {'creation_date': 345}), ('c', ( ('this.jpg', 16380 * 30), ('that.txt', 'mire text'), ), {'creation_date': 456}), ), ) # Create and prepare the torrent we want to generate reused = existing_torrents.torrents2[1] new_torrent = torf.Torrent( path=reused.content_path, trackers=('http://foo:1000', 'http://foo:2000'), webseeds=('http://bar:1000',), httpseeds=('http://baz:1000',), private=True, comment='This is a custom torrent', creation_date=123000, created_by='CREATOR', source='SRC', piece_size=1048576, randomize_infohash=True, ) # Expect the same metainfo, but with important parts copied exp_joined_metainfo = copy.deepcopy(new_torrent.metainfo) exp_joined_metainfo['info']['piece length'] = reused.torrent.metainfo['info']['piece length'] exp_joined_metainfo['info']['pieces'] = reused.torrent.metainfo['info']['pieces'] exp_joined_metainfo['info']['files'] = [ {'length': f['length'], 'path': f['path']} for f in reused.torrent.metainfo['info']['files'] ] # Reuse existing torrent if with_callback: callback = Mock(return_value=None) return_value = new_torrent.reuse(existing_torrents.location_paths, callback=callback) # Confirm everything happened as expected assert return_value is True assert new_torrent.metainfo == exp_joined_metainfo assert callback.call_args_list == [ call(new_torrent, str(existing_torrents.torrents1[0].torrent_path), 1, 6, False, None), call(new_torrent, str(existing_torrents.torrents1[1].torrent_path), 2, 6, None, None), call(new_torrent, str(existing_torrents.torrents1[1].torrent_path), 2, 6, False, None), call(new_torrent, str(existing_torrents.torrents1[2].torrent_path), 3, 6, False, None), call(new_torrent, str(existing_torrents.torrents2[0].torrent_path), 4, 6, False, None), call(new_torrent, str(reused.torrent_path), 5, 6, None, None), call(new_torrent, str(reused.torrent_path), 5, 6, True, None), ] else: return_value = new_torrent.reuse(existing_torrents.location_paths) # Confirm everything happened as expected assert return_value is True assert new_torrent.metainfo == exp_joined_metainfo @pytest.mark.parametrize('with_callback', (True, False), ids=('with_callback', 'without_callback')) def test_exceptions(with_callback, existing_torrents): # Create and prepare existing torrents existing_torrents = existing_torrents( readable1=( ('a', 'foo', {'creation_date': 123}), ('b', 'bar', {'creation_date': 456}), ('c', 'baz', {'creation_date': 789}), ), unreadable=(), readable2=( ('d', 'hey', {'private': True}), ('e', 'ho', {'comment': 'yo'}), ('f', 'oh', {'comment': 'oy'}), ('g', 'ohh', {'comment': 'oyy'}), ), ) # Unreadable directory existing_torrents.locations['unreadable'].chmod(0o300) # Unreadable torrent file existing_torrents.readable2[1].torrent_path.chmod(0o300) # Nonexisting torrent file nonexisting_torrent_file = 'no/such/path.torrent' # Create and prepare the torrent we want to generate reused = existing_torrents.readable2[2] new_torrent = torf.Torrent( path=reused.content_path, trackers=('http://foo:1000', 'http://foo:2000'), webseeds=('http://bar:1000',), httpseeds=('http://baz:1000',), private=True, comment='This is a custom torrent', creation_date=123000, created_by='CREATOR', source='SRC', piece_size=8 * 1048576, randomize_infohash=True, ) # Reuse existing torrent if with_callback: # Expect the same metainfo, but with important parts copied exp_joined_metainfo = copy.deepcopy(new_torrent.metainfo) exp_joined_metainfo['info']['piece length'] = reused.torrent.metainfo['info']['piece length'] exp_joined_metainfo['info']['pieces'] = reused.torrent.metainfo['info']['pieces'] callback = Mock(return_value=None) location_paths = (nonexisting_torrent_file,) + existing_torrents.location_paths return_value = new_torrent.reuse(location_paths, callback=callback) # Confirm everything happened as expected assert return_value is True assert new_torrent.metainfo == exp_joined_metainfo for c in callback.call_args_list: print(c) assert callback.call_args_list == [ call( new_torrent, nonexisting_torrent_file, 1, 8, False, ComparableException( torf.ReadError(errno.ENOENT, nonexisting_torrent_file), ), ), call(new_torrent, str(existing_torrents.readable1[0].torrent_path), 2, 8, False, None), call(new_torrent, str(existing_torrents.readable1[1].torrent_path), 3, 8, False, None), call(new_torrent, str(existing_torrents.readable1[2].torrent_path), 4, 8, False, None), call( new_torrent, None, 4, 8, False, ComparableException( torf.ReadError(errno.EACCES, str(existing_torrents.locations['unreadable'])), ), ), call(new_torrent, str(existing_torrents.readable2[0].torrent_path), 5, 8, False, None), call( new_torrent, str(existing_torrents.readable2[1].torrent_path), 6, 8, False, ComparableException( torf.ReadError(errno.EACCES, str(existing_torrents.readable2[1].torrent_path)), ), ), call(new_torrent, str(existing_torrents.readable2[2].torrent_path), 7, 8, None, None), call(new_torrent, str(existing_torrents.readable2[2].torrent_path), 7, 8, True, None), ] else: # Expect identical metainfo exp_joined_metainfo = copy.deepcopy(new_torrent.metainfo) exp_exception = torf.ReadError(errno.EACCES, str(existing_torrents.locations['unreadable'])) with pytest.raises(type(exp_exception), match=rf'^{re.escape(str(exp_exception))}$'): new_torrent.reuse(existing_torrents.location_paths) # Confirm everything happened as expected assert new_torrent.metainfo == exp_joined_metainfo @pytest.mark.parametrize( argnames='cancel_condition, exp_callback_calls_count', argvalues=( # cancel_condition gets torrent_filepath and is_match and returns True # for cancelling, False otherwise. pytest.param( lambda tfp, is_match: is_match is False, 1, id='mismatch', ), pytest.param( lambda tfp, is_match: tfp is None, 4, id='unreadable directory', ), pytest.param( lambda tfp, is_match: os.path.basename(tfp or '') == 'e.torrent', 6, id='unreadable torrent file', ), pytest.param( lambda tfp, is_match: os.path.basename(tfp or '') == 'f.torrent', 7, id='invalid bencoded data', ), pytest.param( lambda tfp, is_match: os.path.basename(tfp or '') == 'g.torrent', 8, id='invalid metainfo', ), pytest.param( lambda tfp, is_match: is_match is None, 9, id='verification', ), ), ) def test_callback_cancels_when_handling(cancel_condition, exp_callback_calls_count, existing_torrents, create_file): # Create and prepare existing torrents existing_torrents = existing_torrents( readable1=( ('a', 'foo', {'creation_date': 123}), ('b', 'bar', {'creation_date': 456}), ('c', 'baz', {'creation_date': 789}), ), # Unreadable directory unreadable=(), readable2=( ('d', 'hey', {'private': True}), ('e', 'ho', {'comment': 'yo'}), ('f', 'oh', {'comment': 'oy'}), ('g', 'ohh', {'comment': 'oyy'}), ('h', 'ohy', {'comment': 'hoyo'}), ), ) # ReadError (directory) existing_torrents.locations['unreadable'].chmod(0o300) # ReadError (torrent file) existing_torrents.readable2[1].torrent_path.chmod(0o300) # BdecodeError data = bytearray(existing_torrents.readable2[2].torrent_path.read_bytes()) data[0] = ord('x') existing_torrents.readable2[2].torrent_path.write_bytes(data) # MetainfoError del existing_torrents.readable2[3].torrent.metainfo['info']['piece length'] existing_torrents.readable2[3].torrent.write( existing_torrents.readable2[3].torrent_path, validate=False, overwrite=True, ) # Create and prepare the torrent we want to generate reused = existing_torrents.readable2[4] new_torrent = torf.Torrent(path=reused.content_path) exp_joined_metainfo = copy.deepcopy(new_torrent.metainfo) def callback(torrent, torrent_path, done, total, is_match, exception): if cancel_condition(torrent_path, is_match): return 'cancel' callback_wrapper = Mock(side_effect=callback) # Reuse existing torrent return_value = new_torrent.reuse(existing_torrents.location_paths, callback=callback_wrapper) # Confirm everything happened as expected assert return_value is False assert new_torrent.metainfo == exp_joined_metainfo all_callback_calls = [ call(new_torrent, str(existing_torrents.readable1[0].torrent_path), 1, 8, False, None), call(new_torrent, str(existing_torrents.readable1[1].torrent_path), 2, 8, False, None), call(new_torrent, str(existing_torrents.readable1[2].torrent_path), 3, 8, False, None), call( new_torrent, None, 3, 8, False, ComparableException( torf.ReadError(errno.EACCES, str(existing_torrents.locations['unreadable'])), ), ), call(new_torrent, str(existing_torrents.readable2[0].torrent_path), 4, 8, False, None), call( new_torrent, str(existing_torrents.readable2[1].torrent_path), 5, 8, False, ComparableException( torf.ReadError(errno.EACCES, str(existing_torrents.readable2[1].torrent_path)), ), ), call( new_torrent, str(existing_torrents.readable2[2].torrent_path), 6, 8, False, ComparableException( torf.BdecodeError(str(existing_torrents.readable2[2].torrent_path)), ), ), call( new_torrent, str(existing_torrents.readable2[3].torrent_path), 7, 8, False, ComparableException( torf.MetainfoError("Missing 'piece length' in ['info']"), ), ), call(new_torrent, str(existing_torrents.readable2[4].torrent_path), 8, 8, None, None), call(new_torrent, str(existing_torrents.readable2[4].torrent_path), 8, 8, False, None), ] assert callback_wrapper.call_args_list == all_callback_calls[:exp_callback_calls_count] @pytest.mark.parametrize('with_callback', (True, False), ids=('with_callback', 'without_callback')) def test_handling_of_nonexisting_path(with_callback, existing_torrents): # Create and prepare existing torrents existing_torrents = existing_torrents( my_torrents=( ('a', 'foo', {'creation_date': 123}), ('b', 'bar', {'creation_date': 456}), ('c', 'baz', {'creation_date': 789}), ), ) # Create and prepare the torrent we want to generate reused = existing_torrents.my_torrents[0] new_torrent = torf.Torrent(path=reused.content_path) # Expect identical metainfo exp_joined_metainfo = copy.deepcopy(new_torrent.metainfo) # Reuse existing torrent reuse_torrent_path = 'path/to/nonexisting/directory' if with_callback: callback = Mock(return_value=None) return_value = new_torrent.reuse(reuse_torrent_path, callback=callback) # Confirm everything happened as expected assert return_value is False assert new_torrent.metainfo == exp_joined_metainfo assert callback.call_args_list == [ call( new_torrent, None, 0, 0, False, ComparableException( torf.ReadError(errno.ENOENT, reuse_torrent_path), ), ), ] else: exp_exception = torf.ReadError(errno.ENOENT, reuse_torrent_path) with pytest.raises(type(exp_exception), match=rf'^{re.escape(str(exp_exception))}$'): new_torrent.reuse(reuse_torrent_path) assert new_torrent.metainfo == exp_joined_metainfo @pytest.mark.parametrize('with_callback', (True, False), ids=('with_callback', 'without_callback')) def test_reuse_with_empty_file_list(with_callback, existing_torrents, create_file): # Create and prepare existing torrents existing_torrents = existing_torrents( my_torrents=( ('a.jpg', 'foo', {'creation_date': 123}), ('b.txt', 'bar', {'creation_date': 456}), ('c.mp4', 'baz', {'creation_date': 789}), ), ) # Create and prepare the torrent we want to generate new_torrent = torf.Torrent( path=create_file('just_a_file.jpg', 'foo'), exclude_globs=['*.jpg'], ) # Expect identical metainfo exp_joined_metainfo = copy.deepcopy(new_torrent.metainfo) exp_exception = RuntimeError('reuse() called while file list is empty') with pytest.raises(type(exp_exception), match=rf'^{re.escape(str(exp_exception))}$'): if with_callback: new_torrent.reuse(existing_torrents.location_paths, callback=Mock()) else: new_torrent.reuse(existing_torrents.location_paths) assert new_torrent.metainfo == exp_joined_metainfo def test_reuse_considers_piece_size_min_max(existing_torrents): # Create and prepare existing torrents existing_torrents = existing_torrents( small=( ('a.jpg', 'foo', {'piece_size': 1048576 / 2}), ('b.txt', 'bar', {'piece_size': 1048576 * 1}), ('c.mp4', 'baz', {'piece_size': 1048576 / 2}), ), big=( ('a.jpg', 'foo', {'piece_size': 1048576 / 2}), ('b.txt', 'bar', {'piece_size': 1048576 * 4}), ('c.mp4', 'baz', {'piece_size': 1048576 / 2}), ), medium=( ('a.jpg', 'foo', {'piece_size': 1048576 / 2}), ('b.txt', 'bar', {'piece_size': 1048576 * 2}), ('c.mp4', 'baz', {'piece_size': 1048576 / 2}), ), large=( ('a.jpg', 'foo', {'piece_size': 1048576 / 2}), ('b.txt', 'bar', {'piece_size': 1048576 * 8}), ('c.mp4', 'baz', {'piece_size': 1048576 / 2}), ), giant=( ('a.jpg', 'foo', {'piece_size': 1048576 / 2}), ('b.txt', 'bar', {'piece_size': 1048576 * 16}), ('c.mp4', 'baz', {'piece_size': 1048576 / 2}), ), ) # Create and prepare the torrent we want to generate reused = existing_torrents.medium[1] new_torrent = torf.Torrent(path=reused.content_path) exp_joined_metainfo = copy.deepcopy(new_torrent.metainfo) # Limit piece size to 1 - 2 MiB new_torrent.piece_size_min = 1 * 1048576 new_torrent.piece_size_max = 2 * 1048576 exp_joined_metainfo['info']['piece length'] = 1048576 * 1 exp_joined_metainfo['info']['pieces'] = existing_torrents.medium[1].torrent.metainfo['info']['pieces'] new_torrent.reuse(existing_torrents.location_paths) assert new_torrent.metainfo == exp_joined_metainfo # Limit piece size to 2 - 4 MiB new_torrent.piece_size_min = 2 * 1048576 new_torrent.piece_size_max = 4 * 1048576 exp_joined_metainfo['info']['piece length'] = 1048576 * 4 exp_joined_metainfo['info']['pieces'] = existing_torrents.small[1].torrent.metainfo['info']['pieces'] new_torrent.reuse(existing_torrents.location_paths) assert new_torrent.metainfo == exp_joined_metainfo # Limit piece size to 4 - 8 MiB new_torrent.piece_size_min = 4 * 1048576 new_torrent.piece_size_max = 8 * 1048576 exp_joined_metainfo['info']['piece length'] = 1048576 * 4 exp_joined_metainfo['info']['pieces'] = existing_torrents.big[1].torrent.metainfo['info']['pieces'] new_torrent.reuse(existing_torrents.location_paths) assert new_torrent.metainfo == exp_joined_metainfo # Limit piece size to 8 - 16 MiB new_torrent.piece_size_min = 8 * 1048576 new_torrent.piece_size_max = 16 * 1048576 exp_joined_metainfo['info']['piece length'] = 1048576 * 8 exp_joined_metainfo['info']['pieces'] = existing_torrents.small[1].torrent.metainfo['info']['pieces'] new_torrent.reuse(existing_torrents.location_paths) assert new_torrent.metainfo == exp_joined_metainfo @pytest.mark.parametrize('with_callback', (True, False), ids=('with_callback', 'without_callback')) def test_reuse_copies_file_order(with_callback, existing_torrents): # Create and prepare existing torrents with some of them sharing the same # (torrent name, file name, file size) but different file contents existing_torrents = existing_torrents( my_torrents=( ('a', ( ('this.jpg', 16380 * 30), ('that.txt', 'text data'), ), {'creation_date': 123}), ('b', ( ('this.jpg', 16380 * 30), ('that.txt', 'text doto'), ), {'creation_date': 456}), ('c', ( ('this.jpg', 16380 * 30), ('that.txt', 'text diti'), ), {'creation_date': 789}), ), ) # Create and prepare the torrent we want to generate reused = existing_torrents.my_torrents[1] new_torrent = torf.Torrent(reused.content_path) # Differing file order shouldn't matter, the new torrent should have the # same order as the reused torrent new_torrent.metainfo['info']['files'][0], new_torrent.metainfo['info']['files'][1] = \ new_torrent.metainfo['info']['files'][1], new_torrent.metainfo['info']['files'][0] # Expect the same metainfo, but with important parts copied exp_joined_metainfo = copy.deepcopy(new_torrent.metainfo) exp_joined_metainfo['info']['piece length'] = reused.torrent.metainfo['info']['piece length'] exp_joined_metainfo['info']['pieces'] = reused.torrent.metainfo['info']['pieces'] exp_joined_metainfo['info']['files'] = [ {'length': f['length'], 'path': f['path']} for f in reused.torrent.metainfo['info']['files'] ] # Reuse existing torrent if with_callback: callback = Mock(return_value=None) return_value = new_torrent.reuse(existing_torrents.location_paths, callback=callback) # Confirm everything happened as expected assert return_value is True assert new_torrent.metainfo == exp_joined_metainfo assert callback.call_args_list == [ call(new_torrent, str(existing_torrents.my_torrents[0].torrent_path), 1, 3, False, None), call(new_torrent, str(existing_torrents.my_torrents[1].torrent_path), 2, 3, None, None), call(new_torrent, str(existing_torrents.my_torrents[1].torrent_path), 2, 3, True, None), ] else: return_value = new_torrent.reuse(existing_torrents.location_paths) # Confirm everything happened as expected assert return_value is True assert new_torrent.metainfo == exp_joined_metainfo rndusr-torf-547b989/tests/test_stream.py000066400000000000000000002604601513142010300203400ustar00rootroot00000000000000import errno import math import os import re from unittest.mock import Mock, PropertyMock, call import pytest from torf import MemoryError, ReadError, TorrentFileStream, VerifyFileSizeError from . import ComparableException class File(str): byte_counter = 0 def __new__(cls, path, content=None): self = super().__new__(cls, path) if isinstance(content, int): self.size = content self.content = bytearray() for _ in range(0, self.size): self.content += type(self).byte_counter.to_bytes(1, byteorder='big') if type(self).byte_counter >= 255: type(self).byte_counter = 0 else: type(self).byte_counter += 1 else: self.size = len(content) self.content = bytes(content) return self def __eq__(self, other): if isinstance(other, type(self)): return super().__eq__(other) and self.size == other.size else: return NotImplemented def __ne__(self, other): return not self.__eq__(other) def __hash__(self): return hash((str(self), self.size)) def __repr__(self): return f'{type(self).__name__}({str(self)}, {len(self.content)})' def write_at(self, directory, content=None): (directory / self).parent.mkdir(parents=True, exist_ok=True) if content is not None: (directory / self).write_bytes(content) else: (directory / self).write_bytes(self.content) @property def parts(self): return self.split(os.path.sep) class Torrent: def __init__(self, files, piece_size, path=None): self.files = files self.path = path self.piece_size = piece_size self.size = sum(f.size for f in files) self.pieces = int(self.size / piece_size) + 1 @property def mode(self): if len(self.files) == 1 and os.path.sep not in self.files[0]: return 'singlefile' else: return 'multifile' @pytest.mark.parametrize('file', (None, File('MyTorrent/foo.txt', 123))) @pytest.mark.parametrize('none_ok', (True, False)) @pytest.mark.parametrize( argnames='torrent_content_path, stream_content_path, custom_content_path, exp_content_path', argvalues=( ('torrent/path', 'stream/path', 'custom/path', 'custom/path'), ('torrent/path', 'stream/path', None, 'stream/path'), ('torrent/path', 'stream/path', '', ''), ('torrent/path', None, None, 'torrent/path'), ('torrent/path', None, '', ''), ('torrent/path', '', None, ''), (None, None, None, None), (None, None, '', ''), (None, '', None, ''), ('', None, None, ''), ), ) def test_get_content_path_from_multifile_torrent( torrent_content_path, stream_content_path, custom_content_path, exp_content_path, none_ok, file): torrent = Torrent(piece_size=123, files=(File('MyTorrent/a', 1),), path=torrent_content_path) tfs = TorrentFileStream(torrent, content_path=stream_content_path) if exp_content_path is None and not none_ok: with pytest.raises(ValueError, match=r'^Missing content_path argument and torrent has no path specified$'): tfs._get_content_path(custom_content_path, none_ok=none_ok, file=file) else: content_path = tfs._get_content_path(custom_content_path, none_ok=none_ok, file=file) if file is not None: file_parts = file.split(os.path.sep) if not exp_content_path: exp_content_path = file else: exp_content_path = os.path.join(exp_content_path, *file_parts[1:]) assert content_path == exp_content_path @pytest.mark.parametrize('file', (None, File('foo.txt', 123))) @pytest.mark.parametrize('none_ok', (True, False)) @pytest.mark.parametrize( argnames='torrent_content_path, stream_content_path, custom_content_path, exp_content_path', argvalues=( ('torrent/path', 'stream/path', 'custom/path', 'custom/path'), ('torrent/path', 'stream/path', None, 'stream/path'), ('torrent/path', 'stream/path', '', ''), ('torrent/path', None, None, 'torrent/path'), ('torrent/path', None, '', ''), ('torrent/path', '', None, ''), (None, None, None, None), (None, None, '', ''), (None, '', None, ''), ('', None, None, ''), ), ) def test_get_content_path_from_singlefile_torrent( torrent_content_path, stream_content_path, custom_content_path, exp_content_path, none_ok, file): torrent = Torrent(piece_size=123, files=(File('a', 1),), path=torrent_content_path) tfs = TorrentFileStream(torrent, content_path=stream_content_path) if exp_content_path is None and not none_ok: with pytest.raises(ValueError, match=r'^Missing content_path argument and torrent has no path specified$'): tfs._get_content_path(custom_content_path, none_ok=none_ok, file=file) else: content_path = tfs._get_content_path(custom_content_path, none_ok=none_ok, file=file) if exp_content_path: assert content_path == exp_content_path elif file: assert content_path == file else: assert content_path is None def test_behaviour_as_context_manager(mocker): torrent = Torrent(piece_size=123, files=(File('a', 1),)) tfs = TorrentFileStream(torrent) mocker.patch.object(tfs, 'close') assert tfs.close.call_args_list == [] with tfs as x: assert x is tfs assert tfs.close.call_args_list == [] assert tfs.close.call_args_list == [call()] def test_close(): torrent = Torrent(piece_size=123, files=(File('a', 1), File('b', 2), File('c', 3))) tfs = TorrentFileStream(torrent) mocked_open_files = [Mock() for _ in torrent.files] tfs._open_files = { f'path/to/{i}': mof for i,mof in enumerate(mocked_open_files) } tfs.close() for mof in mocked_open_files: assert mof.close.call_args_list == [call()] assert tfs._open_files == {} @pytest.mark.parametrize( argnames='chunk_size, files, exp_max_piece_index', argvalues=( # 0 1 2 3 4 5 6 7 8 9 0 # aaaaaabbbbbbbbbbbb (6, [File('a', 6), File('b', 12)], 2), # 0 1 2 3 4 5 6 7 8 9 0 # aaaaaaabbbbbbbbbbbb (6, [File('a', 7), File('b', 12)], 3), # 0 1 2 3 4 5 6 7 8 9 0 # aaaaaaaabbbbbbbbbbbb (6, [File('a', 8), File('b', 12)], 3), # 0 1 2 3 4 5 6 7 8 9 0 # aaaaaaaabbbbbbbbbbbbbbbb (6, [File('a', 8), File('b', 16)], 3), # 0 1 2 3 4 5 6 7 8 9 0 # aaaaaaaabbbbbbbbbbbbbbbbb (6, [File('a', 8), File('b', 17)], 4), ), ids=lambda v: str(v), ) def test_max_piece_index(chunk_size, files, exp_max_piece_index): torrent = Torrent(piece_size=chunk_size, files=files) tfs = TorrentFileStream(torrent) assert tfs.max_piece_index == exp_max_piece_index @pytest.mark.parametrize( argnames='chunk_size, files, file, exp_result', argvalues=( (4, [File('a', 1)], 'x', ValueError('File not specified: x')), (3, [File('a', 1)], 'a', 0), (3, [File('a', 3)], 'a', 0), (3, [File('a', 4)], 'a', 0), (3, [File('a', 5)], 'a', 0), # 0 1 2 3 4 5 6 7 8 9 # aaabbbbbccccccccccdddddd (4, [File('a', 3), File('b', 5), File('c', 10), File('d', 6)], 'a', 0), (4, [File('a', 3), File('b', 5), File('c', 10), File('d', 6)], 'b', 3), (4, [File('a', 3), File('b', 5), File('c', 10), File('d', 6)], 'c', 8), (4, [File('a', 3), File('b', 5), File('c', 10), File('d', 6)], 'd', 18), # 0 1 2 3 4 5 6 7 8 9 # abcdddddd (4, [File('a', 1), File('b', 1), File('c', 1), File('d', 6)], 'a', 0), (4, [File('a', 1), File('b', 1), File('c', 1), File('d', 6)], 'b', 1), (4, [File('a', 1), File('b', 1), File('c', 1), File('d', 6)], 'c', 2), (4, [File('a', 1), File('b', 1), File('c', 1), File('d', 6)], 'd', 3), # 0 1 2 3 4 5 6 7 8 9 # aaaaaabcd (4, [File('a', 6), File('b', 1), File('c', 1), File('d', 1)], 'a', 0), (4, [File('a', 6), File('b', 1), File('c', 1), File('d', 1)], 'b', 6), (4, [File('a', 6), File('b', 1), File('c', 1), File('d', 1)], 'c', 7), (4, [File('a', 6), File('b', 1), File('c', 1), File('d', 1)], 'd', 8), ), ids=lambda v: str(v), ) def test_get_file_position(chunk_size, files, file, exp_result): torrent = Torrent(piece_size=chunk_size, files=files) tfs = TorrentFileStream(torrent) if isinstance(exp_result, BaseException): with pytest.raises(type(exp_result), match=rf'^{re.escape(str(exp_result))}$'): tfs.get_file_position(file) else: assert tfs.get_file_position(file) == exp_result @pytest.mark.parametrize( argnames='chunk_size, files, position, exp_result', argvalues=( # 0 1 2 3 4 5 # abc (4, [File('a', 1), File('b', 1), File('c', 1)], -1, ValueError('position is out of bounds (0 - 2): -1')), (4, [File('a', 1), File('b', 1), File('c', 1)], 0, 'a'), (4, [File('a', 1), File('b', 1), File('c', 1)], 1, 'b'), (4, [File('a', 1), File('b', 1), File('c', 1)], 2, 'c'), (4, [File('a', 1), File('b', 1), File('c', 1)], 3, ValueError('position is out of bounds (0 - 2): 3')), # 0 1 2 3 4 5 # aaabbbbbcccccccdddddd (4, [File('a', 3), File('b', 5), File('c', 7), File('d', 6)], -1, ValueError('position is out of bounds (0 - 20): -1')), (4, [File('a', 3), File('b', 5), File('c', 7), File('d', 6)], 0, 'a'), (4, [File('a', 3), File('b', 5), File('c', 7), File('d', 6)], 1, 'a'), (4, [File('a', 3), File('b', 5), File('c', 7), File('d', 6)], 2, 'a'), (4, [File('a', 3), File('b', 5), File('c', 7), File('d', 6)], 3, 'b'), (4, [File('a', 3), File('b', 5), File('c', 7), File('d', 6)], 4, 'b'), (4, [File('a', 3), File('b', 5), File('c', 7), File('d', 6)], 5, 'b'), (4, [File('a', 3), File('b', 5), File('c', 7), File('d', 6)], 6, 'b'), (4, [File('a', 3), File('b', 5), File('c', 7), File('d', 6)], 7, 'b'), (4, [File('a', 3), File('b', 5), File('c', 7), File('d', 6)], 8, 'c'), (4, [File('a', 3), File('b', 5), File('c', 7), File('d', 6)], 9, 'c'), (4, [File('a', 3), File('b', 5), File('c', 7), File('d', 6)], 10, 'c'), (4, [File('a', 3), File('b', 5), File('c', 7), File('d', 6)], 11, 'c'), (4, [File('a', 3), File('b', 5), File('c', 7), File('d', 6)], 12, 'c'), (4, [File('a', 3), File('b', 5), File('c', 7), File('d', 6)], 13, 'c'), (4, [File('a', 3), File('b', 5), File('c', 7), File('d', 6)], 14, 'c'), (4, [File('a', 3), File('b', 5), File('c', 7), File('d', 6)], 15, 'd'), (4, [File('a', 3), File('b', 5), File('c', 7), File('d', 6)], 16, 'd'), (4, [File('a', 3), File('b', 5), File('c', 7), File('d', 6)], 17, 'd'), (4, [File('a', 3), File('b', 5), File('c', 7), File('d', 6)], 18, 'd'), (4, [File('a', 3), File('b', 5), File('c', 7), File('d', 6)], 19, 'd'), (4, [File('a', 3), File('b', 5), File('c', 7), File('d', 6)], 20, 'd'), (4, [File('a', 3), File('b', 5), File('c', 7), File('d', 6)], 21, ValueError('position is out of bounds (0 - 20): 21')), ), ) def test_get_file_at_position(chunk_size, files, position, exp_result, mocker): torrent = Torrent(piece_size=chunk_size, files=files) tfs = TorrentFileStream(torrent) def mock_content_path(content_path, none_ok, file): return f'{content_path} / {none_ok} / {file}' mocker.patch.object(tfs, '_get_content_path', side_effect=mock_content_path) if isinstance(exp_result, BaseException): with pytest.raises(type(exp_result), match=rf'^{re.escape(str(exp_result))}$'): tfs.get_file_at_position(position, content_path='my/custom/path') else: exp_file = [f for f in files if f == exp_result][0] exp_filepath = f'my/custom/path / True / {exp_file}' filepath = tfs.get_file_at_position(position, content_path='my/custom/path') assert filepath == exp_filepath @pytest.mark.parametrize( argnames='chunk_size, files, exp_piece_indexes', argvalues=( (3, [File('a', 1)], {'a': [0]}), (3, [File('a', 2)], {'a': [0]}), (3, [File('a', 3)], {'a': [0]}), (3, [File('a', 4)], {'a': [0, 1]}), (3, [File('a', 5)], {'a': [0, 1]}), (3, [File('a', 6)], {'a': [0, 1]}), (3, [File('a', 7)], {'a': [0, 1, 2]}), # 0 1 2 3 4 5 6 7 8 9 0 # aaaaabbbbbbbbbbbbbbbbbbbbbbbc (6, [File('a', 5), File('b', 23), File('c', 1)], {'a': [0], 'b': [0, 1, 2, 3, 4], 'c': [4]}), # 0 1 2 3 4 5 6 7 8 9 0 # aaaaaabbbbbbbbbbbbbbbbbbbbbbbc (6, [File('a', 6), File('b', 23), File('c', 1)], {'a': [0], 'b': [1, 2, 3, 4], 'c': [4]}), # 0 1 2 3 4 5 6 7 8 9 0 # aaaaaaabbbbbbbbbbbbbbbbbbbbbbbc (6, [File('a', 7), File('b', 23), File('c', 1)], {'a': [0, 1], 'b': [1, 2, 3, 4], 'c': [5]}), # 0 1 2 3 4 5 6 7 8 9 0 # aaaaaaaabbbbbbbbbbbbbbbbbbbbbbbc (6, [File('a', 8), File('b', 23), File('c', 1)], {'a': [0, 1], 'b': [1, 2, 3, 4, 5], 'c': [5]}), # 0 1 2 3 4 5 6 7 8 9 0 # aaaaaaaaaaabbbbbbbbbbbbbbbbbbbc (6, [File('a', 11), File('b', 19), File('c', 1)], {'a': [0, 1], 'b': [1, 2, 3, 4], 'c': [5]}), # 0 1 2 3 4 5 6 7 8 9 0 # aaaaaaaaaaaabbbbbbbbbbbbbbbbbbbc (6, [File('a', 12), File('b', 19), File('c', 1)], {'a': [0, 1], 'b': [2, 3, 4, 5], 'c': [5]}), # 0 1 2 3 4 5 6 7 8 9 0 # aaaaaaaaaaaaabbbbbbbbbbbbbbbbbbbc (6, [File('a', 13), File('b', 19), File('c', 1)], {'a': [0, 1, 2], 'b': [2, 3, 4, 5], 'c': [5]}), ), ids=lambda v: repr(v), ) def test_get_piece_indexes_of_file_nonexclusive(chunk_size, files, exp_piece_indexes): torrent = Torrent(piece_size=chunk_size, files=files) tfs = TorrentFileStream(torrent) for filename, exp_indexes in exp_piece_indexes.items(): file = [f for f in torrent.files if f == filename][0] assert tfs.get_piece_indexes_of_file(file) == exp_indexes @pytest.mark.parametrize( argnames='chunk_size, files, exp_piece_indexes', argvalues=( (3, [File('a', 1)], {'a': [0]}), (3, [File('a', 2)], {'a': [0]}), (3, [File('a', 3)], {'a': [0]}), (3, [File('a', 4)], {'a': [0, 1]}), (3, [File('a', 5)], {'a': [0, 1]}), (3, [File('a', 6)], {'a': [0, 1]}), (3, [File('a', 7)], {'a': [0, 1, 2]}), # 0 1 2 3 4 5 6 7 8 9 0 # aaaaabbbbbbbbbbbbbbbbbbbbbbbc (6, [File('a', 5), File('b', 23), File('c', 1)], {'a': [], 'b': [1, 2, 3], 'c': []}), # 0 1 2 3 4 5 6 7 8 9 0 # aaaaaabbbbbbbbbbbbbbbbbbbbbbbc (6, [File('a', 6), File('b', 23), File('c', 1)], {'a': [0], 'b': [1, 2, 3], 'c': []}), # 0 1 2 3 4 5 6 7 8 9 0 # aaaaaaabbbbbbbbbbbbbbbbbbbbbbbc (6, [File('a', 7), File('b', 23), File('c', 1)], {'a': [0], 'b': [2, 3, 4], 'c': [5]}), # 0 1 2 3 4 5 6 7 8 9 0 # aaaaaaaabbbbbbbbbbbbbbbbbbbbbbbc (6, [File('a', 8), File('b', 23), File('c', 1)], {'a': [0], 'b': [2, 3, 4], 'c': []}), # 0 1 2 3 4 5 6 7 8 9 0 # aaaaaaaaaaabbbbbbbbbbbbbbbbbbbc (6, [File('a', 11), File('b', 19), File('c', 1)], {'a': [0], 'b': [2, 3, 4], 'c': [5]}), # 0 1 2 3 4 5 6 7 8 9 0 # aaaaaaaaaaaabbbbbbbbbbbbbbbbbbbc (6, [File('a', 12), File('b', 19), File('c', 1)], {'a': [0, 1], 'b': [2, 3, 4], 'c': []}), # 0 1 2 3 4 5 6 7 8 9 0 # aaaaaaaaaaaaabbbbbbbbbbbbbbbbbbbc (6, [File('a', 13), File('b', 19), File('c', 1)], {'a': [0, 1], 'b': [3, 4], 'c': []}), ), ids=lambda v: repr(v), ) def test_get_piece_indexes_of_file_exclusive(chunk_size, files, exp_piece_indexes): torrent = Torrent(piece_size=chunk_size, files=files) tfs = TorrentFileStream(torrent) for filename, exp_indexes in exp_piece_indexes.items(): file = [f for f in torrent.files if f == filename][0] assert tfs.get_piece_indexes_of_file(file, exclusive=True) == exp_indexes @pytest.mark.parametrize( argnames='chunk_size, files, first_byte_indexes, last_byte_indexes, exp_files', argvalues=( # Files smaller than piece size # 0 1 2 3 4 5 6 7 8 9 0 # abbbccccccccccccccd (6, [File('a', 1), File('b', 3), File('c', 14), File('d', 1)], range(0, 1), range(0, 1), ['a']), (6, [File('a', 1), File('b', 3), File('c', 14), File('d', 1)], range(1, 4), range(1, 4), ['b']), (6, [File('a', 1), File('b', 3), File('c', 14), File('d', 1)], range(4, 18), range(4, 18), ['c']), (6, [File('a', 1), File('b', 3), File('c', 14), File('d', 1)], range(18, 19), range(18, 19), ['d']), (6, [File('a', 1), File('b', 3), File('c', 14), File('d', 1)], range(1, 4), range(18, 19), ['b', 'c', 'd']), (6, [File('a', 1), File('b', 3), File('c', 14), File('d', 1)], range(4, 18), range(18, 19), ['c', 'd']), (6, [File('a', 1), File('b', 3), File('c', 14), File('d', 1)], range(18, 19), range(18, 19), ['d']), (6, [File('a', 1), File('b', 3), File('c', 14), File('d', 1)], range(0, 1), range(0, 1), ['a']), (6, [File('a', 1), File('b', 3), File('c', 14), File('d', 1)], range(0, 1), range(1, 4), ['a', 'b']), (6, [File('a', 1), File('b', 3), File('c', 14), File('d', 1)], range(0, 1), range(4, 18), ['a', 'b', 'c']), (6, [File('a', 1), File('b', 3), File('c', 14), File('d', 1)], range(0, 1), range(18, 19), ['a', 'b', 'c', 'd']), # 0 1 2 3 4 5 6 7 8 9 0 # aaaaaaaaaaabccddd (6, [File('a', 11), File('b', 1), File('c', 2), File('d', 3)], range(0, 11), range(16, 17), ['a', 'b', 'c', 'd']), (6, [File('a', 11), File('b', 1), File('c', 2), File('d', 3)], range(11, 12), range(16, 17), ['b', 'c', 'd']), (6, [File('a', 11), File('b', 1), File('c', 2), File('d', 3)], range(12, 14), range(16, 17), ['c', 'd']), (6, [File('a', 11), File('b', 1), File('c', 2), File('d', 3)], range(14, 17), range(16, 17), ['d']), (6, [File('a', 11), File('b', 1), File('c', 2), File('d', 3)], range(17, 20), range(16, 17), []), (6, [File('a', 11), File('b', 1), File('c', 2), File('d', 3)], range(0, 1), range(0, 11), ['a']), (6, [File('a', 11), File('b', 1), File('c', 2), File('d', 3)], range(0, 1), range(11, 12), ['a', 'b']), (6, [File('a', 11), File('b', 1), File('c', 2), File('d', 3)], range(0, 1), range(12, 14), ['a', 'b', 'c']), (6, [File('a', 11), File('b', 1), File('c', 2), File('d', 3)], range(0, 1), range(14, 17), ['a', 'b', 'c', 'd']), (6, [File('a', 11), File('b', 1), File('c', 2), File('d', 3)], range(0, 1), range(17, 20), ['a', 'b', 'c', 'd']), (6, [File('a', 11), File('b', 1), File('c', 2), File('d', 3)], range(0, 11), range(0, 11), ['a']), (6, [File('a', 11), File('b', 1), File('c', 2), File('d', 3)], range(11, 12), range(11, 12), ['b']), (6, [File('a', 11), File('b', 1), File('c', 2), File('d', 3)], range(12, 14), range(12, 14), ['c']), (6, [File('a', 11), File('b', 1), File('c', 2), File('d', 3)], range(14, 17), range(14, 17), ['d']), (6, [File('a', 11), File('b', 1), File('c', 2), File('d', 3)], range(17, 20), range(17, 20), []), # All files are bigger than piece size # 0 1 2 3 3 4 5 6 7 8 # aaaaaaaaaaabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbcccccccccccccccccccccccccccccccccccccccccccccccccc (12, [File('a', 11), File('b', 49), File('c', 50)], range(0, 11), range(109, 110), ['a', 'b', 'c']), (12, [File('a', 11), File('b', 49), File('c', 50)], range(11, 60), range(109, 110), ['b', 'c']), (12, [File('a', 11), File('b', 49), File('c', 50)], range(60, 110), range(109, 110), ['c']), (12, [File('a', 11), File('b', 49), File('c', 50)], range(110, 112), range(110, 112), []), (12, [File('a', 11), File('b', 49), File('c', 50)], range(0, 1), range(0, 11), ['a']), (12, [File('a', 11), File('b', 49), File('c', 50)], range(0, 1), range(11, 60), ['a', 'b']), (12, [File('a', 11), File('b', 49), File('c', 50)], range(0, 1), range(60, 110), ['a', 'b', 'c']), (12, [File('a', 11), File('b', 49), File('c', 50)], range(0, 1), range(110, 112), ['a', 'b', 'c']), (12, [File('a', 11), File('b', 49), File('c', 50)], range(0, 11), range(0, 11), ['a']), (12, [File('a', 11), File('b', 49), File('c', 50)], range(11, 60), range(11, 60), ['b']), (12, [File('a', 11), File('b', 49), File('c', 50)], range(60, 110), range(60, 110), ['c']), (12, [File('a', 11), File('b', 49), File('c', 50)], range(110, 112), range(110, 112), []), ), ids=lambda v: str(v), ) def test_get_files_at_byte_range(chunk_size, first_byte_indexes, last_byte_indexes, files, exp_files, mocker): torrent = Torrent(piece_size=chunk_size, files=files) tfs = TorrentFileStream(torrent) first_byte_indexes = tuple(first_byte_indexes) last_byte_indexes = tuple(last_byte_indexes) assert first_byte_indexes, first_byte_indexes assert last_byte_indexes, last_byte_indexes def mock_content_path(content_path, none_ok, file): return f'{content_path} / {none_ok} / {file}' mocker.patch.object(tfs, '_get_content_path', side_effect=mock_content_path) for first_byte_index in first_byte_indexes: for last_byte_index in last_byte_indexes: if first_byte_index <= last_byte_index: files = tfs.get_files_at_byte_range( first_byte_index, last_byte_index, content_path='my/custom_path', ) assert files == [f'my/custom_path / True / {file}' for file in exp_files] @pytest.mark.parametrize( argnames='chunk_size, files, file, exp_byte_range', argvalues=( # All files in one piece # 0 1 2 3 4 5 6 7 8 9 0 # abc (6, [File('a', 1), File('b', 1), File('c', 1)], 'a', (0, 0)), (6, [File('a', 1), File('b', 1), File('c', 1)], 'b', (1, 1)), (6, [File('a', 1), File('b', 1), File('c', 1)], 'c', (2, 2)), # 0 1 2 3 4 5 6 7 8 9 0 # aabccc (6, [File('a', 2), File('b', 1), File('c', 3)], 'a', (0, 1)), (6, [File('a', 2), File('b', 1), File('c', 3)], 'b', (2, 2)), (6, [File('a', 2), File('b', 1), File('c', 3)], 'c', (3, 5)), # First piece contains multiple files # 0 1 2 3 4 5 6 7 8 9 0 # aaabbbcccccccccccccccccccc (6, [File('a', 3), File('b', 3), File('c', 20)], 'a', (0, 2)), (6, [File('a', 3), File('b', 3), File('c', 20)], 'b', (3, 5)), (6, [File('a', 3), File('b', 3), File('c', 20)], 'c', (6, 25)), # 0 1 2 3 4 5 6 7 8 9 0 # aaaaabbbbbbbbbbcccccccccccccccccccc (6, [File('a', 5), File('b', 10), File('c', 20)], 'a', (0, 4)), (6, [File('a', 5), File('b', 10), File('c', 20)], 'b', (5, 14)), (6, [File('a', 5), File('b', 10), File('c', 20)], 'c', (15, 34)), # Middle piece contains multiple files # 0 1 2 3 4 5 6 7 8 9 0 # aaaaaaaaaaaaaaaaaaaabbcccccccccccccccccccccccccccccc (6, [File('a', 20), File('b', 2), File('c', 30)], 'a', (0, 19)), (6, [File('a', 20), File('b', 2), File('c', 30)], 'b', (20, 21)), (6, [File('a', 20), File('b', 2), File('c', 30)], 'c', (22, 51)), # Last piece contains multiple files # 0 1 2 3 4 5 6 7 8 9 0 # aaaaaaaaaabbbbbbbbbbbbbbbc (6, [File('a', 10), File('b', 15), File('c', 1)], 'a', (0, 9)), (6, [File('a', 10), File('b', 15), File('c', 1)], 'b', (10, 24)), (6, [File('a', 10), File('b', 15), File('c', 1)], 'c', (25, 25)), # 0 1 2 3 4 5 6 7 8 9 0 # aaaaaaaaaaaaaaaaaaaabbbc (6, [File('a', 20), File('b', 3), File('c', 1)], 'a', (0, 19)), (6, [File('a', 20), File('b', 3), File('c', 1)], 'b', (20, 22)), (6, [File('a', 20), File('b', 3), File('c', 1)], 'c', (23, 23)), ), ids=lambda v: str(v), ) def test_get_byte_range_of_file(chunk_size, files, file, exp_byte_range): torrent = Torrent(piece_size=chunk_size, files=files) tfs = TorrentFileStream(torrent) file = {str(f): f for f in torrent.files}[file] byte_range = tfs.get_byte_range_of_file(file) assert byte_range == exp_byte_range @pytest.mark.parametrize( argnames='chunk_size, files, piece_index, exp_return_value', argvalues=( # First piece contains multiple files # 0 1 2 3 4 5 6 7 8 9 0 # aabbbccccccccccccccccccccccccccccccccccccccccccccccccc (6, [File('a', 2), File('b', 3), File('c', 49)], -1, ValueError('piece_index is out of bounds (0 - 8): -1')), (6, [File('a', 2), File('b', 3), File('c', 49)], 0, ['a', 'b', 'c']), (6, [File('a', 2), File('b', 3), File('c', 49)], 1, ['c']), (6, [File('a', 2), File('b', 3), File('c', 49)], 2, ['c']), (6, [File('a', 2), File('b', 3), File('c', 49)], 8, ['c']), (6, [File('a', 2), File('b', 3), File('c', 49)], 9, ValueError('piece_index is out of bounds (0 - 8): 9')), # 0 1 2 3 4 5 6 7 8 9 0 # aabbbbccccccccccccccccccccccccccccccccccccccccccccccccc (6, [File('a', 2), File('b', 4), File('c', 49)], -1, ValueError('piece_index is out of bounds (0 - 9): -1')), (6, [File('a', 2), File('b', 4), File('c', 49)], 0, ['a', 'b']), (6, [File('a', 2), File('b', 4), File('c', 49)], 1, ['c']), (6, [File('a', 2), File('b', 4), File('c', 49)], 2, ['c']), (6, [File('a', 2), File('b', 4), File('c', 49)], 8, ['c']), (6, [File('a', 2), File('b', 4), File('c', 49)], 9, ['c']), (6, [File('a', 2), File('b', 4), File('c', 49)], 10, ValueError('piece_index is out of bounds (0 - 9): 10')), # 0 1 2 3 4 5 6 7 8 9 0 # aaabbbbccccccccccccccccccccccccccccccccccccccccccccccccc (6, [File('a', 3), File('b', 4), File('c', 49)], -1, ValueError('piece_index is out of bounds (0 - 9): -1')), (6, [File('a', 3), File('b', 4), File('c', 49)], 0, ['a', 'b']), (6, [File('a', 3), File('b', 4), File('c', 49)], 1, ['b', 'c']), (6, [File('a', 3), File('b', 4), File('c', 49)], 2, ['c']), (6, [File('a', 3), File('b', 4), File('c', 49)], 8, ['c']), (6, [File('a', 3), File('b', 4), File('c', 49)], 9, ['c']), (6, [File('a', 3), File('b', 4), File('c', 49)], 10, ValueError('piece_index is out of bounds (0 - 9): 10')), # Middle piece contains multiple files # 0 1 2 3 4 5 6 7 8 9 0 # aaaaaaaaaaaaabcccdddddddddddddddddddddddddddddddddddd (6, [File('a', 13), File('b', 1), File('c', 3), File('d', 36)], -1, ValueError('piece_index is out of bounds (0 - 8): -1')), (6, [File('a', 13), File('b', 1), File('c', 3), File('d', 36)], 0, ['a']), (6, [File('a', 13), File('b', 1), File('c', 3), File('d', 36)], 1, ['a']), (6, [File('a', 13), File('b', 1), File('c', 3), File('d', 36)], 2, ['a', 'b', 'c', 'd']), (6, [File('a', 13), File('b', 1), File('c', 3), File('d', 36)], 3, ['d']), (6, [File('a', 13), File('b', 1), File('c', 3), File('d', 36)], 8, ['d']), (6, [File('a', 13), File('b', 1), File('c', 3), File('d', 36)], 9, ValueError('piece_index is out of bounds (0 - 8): 9')), # 0 1 2 3 4 5 6 7 8 9 0 # aaaaaaaaaaaaabbcccdddddddddddddddddddddddddddddddddddd (6, [File('a', 13), File('b', 2), File('c', 3), File('d', 36)], -1, ValueError('piece_index is out of bounds (0 - 8): -1')), (6, [File('a', 13), File('b', 2), File('c', 3), File('d', 36)], 0, ['a']), (6, [File('a', 13), File('b', 2), File('c', 3), File('d', 36)], 1, ['a']), (6, [File('a', 13), File('b', 2), File('c', 3), File('d', 36)], 2, ['a', 'b', 'c']), (6, [File('a', 13), File('b', 2), File('c', 3), File('d', 36)], 3, ['d']), (6, [File('a', 13), File('b', 2), File('c', 3), File('d', 36)], 8, ['d']), (6, [File('a', 13), File('b', 2), File('c', 3), File('d', 36)], 9, ValueError('piece_index is out of bounds (0 - 8): 9')), # 0 1 2 3 4 5 6 7 8 9 0 # aaaaaaaaaaaaabbccccdddddddddddddddddddddddddddddddddddd (6, [File('a', 13), File('b', 2), File('c', 4), File('d', 36)], -1, ValueError('piece_index is out of bounds (0 - 9): -1')), (6, [File('a', 13), File('b', 2), File('c', 4), File('d', 36)], 0, ['a']), (6, [File('a', 13), File('b', 2), File('c', 4), File('d', 36)], 1, ['a']), (6, [File('a', 13), File('b', 2), File('c', 4), File('d', 36)], 2, ['a', 'b', 'c']), (6, [File('a', 13), File('b', 2), File('c', 4), File('d', 36)], 3, ['c', 'd']), (6, [File('a', 13), File('b', 2), File('c', 4), File('d', 36)], 8, ['d']), (6, [File('a', 13), File('b', 2), File('c', 4), File('d', 36)], 9, ['d']), (6, [File('a', 13), File('b', 2), File('c', 4), File('d', 36)], 10, ValueError('piece_index is out of bounds (0 - 9): 10')), # Last piece contains multiple files # 0 1 2 3 4 5 6 7 8 9 0 # aaaaaaaaaaaaaaaaaaaaaaaaaaaaabccddd (6, [File('a', 29), File('b', 1), File('c', 2), File('d', 3)], 3, ['a']), (6, [File('a', 29), File('b', 1), File('c', 2), File('d', 3)], 4, ['a', 'b']), (6, [File('a', 29), File('b', 1), File('c', 2), File('d', 3)], 5, ['c', 'd']), (6, [File('a', 29), File('b', 1), File('c', 2), File('d', 3)], 6, ValueError('piece_index is out of bounds (0 - 5): 6')), # 0 1 2 3 4 5 6 7 8 9 0 # aaaaaaaaaaaaaaaaaaaaaaaaaaaaabbccddd (6, [File('a', 29), File('b', 2), File('c', 2), File('d', 3)], 3, ['a']), (6, [File('a', 29), File('b', 2), File('c', 2), File('d', 3)], 4, ['a', 'b']), (6, [File('a', 29), File('b', 2), File('c', 2), File('d', 3)], 5, ['b', 'c', 'd']), (6, [File('a', 29), File('b', 2), File('c', 2), File('d', 3)], 6, ValueError('piece_index is out of bounds (0 - 5): 6')), # 0 1 2 3 4 5 6 7 8 9 0 # aaaaaaaaaaaaaaaaaaaaaaaaaaaaabbbccddd (6, [File('a', 29), File('b', 3), File('c', 2), File('d', 3)], 3, ['a']), (6, [File('a', 29), File('b', 3), File('c', 2), File('d', 3)], 4, ['a', 'b']), (6, [File('a', 29), File('b', 3), File('c', 2), File('d', 3)], 5, ['b', 'c', 'd']), (6, [File('a', 29), File('b', 3), File('c', 2), File('d', 3)], 6, ['d']), (6, [File('a', 29), File('b', 3), File('c', 2), File('d', 3)], 7, ValueError('piece_index is out of bounds (0 - 6): 7')), ), ids=lambda v: str(v), ) def test_get_files_at_piece_index(chunk_size, files, piece_index, exp_return_value, mocker): torrent = Torrent(piece_size=chunk_size, files=files) tfs = TorrentFileStream(torrent) def mock_content_path(content_path, none_ok, file): return f'{content_path} / {none_ok} / {file}' mocker.patch.object(tfs, '_get_content_path', side_effect=mock_content_path) if isinstance(exp_return_value, BaseException): with pytest.raises(type(exp_return_value), match=rf'^{re.escape(str(exp_return_value))}$'): tfs.get_files_at_piece_index(piece_index, content_path='my/custom/path') else: files = tfs.get_files_at_piece_index(piece_index, content_path='my/custom/path') assert files == [f'my/custom/path / True / {file}' for file in exp_return_value] @pytest.mark.parametrize( argnames='chunk_size, files, file, relative_piece_indexes, exp_absolute_indexes', argvalues=( # Multiple files in one piece # 0 1 2 3 4 5 6 7 8 9 0 # abc (6, [File('a', 1), File('b', 1), File('c', 1)], 'a', (0, 1, 1000, -1, -2, -1000), [0]), (6, [File('a', 1), File('b', 1), File('c', 1)], 'b', (0, 1, 1000, -1, -2, -1000), [0]), (6, [File('a', 1), File('b', 1), File('c', 1)], 'c', (0, 1, 1000, -1, -2, -1000), [0]), # First piece contains multiple files # 0 1 2 3 4 5 6 7 8 9 0 # aabbbcccccccccccccccccccccccccccccccccccccccccccccccccc (6, [File('a', 2), File('b', 3), File('c', 50)], 'a', (0, 1, 1000, -1, -2, -1000), [0]), (6, [File('a', 2), File('b', 3), File('c', 50)], 'b', (0, 1, 1000, -1, -2, -1000), [0]), (6, [File('a', 2), File('b', 3), File('c', 50)], 'c', (0, 1, 1000, -1, -2, -1000), [0, 1, 8, 9]), # Middle piece contains multiple files # 0 1 2 3 4 5 6 7 8 9 0 # aaaaaaaaaaaaabbcccdddddddddddddddddddddddddddddddddddd (6, [File('a', 13), File('b', 2), File('c', 3), File('d', 36)], 'a', (0, 1, 1000, -1, -2, -1000), [0, 1, 2]), (6, [File('a', 13), File('b', 2), File('c', 3), File('d', 36)], 'b', (0, 1, 1000, -1, -2, -1000), [2]), (6, [File('a', 13), File('b', 2), File('c', 3), File('d', 36)], 'c', (0, 1, 1000, -1, -2, -1000), [2]), (6, [File('a', 13), File('b', 2), File('c', 3), File('d', 36)], 'd', (0, 1, 1000, -1, -2, -1000), [3, 4, 7, 8]), # Last piece contains multiple files # 0 1 2 3 4 5 6 7 8 9 0 # aaaaaaaaaaaaaaaaaaaaaaaaaaaaaabbcddd (6, [File('a', 30), File('b', 2), File('c', 1), File('d', 3)], 'a', (0, 1, 1000, -1, -2, -1000), [0, 1, 3, 4]), (6, [File('a', 30), File('b', 2), File('c', 1), File('d', 3)], 'b', (0, 1, 1000, -1, -2, -1000), [5]), (6, [File('a', 30), File('b', 2), File('c', 1), File('d', 3)], 'c', (0, 1, 1000, -1, -2, -1000), [5]), (6, [File('a', 30), File('b', 2), File('c', 1), File('d', 3)], 'd', (0, 1, 1000, -1, -2, -1000), [5]), ), ids=lambda v: str(v), ) def test_get_absolute_piece_indexes(chunk_size, files, file, relative_piece_indexes, exp_absolute_indexes): torrent = Torrent(piece_size=chunk_size, files=files) tfs = TorrentFileStream(torrent) file = [f for f in files if f == file][0] assert tfs.get_absolute_piece_indexes(file, relative_piece_indexes) == exp_absolute_indexes @pytest.mark.parametrize('ignore_empty_files', (True, False), ids=('ignore_empty_files', 'include_empty_files')) @pytest.mark.parametrize( argnames='prefile_size, postfile_size', argvalues=( (0, 0), (11, 0), (12, 0), (13, 0), (0, 11), (0, 12), (0, 13), (11, 11), (12, 12), (13, 13), (11, 12), (11, 12), (11, 13), (13, 11), (12, 13), (13, 12), ), ) @pytest.mark.parametrize( argnames='file, relative_piece_indexes, exp_indexes', argvalues=( (File('foo', 11), (0, 1, -1, -2), [0]), (File('foo', 12), (0, 1, -1, -2), [0]), (File('foo', 13), (0, 1, -1, -2), [0, 1]), (File('foo', 239), (0, 1, -1, -2), [0, 1, 18, 19]), (File('foo', 240), (0, 1, -1, -2), [0, 1, 18, 19]), (File('foo', 241), (0, 1, -1, -2), [0, 1, 19, 20]), ), ids=lambda v: str(v), ) def test_get_relative_piece_indexes(file, relative_piece_indexes, exp_indexes, prefile_size, postfile_size, ignore_empty_files): files = [] if prefile_size or not ignore_empty_files: files.append(File('before', prefile_size)) files.append(file) if postfile_size or not ignore_empty_files: files.append(File('after', postfile_size)) torrent = Torrent(piece_size=12, files=files) tfs = TorrentFileStream(torrent) assert tfs.get_relative_piece_indexes(file, relative_piece_indexes) == exp_indexes @pytest.mark.parametrize( argnames='chunk_size, files, piece_index', argvalues=( # 0 1 2 3 4 5 6 7 # abcd (6, [File('t/a', 1), File('t/b', 1), File('t/c', 1), File('t/d', 1)], 0), # 0 1 2 3 4 5 6 7 # aaaaaaaaaaabbbbbbbbbbbbbcccccccddddddddddd (6, [File('t/a', 11), File('t/b', 13), File('t/c', 7), File('t/d', 11)], 0), (6, [File('t/a', 11), File('t/b', 13), File('t/c', 7), File('t/d', 11)], 1), (6, [File('t/a', 11), File('t/b', 13), File('t/c', 7), File('t/d', 11)], 2), (6, [File('t/a', 11), File('t/b', 13), File('t/c', 7), File('t/d', 11)], 3), (6, [File('t/a', 11), File('t/b', 13), File('t/c', 7), File('t/d', 11)], 4), (6, [File('t/a', 11), File('t/b', 13), File('t/c', 7), File('t/d', 11)], 5), (6, [File('t/a', 11), File('t/b', 13), File('t/c', 7), File('t/d', 11)], 6), # First piece contains multiple complete files # 0 1 2 3 4 5 # aaaabbbbbccccccccccccccccccccccccccccccccccccccccccccccccccccc (12, [File('t/a', 4), File('t/b', 5), File('t/c', 53)], 0), (12, [File('t/a', 4), File('t/b', 5), File('t/c', 53)], 1), (12, [File('t/a', 4), File('t/b', 5), File('t/c', 53)], 2), (12, [File('t/a', 4), File('t/b', 5), File('t/c', 53)], 3), (12, [File('t/a', 4), File('t/b', 5), File('t/c', 53)], 4), (12, [File('t/a', 4), File('t/b', 5), File('t/c', 53)], 5), # Middle piece contains multiple complete files # 0 1 2 3 # aaaaaaaaaaaaaaaaaaaaabbbbbcccdddddddddddddddddddd (15, [File('t/a', 21), File('t/b', 5), File('t/c', 3), File('t/d', 20)], 0), (15, [File('t/a', 21), File('t/b', 5), File('t/c', 3), File('t/d', 20)], 1), (15, [File('t/a', 21), File('t/b', 5), File('t/c', 3), File('t/d', 20)], 2), (15, [File('t/a', 21), File('t/b', 5), File('t/c', 3), File('t/d', 20)], 3), # Last piece contains multiple complete files # 0 1 2 3 # aaaaaaaaaaaaaaaaaaaaaaaaaabbbbccccc (12, [File('t/a', 26), File('t/b', 4), File('t/c', 5)], 0), (12, [File('t/a', 26), File('t/b', 4), File('t/c', 5)], 1), (12, [File('t/a', 26), File('t/b', 4), File('t/c', 5)], 2), ), ids=lambda v: str(v), ) @pytest.mark.parametrize( argnames='torrent_content_path, stream_content_path, custom_content_path, exp_content_path', argvalues=( ('torrent/path', 'stream/path', 'custom/path', 'custom/path'), ('torrent/path', 'stream/path', None, 'stream/path'), ('torrent/path', None, None, 'torrent/path'), (None, None, None, None), ), ) def test_get_piece_returns_piece_from_files( torrent_content_path, stream_content_path, custom_content_path, exp_content_path, chunk_size, files, piece_index, tmp_path, mocker, ): torrent_name = 'my torrent' if torrent_content_path: torrent_content_path = tmp_path / torrent_content_path / torrent_name if stream_content_path: stream_content_path = tmp_path / stream_content_path / torrent_name if custom_content_path: custom_content_path = tmp_path / custom_content_path / torrent_name print('torrent_content_path:', torrent_content_path) print('stream_content_path:', stream_content_path) print('custom_content_path:', custom_content_path) if exp_content_path: exp_content_path = tmp_path / exp_content_path / torrent_name exp_content_path.mkdir(parents=True, exist_ok=True) for file in files: filepath = exp_content_path.joinpath(*file.parts[1:]) print(f'{filepath}: {file.size} bytes: {file.content}') filepath.write_bytes(file.content) stream = b''.join(f.content for f in files) print('concatenated stream:', stream) start = piece_index * chunk_size stop = min(start + chunk_size, len(stream)) exp_piece = stream[start:stop] print('exp_piece:', f'[{start}:{stop}]:', exp_piece) exp_piece_length = stop - start assert len(exp_piece) == exp_piece_length torrent = Torrent(piece_size=chunk_size, files=files, path=torrent_content_path) with TorrentFileStream(torrent, content_path=stream_content_path) as tfs: if exp_content_path is None: with pytest.raises(ValueError, match=r'^Missing content_path argument and torrent has no path specified$'): tfs.get_piece(piece_index, content_path=custom_content_path) else: piece = tfs.get_piece(piece_index, content_path=custom_content_path) assert piece == exp_piece @pytest.mark.parametrize('chunk_size', range(1, 40)) def test_get_piece_resets_seek_position_when_reusing_file_handle(chunk_size, tmp_path): files = ( File('MyTorrent/a', 12), File('MyTorrent/b', 13), File('MyTorrent/c', 7), File('MyTorrent/d', 16), ) for f in files: print(f'{f}: {f.size} bytes: {f.content}') (tmp_path / 'MyTorrent').mkdir(parents=True, exist_ok=True) f.write_at(tmp_path) stream = b''.join(f.content for f in files) print('concatenated stream:', stream) total_size = sum(f.size for f in files) max_piece_index = math.floor((total_size - 1) // chunk_size) for piece_index in range(max_piece_index + 1): print('testing piece:', piece_index) start = piece_index * chunk_size stop = min(start + chunk_size, len(stream)) exp_piece = stream[start:stop] print('exp_piece:', f'[{start}:{stop}]:', exp_piece) exp_piece_length = stop - start assert len(exp_piece) == exp_piece_length torrent = Torrent(piece_size=chunk_size, files=files) with TorrentFileStream(torrent) as tfs: for i in range(3): piece = tfs.get_piece(piece_index, content_path=tmp_path / 'MyTorrent') assert piece == exp_piece @pytest.mark.parametrize( argnames='chunk_size, files, piece_index, exp_max_piece_index', argvalues=( # First file is smaller than one piece # 0 1 2 3 4 5 6 7 # aaaaaaaaaaabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbcccccccccccccccccccccccccccccc (12, [File('t/a', 11), File('t/b', 49), File('t/c', 30)], -1, 7), (12, [File('t/a', 11), File('t/b', 49), File('t/c', 30)], 8, 7), # Last file is smaller than one piece # 0 1 2 3 4 5 6 7 # aaaaaaaabbbbbbbbbbbbbbbbbccc (4, [File('t/a', 8), File('t/b', 17), File('t/c', 3)], -1, 6), (4, [File('t/a', 8), File('t/b', 17), File('t/c', 3)], 7, 6), # First piece contains multiple complete files # 0 1 2 3 4 5 # aaaabbbbbccccccccccccccccccccccccccccccccccccccccccccccccccccc (12, [File('t/a', 4), File('t/b', 5), File('t/c', 53)], -1, 5), (12, [File('t/a', 4), File('t/b', 5), File('t/c', 53)], 6, 5), # Middle piece contains multiple complete files # 0 1 2 3 # aaaaaaaaaaaaaaaaaaaaabbbbbcccdddddddddddddddddddd (15, [File('t/a', 21), File('t/b', 5), File('t/c', 3), File('t/d', 20)], -1, 3), (15, [File('t/a', 21), File('t/b', 5), File('t/c', 3), File('t/d', 20)], 4, 3), # Last piece contains multiple complete files # 0 1 2 3 # aaaaaaaaaaaaaaaaaaaaaaaaaabbbbccccc (12, [File('t/a', 26), File('t/b', 4), File('t/c', 5)], -1, 2), (12, [File('t/a', 26), File('t/b', 4), File('t/c', 5)], 3, 2), ), ids=lambda v: str(v), ) def test_get_piece_with_piece_index_out_of_bounds(chunk_size, files, piece_index, exp_max_piece_index, tmp_path): torrent = Torrent(piece_size=chunk_size, files=files) tfs = TorrentFileStream(torrent) with pytest.raises(ValueError, match=rf'^piece_index must be in range 0 - {exp_max_piece_index}: {piece_index}$'): tfs.get_piece(piece_index) @pytest.mark.parametrize( argnames='chunk_size, files, missing_files, piece_index, exp_missing_file', argvalues=( # 0 1 2 3 4 5 6 7 # abcd (6, [File('t/a', 1), File('t/b', 1), File('t/c', 1), File('t/d', 1)], ['t/a'], 0, 't/a'), (6, [File('t/a', 1), File('t/b', 1), File('t/c', 1), File('t/d', 1)], ['t/b'], 0, 't/b'), (6, [File('t/a', 1), File('t/b', 1), File('t/c', 1), File('t/d', 1)], ['t/c'], 0, 't/c'), (6, [File('t/a', 1), File('t/b', 1), File('t/c', 1), File('t/d', 1)], ['t/d'], 0, 't/d'), (6, [File('t/a', 1), File('t/b', 1), File('t/c', 1), File('t/d', 1)], ['t/b', 't/c'], 0, 't/b'), # 0 1 2 3 4 5 6 7 # aaaaaaaaaaabbbcccccccccccccccccdddddd (6, [File('t/a', 11), File('t/b', 3), File('t/c', 17), File('t/d', 6)], ['t/a'], 0, 't/a'), (6, [File('t/a', 11), File('t/b', 3), File('t/c', 17), File('t/d', 6)], ['t/a'], 1, 't/a'), (6, [File('t/a', 11), File('t/b', 3), File('t/c', 17), File('t/d', 6)], ['t/a'], 2, None), (6, [File('t/a', 11), File('t/b', 3), File('t/c', 17), File('t/d', 6)], ['t/a'], 3, None), (6, [File('t/a', 11), File('t/b', 3), File('t/c', 17), File('t/d', 6)], ['t/a'], 4, None), (6, [File('t/a', 11), File('t/b', 3), File('t/c', 17), File('t/d', 6)], ['t/a'], 5, None), (6, [File('t/a', 11), File('t/b', 3), File('t/c', 17), File('t/d', 6)], ['t/a'], 6, None), (6, [File('t/a', 11), File('t/b', 3), File('t/c', 17), File('t/d', 6)], ['t/b'], 0, None), (6, [File('t/a', 11), File('t/b', 3), File('t/c', 17), File('t/d', 6)], ['t/b'], 1, 't/b'), (6, [File('t/a', 11), File('t/b', 3), File('t/c', 17), File('t/d', 6)], ['t/b'], 2, 't/b'), (6, [File('t/a', 11), File('t/b', 3), File('t/c', 17), File('t/d', 6)], ['t/b'], 3, None), (6, [File('t/a', 11), File('t/b', 3), File('t/c', 17), File('t/d', 6)], ['t/b'], 4, None), (6, [File('t/a', 11), File('t/b', 3), File('t/c', 17), File('t/d', 6)], ['t/b'], 5, None), (6, [File('t/a', 11), File('t/b', 3), File('t/c', 17), File('t/d', 6)], ['t/b'], 6, None), (6, [File('t/a', 11), File('t/b', 3), File('t/c', 17), File('t/d', 6)], ['t/c'], 0, None), (6, [File('t/a', 11), File('t/b', 3), File('t/c', 17), File('t/d', 6)], ['t/c'], 1, None), (6, [File('t/a', 11), File('t/b', 3), File('t/c', 17), File('t/d', 6)], ['t/c'], 2, 't/c'), (6, [File('t/a', 11), File('t/b', 3), File('t/c', 17), File('t/d', 6)], ['t/c'], 3, 't/c'), (6, [File('t/a', 11), File('t/b', 3), File('t/c', 17), File('t/d', 6)], ['t/c'], 4, 't/c'), (6, [File('t/a', 11), File('t/b', 3), File('t/c', 17), File('t/d', 6)], ['t/c'], 5, 't/c'), (6, [File('t/a', 11), File('t/b', 3), File('t/c', 17), File('t/d', 6)], ['t/c'], 6, None), (6, [File('t/a', 11), File('t/b', 3), File('t/c', 17), File('t/d', 6)], ['t/d'], 0, None), (6, [File('t/a', 11), File('t/b', 3), File('t/c', 17), File('t/d', 6)], ['t/d'], 1, None), (6, [File('t/a', 11), File('t/b', 3), File('t/c', 17), File('t/d', 6)], ['t/d'], 2, None), (6, [File('t/a', 11), File('t/b', 3), File('t/c', 17), File('t/d', 6)], ['t/d'], 3, None), (6, [File('t/a', 11), File('t/b', 3), File('t/c', 17), File('t/d', 6)], ['t/d'], 4, None), (6, [File('t/a', 11), File('t/b', 3), File('t/c', 17), File('t/d', 6)], ['t/d'], 5, 't/d'), (6, [File('t/a', 11), File('t/b', 3), File('t/c', 17), File('t/d', 6)], ['t/d'], 6, 't/d'), (6, [File('t/a', 11), File('t/b', 3), File('t/c', 17), File('t/d', 6)], ['t/a', 't/b'], 0, 't/a'), (6, [File('t/a', 11), File('t/b', 3), File('t/c', 17), File('t/d', 6)], ['t/a', 't/b'], 1, 't/a'), (6, [File('t/a', 11), File('t/b', 3), File('t/c', 17), File('t/d', 6)], ['t/a', 't/b'], 2, 't/b'), (6, [File('t/a', 11), File('t/b', 3), File('t/c', 17), File('t/d', 6)], ['t/a', 't/b'], 3, None), (6, [File('t/a', 11), File('t/b', 3), File('t/c', 17), File('t/d', 6)], ['t/a', 't/b'], 4, None), (6, [File('t/a', 11), File('t/b', 3), File('t/c', 17), File('t/d', 6)], ['t/a', 't/b'], 5, None), (6, [File('t/a', 11), File('t/b', 3), File('t/c', 17), File('t/d', 6)], ['t/a', 't/b'], 6, None), (6, [File('t/a', 11), File('t/b', 3), File('t/c', 17), File('t/d', 6)], ['t/a', 't/c'], 0, 't/a'), (6, [File('t/a', 11), File('t/b', 3), File('t/c', 17), File('t/d', 6)], ['t/a', 't/c'], 1, 't/a'), (6, [File('t/a', 11), File('t/b', 3), File('t/c', 17), File('t/d', 6)], ['t/a', 't/c'], 2, 't/c'), (6, [File('t/a', 11), File('t/b', 3), File('t/c', 17), File('t/d', 6)], ['t/a', 't/c'], 3, 't/c'), (6, [File('t/a', 11), File('t/b', 3), File('t/c', 17), File('t/d', 6)], ['t/a', 't/c'], 4, 't/c'), (6, [File('t/a', 11), File('t/b', 3), File('t/c', 17), File('t/d', 6)], ['t/a', 't/c'], 5, 't/c'), (6, [File('t/a', 11), File('t/b', 3), File('t/c', 17), File('t/d', 6)], ['t/a', 't/c'], 6, None), (6, [File('t/a', 11), File('t/b', 3), File('t/c', 17), File('t/d', 6)], ['t/a', 't/d'], 0, 't/a'), (6, [File('t/a', 11), File('t/b', 3), File('t/c', 17), File('t/d', 6)], ['t/a', 't/d'], 1, 't/a'), (6, [File('t/a', 11), File('t/b', 3), File('t/c', 17), File('t/d', 6)], ['t/a', 't/d'], 2, None), (6, [File('t/a', 11), File('t/b', 3), File('t/c', 17), File('t/d', 6)], ['t/a', 't/d'], 3, None), (6, [File('t/a', 11), File('t/b', 3), File('t/c', 17), File('t/d', 6)], ['t/a', 't/d'], 4, None), (6, [File('t/a', 11), File('t/b', 3), File('t/c', 17), File('t/d', 6)], ['t/a', 't/d'], 5, 't/d'), (6, [File('t/a', 11), File('t/b', 3), File('t/c', 17), File('t/d', 6)], ['t/a', 't/d'], 6, 't/d'), (6, [File('t/a', 11), File('t/b', 3), File('t/c', 17), File('t/d', 6)], ['t/c', 't/d'], 0, None), (6, [File('t/a', 11), File('t/b', 3), File('t/c', 17), File('t/d', 6)], ['t/c', 't/d'], 1, None), (6, [File('t/a', 11), File('t/b', 3), File('t/c', 17), File('t/d', 6)], ['t/c', 't/d'], 2, 't/c'), (6, [File('t/a', 11), File('t/b', 3), File('t/c', 17), File('t/d', 6)], ['t/c', 't/d'], 3, 't/c'), (6, [File('t/a', 11), File('t/b', 3), File('t/c', 17), File('t/d', 6)], ['t/c', 't/d'], 4, 't/c'), (6, [File('t/a', 11), File('t/b', 3), File('t/c', 17), File('t/d', 6)], ['t/c', 't/d'], 5, 't/c'), (6, [File('t/a', 11), File('t/b', 3), File('t/c', 17), File('t/d', 6)], ['t/c', 't/d'], 6, 't/d'), (6, [File('t/a', 11), File('t/b', 3), File('t/c', 17), File('t/d', 6)], ['t/b', 't/d'], 0, None), (6, [File('t/a', 11), File('t/b', 3), File('t/c', 17), File('t/d', 6)], ['t/b', 't/d'], 1, 't/b'), (6, [File('t/a', 11), File('t/b', 3), File('t/c', 17), File('t/d', 6)], ['t/b', 't/d'], 2, 't/b'), (6, [File('t/a', 11), File('t/b', 3), File('t/c', 17), File('t/d', 6)], ['t/b', 't/d'], 3, None), (6, [File('t/a', 11), File('t/b', 3), File('t/c', 17), File('t/d', 6)], ['t/b', 't/d'], 4, None), (6, [File('t/a', 11), File('t/b', 3), File('t/c', 17), File('t/d', 6)], ['t/b', 't/d'], 5, 't/d'), (6, [File('t/a', 11), File('t/b', 3), File('t/c', 17), File('t/d', 6)], ['t/b', 't/d'], 6, 't/d'), ), ids=lambda v: str(v), ) def test_get_piece_with_missing_file(chunk_size, files, missing_files, piece_index, exp_missing_file, tmp_path): torrent_name = files[0].parts[0] for file in files: if file not in missing_files: filepath = tmp_path / file print(f'writing {filepath}: {file.size} bytes: {file.content}') filepath.parent.mkdir(parents=True, exist_ok=True) filepath.write_bytes(file.content) else: print(f'not writing {file}: {file.size} bytes: {file.content}') torrent = Torrent(piece_size=chunk_size, files=files) tfs = TorrentFileStream(torrent) if exp_missing_file: exp_exception = ReadError(errno.ENOENT, tmp_path / exp_missing_file) with pytest.raises(type(exp_exception), match=rf'^{re.escape(str(exp_exception))}$'): tfs.get_piece(piece_index, content_path=tmp_path / torrent_name) else: piece = tfs.get_piece(piece_index, content_path=tmp_path / torrent_name) assert isinstance(piece, bytes) @pytest.mark.parametrize( argnames='chunk_size, files, contents, piece_index, exp_result', argvalues=( # 0 1 2 3 4 5 # aaaaaaaaaaabbbbbbbbbbbbcccccc (6, [File('t/a', 11), File('t/b', 12), File('t/c', 6)], {'t/a': b'x' * 1}, 0, Exception('t/a')), (6, [File('t/a', 11), File('t/b', 12), File('t/c', 6)], {'t/a': b'x' * 2}, 1, Exception('t/a')), (6, [File('t/a', 11), File('t/b', 12), File('t/c', 6)], {'t/a': b'x' * 3}, 2, bytes), (6, [File('t/a', 11), File('t/b', 12), File('t/c', 6)], {'t/a': b'x' * 4}, 3, bytes), (6, [File('t/a', 11), File('t/b', 12), File('t/c', 6)], {'t/a': b'x' * 5}, 4, bytes), (6, [File('t/a', 11), File('t/b', 12), File('t/c', 6)], {'t/b': b'x' * 6}, 0, bytes), (6, [File('t/a', 11), File('t/b', 12), File('t/c', 6)], {'t/b': b'x' * 7}, 1, Exception('t/b')), (6, [File('t/a', 11), File('t/b', 12), File('t/c', 6)], {'t/b': b'x' * 8}, 2, Exception('t/b')), (6, [File('t/a', 11), File('t/b', 12), File('t/c', 6)], {'t/b': b'x' * 9}, 3, Exception('t/b')), (6, [File('t/a', 11), File('t/b', 12), File('t/c', 6)], {'t/b': b'x' * 10}, 4, bytes), (6, [File('t/a', 11), File('t/b', 12), File('t/c', 6)], {'t/c': b'x' * 11}, 0, bytes), (6, [File('t/a', 11), File('t/b', 12), File('t/c', 6)], {'t/c': b'x' * 12}, 1, bytes), (6, [File('t/a', 11), File('t/b', 12), File('t/c', 6)], {'t/c': b'x' * 13}, 2, bytes), (6, [File('t/a', 11), File('t/b', 12), File('t/c', 6)], {'t/c': b'x' * 14}, 3, Exception('t/c')), (6, [File('t/a', 11), File('t/b', 12), File('t/c', 6)], {'t/c': b'x' * 15}, 4, Exception('t/c')), ), ids=lambda v: str(v), ) def test_get_piece_with_wrong_file_size(chunk_size, files, contents, piece_index, exp_result, tmp_path): for file in files: filepath = tmp_path / file filepath.parent.mkdir(parents=True, exist_ok=True) content = contents.get(str(file), file.content) print(f'{filepath}: {bytes(file.content)}, {len(file.content)} bytes') if content != file.content: print(f' wrong file size: {bytes(content)}, {len(content)} bytes') filepath.write_bytes(content) torrent = Torrent(piece_size=chunk_size, files=files) if isinstance(exp_result, BaseException): exp_filepath_rel = str(exp_result) exp_filepath = str(tmp_path / exp_filepath_rel) exp_filesize = {str(f): f.size for f in files}[exp_filepath_rel] actual_file_size = os.path.getsize(tmp_path / exp_filepath_rel) exp_exception = VerifyFileSizeError(exp_filepath, actual_file_size, exp_filesize) with TorrentFileStream(torrent) as tfs: with pytest.raises(type(exp_exception), match=rf'^{re.escape(str(exp_exception))}$'): tfs.get_piece(piece_index, content_path=tmp_path / 't') else: stream = b''.join(f.content for f in files) print('concatenated stream:', stream) start = piece_index * chunk_size stop = min(start + chunk_size, len(stream)) exp_piece = stream[start:stop] print('exp_piece:', f'[{start}:{stop}]:', exp_piece) exp_piece_length = stop - start assert len(exp_piece) == exp_piece_length with TorrentFileStream(torrent) as tfs: assert tfs.get_piece(piece_index, content_path=tmp_path / 't') == exp_piece def test_get_file_size_from_fs_returns_file_size(mocker): torrent = Torrent(piece_size=123, files=(File('a', 1), File('b', 2), File('c', 3))) tfs = TorrentFileStream(torrent) exists_mock = mocker.patch('os.path.exists', return_value=True) getsize_mock = mocker.patch('os.path.getsize', return_value=123456) assert tfs._get_file_size_from_fs('path/to/b') == 123456 assert exists_mock.call_args_list == [call('path/to/b')] assert getsize_mock.call_args_list == [call('path/to/b')] def test_get_file_size_from_fs_gets_nonexisting_file(mocker): torrent = Torrent(piece_size=123, files=(File('a', 1), File('b', 2), File('c', 3))) tfs = TorrentFileStream(torrent) exists_mock = mocker.patch('os.path.exists', return_value=False) getsize_mock = mocker.patch('os.path.getsize', return_value=123456) assert tfs._get_file_size_from_fs('path/to/b') is None assert exists_mock.call_args_list == [call('path/to/b')] assert getsize_mock.call_args_list == [] def test_get_file_size_from_fs_gets_private_file(mocker): torrent = Torrent(piece_size=123, files=(File('a', 1), File('b', 2), File('c', 3))) tfs = TorrentFileStream(torrent) exists_mock = mocker.patch('os.path.exists', return_value=True) getsize_mock = mocker.patch('os.path.getsize', side_effect=PermissionError('Size is secret')) assert tfs._get_file_size_from_fs('path/to/b') is None assert exists_mock.call_args_list == [call('path/to/b')] assert getsize_mock.call_args_list == [call('path/to/b')] def test_get_open_file_gets_nonexisting_file(mocker): open_mock = mocker.patch('__main__.open') torrent = Torrent(piece_size=123, files=(File('a', 1), File('b', 2), File('c', 3))) tfs = TorrentFileStream(torrent) path = 'foo/path' exp_exception = ReadError(errno.ENOENT, path) with pytest.raises(type(exp_exception), match=rf'^{re.escape(str(exp_exception))}$'): tfs._get_open_file(path) assert open_mock.call_args_list == [] def test_get_open_file_fails_to_open_file(mocker): open_mock = mocker.patch('builtins.open', side_effect=OSError(2, 'nope')) torrent = Torrent(piece_size=123, files=(File('a', 1), File('b', 2), File('c', 3))) tfs = TorrentFileStream(torrent) with pytest.raises(ReadError, match=r'^foo/path/b: No such file or directory$'): tfs._get_open_file('foo/path/b') assert open_mock.call_args_list == [call('foo/path/b', 'rb')] def test_get_open_file_opens_file_only_once(mocker): fh1, fh2 = (Mock(), Mock()) open_mock = mocker.patch('builtins.open', side_effect=(fh1, fh2)) torrent = Torrent(piece_size=123, files=(File('a', 1), File('b', 2), File('c', 3))) tfs = TorrentFileStream(torrent) for _ in range(5): assert tfs._get_open_file('foo/path/b') == fh1 assert open_mock.call_args_list == [call('foo/path/b', 'rb')] def test_get_open_file_respects_max_open_files(mocker): max_open_files = 3 open_files = { f'path/to/file{i}': Mock(name=f'mock file object {i}') for i in range(max_open_files + 1) } torrent = Torrent(piece_size=123, files=(File('a', 1), File('b', 2), File('c', 3))) tfs = TorrentFileStream(torrent) mocker.patch.object(tfs, 'max_open_files', max_open_files) tfs._open_files = open_files.copy() open_mock = mocker.patch('builtins.open', return_value=Mock(name='freshly opened file')) fh = tfs._get_open_file('another/path') assert fh is open_mock.return_value assert open_mock.call_args_list == [call('another/path', 'rb')] print(open_files) print(tfs._open_files) assert open_files['path/to/file0'].close.call_args_list == [call()] for path, fh in tuple(open_files.items())[1:]: assert fh.close.call_args_list == [] exp_open_files = { 'path/to/file1': open_files['path/to/file1'], 'path/to/file2': open_files['path/to/file2'], 'path/to/file3': open_files['path/to/file3'], 'another/path': open_mock.return_value, } assert tfs._open_files == exp_open_files @pytest.mark.parametrize( argnames='chunk_size, files, exp_chunks', argvalues=( # 0 1 2 3 4 5 6 7 8 8 9 # ABC (8, [File('t/A', b'a'), File('t/B', b'b'), File('t/C', b'c')], [ (b'abc', ('C', 1), ()), ]), # 0 1 2 3 4 5 6 7 8 8 9 # AAAAAABC (8, [File('t/A', b'abcdef'), File('t/B', b'g'), File('t/C', b'h')], [ (b'abcdefgh', ('C', 1), ()), ]), # 0 1 2 3 4 5 6 7 8 8 9 # AAAAAABCC (8, [File('t/A', b'abcdef'), File('t/B', b'g'), File('t/C', b'hi')], [ (b'abcdefgh', ('C', 2), ()), (b'i', ('C', 2), ()), ]), # 0 1 2 3 4 5 6 7 8 8 9 # AAAAAABBC (8, [File('t/A', b'abcdef'), File('t/B', b'gh'), File('t/C', b'i')], [ (b'abcdefgh', ('B', 2), ()), (b'i', ('C', 1), ()), ]), # 0 1 2 3 4 5 6 7 8 8 9 # AAAAAABBCC (8, [File('t/A', b'abcdef'), File('t/B', b'gh'), File('t/C', b'ij')], [ (b'abcdefgh', ('B', 2), ()), (b'ij', ('C', 2), ()), ]), # 0 1 2 3 4 5 6 7 8 8 9 # AAAAAAAABBCC (8, [File('t/A', b'abcdefgh'), File('t/B', b'ij'), File('t/C', b'kl')], [ (b'abcdefgh', ('A', 8), ()), (b'ijkl', ('C', 2), ()), ]), # 0 1 2 3 4 5 6 7 8 8 9 # AAAAAAAAABBCC (8, [File('t/A', b'abcdefghi'), File('t/B', b'jk'), File('t/C', b'lm')], [ (b'abcdefgh', ('A', 9), ()), (b'ijklm', ('C', 2), ()), ]), # 0 1 2 3 4 5 6 7 8 8 9 # AAAAAAAAABBCCCCC (8, [File('t/A', b'abcdefghi'), File('t/B', b'jk'), File('t/C', b'lmnop')], [ (b'abcdefgh', ('A', 9), ()), (b'ijklmnop', ('C', 5), ()), ]), # 0 1 2 3 4 5 6 7 8 8 9 # AAAAAAAAABBBCCCCC (8, [File('t/A', b'abcdefghi'), File('t/B', b'jkl'), File('t/C', b'mnopq')], [ (b'abcdefgh', ('A', 9), ()), (b'ijklmnop', ('C', 5), ()), (b'q', ('C', 5), ()), ]), # 0 1 2 3 4 5 6 7 8 8 9 # AAAAAAAABBBBCCCCC (8, [File('t/A', b'abcdefgh'), File('t/B', b'ijkl'), File('t/C', b'mnopq')], [ (b'abcdefgh', ('A', 8), ()), (b'ijklmnop', ('C', 5), ()), (b'q', ('C', 5), ()), ]), # 0 1 2 3 4 5 6 7 8 8 9 # AAAAAAABBBBCCCCCC (8, [File('t/A', b'abcdefg'), File('t/B', b'hijk'), File('t/C', b'lmnopq')], [ (b'abcdefgh', ('B', 4), ()), (b'ijklmnop', ('C', 6), ()), (b'q', ('C', 6), ()), ]), # 0 1 2 3 4 5 6 7 8 8 9 # ABBBBCCCCC (8, [File('t/A', b'a'), File('t/B', b'bcde'), File('t/C', b'fghij')], [ (b'abcdefgh', ('C', 5), ()), (b'ij', ('C', 5), ()), ]), # 0 1 2 3 4 5 6 7 8 8 9 # ABBBBCCCC (8, [File('t/A', b'a'), File('t/B', b'bcde'), File('t/C', b'fghi')], [ (b'abcdefgh', ('C', 4), ()), (b'i', ('C', 4), ()), ]), # 0 1 2 3 4 5 6 7 8 8 9 # ABBBBC (8, [File('t/A', b'a'), File('t/B', b'bcde'), File('t/C', b'f')], [ (b'abcdef', ('C', 1), ()), ]), ), ids=lambda v: str(v), ) @pytest.mark.parametrize( argnames='torrent_content_path, stream_content_path, custom_content_path, exp_content_path', argvalues=( ('torrent/path', 'stream/path', 'custom/path', 'custom/path'), ('torrent/path', 'stream/path', None, 'stream/path'), ('torrent/path', None, None, 'torrent/path'), (None, None, None, None), ), ) def test_iter_pieces_without_missing_files( torrent_content_path, stream_content_path, custom_content_path, exp_content_path, chunk_size, files, exp_chunks, tmp_path, mocker, ): torrent_name = 'my_torrent' if torrent_content_path: torrent_content_path = (tmp_path / torrent_content_path).parent / torrent_name if stream_content_path: stream_content_path = (tmp_path / stream_content_path).parent / torrent_name if custom_content_path: custom_content_path = (tmp_path / custom_content_path).parent / torrent_name print('torrent_content_path:', torrent_content_path) print('stream_content_path:', stream_content_path) print('custom_content_path:', custom_content_path) if exp_content_path: exp_content_path = (tmp_path / exp_content_path).parent / torrent_name exp_content_path.mkdir(parents=True, exist_ok=True) for file in files: filepath = exp_content_path.joinpath(os.sep.join(file.parts[1:])) print(f'{filepath}: {file.size} bytes: {file.content}') filepath.write_bytes(file.content) exp_chunks_fixed = [] for chunk, (filepath_rel, filesize), exceptions in exp_chunks: if exp_content_path: filepath = File(exp_content_path / filepath_rel, filesize) else: filepath = File(filepath_rel, filesize) exp_chunks_fixed.append((chunk, filepath, exceptions)) torrent = Torrent(piece_size=chunk_size, files=files, path=torrent_content_path) with TorrentFileStream(torrent, content_path=stream_content_path) as tfs: if exp_content_path is None: with pytest.raises(ValueError, match=r'^Missing content_path argument and torrent has no path specified$'): list(tfs.iter_pieces(content_path=custom_content_path)) else: assert list(tfs.iter_pieces(content_path=custom_content_path)) == exp_chunks_fixed @pytest.mark.parametrize( argnames='chunk_size, files, missing_files, exp_chunks', argvalues=( # 0 1 2 3 4 5 6 7 8 8 9 # ABC (8, [File('t/A', b'a'), File('t/B', b'b'), File('t/C', b'c')], ['t/A'], [ (None, ('t/A', 1), ('t/A',)), ]), (8, [File('t/A', b'a'), File('t/B', b'b'), File('t/C', b'c')], ['t/B'], [ (None, ('t/B', 1), ('t/B',)), ]), (8, [File('t/A', b'a'), File('t/B', b'b'), File('t/C', b'c')], ['t/C'], [ (None, ('t/C', 1), ('t/C',)), ]), (8, [File('t/A', b'a'), File('t/B', b'b'), File('t/C', b'c')], ['t/A', 't/B'], [ (None, ('t/A', 1), ('t/A', 't/B')), ]), (8, [File('t/A', b'a'), File('t/B', b'b'), File('t/C', b'c')], ['t/B', 't/C'], [ (None, ('t/B', 1), ('t/B', 't/C')), ]), (8, [File('t/A', b'a'), File('t/B', b'b'), File('t/C', b'c')], ['t/A', 't/C'], [ (None, ('t/A', 1), ('t/A', 't/C')), ]), (8, [File('t/A', b'a'), File('t/B', b'b'), File('t/C', b'c')], ['t/A', 't/B', 't/C'], [ (None, ('t/A', 1), ('t/A', 't/B', 't/C')), ]), # 0 1 2 3 4 5 6 7 8 8 9 # AAABBBCC (8, [File('t/A', b'abc'), File('t/B', b'def'), File('t/C', b'gh')], ['t/A'], [ (None, ('t/A', 3), ('t/A',)), ]), (8, [File('t/A', b'abc'), File('t/B', b'def'), File('t/C', b'gh')], ['t/B'], [ (None, ('t/B', 3), ('t/B',)), ]), (8, [File('t/A', b'abc'), File('t/B', b'def'), File('t/C', b'gh')], ['t/C'], [ (None, ('t/C', 2), ('t/C',)), ]), (8, [File('t/A', b'abc'), File('t/B', b'def'), File('t/C', b'gh')], ['t/A', 't/B'], [ (None, ('t/A', 3), ('t/A', 't/B')), ]), (8, [File('t/A', b'abc'), File('t/B', b'def'), File('t/C', b'gh')], ['t/B', 't/C'], [ (None, ('t/B', 3), ('t/B', 't/C')), ]), (8, [File('t/A', b'abc'), File('t/B', b'def'), File('t/C', b'gh')], ['t/A', 't/C'], [ (None, ('t/A', 3), ('t/A', 't/C')), ]), (8, [File('t/A', b'abc'), File('t/B', b'def'), File('t/C', b'gh')], ['t/A', 't/B', 't/C'], [ (None, ('t/A', 3), ('t/A', 't/B', 't/C')), ]), # 0 1 2 3 4 5 6 7 8 8 9 # ABCCCCCCC (8, [File('t/A', b'a'), File('t/B', b'b'), File('t/C', b'cdefghi')], ['t/A'], [ (None, ('t/A', 1), ('t/A',)), (b'i', ('t/C', 7), ()), ]), (8, [File('t/A', b'a'), File('t/B', b'b'), File('t/C', b'cdefghi')], ['t/B'], [ (None, ('t/B', 1), ('t/B',)), (b'i', ('t/C', 7), ()), ]), (8, [File('t/A', b'a'), File('t/B', b'b'), File('t/C', b'cdefghi')], ['t/C'], [ (None, ('t/C', 7), ('t/C',)), (None, ('t/C', 7), ()), ]), (8, [File('t/A', b'a'), File('t/B', b'b'), File('t/C', b'cdefghi')], ['t/A', 't/B'], [ (None, ('t/A', 1), ('t/A', 't/B')), (b'i', ('t/C', 7), ()), ]), (8, [File('t/A', b'a'), File('t/B', b'b'), File('t/C', b'cdefghi')], ['t/B', 't/C'], [ (None, ('t/B', 1), ('t/B',)), (None, ('t/C', 7), ('t/C',)), ]), (8, [File('t/A', b'a'), File('t/B', b'b'), File('t/C', b'cdefghi')], ['t/A', 't/C'], [ (None, ('t/A', 1), ('t/A',)), (None, ('t/C', 7), ('t/C',)), ]), (8, [File('t/A', b'a'), File('t/B', b'b'), File('t/C', b'cdefghi')], ['t/A', 't/B', 't/C'], [ (None, ('t/A', 1), ('t/A', 't/B')), (None, ('t/C', 7), ('t/C',)), ]), # 0 1 2 3 4 5 6 7 8 8 9 # ABBBBBBCC (8, [File('t/A', b'a'), File('t/B', b'bcdefg'), File('t/C', b'hi')], ['t/A'], [ (None, ('t/A', 1), ('t/A',)), (b'i', ('t/C', 2), ()), ]), (8, [File('t/A', b'a'), File('t/B', b'bcdefg'), File('t/C', b'hi')], ['t/B'], [ (None, ('t/B', 6), ('t/B',)), (b'i', ('t/C', 2), ()), ]), (8, [File('t/A', b'a'), File('t/B', b'bcdefg'), File('t/C', b'hi')], ['t/C'], [ (None, ('t/C', 2), ('t/C',)), (None, ('t/C', 2), ()), ]), (8, [File('t/A', b'a'), File('t/B', b'bcdefg'), File('t/C', b'hi')], ['t/A', 't/B'], [ (None, ('t/A', 1), ('t/A', 't/B')), (b'i', ('t/C', 2), ()), ]), (8, [File('t/A', b'a'), File('t/B', b'bcdefg'), File('t/C', b'hi')], ['t/B', 't/C'], [ (None, ('t/B', 6), ('t/B',)), (None, ('t/C', 2), ('t/C',)), ]), (8, [File('t/A', b'a'), File('t/B', b'bcdefg'), File('t/C', b'hi')], ['t/A', 't/C'], [ (None, ('t/A', 1), ('t/A',)), (None, ('t/C', 2), ('t/C',)), ]), (8, [File('t/A', b'a'), File('t/B', b'bcdefg'), File('t/C', b'hi')], ['t/A', 't/B', 't/C'], [ (None, ('t/A', 1), ('t/A', 't/B')), (None, ('t/C', 2), ('t/C',)), ]), # 0 1 2 3 4 5 6 7 8 8 9 # ABBBBBBCCCCCCCCCC (8, [File('t/A', b'a'), File('t/B', b'bcdefg'), File('t/C', b'hijklmnopq')], ['t/A'], [ (None, ('t/A', 1), ('t/A',)), (b'ijklmnop', ('t/C', 10), ()), (b'q', ('t/C', 10), ()), ]), (8, [File('t/A', b'a'), File('t/B', b'bcdefg'), File('t/C', b'hijklmnopq')], ['t/B'], [ (None, ('t/B', 6), ('t/B',)), (b'ijklmnop', ('t/C', 10), ()), (b'q', ('t/C', 10), ()), ]), (8, [File('t/A', b'a'), File('t/B', b'bcdefg'), File('t/C', b'hijklmnopq')], ['t/C'], [ (None, ('t/C', 10), ('t/C',)), (None, ('t/C', 10), ()), (None, ('t/C', 10), ()), ]), (8, [File('t/A', b'a'), File('t/B', b'bcdefg'), File('t/C', b'hijklmnopq')], ['t/A', 't/B'], [ (None, ('t/A', 1), ('t/A', 't/B')), (b'ijklmnop', ('t/C', 10), ()), (b'q', ('t/C', 10), ()), ]), (8, [File('t/A', b'a'), File('t/B', b'bcdefg'), File('t/C', b'hijklmnopq')], ['t/B', 't/C'], [ (None, ('t/B', 6), ('t/B',)), (None, ('t/C', 10), ('t/C',)), (None, ('t/C', 10), ()), ]), (8, [File('t/A', b'a'), File('t/B', b'bcdefg'), File('t/C', b'hijklmnopq')], ['t/A', 't/C'], [ (None, ('t/A', 1), ('t/A',)), (None, ('t/C', 10), ('t/C',)), (None, ('t/C', 10), ()), ]), (8, [File('t/A', b'a'), File('t/B', b'bcdefg'), File('t/C', b'hijklmnopq')], ['t/A', 't/B', 't/C'], [ (None, ('t/A', 1), ('t/A', 't/B')), (None, ('t/C', 10), ('t/C',)), (None, ('t/C', 10), ()), ]), # 0 1 2 3 4 5 6 7 8 8 9 # ABBBBBBBCCCCCCCCCC (8, [File('t/A', b'a'), File('t/B', b'bcdefgh'), File('t/C', b'ijklmnopqr')], ['t/A'], [ (None, ('t/A', 1), ('t/A',)), (b'ijklmnop', ('t/C', 10), ()), (b'qr', ('t/C', 10), ()), ]), (8, [File('t/A', b'a'), File('t/B', b'bcdefgh'), File('t/C', b'ijklmnopqr')], ['t/B'], [ (None, ('t/B', 7), ('t/B',)), (b'ijklmnop', ('t/C', 10), ()), (b'qr', ('t/C', 10), ()), ]), (8, [File('t/A', b'a'), File('t/B', b'bcdefgh'), File('t/C', b'ijklmnopqr')], ['t/C'], [ (b'abcdefgh', ('t/B', 7), ()), (None, ('t/C', 10), ('t/C',)), (None, ('t/C', 10), ()), ]), (8, [File('t/A', b'a'), File('t/B', b'bcdefgh'), File('t/C', b'ijklmnopqr')], ['t/A', 't/B'], [ (None, ('t/A', 1), ('t/A', 't/B')), (b'ijklmnop', ('t/C', 10), ()), (b'qr', ('t/C', 10), ()), ]), (8, [File('t/A', b'a'), File('t/B', b'bcdefgh'), File('t/C', b'ijklmnopqr')], ['t/B', 't/C'], [ (None, ('t/B', 7), ('t/B',)), (None, ('t/C', 10), ('t/C',)), (None, ('t/C', 10), ()), ]), (8, [File('t/A', b'a'), File('t/B', b'bcdefgh'), File('t/C', b'ijklmnopqr')], ['t/A', 't/C'], [ (None, ('t/A', 1), ('t/A',)), (None, ('t/C', 10), ('t/C',)), (None, ('t/C', 10), ()), ]), (8, [File('t/A', b'a'), File('t/B', b'bcdefgh'), File('t/C', b'ijklmnopqr')], ['t/A', 't/B', 't/C'], [ (None, ('t/A', 1), ('t/A', 't/B')), (None, ('t/C', 10), ('t/C',)), (None, ('t/C', 10), ()), ]), # 0 1 2 3 4 5 6 7 8 8 9 # AAAAABBBCCCCCC (4, [File('t/A', b'abcde'), File('t/B', b'fgh'), File('t/C', b'ijklmn')], ['t/A'], [ (None, ('t/A', 5), ('t/A',)), (None, ('t/A', 5), ()), (b'ijkl', ('t/C', 6), ()), (b'mn', ('t/C', 6), ()), ]), (4, [File('t/A', b'abcde'), File('t/B', b'fgh'), File('t/C', b'ijklmn')], ['t/B'], [ (b'abcd', ('t/A', 5), ()), (None, ('t/B', 3), ('t/B',)), (b'ijkl', ('t/C', 6), ()), (b'mn', ('t/C', 6), ()), ]), (4, [File('t/A', b'abcde'), File('t/B', b'fgh'), File('t/C', b'ijklmn')], ['t/C'], [ (b'abcd', ('t/A', 5), ()), (b'efgh', ('t/B', 3), ()), (None, ('t/C', 6), ('t/C',)), (None, ('t/C', 6), ()), ]), (4, [File('t/A', b'abcde'), File('t/B', b'fgh'), File('t/C', b'ijklmn')], ['t/A', 't/B'], [ (None, ('t/A', 5), ('t/A',)), (None, ('t/A', 5), ('t/B',)), (b'ijkl', ('t/C', 6), ()), (b'mn', ('t/C', 6), ()), ]), (4, [File('t/A', b'abcde'), File('t/B', b'fgh'), File('t/C', b'ijklmn')], ['t/B', 't/C'], [ (b'abcd', ('t/A', 5), ()), (None, ('t/B', 3), ('t/B',)), (None, ('t/C', 6), ('t/C',)), (None, ('t/C', 6), ()), ]), (4, [File('t/A', b'abcde'), File('t/B', b'fgh'), File('t/C', b'ijklmn')], ['t/A', 't/C'], [ (None, ('t/A', 5), ('t/A',)), (None, ('t/A', 5), ()), (None, ('t/C', 6), ('t/C',)), (None, ('t/C', 6), ()), ]), (4, [File('t/A', b'abcde'), File('t/B', b'fgh'), File('t/C', b'ijklmn')], ['t/A', 't/B', 't/C'], [ (None, ('t/A', 5), ('t/A',)), (None, ('t/A', 5), ('t/B',)), (None, ('t/C', 6), ('t/C',)), (None, ('t/C', 6), ()), ]), # 0 1 2 3 4 5 6 7 8 8 9 # AAAAABBBBCCCCC (4, [File('t/A', b'abcde'), File('t/B', b'fghi'), File('t/C', b'jklmn')], ['t/A'], [ (None, ('t/A', 5), ('t/A',)), (None, ('t/A', 5), ()), (b'ijkl', ('t/C', 5), ()), (b'mn', ('t/C', 5), ()), ]), (4, [File('t/A', b'abcde'), File('t/B', b'fghi'), File('t/C', b'jklmn')], ['t/B'], [ (b'abcd', ('t/A', 5), ()), (None, ('t/B', 4), ('t/B',)), (None, ('t/B', 4), ()), (b'mn', ('t/C', 5), ()), ]), (4, [File('t/A', b'abcde'), File('t/B', b'fghi'), File('t/C', b'jklmn')], ['t/C'], [ (b'abcd', ('t/A', 5), ()), (b'efgh', ('t/B', 4), ()), (None, ('t/C', 5), ('t/C',)), (None, ('t/C', 5), ()), ]), (4, [File('t/A', b'abcde'), File('t/B', b'fghi'), File('t/C', b'jklmn')], ['t/A', 't/B'], [ (None, ('t/A', 5), ('t/A',)), (None, ('t/A', 5), ()), (None, ('t/B', 4), ('t/B',)), (b'mn', ('t/C', 5), ()), ]), (4, [File('t/A', b'abcde'), File('t/B', b'fghi'), File('t/C', b'jklmn')], ['t/B', 't/C'], [ (b'abcd', ('t/A', 5), ()), (None, ('t/B', 4), ('t/B',)), (None, ('t/B', 4), ()), (None, ('t/C', 5), ('t/C',)), ]), (4, [File('t/A', b'abcde'), File('t/B', b'fghi'), File('t/C', b'jklmn')], ['t/A', 't/C'], [ (None, ('t/A', 5), ('t/A',)), (None, ('t/A', 5), ()), (None, ('t/C', 5), ('t/C',)), (None, ('t/C', 5), ()), ]), (4, [File('t/A', b'abcde'), File('t/B', b'fghi'), File('t/C', b'jklmn')], ['t/A', 't/B', 't/C'], [ (None, ('t/A', 5), ('t/A',)), (None, ('t/A', 5), ()), (None, ('t/B', 4), ('t/B',)), (None, ('t/C', 5), ('t/C',)), ]), # 0 1 2 3 4 5 6 7 8 8 9 # AAABBBBBCCCCCC (4, [File('t/A', b'abc'), File('t/B', b'defgh'), File('t/C', b'ijklmn')], ['t/A'], [ (None, ('t/A', 3), ('t/A',)), (b'efgh', ('t/B', 5), ()), (b'ijkl', ('t/C', 6), ()), (b'mn', ('t/C', 6), ()), ]), (4, [File('t/A', b'abc'), File('t/B', b'defgh'), File('t/C', b'ijklmn')], ['t/B'], [ (None, ('t/B', 5), ('t/B',)), (None, ('t/B', 5), ()), (b'ijkl', ('t/C', 6), ()), (b'mn', ('t/C', 6), ()), ]), (4, [File('t/A', b'abc'), File('t/B', b'defgh'), File('t/C', b'ijklmn')], ['t/C'], [ (b'abcd', ('t/B', 5), ()), (b'efgh', ('t/B', 5), ()), (None, ('t/C', 6), ('t/C',)), (None, ('t/C', 6), ()), ]), (4, [File('t/A', b'abc'), File('t/B', b'defgh'), File('t/C', b'ijklmn')], ['t/A', 't/B'], [ (None, ('t/A', 3), ('t/A',)), (None, ('t/B', 5), ('t/B',)), (b'ijkl', ('t/C', 6), ()), (b'mn', ('t/C', 6), ()), ]), (4, [File('t/A', b'abc'), File('t/B', b'defgh'), File('t/C', b'ijklmn')], ['t/B', 't/C'], [ (None, ('t/B', 5), ('t/B',)), (None, ('t/B', 5), ()), (None, ('t/C', 6), ('t/C',)), (None, ('t/C', 6), ()), ]), (4, [File('t/A', b'abc'), File('t/B', b'defgh'), File('t/C', b'ijklmn')], ['t/A', 't/C'], [ (None, ('t/A', 3), ('t/A',)), (b'efgh', ('t/B', 5), ()), (None, ('t/C', 6), ('t/C',)), (None, ('t/C', 6), ()), ]), (4, [File('t/A', b'abc'), File('t/B', b'defgh'), File('t/C', b'ijklmn')], ['t/A', 't/B', 't/C'], [ (None, ('t/A', 3), ('t/A',)), (None, ('t/B', 5), ('t/B',)), (None, ('t/C', 6), ('t/C',)), (None, ('t/C', 6), ()), ]), ), ids=lambda v: str(v), ) def test_iter_pieces_with_missing_files(chunk_size, files, missing_files, exp_chunks, tmp_path): torrent_name = files[0].parts[0] content_path = tmp_path / torrent_name content_path.mkdir(parents=True, exist_ok=True) for f in files: if str(f) not in missing_files: filepath = tmp_path / f print(f'writing {filepath}: {f.size} bytes: {f.content}') filepath.write_bytes(f.content) else: print(f'not writing {f}: {f.size} bytes: {f.content}') exp_chunks_fixed = [] for chunk, (filepath_rel, filesize), exceptions in exp_chunks: filepath = File(tmp_path / filepath_rel, filesize) exceptions = tuple(ComparableException(ReadError(errno.ENOENT, str(tmp_path / f))) for f in exceptions) exp_chunks_fixed.append((chunk, filepath, exceptions)) torrent = Torrent(piece_size=chunk_size, files=files) tfs = TorrentFileStream(torrent) chunks = list(tfs.iter_pieces(content_path=content_path)) def compare(x, y): if chunks[x][y] != exp_chunks_fixed[x][y]: print(f'{i}: {chunks[x][y]!r}\n {exp_chunks_fixed[x][y]!r}') for i in range(len(chunks)): compare(i, 0) compare(i, 1) compare(i, 2) assert chunks == exp_chunks_fixed class OOMCallback: def __init__(self, attempts): self._attempts = int(attempts) def __call__(self, exception): try: if self._attempts <= 0: print('Raising', repr(exception)) raise exception else: print('Ignoring', repr(exception)) finally: self._attempts -= 1 @pytest.mark.parametrize( argnames='oom_callback_kwargs, read_results, exp_result, exp_oom_callback_calls', argvalues=( ( None, [b'abc', b'def', b'ghi'], b'abc', [], ), ( None, [MemoryError('one'), MemoryError('two'), b'ghi'], MemoryError('Out of memory while reading from path/to/file at position 1'), [], ), ( {'attempts': 0}, [MemoryError('one'), MemoryError('two'), b'ghi'], MemoryError('Out of memory while reading from path/to/file at position 1'), [call(MemoryError('Out of memory while reading from path/to/file at position 1')),], ), ( {'attempts': 1}, [MemoryError('one'), MemoryError('two'), b'ghi'], MemoryError('Out of memory while reading from path/to/file at position 2'), [ call(MemoryError('Out of memory while reading from path/to/file at position 1')), call(MemoryError('Out of memory while reading from path/to/file at position 2')), ], ), ( {'attempts': 3}, [MemoryError('one'), MemoryError('two'), b'ghi'], b'ghi', [ call(MemoryError('Out of memory while reading from path/to/file at position 1')), call(MemoryError('Out of memory while reading from path/to/file at position 2')), ], ), ), ids=lambda v: repr(v), ) def test_read_from_fh(oom_callback_kwargs, read_results, exp_result, exp_oom_callback_calls, mocker): files = [ File('A', b'abc'), File('A', b'def'), File('A', b'ghi'), ] size = 123 fh = Mock(read=Mock(side_effect=read_results)) fh.tell.side_effect = [int(n) for n in '1234567890'] fh.configure_mock(name='path/to/file') torrent = Torrent(piece_size=size, files=files) tfs = TorrentFileStream(torrent) if oom_callback_kwargs is None: oom_callback = None else: oom_callback = OOMCallback(**oom_callback_kwargs) if isinstance(exp_result, Exception): with pytest.raises(type(exp_result), match=rf'^{re.escape(str(exp_result))}$'): print(tfs._read_from_fh(fh, size, oom_callback)) else: return_value = tfs._read_from_fh(fh, size, oom_callback) assert return_value is exp_result def test_get_piece_hash_from_readable_piece(mocker): torrent = Torrent(piece_size=123, files=(File('a', 1), File('b', 2), File('c', 3))) tfs = TorrentFileStream(torrent) get_piece_mock = mocker.patch.object(tfs, 'get_piece', return_value=b'mock piece') sha1_mock = mocker.patch('hashlib.sha1', return_value=Mock(digest=Mock(return_value=b'mock hash'))) assert tfs.get_piece_hash(123, content_path='foo/path') == b'mock hash' assert get_piece_mock.call_args_list == [call(123, content_path='foo/path')] assert sha1_mock.call_args_list == [call(b'mock piece')] def test_get_piece_hash_from_piece_from_missing_file(mocker): torrent = Torrent(piece_size=123, files=(File('a', 1), File('b', 2), File('c', 3))) tfs = TorrentFileStream(torrent) get_piece_mock = mocker.patch.object(tfs, 'get_piece', side_effect=ReadError(errno.ENOENT, 'foo/path')) sha1_mock = mocker.patch('hashlib.sha1', return_value=Mock(digest=Mock(return_value=b'mock hash'))) assert tfs.get_piece_hash(123, content_path='foo/path') is None assert get_piece_mock.call_args_list == [call(123, content_path='foo/path')] assert sha1_mock.call_args_list == [] def test_get_piece_hash_from_piece_from_existing_unreadable_file(mocker): torrent = Torrent(piece_size=123, files=(File('a', 1), File('b', 2), File('c', 3))) tfs = TorrentFileStream(torrent) exception = ReadError(errno.EACCES, 'foo/path') get_piece_mock = mocker.patch.object(tfs, 'get_piece', side_effect=exception) sha1_mock = mocker.patch('hashlib.sha1', return_value=Mock(digest=Mock(return_value=b'mock hash'))) with pytest.raises(type(exception), match=rf'^{re.escape(str(exception))}$'): tfs.get_piece_hash(123, content_path='foo/path') assert get_piece_mock.call_args_list == [call(123, content_path='foo/path')] assert sha1_mock.call_args_list == [] def test_verify_piece_verifies_piece_hash(mocker): torrent = Torrent(piece_size=123, files=(File('a', 1), File('b', 2), File('c', 3))) torrent.hashes = (b'd34d', b'b33f', b'b00b5') tfs = TorrentFileStream(torrent) mocker.patch.object(tfs, 'get_piece_hash', return_value=b'b33f') mocker.patch.object(type(tfs), 'max_piece_index', PropertyMock(return_value=2)) assert tfs.verify_piece(0, content_path='foo/path') is False assert tfs.verify_piece(1, content_path='foo/path') is True assert tfs.verify_piece(2, content_path='foo/path') is False with pytest.raises(ValueError, match=r'^piece_index must be in range 0 - 2: 3$'): tfs.verify_piece(3, content_path='foo/path') def test_verify_piece_gets_handles_no_piece_hash(mocker): torrent = Torrent(piece_size=123, files=(File('a', 1), File('b', 2), File('c', 3))) torrent.hashes = (b'd34d', b'b33f', b'b00b5') tfs = TorrentFileStream(torrent) mocker.patch.object(tfs, 'get_piece_hash', return_value=None) mocker.patch.object(type(tfs), 'max_piece_index', PropertyMock(return_value=2)) assert tfs.verify_piece(0, content_path='foo/path') is None assert tfs.verify_piece(1, content_path='foo/path') is None assert tfs.verify_piece(2, content_path='foo/path') is None with pytest.raises(ValueError, match=r'^piece_index must be in range 0 - 2: 3$'): tfs.verify_piece(3, content_path='foo/path') rndusr-torf-547b989/tests/test_utils.py000066400000000000000000000744441513142010300202120ustar00rootroot00000000000000import os import pickle import re from collections import OrderedDict from pathlib import Path from unittest import mock import pytest import torf from torf import _errors as errors from torf import _utils as utils @pytest.mark.parametrize( argnames='num, exp_return_value', argvalues=( (-16 * 3 * 1024 + 1, False), (-16 * 3 * 1024 + 0, False), (-16 * 3 * 1024 - 1, False), (-16 * 1 * 1024 + 1, False), (-16 * 1 * 1024 + 0, False), (-16 * 1 * 1024 - 1, False), (-1, False), (0, False), (1, False), (16 * 1 * 1024 + 1, False), (16 * 1 * 1024 + 0, True), (16 * 1 * 1024 - 1, False), (16 * 3 * 1024 + 1, False), (16 * 3 * 1024 + 0, True), (16 * 3 * 1024 - 1, False), ), ) def test_is_divisible_by_16_kib(num, exp_return_value): assert utils.is_divisible_by_16_kib(num) is exp_return_value def test_iterable_startswith(): a = ['a', 'b', 'c', 'd'] b = ['a', 'b', 'c'] assert utils.iterable_startswith(a, b) assert not utils.iterable_startswith(b, a) a = ['a', 'b', 'c'] b = ['a', 'b', 'c'] assert utils.iterable_startswith(a, b) assert utils.iterable_startswith(b, a) a = ['a', 'b', 'c'] b = [] assert utils.iterable_startswith(a, b) assert not utils.iterable_startswith(b, a) a = [] b = [] assert utils.iterable_startswith(a, b) assert utils.iterable_startswith(b, a) def test_URL__max_port_number(): utils.URL(f'http://foohost:{2**16 - 1}') with pytest.raises(torf.URLError): utils.URL(f'http://foohost:{2**16}') def test_URL__min_port_number(): utils.URL('http://foohost:0') with pytest.raises(torf.URLError): utils.URL('http://foohost:-1') def test_real_size_of_directory(tmp_path): dir = tmp_path / 'dir' ; dir.mkdir() # noqa: E702 subdir = dir / 'subdir' ; subdir.mkdir() # noqa: E702 (dir / 'file1').write_bytes(b'\x00' * 100) (dir / 'file2').write_bytes(b'\x00' * 200) (subdir / 'file3').write_bytes(b'\x00' * 300) (subdir / 'file4').write_bytes(b'\x00' * 400) assert utils.real_size(dir) == 1000 def test_real_size_of_directory_with_unreadable_file(tmp_path): dir = tmp_path / 'dir' ; dir.mkdir() # noqa: E702 subdir = dir / 'subdir' ; subdir.mkdir() # noqa: E702 (dir / 'file1').write_bytes(b'\x00' * 100) (subdir / 'file2').write_bytes(b'\x00' * 200) subdir_mode = os.stat(subdir).st_mode os.chmod(subdir, mode=0o222) try: with pytest.raises(errors.ReadError) as exc_info: utils.real_size(dir) assert str(exc_info.value) == f'{subdir}: Permission denied' finally: os.chmod(subdir, mode=subdir_mode) def test_real_size_of_file(tmp_path): (tmp_path / 'file').write_bytes(b'\x00' * 123) assert utils.real_size(tmp_path / 'file') == 123 def test_real_size_of_nonexising_path(): with pytest.raises(errors.ReadError) as exc_info: utils.real_size('path/doesnt/exist') assert str(exc_info.value) == 'path/doesnt/exist: No such file or directory' def test_real_size_of_symbolic_link_to_existing_file(tmp_path): original_path = tmp_path / 'source' original_path.write_bytes(b'source content') symlink_path = tmp_path / 'link' os.symlink(original_path, symlink_path) size = utils.real_size(str(symlink_path)) assert size == len(b'source_content') def test_real_size_of_symbolic_link_to_nonexisting_file(tmp_path): symlink_path = tmp_path / 'link' os.symlink(tmp_path / 'does/not/exist', symlink_path) with pytest.raises(errors.ReadError) as exc_info: utils.real_size(str(symlink_path)) assert str(exc_info.value) == f'{symlink_path}: No such file or directory' @pytest.fixture def testdir(tmp_path): base = tmp_path / 'base' base.mkdir() foo = base / 'foo' foo.mkdir() bar = base / '.bar' bar.mkdir() baz = bar / 'baz' baz.mkdir() for path in (foo, bar, baz): (path / 'empty').write_text('') (path / '.empty').write_text('') (path / 'not_empty').write_text('dummy content') (path / '.not_empty').write_text('more dummy content') os.symlink(base / 'foo', base / 'symlink') return base def test_list_files_with_file(testdir): files = [Path(filepath).relative_to(testdir.parent) for filepath in utils.list_files(testdir / 'foo/empty')] exp = ['base/foo/empty'] assert files == [Path(p) for p in exp] def test_list_files_with_directory(testdir): files = [Path(filepath).relative_to(testdir.parent) for filepath in utils.list_files(testdir)] exp = sorted(['base/foo/.empty', 'base/foo/.not_empty', 'base/foo/empty', 'base/foo/not_empty', 'base/.bar/.empty', 'base/.bar/.not_empty', 'base/.bar/empty', 'base/.bar/not_empty', 'base/.bar/baz/.empty', 'base/.bar/baz/.not_empty', 'base/.bar/baz/empty', 'base/.bar/baz/not_empty', 'base/symlink/.empty', 'base/symlink/.not_empty', 'base/symlink/empty', 'base/symlink/not_empty']) assert files == [Path(p) for p in exp] def test_list_files_with_unreadable_file(tmp_path): file = tmp_path / 'foo.jpg' file.write_text('asdf') file_mode = os.stat(file).st_mode os.chmod(file, mode=0o222) try: with pytest.raises(errors.ReadError) as exc_info: utils.list_files(file) assert str(exc_info.value) == f'{file}: Permission denied' finally: os.chmod(file, mode=file_mode) def test_list_files_with_unreadable_directory(tmp_path): dir = tmp_path / 'dir' dir.mkdir() file = dir / 'foo.jpg' file.write_text('asdf') dir_mode = os.stat(dir).st_mode os.chmod(dir, mode=0o222) try: for path in (dir, file): with pytest.raises(errors.ReadError) as exc_info: utils.list_files(path) assert str(exc_info.value) == f'{path}: Permission denied' finally: os.chmod(dir, mode=dir_mode) def test_list_files_with_unreadable_file_in_directory(tmp_path): dir = tmp_path / 'dir' dir.mkdir() file = dir / 'foo.jpg' file.write_text('asdf') file_mode = os.stat(file).st_mode os.chmod(file, mode=0o222) try: with pytest.raises(errors.ReadError) as exc_info: utils.list_files(dir) assert str(exc_info.value) == f'{file}: Permission denied' finally: os.chmod(file, mode=file_mode) def test_filter_files_with_default_arguments(): filelist = ['base/foo/.hidden', 'base/foo/not_hidden', 'base/.hidden/.hidden', 'base/.hiddendir/not_hidden', 'base/.hidden/not_hidden/.hidden', 'base/.hidden/not_hidden/not_hidden'] assert utils.filter_files(filelist) == filelist def test_filter_files_without_hidden_files_or_directories(): filelist = ['base/foo/.hidden', 'base/foo/not_hidden', 'base/.hidden/.hidden', 'base/.hiddendir/not_hidden', 'base/.hidden/not_hidden/.hidden', 'base/.hidden/not_hidden/not_hidden'] assert utils.filter_files(filelist, hidden=False) == ['base/foo/not_hidden'] def test_filter_files_ignores_hidden_parent_directories(): filelist = ['.base/foo/.hidden', '.base/foo/not_hidden', '.base/.hidden/.hidden', '.base/.hiddendir/not_hidden', '.base/.hidden/not_hidden/.hidden', '.base/.hidden/not_hidden/not_hidden'] assert utils.filter_files(filelist, hidden=False) == ['.base/foo/not_hidden'] filelist = ['path/to/.hidden/base/foo/.hidden', 'path/to/.hidden/base/foo/not_hidden', 'path/to/.hidden/base/.hidden/.hidden', 'path/to/.hidden/base/.hiddendir/not_hidden', 'path/to/.hidden/base/.hidden/not_hidden/.hidden', 'path/to/.hidden/base/.hidden/not_hidden/not_hidden'] assert utils.filter_files(filelist, hidden=False) == ['path/to/.hidden/base/foo/not_hidden'] def test_filter_files_without_empty_files(testdir): filelist = [str(Path(filepath).relative_to(testdir.parent)) for filepath in utils.list_files(testdir)] cwd = os.getcwd() try: os.chdir(testdir.parent) assert utils.filter_files(filelist, empty=False) == sorted(['base/foo/.not_empty', 'base/foo/not_empty', 'base/.bar/.not_empty', 'base/.bar/not_empty', 'base/.bar/baz/.not_empty', 'base/.bar/baz/not_empty', 'base/symlink/.not_empty', 'base/symlink/not_empty']) finally: os.chdir(cwd) def test_filter_files_exclude_argument(testdir): filelist = ['base/foo/bar/baz', 'base/foo/two/three', 'base/one/two/foo'] assert utils.filter_files(filelist, exclude=(re.compile(r'two'),)) == ['base/foo/bar/baz'] assert utils.filter_files(filelist, exclude=(re.compile(r'foo$'),)) == ['base/foo/bar/baz', 'base/foo/two/three'] assert utils.filter_files(filelist, exclude=('base/foo/*',)) == ['base/one/two/foo'] assert utils.filter_files(filelist, exclude=(re.compile(r'foo/bar'), '*/one/*')) == ['base/foo/two/three'] def test_filter_files_with_no_common_path(testdir): filelist = ['foo/bar/baz', 'bar/two/three', 'one/two/foo'] assert utils.filter_files(filelist) == filelist assert utils.filter_files(filelist, exclude=(re.compile(r'bar'),)) == ['one/two/foo'] def test_filter_files_with_absolute_and_relative_paths(testdir): filelist = ['foo/bar/one', 'foo/bar/two', '/some/where/foo/bar/three', '/some/where/foo/bar/four'] assert utils.filter_files(filelist) == filelist def test_filter_files_with_getter_argument(testdir): items = [(123, 'foo/bar/baz', 456), (123, 'bar/two/three', 456), (123, 'one/two/foo', 456)] assert utils.filter_files(items, getter=lambda i: i[1], exclude=(re.compile(r'foo'),)) == [(123, 'bar/two/three', 456)] def test_decoding(): encoded = { b'one': b'foo', b'two': 17, b'three': [1, b'twelve', [b'x', {b'boo': 800}]], b'something': { b'four': b'baz', b'five': [{b'a': [1, 2, 3], b'b': 4}], } } decoded = { 'one': 'foo', 'two': 17, 'three': [1, 'twelve', ['x', {'boo': 800}]], 'something': { 'four': 'baz', 'five': [{'a': [1, 2, 3], 'b': 4}], } } assert utils.decode_dict(encoded) == decoded def test_decoding_invalid_unicode(): assert utils.decode_value(b'\xed') == b'\xed' def test_encoding(): class SillyStr(str): def __str__(self): return f'This is silly: {super().__str__()}' __repr__ = __str__ decoded = { 'one': SillyStr('foo'), 'two': 17.3, 'three': (1, 'twelve', ['x', OrderedDict([('boo', range(3))])]), 'something': { 'four': 'baz', 'five': [{'a': (1, 2, 3), 'b': -4}], } } encoded = { b'one': b'This is silly: foo', b'two': 17, b'three': [1, b'twelve', [b'x', {b'boo': [0, 1, 2]}]], b'something': { b'four': b'baz', b'five': [{b'a': [1, 2, 3], b'b': -4}], } } assert utils.encode_dict(decoded) == encoded def test_File_is_picklable(): file_original = utils.File('the/path/of/mine', 123456) file_pickled = pickle.dumps(file_original) file_unpickled = pickle.loads(file_pickled) assert file_unpickled == file_original def test_Filepath_is_equal_to_absolute_path(): assert utils.Filepath('/some/path/to/a/file') == utils.Filepath('/some/path/to/a/file') assert utils.Filepath('/some/path/to/a/file') == '/some/path/to/a/file' assert '/some/path/to/a/file' == utils.Filepath('/some/path/to/a/file') def test_Filepath_is_equal_to_relative_path(tmp_path): orig_cwd = os.getcwd() os.chdir(tmp_path.parent) abspath = str(tmp_path / 'foo') relpath = f'{tmp_path.parts[-1]}/foo' Path(abspath).write_text('bar') try: assert utils.Filepath(abspath) == utils.Filepath(relpath) assert utils.Filepath(abspath) == relpath assert relpath == utils.Filepath(abspath) assert utils.Filepath(relpath) == utils.Filepath(abspath) assert utils.Filepath(relpath) == abspath assert abspath == utils.Filepath(relpath) assert relpath == utils.Filepath(relpath) assert utils.Filepath(relpath) == relpath finally: os.chdir(orig_cwd) assert utils.Filepath(abspath) != utils.Filepath(relpath) assert utils.Filepath(abspath) != relpath assert relpath != utils.Filepath(abspath) assert utils.Filepath(relpath) != utils.Filepath(abspath) assert utils.Filepath(relpath) != abspath assert abspath != utils.Filepath(relpath) assert utils.Filepath(relpath) == relpath assert relpath == utils.Filepath(relpath) def test_Filepath_is_equal_to_symlink(tmp_path): path = tmp_path / 'foo' Path(path).write_text('bar') abspath = str(tmp_path / 'foo.link') relpath = './foo.link' Path(abspath).symlink_to('foo') assert utils.Filepath(abspath) == utils.Filepath(path) assert utils.Filepath(abspath) == path assert path == utils.Filepath(abspath) assert utils.Filepath(path) == abspath orig_cwd = os.getcwd() os.chdir(tmp_path) try: assert utils.Filepath(relpath) == utils.Filepath(path) assert utils.Filepath(relpath) == path assert path == utils.Filepath(relpath) assert utils.Filepath(path) == relpath finally: os.chdir(orig_cwd) def test_Filepaths_accepts_string_or_iterable(): assert utils.Filepaths('path/to/foo.jpg') == [Path('path/to/foo.jpg')] assert utils.Filepaths(('path/to/foo.jpg',)) == [Path('path/to/foo.jpg')] assert utils.Filepaths(['path/to/foo.jpg']) == [Path('path/to/foo.jpg')] def test_Filepaths_deduplicates_when_initializing(): fps = utils.Filepaths(('path/to/foo.jpg', 'path/to/bar.jpg', 'path/to/foo.jpg')) assert fps == (Path('path/to/foo.jpg'), Path('path/to/bar.jpg')) def test_Filepaths_deduplicates_when_setting(): fps = utils.Filepaths(('path/to/foo.jpg', 'path/to/bar.jpg')) fps.append('path/to/foo.jpg') fps.extend(('path/to/bar.jpg',)) assert fps == (Path('path/to/foo.jpg'), Path('path/to/bar.jpg')) def test_Filepaths_deduplicates_when_inserting(): fps = utils.Filepaths(('path/to/foo.jpg', 'path/to/bar.jpg')) fps.insert(0, 'path/to/bar.jpg') assert fps == (Path('path/to/foo.jpg'), Path('path/to/bar.jpg')) def test_Filepaths_treats_relative_paths_as_equal_to_their_absolute_versions(tmp_path): (tmp_path / 'cwd').mkdir() cwd = os.getcwd() try: os.chdir(tmp_path / 'cwd') fps = utils.Filepaths((Path('foo'),)) assert fps == ('foo',) fps.append(tmp_path / 'cwd' / 'foo') assert fps == ('foo',) fps.append(tmp_path / 'cwd' / 'bar') fps.append('bar') assert fps == ('foo', tmp_path / 'cwd' / 'bar') finally: os.chdir(cwd) def test_Filepaths_handles_directories(tmp_path): # Create directory with 2 files content = tmp_path / 'content' ; content.mkdir() # noqa: E702 for f in ('a', 'b'): (content / f).write_text('') # noqa: E701 fps = utils.Filepaths((content,)) assert fps == (content / 'a', content / 'b') # Replace one file with multilevel subdirectory subdir = content / 'b' ; subdir.unlink() ; subdir.mkdir() # noqa: E702 for f in ('c', 'd'): (subdir / f).write_text('') # noqa: E701 subsubdir = subdir / 'subsubdir' ; subsubdir.mkdir() # noqa: E702 for f in ('e', 'f'): (subsubdir / f).write_text('') # noqa: E701 fps[1] = content / 'b' assert fps == (content / 'a', subdir / 'c', subdir / 'd', subsubdir / 'e', subsubdir / 'f') # Replace subdirectory with file again for f in (subdir / 'c', subdir / 'd', subsubdir / 'e', subsubdir / 'f'): f.unlink() subsubdir.rmdir() subdir.rmdir() (content / 'b').write_text('I AM BACK') fps[1] = content / 'b' assert fps == (content / 'a', content / 'b') def test_Filepaths_calls_callback_after_appending(): cb = mock.MagicMock() fps = utils.Filepaths(('path/to/foo.jpg',), callback=cb) fps.append('path/to/baz.jpg') cb.assert_called_once_with(fps) def test_Filepaths_calls_callback_after_removing(): cb = mock.MagicMock() fps = utils.Filepaths(('path/to/foo.jpg',), callback=cb) del fps[0] cb.assert_called_once_with(fps) def test_Filepaths_calls_callback_after_inserting(): cb = mock.MagicMock() fps = utils.Filepaths(('path/to/foo.jpg',), callback=cb) fps.insert(0, 'path/to/baz.jpg') cb.assert_called_once_with(fps) def test_Filepaths_calls_callback_after_clearing(): cb = mock.MagicMock() fps = utils.Filepaths(('path/to/foo.jpg',), callback=cb) fps.clear() cb.assert_called_once_with(fps) def test_URLs_accepts_string_or_iterable(): urls = utils.URLs('http://foo:123') assert urls == utils.URLs(('http://foo:123',)) assert urls == utils.URLs(['http://foo:123']) def test_URLs_interprets_empty_string_as_empty_list(): urls = utils.URLs('') assert urls == () def test_URLs_deduplicates_when_initializing(): urls = utils.URLs(('http://foo:123', 'http://bar:456', 'http://foo:123')) assert urls == ['http://foo:123', 'http://bar:456'] def test_URLs_deduplicates_when_setting(): urls = utils.URLs(('http://foo:123', 'http://bar:456')) urls.append('http://foo:123') urls.append('http://bar:456') urls.extend(('http://foo:123', 'http://bar:456')) assert urls == ['http://foo:123', 'http://bar:456'] def test_URLs_deduplicates_when_inserting(): urls = utils.URLs(('http://foo:123', 'http://bar:456')) urls.insert(1, 'http://foo:123') urls.insert(0, 'http://bar:456') urls.insert(0, 'http://foo:123') urls.insert(1, 'http://bar:456') assert urls == ['http://foo:123', 'http://bar:456'] def test_URLs_validates_initial_urls(): with pytest.raises(errors.URLError) as e: utils.URLs(('http://foo:123', 'http://bar:456:789')) assert str(e.value) == 'http://bar:456:789: Invalid URL' def test_URLs_validates_appended_urls(): urls = utils.URLs('http://foo:123') with pytest.raises(errors.URLError) as e: urls.append('http://bar:456:789') assert str(e.value) == 'http://bar:456:789: Invalid URL' assert urls == ('http://foo:123',) def test_URLs_validates_changed_urls(): urls = utils.URLs('http://foo:123') with pytest.raises(errors.URLError) as e: urls[0] = 'http://bar:456:789' assert str(e.value) == 'http://bar:456:789: Invalid URL' assert urls == ('http://foo:123',) def test_URLs_validates_inserted_urls(): urls = utils.URLs(('http://foo:123', 'http://bar:456')) with pytest.raises(errors.URLError) as e: urls.insert(1, 'http://baz:789:abc') assert str(e.value) == 'http://baz:789:abc: Invalid URL' assert urls == ('http://foo:123', 'http://bar:456') def test_URLs_does_not_empty_when_replacing_with_invalid_URLs(): urls = utils.URLs(('http://foo:123', 'http://bar:456')) with pytest.raises(errors.URLError): urls.replace(('http://baz:789:abc',)) assert urls == ('http://foo:123', 'http://bar:456') def test_URLs_is_equal_to_URLs_instances(): t1 = utils.URLs(('http://foo:123', 'http://bar:456')) t2 = utils.URLs(('http://foo:123', 'http://bar:456')) assert t1 == t2 t2 = utils.URLs(('http://foo:123', 'http://baz:789')) assert t1 != t2 def test_URLs_is_equal_to_iterables(): urls = utils.URLs(('http://foo:123', 'http://bar:456')) assert urls == ['http://foo:123', 'http://bar:456'] assert urls == ('http://foo:123', 'http://bar:456') def test_URLs_is_equal_to_any_combination_of_the_same_urls(): urls = utils.URLs(('http://foo:123', 'http://bar:456', 'http://baz:789')) assert urls == ('http://foo:123', 'http://bar:456', 'http://baz:789') assert urls == ('http://bar:456', 'http://foo:123', 'http://baz:789') assert urls == ('http://bar:456', 'http://foo:123', 'http://baz:789') assert urls == ('http://foo:123', 'http://baz:789', 'http://bar:456') def test_URLs_calls_callback_after_appending(): cb = mock.MagicMock() urls = utils.URLs(('http://foo:123', 'http://bar:456'), callback=cb) urls.append('http://baz:789') cb.assert_called_once_with(urls) def test_URLs_calls_callback_after_removing(): cb = mock.MagicMock() urls = utils.URLs(('http://foo:123', 'http://bar:456'), callback=cb) urls.remove('http://bar:456') cb.assert_called_once_with(urls) def test_URLs_calls_callback_after_inserting(): cb = mock.MagicMock() urls = utils.URLs(('http://foo:123', 'http://bar:456'), callback=cb) urls.insert(0, 'http://baz:789') cb.assert_called_once_with(urls) def test_URLs_calls_callback_after_clearing(): cb = mock.MagicMock() urls = utils.URLs(('http://foo:123', 'http://bar:456'), callback=cb) urls.clear() cb.assert_called_once_with(urls) def test_URLs_equality(): urls = utils.URLs(('http://foo:123', 'http://bar:456')) assert urls == ('http://foo:123', 'http://bar:456') assert urls == ['http://foo:123', 'http://bar:456'] assert urls != ['http://foo:124', 'http://bar:456'] assert urls != 'http://bar:456' assert urls != 5 assert urls is not None def test_URLs_can_be_added(): urls1 = utils.URLs(('http://foo:123', 'http://bar:456')) urls2 = utils.URLs(('http://bar', 'http://baz')) assert urls1 + urls2 == ('http://foo:123', 'http://bar:456', 'http://bar', 'http://baz') assert urls1 + ('http://bar',) == ('http://foo:123', 'http://bar:456', 'http://bar') assert urls1 + 'http://baz' == ('http://foo:123', 'http://bar:456', 'http://baz') def test_URLs_replace(): cb = mock.MagicMock() urls = utils.URLs(('http://foo:123', 'http://bar:456'), callback=cb) urls.replace(['http://asdf', 'http://quux']) assert urls == ['http://asdf', 'http://quux'] assert cb.call_args_list == [mock.call(urls)] def test_Trackers_ensures_tiers_when_initializing(): for args in (('http://foo:123', 'http://bar:456'), (['http://foo:123'], 'http://bar:456'), ('http://foo:123', ['http://bar:456']), (['http://foo:123'], ['http://bar:456'])): tiers = utils.Trackers(args) for tier in tiers: assert isinstance(tier, utils.URLs) assert tiers == [['http://foo:123'], ['http://bar:456']] def test_Trackers_ensures_tiers_when_setting(): for args in (('http://foo:123', 'http://bar:456'), (['http://foo:123'], 'http://bar:456'), ('http://foo:123', ['http://bar:456']), (['http://foo:123'], ['http://bar:456'])): tiers = utils.Trackers('http://quux') tiers.extend(args) for tier in tiers: assert isinstance(tier, utils.URLs) assert tiers == [['http://quux'], ['http://foo:123'], ['http://bar:456']] tiers = utils.Trackers('http://quux') tiers.append(args) for tier in tiers: assert isinstance(tier, utils.URLs) assert tiers == [['http://quux'], ['http://foo:123', 'http://bar:456']] def test_Trackers_ensures_tiers_when_inserting(): for args in (('http://foo:123', 'http://bar:456'), (['http://foo:123'], 'http://bar:456'), ('http://foo:123', ['http://bar:456']), (['http://foo:123'], ['http://bar:456'])): tiers = utils.Trackers('http://quux') tiers.insert(1, args) for tier in tiers: assert isinstance(tier, utils.URLs) assert tiers == [['http://quux'], ['http://foo:123', 'http://bar:456']] def test_Trackers_equality(): urls = utils.Trackers(('http://foo:123', 'http://bar:456')) assert urls == utils.Trackers(('http://foo:123', 'http://bar:456')) assert urls != utils.Trackers(('http://foo:123', 'http://bar:4567')) assert urls == utils.Trackers(('http://foo:123', 'http://bar:456'), callback=lambda _: None) assert urls == [['http://foo:123'], ('http://bar:456',)] assert urls != [['http://foo:123'], 'http://bar:456'] assert urls == (('http://foo:123',), ['http://bar:456']) assert urls != (('http://foo:123',), [['http://bar:456']]) urls_ = utils.Trackers('http://foo:123') assert urls != urls_ urls_.append('http://bar:456') assert urls == urls_ def test_Trackers_can_be_added(): urls1 = utils.Trackers((('http://foo', 'http://bar'), 'http://baz')) urls2 = utils.Trackers(('http://a', ('http://b', 'http://c'), 'http://d')) assert urls1 + urls2 == (('http://foo', 'http://bar', 'http://a'), ('http://baz', 'http://b', 'http://c'), ('http://d',)) assert urls1 + ('http://x',) == (('http://foo', 'http://bar', 'http://x'), ('http://baz',)) assert urls2 + ('http://x',) == (('http://a','http://x'), ('http://b', 'http://c'), ('http://d',)) assert urls1 + (('http://x', 'http://y'), 'http://z') == (('http://foo', 'http://bar', 'http://x', 'http://y'), ('http://baz', 'http://z')) assert urls2 + (('http://x', 'http://y'), 'http://z') == (('http://a', 'http://x', 'http://y'), ('http://b', 'http://c', 'http://z'), ('http://d',)) assert urls1 + (('http://x',), 'http://z', ('http://123', 'http://456')) == (('http://foo', 'http://bar', 'http://x'), ('http://baz', 'http://z'), ('http://123', 'http://456')) def test_Trackers_callback(): def assert_type(arg): assert type(arg) is utils.Trackers cb = mock.MagicMock() cb.side_effect = assert_type tiers = utils.Trackers(('http://foo:123', 'http://bar:456'), callback=cb) assert cb.call_args_list == [] tiers.append('http://baz:789') assert cb.call_args_list == [mock.call(tiers)] del tiers[0] assert cb.call_args_list == [mock.call(tiers)] * 2 tiers.insert(0, ['http://quux']) assert cb.call_args_list == [mock.call(tiers)] * 3 tiers[0].append('http://asdf') assert cb.call_args_list == [mock.call(tiers)] * 4 tiers[2].remove('http://baz:789') assert cb.call_args_list == [mock.call(tiers)] * 5 tiers.clear() assert cb.call_args_list == [mock.call(tiers)] * 6 def test_Trackers_removes_empty_tier_automatically(): tiers = utils.Trackers(('http://foo:123', 'http://bar:456')) assert tiers == [['http://foo:123'], ['http://bar:456']] tiers[0].remove('http://foo:123') assert tiers == [['http://bar:456']] def test_Trackers_deduplicates_urls_automatically_when_initializing(): tiers = utils.Trackers((['http://foo:123', 'http://bar:456', 'http://baz:789'], ['http://quux', 'http://foo:123', 'http://asdf'], ['http://asdf', 'http://baz:789', 'http://flim'])) assert tiers == [['http://foo:123', 'http://bar:456', 'http://baz:789'], ['http://quux', 'http://asdf'], ['http://flim']] def test_Trackers_deduplicates_urls_automatically_when_setting(): tiers = utils.Trackers((['http://foo:123', 'http://bar:456', 'http://baz:789'],)) tiers.append(['http://quux', 'http://foo:123']) assert tiers == [['http://foo:123', 'http://bar:456', 'http://baz:789'], ['http://quux']] tiers.append(['http://foo:123', 'http://bar:456', 'http://baz:789']) tiers.append('http://quux') assert tiers == [['http://foo:123', 'http://bar:456', 'http://baz:789'], ['http://quux']] def test_Trackers_deduplicates_urls_automatically_when_inserting(): tiers = utils.Trackers((['http://foo:123', 'http://bar:456', 'http://baz:789'],)) tiers.insert(0, ['http://asdf', 'http://baz:789', 'http://quux', 'http://foo:123']) assert tiers == [['http://asdf', 'http://quux'], ['http://foo:123', 'http://bar:456', 'http://baz:789']] tiers = utils.Trackers((['http://foo:123', 'http://bar:456', 'http://baz:789'],)) tiers.insert(1, ['http://asdf', 'http://baz:789', 'http://quux', 'http://foo:123']) assert tiers == [['http://foo:123', 'http://bar:456', 'http://baz:789'], ['http://asdf', 'http://quux']] def test_Trackers_flat_property(): tiers = utils.Trackers((['http://foo:123'], ['http://bar:456'])) assert tiers.flat == ('http://foo:123', 'http://bar:456') def test_Trackers_replace(): cb = mock.MagicMock() tiers = utils.Trackers((['http://foo:123'], ['http://bar:456']), callback=cb) cb.reset_mock() tiers.replace(('http://asdf', ('http://qux', 'http://quux'), 'http://qaax')) assert tiers == (['http://asdf'], ['http://qux', 'http://quux'], ['http://qaax']) assert cb.call_args_list == [mock.call(tiers)] def test_download_from_invalid_url(): with pytest.raises(torf.URLError) as excinfo: utils.download('http://foo:bar') assert str(excinfo.value) == 'http://foo:bar: Invalid URL' def test_download_from_url_with_unsupported_protocol(): with pytest.raises(torf.ConnectionError) as excinfo: utils.download('asdf://foo:bar') assert str(excinfo.value) == 'asdf://foo:bar: Unsupported protocol' def test_download_from_unconnectable_url(free_port): with pytest.raises(torf.ConnectionError) as excinfo: utils.download(f'http://localhost:{free_port}') assert str(excinfo.value) == f'http://localhost:{free_port}: Connection refused' def test_download_from_connectable_url(httpserver): httpserver.expect_request('/foo').respond_with_data(b'bar') assert utils.download(httpserver.url_for('/foo')) == b'bar' def test_download_with_zero_timeout(httpserver): with pytest.raises(torf.ConnectionError) as excinfo: utils.download('some/url', timeout=0) assert str(excinfo.value) == 'some/url: Timed out' with pytest.raises(torf.ConnectionError) as excinfo: utils.download('some/url', timeout=-1) assert str(excinfo.value) == 'some/url: Timed out' rndusr-torf-547b989/tests/test_validate.py000066400000000000000000000405301513142010300206300ustar00rootroot00000000000000import os import pytest import torf def test_wrong_info_type(generated_singlefile_torrent): t = generated_singlefile_torrent for typ in (bytearray, list, tuple): t.metainfo['info'] = typ() with pytest.raises(torf.MetainfoError) as excinfo: t.validate() assert str(excinfo.value) == (f"Invalid metainfo: ['info'] " f"must be dict, not {typ.__qualname__}: {t.metainfo['info']}") def test_length_and_files_in_info(generated_multifile_torrent): t = generated_multifile_torrent t.metainfo['info']['length'] = 123 with pytest.raises(torf.MetainfoError) as excinfo: t.validate() assert str(excinfo.value) == "Invalid metainfo: ['info'] includes both 'length' and 'files'" def test_wrong_name_type(generated_singlefile_torrent): t = generated_singlefile_torrent t.metainfo['info']['name'] = 123 with pytest.raises(torf.MetainfoError) as excinfo: t.validate() assert str(excinfo.value) == ("Invalid metainfo: ['info']['name'] " "must be str or bytes, not int: 123") def test_wrong_piece_length_type(generated_singlefile_torrent): t = generated_singlefile_torrent t.metainfo['info']['piece length'] = [700] with pytest.raises(torf.MetainfoError) as excinfo: t.validate() assert str(excinfo.value) == ("Invalid metainfo: ['info']['piece length'] " "must be int, not list: [700]") @pytest.mark.parametrize( argnames='piece_length, exp_exception', argvalues=( (-1, torf.MetainfoError("['info']['piece length'] is invalid: -1")), (0, torf.MetainfoError("['info']['piece length'] is invalid: 0")), (16385, torf.MetainfoError("['info']['piece length'] is invalid: 16385")), ), ) def test_piece_length_not_divisible_by_16_kib(piece_length, exp_exception, generated_singlefile_torrent): t = generated_singlefile_torrent t.metainfo['info']['piece length'] = piece_length with pytest.raises(type(exp_exception)) as excinfo: t.validate() assert str(excinfo.value) == str(exp_exception) def test_wrong_pieces_type(generated_singlefile_torrent): t = generated_singlefile_torrent t.metainfo['info']['pieces'] = 'many' with pytest.raises(torf.MetainfoError) as excinfo: t.validate() assert str(excinfo.value) == ("Invalid metainfo: ['info']['pieces'] " "must be bytes, not str: 'many'") def test_pieces_is_empty(generated_singlefile_torrent): t = generated_singlefile_torrent t.metainfo['info']['pieces'] = bytes() with pytest.raises(torf.MetainfoError) as excinfo: t.validate() assert str(excinfo.value) == "Invalid metainfo: ['info']['pieces'] is empty" def test_invalid_number_of_bytes_in_pieces(generated_singlefile_torrent): t = generated_singlefile_torrent t.path = None t.metainfo['info']['piece length'] = 512 * 1024 for i in range(1, 10): t.metainfo['info']['length'] = i * t.metainfo['info']['piece length'] t.metainfo['info']['pieces'] = bytes(os.urandom(i * 20)) t.validate() for j in ((i * 20) + 1, (i * 20) - 1): t.metainfo['info']['pieces'] = bytes(os.urandom(j)) with pytest.raises(torf.MetainfoError) as excinfo: t.validate() assert str(excinfo.value) == ("Invalid metainfo: length of ['info']['pieces'] " "is not divisible by 20") def test_wrong_creation_date_type(generated_singlefile_torrent): t = generated_singlefile_torrent t.metainfo['creation date'] = 'hello' with pytest.raises(torf.MetainfoError) as excinfo: t.validate() assert str(excinfo.value) == "Invalid metainfo: ['creation date'] must be int or datetime, not str: 'hello'" def test_singlefile__unexpected_number_of_bytes_in_pieces(generated_singlefile_torrent): t = generated_singlefile_torrent t.path = None # Don't complain about wrong file size t.metainfo['info']['length'] = 1024 * 1024 t.metainfo['info']['piece length'] = int(1024 * 1024 / 8) t.metainfo['info']['pieces'] = os.urandom(20 * 9) with pytest.raises(torf.MetainfoError) as excinfo: t.validate() assert str(excinfo.value) == 'Invalid metainfo: Expected 8 pieces but there are 9' t.metainfo['info']['pieces'] = os.urandom(20 * 7) with pytest.raises(torf.MetainfoError) as excinfo: t.validate() assert str(excinfo.value) == 'Invalid metainfo: Expected 8 pieces but there are 7' def test_multifile__unexpected_number_of_bytes_in_pieces(generated_multifile_torrent): t = generated_multifile_torrent t.path = None # Don't complain about wrong file size total_size = 0 for i,file in enumerate(t.metainfo['info']['files'], start=1): file['length'] = 1024 * 1024 * i + 123 total_size += file['length'] import math t.metainfo['info']['piece length'] = int(1024 * 1024 / 8) piece_count = math.ceil(total_size / t.metainfo['info']['piece length']) t.metainfo['info']['pieces'] = os.urandom(20 * (piece_count + 1)) with pytest.raises(torf.MetainfoError) as excinfo: t.validate() assert str(excinfo.value) == 'Invalid metainfo: Expected 49 pieces but there are 50' t.metainfo['info']['pieces'] = os.urandom(20 * (piece_count - 1)) with pytest.raises(torf.MetainfoError) as excinfo: t.validate() assert str(excinfo.value) == 'Invalid metainfo: Expected 49 pieces but there are 48' def test_no_announce_is_ok(generated_singlefile_torrent): t = generated_singlefile_torrent if 'announce' in t.metainfo: del t.metainfo['announce'] t.validate() def test_wrong_announce_type(generated_singlefile_torrent): t = generated_singlefile_torrent for typ in (bytearray, list, tuple): t.metainfo['announce'] = typ() with pytest.raises(torf.MetainfoError) as excinfo: t.validate() assert str(excinfo.value) == (f"Invalid metainfo: ['announce'] " f"must be str, not {typ.__qualname__}: {t.metainfo['announce']}") def test_invalid_announce_url(generated_singlefile_torrent): t = generated_singlefile_torrent for url in ('123', 'http://123:xxx/announce'): t.metainfo['announce'] = url with pytest.raises(torf.MetainfoError) as excinfo: t.validate() assert str(excinfo.value) == f"Invalid metainfo: ['announce'] is invalid: {url!r}" def test_no_announce_list_is_ok(generated_singlefile_torrent): t = generated_singlefile_torrent if 'announce-list' in t.metainfo: del t.metainfo['announce-list'] t.validate() def test_wrong_announce_list_type(generated_singlefile_torrent): t = generated_singlefile_torrent # announce-list must be a list for value in (3, 'foo', None, lambda: None): t.metainfo['announce-list'] = value with pytest.raises(torf.MetainfoError) as excinfo: t.validate() assert str(excinfo.value) == (f"Invalid metainfo: ['announce-list'] " f"must be Iterable, not {type(value).__qualname__}: " f"{t.metainfo['announce-list']!r}") # Each item in announce-list must be a list for tier in (3, 'foo', None, lambda: None): for lst in ([tier], [tier, []], [[], tier], [[], tier, []]): t.metainfo['announce-list'] = lst with pytest.raises(torf.MetainfoError) as excinfo: t.validate() tier_index = lst.index(tier) assert str(excinfo.value) == (f"Invalid metainfo: ['announce-list'][{tier_index}] " f"must be Iterable, not {type(tier).__qualname__}: {tier!r}") # Each item in each list in announce-list must be a string for typ in (bytearray, set): url = typ() for tier in ([url], ['http://localhost:123/', url], [url, 'http://localhost:123/'], ['http://localhost:123/', url, 'http://localhost:456/']): url_index = tier.index(url) for lst in ([tier], [tier, []], [[], tier], [[], tier, []]): tier_index = lst.index(tier) t.metainfo['announce-list'] = lst with pytest.raises(torf.MetainfoError) as excinfo: t.validate() assert str(excinfo.value) == (f"Invalid metainfo: ['announce-list'][{tier_index}][{url_index}] " f"must be str, not {typ.__qualname__}: {url!r}") def test_invalid_url_in_announce_list(generated_singlefile_torrent): t = generated_singlefile_torrent for url in ('123', 'http://123:xxx/announce'): for tier in ([url], ['http://localhost:123/', url], [url, 'http://localhost:123/'], ['http://localhost:123/', url, 'http://localhost:456/']): url_index = tier.index(url) for lst in ([tier], [tier, []], [[], tier], [[], tier, []]): tier_index = lst.index(tier) t.metainfo['announce-list'] = lst with pytest.raises(torf.MetainfoError) as excinfo: t.validate() assert str(excinfo.value) == (f"Invalid metainfo: ['announce-list'][{tier_index}][{url_index}] " f"is invalid: {url!r}") def test_no_announce_and_no_announce_list_when_torrent_is_private(generated_singlefile_torrent): t = generated_singlefile_torrent t.metainfo['info']['private'] = True if 'announce' in t.metainfo: del t.metainfo['announce'] if 'announce-list' in t.metainfo: del t.metainfo['announce-list'] t.validate() assert t.generate() is True assert t.is_ready is True def test_singlefile_wrong_length_type(generated_singlefile_torrent): t = generated_singlefile_torrent t.metainfo['info']['length'] = 'foo' with pytest.raises(torf.MetainfoError) as excinfo: t.validate() assert str(excinfo.value) == ("Invalid metainfo: ['info']['length'] " "must be int or float, not str: 'foo'") def test_singlefile_wrong_md5sum_type(generated_singlefile_torrent): t = generated_singlefile_torrent t.metainfo['info']['md5sum'] = 0 with pytest.raises(torf.MetainfoError) as excinfo: t.validate() assert str(excinfo.value) == ("Invalid metainfo: ['info']['md5sum'] " "must be str, not int: 0") t.metainfo['info']['md5sum'] = 'Z8b329da9893e34099c7d8ad5cb9c940' with pytest.raises(torf.MetainfoError) as excinfo: t.validate() assert str(excinfo.value) == ("Invalid metainfo: ['info']['md5sum'] is invalid: " "'Z8b329da9893e34099c7d8ad5cb9c940'") def test_multifile_wrong_files_type(generated_multifile_torrent): t = generated_multifile_torrent t._path = None t.metainfo['info']['files'] = 'foo' with pytest.raises(torf.MetainfoError) as excinfo: t.validate() assert str(excinfo.value) == ("Invalid metainfo: ['info']['files'] " "must be Iterable, not str: 'foo'") def test_multifile_wrong_path_type(generated_multifile_torrent): t = generated_multifile_torrent t._path = None t.metainfo['info']['files'][0]['path'] = 'foo/bar/baz' with pytest.raises(torf.MetainfoError) as excinfo: t.validate() assert str(excinfo.value) == ("Invalid metainfo: ['info']['files'][0]['path'] " "must be Iterable, not str: 'foo/bar/baz'") def test_multifile_wrong_path_item_type(generated_multifile_torrent): t = generated_multifile_torrent t._path = None t.metainfo['info']['files'][1]['path'][0] = 17 with pytest.raises(torf.MetainfoError) as excinfo: t.validate() assert str(excinfo.value) == ("Invalid metainfo: ['info']['files'][1]['path'][0] " "must be str or bytes, not int: 17") def test_multifile_wrong_length_type(generated_multifile_torrent): t = generated_multifile_torrent t._path = None t.metainfo['info']['files'][2]['length'] = ['this', 'is', 'not', 'a', 'length'] with pytest.raises(torf.MetainfoError) as excinfo: t.validate() assert str(excinfo.value) == ("Invalid metainfo: ['info']['files'][2]['length'] " "must be int or float, not list: ['this', 'is', 'not', 'a', 'length']") def test_multifile_wrong_md5sum_type(generated_multifile_torrent): t = generated_multifile_torrent t.metainfo['info']['files'][0]['md5sum'] = 0 with pytest.raises(torf.MetainfoError) as excinfo: t.validate() assert str(excinfo.value) == ("Invalid metainfo: ['info']['files'][0]['md5sum'] " "must be str, not int: 0") t.metainfo['info']['files'][0]['md5sum'] = 'Z8b329da9893e34099c7d8ad5cb9c940' with pytest.raises(torf.MetainfoError) as excinfo: t.validate() assert str(excinfo.value) == ("Invalid metainfo: ['info']['files'][0]['md5sum'] is invalid: " "'Z8b329da9893e34099c7d8ad5cb9c940'") def assert_missing_metainfo(torrent, *keys): md = torrent.metainfo for key in keys[:-1]: md = md[key] del md[keys[-1]] with pytest.raises(torf.MetainfoError) as excinfo: torrent.validate() assert excinfo.match(rf"Invalid metainfo: Missing {keys[-1]!r} in \['info'\]") def test_singlefile_missing_info_path(generated_singlefile_torrent): assert_missing_metainfo(generated_singlefile_torrent, 'info', 'name') def test_singlefile_missing_info_piece_length(generated_singlefile_torrent): assert_missing_metainfo(generated_singlefile_torrent, 'info', 'piece length') def test_singlefile_missing_info_pieces(generated_singlefile_torrent): assert_missing_metainfo(generated_singlefile_torrent, 'info', 'pieces') def test_multifile_missing_info_path(generated_multifile_torrent): assert_missing_metainfo(generated_multifile_torrent, 'info', 'name') def test_multifile_missing_info_piece_length(generated_multifile_torrent): assert_missing_metainfo(generated_multifile_torrent, 'info', 'piece length') def test_multifile_missing_info_pieces(generated_multifile_torrent): assert_missing_metainfo(generated_multifile_torrent, 'info', 'pieces') def test_multifile_missing_info_files_0_length(generated_multifile_torrent): assert_missing_metainfo(generated_multifile_torrent, 'info', 'files', 0, 'length') def test_multifile_missing_info_files_1_length(generated_multifile_torrent): assert_missing_metainfo(generated_multifile_torrent, 'info', 'files', 1, 'length') def test_multifile_missing_info_files_1_path(generated_multifile_torrent): assert_missing_metainfo(generated_multifile_torrent, 'info', 'files', 1, 'path') def test_multifile_missing_info_files_2_path(generated_multifile_torrent): assert_missing_metainfo(generated_multifile_torrent, 'info', 'files', 2, 'path') def assert_mismatching_filesizes(torrent): torrent.validate() # Should validate for torrent_path, fs_path in zip(torrent.files, torrent.filepaths): # Remember file content with open(fs_path, 'rb') as f: orig_fs_path_content = f.read() # Change file size with open(fs_path, 'ab') as f: f.write(b'foo') # Expect validation error mi_size = torrent.partial_size(torrent_path) fs_size = os.path.getsize(fs_path) assert fs_size == mi_size + len('foo') with pytest.raises(torf.MetainfoError) as excinfo: torrent.validate() assert str(excinfo.value) == (f'Invalid metainfo: Mismatching file sizes in metainfo ({mi_size}) ' f'and file system ({fs_size}): {fs_path}') # Restore original file content with open(fs_path, 'wb') as f: f.write(orig_fs_path_content) torrent.validate() # Should validate again def test_singlefile_mismatching_filesize(generated_singlefile_torrent): assert_mismatching_filesizes(generated_singlefile_torrent) def test_multifile_mismatching_filesize(generated_multifile_torrent): assert_mismatching_filesizes(generated_multifile_torrent) rndusr-torf-547b989/tests/test_verify_content.py000066400000000000000000000714651513142010300221100ustar00rootroot00000000000000import collections import errno import itertools import os import random from unittest import mock import pytest import torf from . import ( ComparableException, calc_corruptions, calc_good_pieces, calc_piece_indexes, calc_pieces_done, change_file_size, display_filespecs, file_piece_indexes, file_range, fuzzylist, pos2file, pos2files, random_positions, round_down_to_multiple, ) import logging # isort:skip debug = logging.getLogger('test').debug # Allow arbitrary small piece sizes to make debugging easier. @pytest.fixture(autouse=True) def ignore_piece_size_check(mocker): mocker.patch('torf._utils.is_divisible_by_16_kib', return_value=True) class CollectingCallback(): """Collect call arguments and make basic assertions""" def __init__(self, torrent): super().__init__() self.torrent = torrent self.seen_pieces_done = [] self._seen_piece_indexes = collections.defaultdict(lambda: fuzzylist()) # noqa: F405 self._seen_good_pieces = collections.defaultdict(lambda: fuzzylist()) # noqa: F405 self._seen_skipped_pieces = collections.defaultdict(lambda: fuzzylist()) # noqa: F405 self.seen_exceptions = fuzzylist() # noqa: F405 def __call__(self, t, path, pieces_done, pieces_total, piece_index, piece_hash, exc): assert t is self.torrent assert pieces_total == t.pieces assert 1 <= pieces_done <= pieces_total self.seen_pieces_done.append(pieces_done) self._seen_piece_indexes[path.name].append(piece_index) if exc is not None: if isinstance(exc, torf.VerifyContentError): assert type(piece_hash) is bytes and len(piece_hash) == 20 # noqa: E721 else: assert piece_hash is None self.seen_exceptions.append(ComparableException(exc)) # noqa: F405 debug(f'--- Seeing exception: {self.seen_exceptions[-1]}') elif piece_hash is None: assert exc is None self._seen_skipped_pieces[path.name].append(piece_index) debug(f'--- Seeing skipped piece of {path.name}: {piece_index}') else: assert exc is None assert type(piece_hash) is bytes and len(piece_hash) == 20 # noqa: E721 debug(f'--- Seeing good piece of {path.name}: {piece_index}') self._seen_good_pieces[path.name].append(piece_index) @property def seen_piece_indexes(self): return dict(self._seen_piece_indexes) @property def seen_good_pieces(self): return dict(self._seen_good_pieces) @property def seen_skipped_pieces(self): return dict(self._seen_skipped_pieces) class _TestCaseBase(): """ This class runs most of the tests while the test_*() functions mostly collect parametrized test values """ def __init__(self, create_dir, create_file, create_torrent_file, forced_piece_size): self.create_dir = create_dir self.create_file = create_file self.create_torrent_file = create_torrent_file self.forced_piece_size = forced_piece_size self.reset() def reset(self): self.corruption_positions = set() self.files_corrupt = [] self.files_missing = [] self.files_missized = [] for attr in ('_exp_exceptions', '_exp_pieces_done', '_exp_piece_indexes', '_exp_good_pieces', '_exp_exc_corruptions', '_exp_exc_files_missing', '_exp_exc_files_missized'): if hasattr(self, attr): delattr(self, attr) def run(self, *_, with_callback, exp_return_value=None, skip_on_error=False): debug(f'Original stream: {self.stream_original.hex()}') debug(f' Corrupt stream: {self.stream_corrupt.hex()}') debug(f'Corruption positions: {sorted(self.corruption_positions)}') debug(f'Corrupt piece indexes: {sorted(set(corrpos // self.piece_size for corrpos in self.corruption_positions))}') self.skip_on_error = skip_on_error kwargs = { # 'skip_on_error': skip_on_error, 'exp_return_value': exp_return_value, } if not with_callback: exp_exceptions = self.exp_exceptions if not exp_exceptions: debug('Expecting no exceptions') self._run_without_callback(**kwargs) else: debug(f'Expected exceptions: {exp_exceptions}') exp_exception_types = tuple(set(type(exc) for exc in exp_exceptions)) debug(f'Expected exception types: {exp_exception_types}') with pytest.raises(exp_exception_types) as e: self._run_without_callback(**kwargs) # Usually the first error in the stream is reported, but not # always, so we expect one of the possible exceptions to be # raised. assert e.value in exp_exceptions else: return self._run_with_callback(**kwargs) def _run_without_callback(self, exp_return_value, **kwargs): debug(f'################ VERIFY WITHOUT CALLBACK: kwargs={kwargs}') if exp_return_value is not None: assert self.torrent.verify(self.content_path, **kwargs) is exp_return_value else: self.torrent.verify(self.content_path, **kwargs) def _run_with_callback(self, exp_return_value, **kwargs): debug(f'################ VERIFY WITH CALLBACK: kwargs={kwargs}') cb = CollectingCallback(self.torrent) kwargs['callback'] = cb kwargs['interval'] = 0 if exp_return_value is not None: return_value = self.torrent.verify(self.content_path, **kwargs) assert return_value == exp_return_value else: self.torrent.verify(self.content_path, **kwargs) debug(f'seen_exceptions: {cb.seen_exceptions}') assert cb.seen_exceptions == self.exp_exceptions debug(f'seen_piece_indexes: {cb.seen_piece_indexes}') assert cb.seen_piece_indexes == self.exp_piece_indexes debug(f'seen_pieces_done: {cb.seen_pieces_done}') assert cb.seen_pieces_done == self.exp_pieces_done # Last pieces_done value must be the total number of pieces so progress # is finalized correctly, e.g. progress bar ends at 100% assert cb.seen_pieces_done[-1] == self.torrent.pieces debug(f'seen_good_pieces: {cb.seen_good_pieces}') assert cb.seen_good_pieces == self.exp_good_pieces @property def exp_pieces_done(self): if not hasattr(self, '_exp_pieces_done'): self._exp_pieces_done = calc_pieces_done(self.filespecs_abspath, self.piece_size, # noqa: F405 self.files_missing, self.files_missized) debug(f'Expected pieces done: {self._exp_pieces_done}') return self._exp_pieces_done @property def exp_piece_indexes(self): if not hasattr(self, '_exp_piece_indexes'): self._exp_piece_indexes = calc_piece_indexes(self.filespecs, self.piece_size, # noqa: F405 self.files_missing, self.files_missized) debug(f'Expected piece indexes: {dict(self._exp_piece_indexes)}') return self._exp_piece_indexes @property def exp_good_pieces(self): if not hasattr(self, '_exp_good_pieces'): self._exp_good_pieces = calc_good_pieces(self.filespecs, # noqa: F405 self.piece_size, self.files_missing, self.corruption_positions, self.files_missized) # This is disabled because the skip_on_error option for # Torrent.verify() was removed. Feel free to re-implement and # re-enable. # if self.skip_on_error: # self._exp_good_pieces = skip_good_pieces(self._exp_good_pieces, # noqa: F405 # self.filespecs, # self.piece_size, # self.corruption_positions) debug(f'Expected good pieces: {self._exp_good_pieces}') return self._exp_good_pieces @property def exp_exc_corruptions(self): if not hasattr(self, '_exp_exc_corruptions'): self._exp_exc_corruptions = calc_corruptions(self.filespecs_abspath, # noqa: F405 self.piece_size, self.corruption_positions) # This is disabled because the skip_on_error option for # Torrent.verify() was removed. Feel free to re-implement and # re-enable. # if self.skip_on_error: # self._exp_exc_corruptions = skip_corruptions(self._exp_exc_corruptions, self.filespecs_abspath, # noqa: F405 # self.piece_size, self.corruption_positions, # self.files_missing, self.files_missized) debug('Expected corruptions:') for exc in self._exp_exc_corruptions: debug(f' {exc}') return self._exp_exc_corruptions @property def exp_exc_files_missing(self): if not hasattr(self, '_exp_exc_files_missing'): self._exp_exc_files_missing = fuzzylist(*(ComparableException(torf.ReadError(errno.ENOENT, filepath)) # noqa: F405 for filepath in self.files_missing)) debug(f'Expected files missing: {self._exp_exc_files_missing}') return self._exp_exc_files_missing @property def exp_exc_files_missized(self): if not hasattr(self, '_exp_exc_files_missized'): def mkexc(filepath): fsize_orig = self.get_original_filesize(filepath) fsize_actual = self.get_actual_filesize(filepath) return ComparableException(torf.VerifyFileSizeError( # noqa: F405 filepath, actual_size=fsize_actual, expected_size=fsize_orig)) self._exp_exc_files_missized = fuzzylist(*(mkexc(filepath) for filepath in self.files_missized)) # noqa: F405 debug(f'Expected files missized: {self._exp_exc_files_missized}') return self._exp_exc_files_missized @property def exp_exceptions(self): if not hasattr(self, '_exp_exceptions'): debug('Calculating expected exceptions:') # Exceptions that must be reported mandatory = set(self.exp_exc_files_missing) maybe = set() # Files with wrong size must be reported if they are not also missing mandatory_files = set(exc.path for exc in mandatory) for exc in self.exp_exc_files_missized: if exc.filepath not in mandatory_files: mandatory.add(exc) # If there are no missing or missized files, corruptions are mandatory if not mandatory: debug('all corruption exceptions are mandatory') mandatory.update(self.exp_exc_corruptions) maybe.update(self.exp_exc_corruptions.maybe) else: debug('not all corruption exceptions are mandatory') # Corrupt files are only reported if their piece_indexes aren't # already covered by missing or missized files missing_missized_pis = set() for filepath in itertools.chain(self.files_missing, self.files_missized): filename = os.path.basename(filepath) file_pis = file_piece_indexes(filename, self.filespecs, self.piece_size, exclusive=False) # noqa: F405 missing_missized_pis.update(file_pis) for exc in self.exp_exc_corruptions: if exc.piece_index not in missing_missized_pis: debug(f' expecting non-missing/missized: {str(exc)}') mandatory.add(exc) else: debug(f' not expecting missing/missized: {str(exc)}') # Also allow corruptions that are already classified as optional. for exc in self.exp_exc_corruptions.maybe: debug(f' also allowing {str(exc)}') maybe.add(exc) self._exp_exceptions = fuzzylist(*mandatory, maybe=maybe) # noqa: F405 debug('Expected exceptions:') for e in self._exp_exceptions: debug(repr(e)) debug('Tolerated exceptions:') for e in self._exp_exceptions.maybe: debug(repr(e)) return self._exp_exceptions class _TestCaseSinglefile(_TestCaseBase): @property def filespecs_abspath(self): return ((str(self.content_path), self.filesize),) def setup(self, filespecs, piece_size): self.filespecs = filespecs self.piece_size = piece_size self.filename = filespecs[0][0] self.filesize = filespecs[0][1] debug(f'Filename: {self.filename}, size: {self.filesize}, piece size: {piece_size}') self.stream_original = b'\x00' * self.filesize self.stream_corrupt = bytearray(self.stream_original) self.content_path = self.create_file(self.filename, self.stream_original) with self.forced_piece_size(piece_size): with self.create_torrent_file(path=self.content_path) as torrent_filepath: self.torrent = torf.Torrent.read(torrent_filepath) def corrupt_stream(self, *positions): # Check if this file already has other errors if self.files_missing or self.files_missized: return # Introduce random number of corruptions without changing stream length corruption_positions = set(random_positions(self.stream_corrupt) if not positions else positions) # noqa: F405 for corrpos in corruption_positions: debug(f'* Introducing corruption at index {corrpos}') self.stream_corrupt[corrpos] = (self.stream_corrupt[corrpos] + 1) % 256 self.content_path.write_bytes(self.stream_corrupt) self.corruption_positions.update(corruption_positions) def delete_file(self, index=None): # Check if this file already has other errors if self.corruption_positions or self.files_missized: return debug(f'* Removing file from file system: {os.path.basename(self.content_path)}') os.rename(self.content_path, str(self.content_path) + '.deleted') self.files_missing = [self.content_path] self.stream_corrupt = b'\xCC' * self.torrent.size # No need to update self.corruption_positions. A missing single file # does not produce any corruption errors because the "No such file" # error is enough. def change_file_size(self, index=None): # Check if this file already has other errors if self.corruption_positions or self.files_missing: return debug(f'* Changing file size in file system: {os.path.basename(self.content_path)}') self.stream_corrupt = change_file_size(self.content_path, self.torrent.size) # noqa: F405 self.files_missized.append(self.content_path) debug(f' Corruption positions after changing file size: {self.corruption_positions}') def get_original_filesize(self, filepath): return len(self.stream_original) def get_actual_filesize(self, filepath): return len(self.stream_corrupt) class _TestCaseMultifile(_TestCaseBase): @property def filespecs_abspath(self): return tuple((str(self.content_path / filename), filesize) for filename,filesize in self.filespecs) def setup(self, filespecs, piece_size): debug(f'File sizes: {", ".join(f"{n}={s}" for n,s in filespecs)}') debug(f'Stream size: {sum(s for _,s in filespecs)}') debug(f'Piece size: {piece_size}') self.filespecs = filespecs self.piece_size = piece_size self.content_original = {} self.content_corrupt = {} create_dir_args = [] for filename,filesize in filespecs: data = b'\x00' * filesize self.content_original[filename] = data self.content_corrupt[filename] = bytearray(data) create_dir_args.append((filename, data)) self.content_path = self.create_dir('content', *create_dir_args) debug(f'Content: {self.content_original}') with self.forced_piece_size(piece_size): with self.create_torrent_file(path=self.content_path) as torrent_filepath: self.torrent = torf.Torrent.read(torrent_filepath) @property def stream_original(self): return b''.join((data for data in self.content_original.values())) @property def stream_corrupt(self): return b''.join((data for data in self.content_corrupt.values())) def corrupt_stream(self, *positions): # Introduce corruptions without changing stream length error_files = set(os.path.basename(f) for f in itertools.chain( self.files_missing, self.files_missized)) corruption_positions = set(random_positions(self.stream_original) if not positions else positions) # noqa: F405 for corrpos_in_stream in corruption_positions: filename,corrpos_in_file = pos2file(corrpos_in_stream, self.filespecs, self.piece_size) # noqa: F405 if filename in error_files: continue else: debug(f'* Introducing corruption in {filename} at index {corrpos_in_stream} in stream, ' f'{corrpos_in_file} in file {filename}') self.corruption_positions.add(corrpos_in_stream) data = self.content_corrupt[filename] data[corrpos_in_file] = (data[corrpos_in_file] + 1) % 256 (self.content_path / filename).write_bytes(data) self.files_corrupt.append(str(self.content_path / filename)) debug(f' Corruption positions after corrupting stream: {self.corruption_positions}') def delete_file(self, index=None): if index is None: index = random.choice(range(len(self.filespecs))) # Remove file at `index` in filespecs from file system filename,filesize = self.filespecs[index] # Don't delete corrupt/missing file error_files = set(os.path.basename(f) for f in itertools.chain( self.files_corrupt, self.files_missized)) if filename in error_files: return debug(f'* Removing file from file system: {os.path.basename(filename)}') filepath = self.content_path / filename os.rename(filepath, str(filepath) + '.deleted') self.files_missing.append(filepath) self.content_corrupt[os.path.basename(filename)] = b'\xCC' * filesize # Re-calculate corruptions for adjacent files of all missing files corruption_positions = set() for removed_filepath in self.files_missing: # Find the first byte of the first affected piece and the first byte # of the last affected piece and mark them as corrupt removed_filename = os.path.basename(removed_filepath) file_beg,file_end = file_range(removed_filename, self.filespecs) # noqa: F405 debug(f' {removed_filename} starts at {file_beg} and ends at {file_end} in stream') first_affected_piece_pos = round_down_to_multiple(file_beg, self.piece_size) # noqa: F405 last_affected_piece_pos = round_down_to_multiple(file_end, self.piece_size) # noqa: F405 debug(f' First affected piece starts at {first_affected_piece_pos} ' f'and last affected piece starts at {last_affected_piece_pos}') corruption_positions.add(first_affected_piece_pos) corruption_positions.add(last_affected_piece_pos) self.corruption_positions.update(corruption_positions) self._remove_skipped_corruptions() debug(f' Corruption positions after removing file: {self.corruption_positions}') def _remove_skipped_corruptions(self): # Finally, remove corruptions that exclusively belong to # missing/missized files because they are always skipped skipped_files = {str(filepath) for filepath in itertools.chain(self.files_missing, self.files_missized)} debug(f' skipped_files: {skipped_files}') for corrpos in tuple(self.corruption_positions): affected_files = pos2files(corrpos, self.filespecs_abspath, self.piece_size) # noqa: F405 if all(f in skipped_files for f in affected_files): debug(f' only skipped files are affected by corruption at position {corrpos}') self.corruption_positions.remove(corrpos) def change_file_size(self, index=None): # Pick random file if index is None: filename = random.choice(tuple(self.content_original)) else: filename = tuple(self.content_original)[index] filepath = self.content_path / filename # Don't change corrupt/missing file error_files = set(os.path.basename(f) for f in itertools.chain( self.files_missing, self.files_corrupt)) if filename in error_files: return debug(f'* Changing file size in file system: {filepath}') # Change file size self.content_corrupt[filename] = change_file_size( # noqa: F405 filepath, len(self.content_original[filename])) self.files_missized.append(filepath) # Check if the beginning of adjacent files will be corrupted file_beg,file_end = file_range(filename, self.filespecs) # noqa: F405 debug(f' Original file beginning and end in stream: {file_beg}, {file_end}') if file_beg % self.piece_size != 0: debug(f' Beginning corrupts previous file at piece_index {file_beg // self.piece_size}') self.corruption_positions.add(file_beg) # Check if the end of adjacent files will be corrupted if (file_end + 1) % self.piece_size != 0: filepath,_ = pos2file(file_end, self.filespecs_abspath, self.piece_size) # noqa: F405 if (filepath not in self.files_missing and filepath not in self.files_missized and filepath != self.filespecs_abspath[-1][0]): debug(f' End corrupts next file at piece_index {(file_end + 1) // self.piece_size}') self.corruption_positions.add(file_end) self._remove_skipped_corruptions() debug(f' Corruption positions after changing file size: {self.corruption_positions}') def get_original_filesize(self, filepath): return len(self.content_original[os.path.basename(filepath)]) def get_actual_filesize(self, filepath): return len(self.content_corrupt[os.path.basename(filepath)]) @pytest.fixture def mktestcase(create_dir, create_file, forced_piece_size, create_torrent_file): """Return instance of _TestCaseMultifile or _TestCaseSinglefile""" def mktestcase_(filespecs, piece_size): if len(filespecs) == 1: testcls = _TestCaseSinglefile else: testcls = _TestCaseMultifile testcase = testcls(create_dir, create_file, create_torrent_file, forced_piece_size) testcase.setup(filespecs, piece_size) debug(f'################ TEST TORRENT CREATED: {testcase.torrent}') return testcase return mktestcase_ def test_validate_is_called_first(monkeypatch): torrent = torf.Torrent() mock_validate = mock.Mock(side_effect=torf.MetainfoError('Mock error')) monkeypatch.setattr(torrent, 'validate', mock_validate) with pytest.raises(torf.MetainfoError) as excinfo: torrent.verify('some/path') assert str(excinfo.value) == 'Invalid metainfo: Mock error' mock_validate.assert_called_once_with() def test_verify_singlefile_torrent_with_directory(generated_singlefile_torrent, create_dir): content_path = create_dir('multifile') exp_exception = torf.VerifyIsDirectoryError(content_path) # Without callback with pytest.raises(type(exp_exception)) as excinfo: generated_singlefile_torrent.verify(content_path) assert str(excinfo.value) == str(exp_exception) # With callback cb = mock.Mock() generated_singlefile_torrent.verify(content_path, callback=cb) exp_torrent = generated_singlefile_torrent exp_filepath = content_path exp_pieces_done = 0 exp_pieces_total = generated_singlefile_torrent.pieces exp_piece_index = 0 exp_piece_hash = None assert cb.call_args_list == [mock.call( exp_torrent, exp_filepath, exp_pieces_done, exp_pieces_total, exp_piece_index, exp_piece_hash, ComparableException(exp_exception), )] def test_verify_multifile_torrent_with_file(generated_multifile_torrent, tmp_path): content_path = tmp_path / 'singlefile' content_path.write_text('some file data') exp_exception = torf.VerifyNotDirectoryError(content_path) # Without callback with pytest.raises(type(exp_exception)) as excinfo: generated_multifile_torrent.verify(content_path) assert str(excinfo.value) == str(exp_exception) # With callback cb = mock.Mock() generated_multifile_torrent.verify(content_path, callback=cb) exp_torrent = generated_multifile_torrent exp_filepath = content_path exp_pieces_done = 0 exp_pieces_total = generated_multifile_torrent.pieces exp_piece_index = 0 exp_piece_hash = None assert cb.call_args_list == [mock.call( exp_torrent, exp_filepath, exp_pieces_done, exp_pieces_total, exp_piece_index, exp_piece_hash, ComparableException(exp_exception), )] def test_verify_content_successfully(mktestcase, piece_size, callback, filespecs): display_filespecs(filespecs, piece_size) # noqa: F405 tc = mktestcase(filespecs, piece_size) tc.run(with_callback=callback['enabled'], exp_return_value=True) def test_verify_content_with_random_corruptions_and_no_skipping(mktestcase, piece_size, callback, filespecs): display_filespecs(filespecs, piece_size) # noqa: F405 tc = mktestcase(filespecs, piece_size) tc.corrupt_stream() tc.run(with_callback=callback['enabled'], exp_return_value=False) # def test_verify_content_with_random_corruptions_and_skipping(mktestcase, piece_size, callback, filespecs): # display_filespecs(filespecs, piece_size) # noqa: F405 # tc = mktestcase(filespecs, piece_size) # tc.corrupt_stream() # tc.run(with_callback=callback['enabled'], # skip_on_error=True, # exp_return_value=False) def test_verify_content_with_missing_files_and_no_skipping(mktestcase, piece_size, callback, filespecs, filespec_indexes): display_filespecs(filespecs, piece_size) # noqa: F405 tc = mktestcase(filespecs, piece_size) for index in filespec_indexes: tc.delete_file(index) tc.run(with_callback=callback['enabled'], exp_return_value=False) # def test_verify_content_with_missing_files_and_skipping(mktestcase, piece_size, callback, filespecs, filespec_indexes): # display_filespecs(filespecs, piece_size) # noqa: F405 # tc = mktestcase(filespecs, piece_size) # for index in filespec_indexes: # tc.delete_file(index) # tc.run(with_callback=callback['enabled'], # skip_on_error=True, # exp_return_value=False) def test_verify_content_with_changed_file_size_and_no_skipping(mktestcase, piece_size, callback, filespecs): display_filespecs(filespecs, piece_size) # noqa: F405 tc = mktestcase(filespecs, piece_size) tc.change_file_size() tc.run(with_callback=callback['enabled'], exp_return_value=False) # def test_verify_content_with_changed_file_size_and_skipping(mktestcase, piece_size, callback, filespecs): # display_filespecs(filespecs, piece_size) # noqa: F405 # tc = mktestcase(filespecs, piece_size) # tc.change_file_size() # tc.run(with_callback=callback['enabled'], # skip_on_error=True, # exp_return_value=False) def test_verify_content_with_multiple_error_types(mktestcase, piece_size, callback, filespecs): display_filespecs(filespecs, piece_size) # noqa: F405 tc = mktestcase(filespecs, piece_size) # Introduce 2 or 3 errors in random order errorizers = [tc.corrupt_stream, tc.delete_file, tc.change_file_size] for _ in range(random.randint(2, len(errorizers))): errorizer = errorizers.pop(random.choice(range(len(errorizers)))) errorizer() tc.run(with_callback=callback['enabled'], # skip_on_error=random.choice((True, False)), exp_return_value=False) rndusr-torf-547b989/tests/test_verify_filesize.py000066400000000000000000000500601513142010300222340ustar00rootroot00000000000000import os import shutil from pathlib import Path from unittest import mock import pytest import torf def test_validate_is_called_first(monkeypatch): torrent = torf.Torrent() mock_validate = mock.MagicMock(side_effect=torf.MetainfoError('Mock error')) monkeypatch.setattr(torrent, 'validate', mock_validate) with pytest.raises(torf.MetainfoError) as excinfo: torrent.verify_filesize('some/path') assert excinfo.match('^Invalid metainfo: Mock error$') mock_validate.assert_called_once_with() @pytest.mark.parametrize( argnames='callback_return_values, exp_calls, exp_success', argvalues=( ([None], 1, True), ([True], 1, False), ([False], 1, False), ([''], 1, False), ), ) def test_success_with_singlefile_torrent(callback_return_values, exp_calls, exp_success, create_file, create_torrent_file): content_path = create_file('file.jpg', '') with create_torrent_file(path=content_path) as torrent_file: torrent = torf.Torrent.read(torrent_file) # Without callback return_value = torrent.verify_filesize(content_path) assert return_value is True # With callback def assert_call(t, fs_path, t_path, files_done, files_total, exc): assert t == torrent assert fs_path == content_path assert files_done == 1 assert files_total == 1 assert exc is None return callback_return_values.pop(0) cb = mock.MagicMock() cb.side_effect = assert_call assert torrent.verify_filesize(content_path, callback=cb) is exp_success assert cb.call_count == exp_calls @pytest.mark.parametrize( argnames='callback_return_values, exp_calls, exp_success', argvalues=( ([None, None, None], 3, True), ([None, None, True], 3, False), ([None, True], 2, False), ([True], 1, False), ), ) def test_success_with_multifile_torrent(callback_return_values, exp_calls, exp_success, create_dir, create_torrent_file): content_path = create_dir('content', ('a.jpg', 'some data'), ('b.jpg', 'some other data'), ('c.jpg', 'some more data')) with create_torrent_file(path=content_path) as torrent_file: torrent = torf.Torrent.read(torrent_file) assert os.path.exists(content_path / 'a.jpg') assert os.path.exists(content_path / 'b.jpg') assert os.path.exists(content_path / 'c.jpg') # Without callback assert torrent.verify_filesize(content_path) is True # With callback def assert_call(t, fs_path, t_path, files_done, files_total, exc): assert t == torrent assert files_done == cb.call_count assert files_total == 3 if cb.call_count == 1: assert fs_path == content_path / 'a.jpg' assert t_path == Path(*(content_path / 'a.jpg').parts[-2:]) assert exc is None elif cb.call_count == 2: assert fs_path == content_path / 'b.jpg' assert t_path == Path(*(content_path / 'b.jpg').parts[-2:]) assert exc is None elif cb.call_count == 3: assert fs_path == content_path / 'c.jpg' assert t_path == Path(*(content_path / 'c.jpg').parts[-2:]) assert exc is None return callback_return_values.pop(0) cb = mock.MagicMock() cb.side_effect = assert_call assert torrent.verify_filesize(content_path, callback=cb) is exp_success assert cb.call_count == exp_calls @pytest.mark.parametrize( argnames='callback_return_values, exp_calls', argvalues=( ([None], 1), ([True], 1), ([False], 1), ([''], 1), ), ) def test_file_in_singlefile_torrent_doesnt_exist(callback_return_values, exp_calls, create_file, create_torrent_file): content_path = create_file('file.jpg', '') with create_torrent_file(path=content_path) as torrent_file: torrent = torf.Torrent.read(torrent_file) # Without callback with pytest.raises(torf.ReadError) as excinfo: torrent.verify_filesize('/some/nonexisting/path') assert excinfo.match('^/some/nonexisting/path: No such file or directory$') # With callback def assert_call(t, fs_path, t_path, files_done, files_total, exc): assert t == torrent assert fs_path == Path('/some/nonexisting/path') assert files_done == 1 assert files_total == 1 assert str(exc) == '/some/nonexisting/path: No such file or directory' return callback_return_values.pop(0) cb = mock.MagicMock() cb.side_effect = assert_call assert torrent.verify_filesize('/some/nonexisting/path', callback=cb) is False assert cb.call_count == exp_calls @pytest.mark.parametrize( argnames='callback_return_values, exp_calls', argvalues=( ([None, None, None], 3), ([None, True], 2), ([True], 1), ), ) def test_file_in_multifile_torrent_doesnt_exist(callback_return_values, exp_calls, create_dir, create_torrent_file): content_path = create_dir('content', ('a.jpg', 'some data'), ('b.jpg', 'some other data'), ('c.jpg', 'some more data')) with create_torrent_file(path=content_path) as torrent_file: torrent = torf.Torrent.read(torrent_file) os.remove(content_path / 'a.jpg') os.remove(content_path / 'c.jpg') assert not os.path.exists(content_path / 'a.jpg') assert os.path.exists(content_path / 'b.jpg') assert not os.path.exists(content_path / 'c.jpg') # Without callback with pytest.raises(torf.ReadError) as excinfo: torrent.verify_filesize(content_path) assert excinfo.match(f'^{content_path / "a.jpg"}: No such file or directory$') # With callback def assert_call(t, fs_path, t_path, files_done, files_total, exc): assert t == torrent assert files_done == cb.call_count assert files_total == 3 if cb.call_count == 1: assert fs_path == content_path / 'a.jpg' assert t_path == Path(*(content_path / 'a.jpg').parts[-2:]) assert str(exc) == f'{fs_path}: No such file or directory' elif cb.call_count == 2: assert fs_path == content_path / 'b.jpg' assert t_path == Path(*(content_path / 'b.jpg').parts[-2:]) assert exc is None elif cb.call_count == 3: assert fs_path == content_path / 'c.jpg' assert t_path == Path(*(content_path / 'c.jpg').parts[-2:]) assert str(exc) == f'{fs_path}: No such file or directory' return callback_return_values.pop(0) cb = mock.MagicMock() cb.side_effect = assert_call assert torrent.verify_filesize(content_path, callback=cb) is False assert cb.call_count == exp_calls @pytest.mark.parametrize( argnames='callback_return_values, exp_calls', argvalues=( ([None], 1), ([True], 1), (['yes'], 1), ), ) def test_file_in_singlefile_torrent_has_wrong_size(callback_return_values, exp_calls, create_file, create_torrent_file): content_path = create_file('file.jpg', '') with create_torrent_file(path=content_path) as torrent_file: torrent = torf.Torrent.read(torrent_file) content_path.write_text('') assert os.path.getsize(content_path) != torrent.size # Without callback with pytest.raises(torf.VerifyFileSizeError) as excinfo: torrent.verify_filesize(content_path) assert excinfo.match(f'^{content_path}: Too big: 22 instead of 12 bytes$') # With callback def assert_call(t, fs_path, t_path, files_done, files_total, exc): assert t == torrent assert fs_path == content_path assert t_path == Path(Path(content_path).name) assert files_done == cb.call_count assert files_total == 1 assert str(exc) == f'{content_path}: Too big: 22 instead of 12 bytes' return callback_return_values.pop(0) cb = mock.MagicMock() cb.side_effect = assert_call assert torrent.verify_filesize(content_path, callback=cb) is False assert cb.call_count == exp_calls @pytest.mark.parametrize( argnames='callback_return_values, exp_calls', argvalues=( ([None, None, True], 3), ([None, True, None], 2), (['yes', None, None], 1), ), ) def test_file_in_multifile_torrent_has_wrong_size(callback_return_values, exp_calls, create_dir, create_torrent_file): content_path = create_dir('content', ('a.jpg', 100), ('b.jpg', 200), ('c.jpg', 300)) with create_torrent_file(path=content_path) as torrent_file: torrent = torf.Torrent.read(torrent_file) (content_path / 'b.jpg').write_bytes(b'\x00' * 201) (content_path / 'c.jpg').write_bytes(b'\x00' * 299) assert len((content_path / 'b.jpg').read_bytes()) == 201 assert len((content_path / 'c.jpg').read_bytes()) == 299 # Without callback with pytest.raises(torf.VerifyFileSizeError) as excinfo: torrent.verify_filesize(content_path) assert excinfo.match(f'^{content_path / "b.jpg"}: Too big: 201 instead of 200 bytes$') # With callback def assert_call(t, fs_path, t_path, files_done, files_total, exc): assert t == torrent assert files_done == cb.call_count assert files_total == 3 if cb.call_count == 1: assert fs_path == content_path / 'a.jpg' assert t_path == Path(content_path.name, 'a.jpg') assert exc is None elif cb.call_count == 2: assert fs_path == content_path / 'b.jpg' assert t_path == Path(content_path.name, 'b.jpg') assert str(exc) == f'{fs_path}: Too big: 201 instead of 200 bytes' elif cb.call_count == 3: assert fs_path == content_path / 'c.jpg' assert t_path == Path(content_path.name, 'c.jpg') assert str(exc) == f'{fs_path}: Too small: 299 instead of 300 bytes' return callback_return_values.pop(0) cb = mock.MagicMock() cb.side_effect = assert_call assert torrent.verify_filesize(content_path, callback=cb) is False assert cb.call_count == exp_calls @pytest.mark.parametrize( argnames='callback_return_values, exp_calls', argvalues=( ([None, None, True], 1), ([True, None, None], 1), ([None], 1), ), ) def test_path_is_directory_and_torrent_contains_single_file(callback_return_values, exp_calls, create_file, create_dir, create_torrent_file): content_data = b'\x00' * 1001 content_path = create_file('content', content_data) with create_torrent_file(path=content_path) as torrent_file: torrent = torf.Torrent.read(torrent_file) os.remove(content_path) content_path = create_dir('content', ('content', content_data)) assert os.path.isdir(content_path) # Without callback with pytest.raises(torf.VerifyIsDirectoryError) as excinfo: torrent.verify_filesize(content_path) assert excinfo.match(f'^{content_path}: Is a directory$') # With callback def assert_call(t, fs_path, t_path, files_done, files_total, exc): assert t == torrent assert files_done == 1 assert files_total == 1 assert fs_path == Path(content_path) assert t_path == Path(Path(content_path).name) assert str(exc) == f'{content_path}: Is a directory' return callback_return_values.pop(0) cb = mock.MagicMock() cb.side_effect = assert_call assert torrent.verify_filesize(content_path, callback=cb) is False assert cb.call_count == exp_calls @pytest.mark.parametrize( argnames='callback_return_values, exp_calls', argvalues=( ([None, None, None], 2), ([None, True], 2), ([False], 1), ), ) def test_path_is_file_and_torrent_contains_directory(callback_return_values, exp_calls, create_file, create_dir, create_torrent_file): content_path = create_dir('content', ('a.jpg', b'\x00' * 1234), ('b.jpg', b'\x00' * 234)) with create_torrent_file(path=content_path) as torrent_file: torrent = torf.Torrent.read(torrent_file) shutil.rmtree(content_path) assert not os.path.exists(content_path) create_file('content', 'some data') assert os.path.isfile(content_path) # Without callback with pytest.raises(torf.ReadError) as excinfo: torrent.verify_filesize(content_path) assert excinfo.match(f'^{content_path / "a.jpg"}: No such file or directory$') # With callback def assert_call(t, fs_path, t_path, files_done, files_total, exc): assert t == torrent assert files_done == cb.call_count assert files_total == 2 if cb.call_count == 1: assert fs_path == content_path / 'a.jpg' assert t_path == Path(content_path.name, 'a.jpg') assert str(exc) == f'{fs_path}: No such file or directory' elif cb.call_count == 2: assert fs_path == content_path / 'b.jpg' assert t_path == Path(content_path.name, 'b.jpg') assert str(exc) == f'{fs_path}: No such file or directory' return callback_return_values.pop(0) cb = mock.MagicMock() cb.side_effect = assert_call assert torrent.verify_filesize(content_path, callback=cb) is False assert cb.call_count == exp_calls @pytest.mark.parametrize( argnames='callback_return_values, exp_calls', argvalues=( ([None, None, None], 3), ([None, None, 'cancel'], 3), ([None, ()], 2), ([0], 1), ), ) def test_parent_path_of_multifile_torrent_is_unreadable(callback_return_values, exp_calls, create_dir, create_torrent_file): content_path = create_dir('content', ('unreadable1/b/c/a.jpg', 'a data'), ('unreadable2/b/c/b.jpg', 'b data'), ('readable/b/c/c.jpg', 'c data')) with create_torrent_file(path=content_path) as torrent_file: torrent = torf.Torrent.read(torrent_file) unreadable_path1_mode = os.stat(content_path / 'unreadable1').st_mode unreadable_path2_mode = os.stat(content_path / 'unreadable2').st_mode try: os.chmod((content_path / 'unreadable1'), mode=0o222) os.chmod((content_path / 'unreadable2'), mode=0o222) # NOTE: We would expect "Permission denied" here, but # os.path.exists() can't look inside .../content/unreadable1/ and # thus raises "No such file or directory". # Without callback with pytest.raises(torf.ReadError) as excinfo: torrent.verify_filesize(content_path) assert excinfo.match(f'^{content_path / "unreadable1/b/c/a.jpg"}: No such file or directory$') # With callback def assert_call(t, fs_path, t_path, files_done, files_total, exc): assert t == torrent assert files_done == cb.call_count assert files_total == 3 if cb.call_count == 1: assert fs_path == content_path / 'readable/b/c/c.jpg' assert t_path == Path(content_path.name, 'readable/b/c/c.jpg') assert exc is None elif cb.call_count == 2: assert fs_path == content_path / 'unreadable1/b/c/a.jpg' assert t_path == Path(content_path.name, 'unreadable1/b/c/a.jpg') assert str(exc) == f'{fs_path}: No such file or directory' elif cb.call_count == 3: assert fs_path == Path(content_path / 'unreadable2/b/c/b.jpg') assert t_path == Path(content_path.name, 'unreadable2/b/c/b.jpg') assert str(exc) == f'{fs_path}: No such file or directory' return callback_return_values.pop(0) cb = mock.MagicMock() cb.side_effect = assert_call assert torrent.verify_filesize(content_path, callback=cb) is False assert cb.call_count == exp_calls finally: os.chmod((content_path / 'unreadable1'), mode=unreadable_path1_mode) os.chmod((content_path / 'unreadable2'), mode=unreadable_path2_mode) @pytest.mark.parametrize( argnames='callback_return_values, exp_calls', argvalues=( ([None, None, None], 1), (['abort', None, None], 1), ([range(123), None, None], 1), ([123, None, None], 1), ), ) def test_parent_path_of_singlefile_torrent_is_unreadable(callback_return_values, exp_calls, create_dir, create_torrent_file): parent_path = create_dir('parent', ('file.jpg', b'\x00' * 123)) content_file = str(parent_path / 'file.jpg') with create_torrent_file(path=content_file) as torrent_file: torrent = torf.Torrent.read(torrent_file) parent_path_mode = os.stat(parent_path).st_mode try: os.chmod(parent_path, mode=0o222) # NOTE: We would expect "Permission denied" here, but # os.path.exists() can't look inside "parent" directory and thus # raises "No such file or directory". # Without callback with pytest.raises(torf.ReadError) as excinfo: torrent.verify_filesize(content_file) assert excinfo.match(f'^{content_file}: No such file or directory$') # With callback def assert_call(t, fs_path, t_path, files_done, files_total, exc): assert t == torrent assert files_done == 1 assert files_total == 1 assert fs_path == Path(content_file) assert t_path == Path(Path(content_file).name) assert str(exc) == f'{content_file}: No such file or directory' return callback_return_values.pop(0) cb = mock.MagicMock() cb.side_effect = assert_call assert torrent.verify_filesize(content_file, callback=cb) is False assert cb.call_count == exp_calls finally: os.chmod(parent_path, mode=parent_path_mode) def test_callback_raises_exception(create_dir, create_torrent_file): content_path = create_dir('content', ('a.jpg', b'\x00' * 123), ('b.jpg', b'\x00' * 456), ('c.jpg', b'\x00' * 789)) with create_torrent_file(path=content_path) as torrent_file: def assert_call(t, fs_path, t_path, files_done, files_total, exc): assert t == torrent assert files_done == cb.call_count assert files_total == 3 if cb.call_count == 1: assert fs_path == content_path / 'a.jpg' assert t_path == Path(content_path.name, 'a.jpg') assert exc is None elif cb.call_count == 2: raise RuntimeError("I'm off") elif cb.call_count == 3: assert fs_path == content_path / 'c.jpg' assert t_path == Path(content_path.name, 'c.jpg') assert exc is None return None torrent = torf.Torrent.read(torrent_file) cb = mock.MagicMock() cb.side_effect = assert_call with pytest.raises(RuntimeError) as excinfo: torrent.verify_filesize(content_path, callback=cb) assert excinfo.match("^I'm off$") assert cb.call_count == 2 rndusr-torf-547b989/tests/test_write.py000066400000000000000000000054471513142010300202010ustar00rootroot00000000000000import os import time import pytest import torf from torf import _flatbencode as bencode def test_successful_write(generated_singlefile_torrent, tmp_path): f = tmp_path / 'a.torrent' generated_singlefile_torrent.write(f) bytes_written = open(f, 'rb').read() bytes_expected = generated_singlefile_torrent.dump() assert bytes_written == bytes_expected def test_write_with_creation_date(generated_singlefile_torrent, tmp_path): f = tmp_path / 'a.torrent' now = int(time.time()) generated_singlefile_torrent.creation_date = now generated_singlefile_torrent.write(f) metainfo = bencode.decode(open(f, 'rb').read()) assert metainfo[b'creation date'] == now def test_write_to_file_without_permission(generated_singlefile_torrent, tmp_path): (tmp_path / 'test_dir').mkdir() (tmp_path / 'test_dir').chmod(0o444) (tmp_path / 'test_dir').chmod(0o444) with pytest.raises(torf.WriteError) as excinfo: generated_singlefile_torrent.write(tmp_path / 'test_dir' / 'a.torrent') assert excinfo.match(f'^{tmp_path / "test_dir" / "a.torrent"}: Permission denied$') def test_write_to_existing_file(generated_singlefile_torrent, tmp_path): (tmp_path / 'a.torrent').write_text('something') with pytest.raises(torf.WriteError) as excinfo: generated_singlefile_torrent.write(tmp_path / 'a.torrent') assert excinfo.match(f'^{tmp_path / "a.torrent"}: File exists$') generated_singlefile_torrent.write(tmp_path / 'a.torrent', overwrite=True) bytes_written = open(tmp_path / 'a.torrent', 'rb').read() bytes_expected = generated_singlefile_torrent.dump() assert bytes_written == bytes_expected def test_existing_file_is_unharmed_if_dump_fails(generated_singlefile_torrent, tmp_path): (tmp_path / 'a.torrent').write_text('something') del generated_singlefile_torrent.metainfo['info']['length'] with pytest.raises(torf.MetainfoError): generated_singlefile_torrent.write(tmp_path / 'a.torrent', overwrite=True) old_content = open(tmp_path / 'a.torrent', 'r').read() assert old_content == 'something' def test_new_file_is_not_created_if_dump_fails(generated_singlefile_torrent, tmp_path): f = tmp_path / 'a.torrent' del generated_singlefile_torrent.metainfo['info']['length'] with pytest.raises(torf.MetainfoError): generated_singlefile_torrent.write(f) assert not os.path.exists(f) def test_overwriting_larger_torrent_file_truncates_first(generated_singlefile_torrent, tmp_path): f = (tmp_path / 'large.file') f.write_text('x' * 1000000) assert os.path.getsize(f) == 1e6 generated_singlefile_torrent.write(str(f), overwrite=True) assert os.path.exists(f) assert os.path.getsize(f) < 1e6 assert torf.Torrent.read(str(f)).name == os.path.basename(generated_singlefile_torrent.path) rndusr-torf-547b989/torf/000077500000000000000000000000001513142010300152345ustar00rootroot00000000000000rndusr-torf-547b989/torf/__init__.py000066400000000000000000000015731513142010300173530ustar00rootroot00000000000000# This file is part of torf. # # torf is free software: you can redistribute it and/or modify it under the # terms of the GNU General Public License as published by the Free Software # Foundation, either version 3 of the License, or (at your option) any later # version. # # torf is distributed in the hope that it will be useful, but WITHOUT ANY # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR # A PARTICULAR PURPOSE. See the GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with torf. If not, see . # flake8: noqa """ Create and parse torrent files and magnet URIs """ __version__ = '4.3.1' from ._errors import * from ._magnet import Magnet from ._stream import TorrentFileStream from ._torrent import Torrent from ._utils import File, Filepath rndusr-torf-547b989/torf/__init__.pyi000066400000000000000000000016251513142010300175220ustar00rootroot00000000000000# This file is part of torf. # # torf is free software: you can redistribute it and/or modify it under the # terms of the GNU General Public License as published by the Free Software # Foundation, either version 3 of the License, or (at your option) any later # version. # # torf is distributed in the hope that it will be useful, but WITHOUT ANY # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR # A PARTICULAR PURPOSE. See the GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with torf. If not, see . # flake8: noqa __version__: str = ... from ._errors import * from ._magnet import Magnet as Magnet from ._stream import TorrentFileStream as TorrentFileStream from ._torrent import Torrent as Torrent from ._utils import File as File from ._utils import Filepath as Filepath rndusr-torf-547b989/torf/_errors.py000066400000000000000000000233421513142010300172650ustar00rootroot00000000000000# This file is part of torf. # # torf is free software: you can redistribute it and/or modify it under the # terms of the GNU General Public License as published by the Free Software # Foundation, either version 3 of the License, or (at your option) any later # version. # # torf is distributed in the hope that it will be useful, but WITHOUT ANY # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR # A PARTICULAR PURPOSE. See the GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with torf. If not, see . import os class TorfError(Exception): """Base exception for all exceptions raised by torf""" def __init__(self, msg, *posargs, **kwargs): super().__init__(msg) self.posargs = posargs self.kwargs = kwargs class URLError(TorfError): """Invalid URL""" def __init__(self, url): self._url = url super().__init__(f'{url}: Invalid URL', url) @property def url(self): """The invalid URL""" return self._url class PieceSizeError(TorfError): """Invalid piece size""" def __init__(self, size, min=None, max=None): self._size = size self._min = min self._max = max if min is not None and max is not None: super().__init__(f'Piece size must be between {min} and {max}: {size}', size, min=min, max=max) else: super().__init__(f'Piece size must be divisible by 16 KiB: {size}', size) @property def size(self): """The invalid piece size""" return self._size @property def min(self): """Smallest allowed piece size or ``None``""" return self._min @property def max(self): """Largest allowed piece size or ``None``""" return self._max class MetainfoError(TorfError): """Invalid torrent metainfo""" def __init__(self, msg): super().__init__(f'Invalid metainfo: {msg}', msg) class BdecodeError(TorfError): """Failed to decode bencoded byte sequence""" def __init__(self, filepath=None): self._filepath = filepath if filepath is None: super().__init__('Invalid metainfo format') else: super().__init__(f'{filepath}: Invalid torrent file format', filepath) @property def filepath(self): """Path of the offending torrent file or ``None``""" return self._filepath class MagnetError(TorfError): """Invalid magnet URI or value""" def __init__(self, uri, reason=None): self._uri = uri self._reason = reason if reason is not None: super().__init__(f'{uri}: {reason}', uri, reason=reason) else: super().__init__(f'{uri}: Invalid magnet URI', uri) @property def uri(self): """The invalid URI""" return self._uri @property def reason(self): """Why URI is invalid""" return self._reason class PathError(TorfError): """General invalid or unexpected path""" def __init__(self, path, msg): self._path = path super().__init__(f'{path}: {msg}', path, msg) @property def path(self): """Path of the offending file or directory""" return self._path class CommonPathError(TorfError): """Files don't share parent directory""" def __init__(self, filepaths): self._filepaths = filepaths filepaths_str = ', '.join(str(fp) for fp in filepaths) super().__init__(f'No common parent path: {filepaths_str}', filepaths) @property def filepaths(self): """Paths to offending files""" return self._filepaths class VerifyIsDirectoryError(TorfError): """Expected file but found directory""" def __init__(self, path): self._path = path super().__init__(f'{path}: Is a directory', path) @property def path(self): """Path of the offending directory""" return self._path class VerifyNotDirectoryError(TorfError): """Expected (link to) directory, but found something else""" def __init__(self, path): self._path = path super().__init__(f'{path}: Not a directory', path) @property def path(self): """Path of the offending non-directory""" return self._path class VerifyFileSizeError(TorfError): """Unexpected file size""" def __init__(self, filepath, actual_size, expected_size): self._filepath = filepath self._actual_size = actual_size self._expected_size = expected_size if actual_size > expected_size: super().__init__(f'{filepath}: Too big: {actual_size} instead of {expected_size} bytes', filepath, actual_size=actual_size, expected_size=expected_size) elif actual_size < expected_size: super().__init__(f'{filepath}: Too small: {actual_size} instead of {expected_size} bytes', filepath, actual_size=actual_size, expected_size=expected_size) else: raise RuntimeError(f'Unjustified: actual_size={actual_size} == expected_size={expected_size}') @property def filepath(self): """Path of the offending file""" return self._filepath @property def actual_size(self): """Size as reported by the file system""" return self._actual_size @property def expected_size(self): """Size as specified in the metainfo""" return self._expected_size class VerifyContentError(TorfError): """On-disk data does not match hashes in metainfo""" def __init__(self, filepath, piece_index, piece_size, file_sizes): self._filepath = filepath self._piece_index = piece_index self._piece_size = piece_size msg = f'Corruption in piece {piece_index + 1}' if len(file_sizes) < 1: raise RuntimeError('file_sizes argument is empty: {file_sizes!r}') elif len(file_sizes) == 1: corrupt_files = (file_sizes[0][0],) else: corrupt_files = [] # Find the slice in the whole stream of files that contains the # corruption (piece_index=0 is the first piece) err_i_beg = piece_index * piece_size err_i_end = err_i_beg + piece_size # Find the files that are covered by the corrupt piece cur_pos = 0 for filepath,filesize in file_sizes: # `file` is possibly corrupt if: # 1. The corrupt piece STARTS between the beginning and the end # of the file in the stream. # 2. The corrupt piece ENDS between the beginning and the end # of the file in the stream. # 3. Both beginning and end of the file are between beginning # and end of the corrupt piece (i.e. file fits in one piece). file_i_beg = cur_pos file_i_end = file_i_beg + filesize if (file_i_beg <= err_i_beg < file_i_end or file_i_beg < err_i_end <= file_i_end or (file_i_beg >= err_i_beg and file_i_end < err_i_end)): corrupt_files.append(filepath) cur_pos += filesize if len(corrupt_files) == 1: msg += f' in {corrupt_files[0]}' else: msg += (', at least one of these files is corrupt: ' + ', '.join(str(f) for f in corrupt_files)) self._files = tuple(corrupt_files) super().__init__(msg, filepath, piece_index, piece_size, file_sizes) @property def filepath(self): """Path to file that caused the piece corruption""" return self._filepath @property def piece_index(self): """Index of the corrupt piece in the stream of concatenated files""" return self._piece_index @property def piece_size(self): """Size of the corrupt piece in bytes""" return self._piece_size @property def files(self): """Potentially corrupt neighboring files""" return self._files class ReadError(TorfError): """Unreadable file or stream""" def __init__(self, errno, path=None): self._errno = errno self._path = path msg = os.strerror(errno) if errno else 'Unable to read' if path is None: super().__init__(f'{msg}', errno) else: super().__init__(f'{path}: {msg}', errno, path) @property def path(self): """Path of the offending file or ``None``""" return self._path @property def errno(self): """POSIX error number from errno.h""" return self._errno class MemoryError(TorfError, MemoryError): """ Out of memory See also :class:`MemoryError`. """ class WriteError(TorfError): """Unwritable file or stream""" def __init__(self, errno, path=None): self._errno = errno self._path = path msg = os.strerror(errno) if errno else 'Unable to write' if path is None: super().__init__(f'{msg}', path) else: super().__init__(f'{path}: {msg}', errno, path) @property def path(self): """Path of the offending file or ``None``""" return self._path @property def errno(self): """POSIX error number from errno.h""" return self._errno class ConnectionError(TorfError): """Unwritable file or stream""" def __init__(self, url, msg='Failed'): self._url = url self._msg = str(msg) super().__init__(f'{url}: {msg}', url, msg) @property def url(self): """URL that caused the exception""" return self._url rndusr-torf-547b989/torf/_errors.pyi000066400000000000000000000055601513142010300174400ustar00rootroot00000000000000from _typeshed import StrPath from ._utils import Filepaths class TorfError(Exception): def __init__(self, msg: str, *posargs: object, **kwargs: object) -> None: ... class URLError(TorfError): def __init__(self, url: str) -> None: ... @property def url(self) -> str: ... class PieceSizeError(TorfError): def __init__(self, size: int, min: int | None = None, max: int | None = None) -> None: ... @property def size(self) -> int: ... @property def min(self) -> int | None: ... @property def max(self) -> int | None: ... class MetainfoError(TorfError): def __init__(self, msg: str) -> None: ... class BdecodeError(TorfError): def __init__(self, filepath: StrPath | None = None) -> None: ... @property def filepath(self) -> StrPath | None: ... class MagnetError(TorfError): def __init__(self, uri: str, reason: str | None = None) -> None: ... @property def uri(self) -> str: ... @property def reason(self) -> str | None: ... class PathError(TorfError): def __init__(self, path: StrPath, msg: str) -> None: ... @property def path(self) -> StrPath: ... class CommonPathError(TorfError): def __init__(self, filepaths: Filepaths) -> None: ... @property def filepaths(self) -> Filepaths: ... class VerifyIsDirectoryError(TorfError): def __init__(self, path: StrPath) -> None: ... @property def path(self) -> StrPath: ... class VerifyNotDirectoryError(TorfError): def __init__(self, path: StrPath) -> None: ... @property def path(self) -> StrPath: ... class VerifyFileSizeError(TorfError): def __init__(self, filepath: StrPath, actual_size: int | None, expected_size: int) -> None: ... @property def filepath(self) -> StrPath: ... @property def actual_size(self) -> int | None: ... @property def expected_size(self) -> int: ... class VerifyContentError(TorfError): def __init__( self, filepath: StrPath, piece_index: int, piece_size: int, file_sizes: tuple[tuple[str, int], ...] ) -> None: ... @property def filepath(self) -> StrPath: ... @property def piece_index(self) -> int: ... @property def piece_size(self) -> int: ... @property def files(self) -> tuple[tuple[str, int], ...]: ... class ReadError(TorfError): def __init__(self, errno: int, path: StrPath | None = None) -> None: ... @property def path(self) -> StrPath | None: ... @property def errno(self) -> int: ... class MemoryError(TorfError, MemoryError): ... # type: ignore[misc] class WriteError(TorfError): def __init__(self, errno: int, path: StrPath | None = None) -> None: ... @property def path(self) -> StrPath | None: ... @property def errno(self) -> int: ... class ConnectionError(TorfError): def __init__(self, url: str, msg: str = 'Failed') -> None: ... @property def url(self) -> str: ... rndusr-torf-547b989/torf/_flatbencode.py000066400000000000000000000126231513142010300202170ustar00rootroot00000000000000# This is a copy of the dead flatbencode module: https://github.com/acatton/flatbencode # # This allows us to fix any potential issues without forking and makes packaging easier. # ######################################################################################## # # The MIT License (MIT) # # Copyright (c) 2016, Antoine Catton # # Permission is hereby granted, free of charge, to any person obtaining a copy of # this software and associated documentation files (the "Software"), to deal in # the Software without restriction, including without limitation the rights to # use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies # of the Software, and to permit persons to whom the Software is furnished to do # so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in all # copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. import collections import io import itertools ONE_CHAR = 1 INTEGER_START = b'i' LIST_START = b'l' DICT_START = b'd' END = b'e' NEGATIVE_SIGN = b'-' STRING_LENGTH_SEPARATOR = b':' __all__ = ['decode', 'DecodingError', 'encode'] class DecodingError(ValueError): pass def byte_is_integer(b): return b'0' <= b <= b'9' def group_by(it, n): """ >>> list(group_by([1, 2, 3, 4], 2)) [(1, 2), (3, 4)] """ return zip(*[itertools.islice(it2, i, None, n) for i, it2 in enumerate(itertools.tee(it))]) def list_to_dict(l): if not all(isinstance(k, bytes) for k, v in group_by(reversed(l), 2)): raise DecodingError return collections.OrderedDict(group_by(reversed(l), 2)) def _read_integer(buf): c = buf.read(ONE_CHAR) if c == NEGATIVE_SIGN: negative = True c = buf.read(ONE_CHAR) else: negative = False acc = io.BytesIO() while c != END: if len(c) == 0: raise DecodingError if not byte_is_integer(c): raise DecodingError acc.write(c) c = buf.read(ONE_CHAR) n = acc.getvalue() if n.startswith(b'0') and len(n) > 1: # '03' is illegal raise DecodingError n = int(n) if n == 0 and negative: # '-0' is illegal raise DecodingError if negative: n = -n return n def _read_length(c, buf): acc = io.BytesIO() while c != STRING_LENGTH_SEPARATOR: if not byte_is_integer(c): raise DecodingError acc.write(c) c = buf.read(ONE_CHAR) return int(acc.getvalue()) def _read_string(firstchar, buf): length = _read_length(firstchar, buf) string = buf.read(length) if len(string) != length: raise DecodingError return string list_starter = object() dict_starter = object() def decode(s): buf = io.BufferedReader(io.BytesIO(s)) buf.seek(0) stack = [] while True: c = buf.read(ONE_CHAR) if not c: raise DecodingError if c == END: acc = [] while True: if not stack: raise DecodingError x = stack.pop() if x == list_starter: elem = list(reversed(acc)) break elif x == dict_starter: elem = list_to_dict(acc) break else: acc.append(x) elif c == INTEGER_START: elem = _read_integer(buf) elif c == DICT_START: stack.append(dict_starter) continue elif c == LIST_START: stack.append(list_starter) continue else: elem = _read_string(c, buf) if not stack: end_of_string = not buf.read(ONE_CHAR) if not end_of_string: raise DecodingError return elem else: stack.append(elem) def encode(obj): def generator(obj): if isinstance(obj, dict): if not all(isinstance(k, bytes) for k in obj.keys()): raise ValueError("Dictionary keys must be strings") yield DICT_START # Dictionary keys should be sorted according to the BEP-0003: # "Keys must be strings and appear in sorted order (sorted as # raw strings, not alphanumerics)." for k in sorted(obj.keys()): yield from generator(k) yield from generator(obj[k]) yield END elif isinstance(obj, list): yield LIST_START for elem in obj: yield from generator(elem) yield END elif isinstance(obj, bytes): yield str(len(obj)).encode('ascii') yield STRING_LENGTH_SEPARATOR yield obj elif isinstance(obj, int): yield INTEGER_START yield str(obj).encode('ascii') yield END else: raise ValueError("type {} not supported".format(type(obj))) return b''.join(generator(obj)) rndusr-torf-547b989/torf/_generate.py000066400000000000000000000502651513142010300175470ustar00rootroot00000000000000# This file is part of torf. # # torf is free software: you can redistribute it and/or modify it under the # terms of the GNU General Public License as published by the Free Software # Foundation, either version 3 of the License, or (at your option) any later # version. # # torf is distributed in the hope that it will be useful, but WITHOUT ANY # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR # A PARTICULAR PURPOSE. See the GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with torf. If not, see . import errno import logging import os import queue import threading from hashlib import sha1 from time import monotonic as time_monotonic from . import _errors as errors from ._stream import TorrentFileStream QUEUE_CLOSED = object() _debug = logging.getLogger('torf').debug def _thread_name(): return threading.current_thread().name def _pretty_bytes(b): if isinstance(b, (bytes, bytearray)) and len(b) > 8: # return b[:8].hex() + '...' + b[-8:].hex() return b[:8] + b'...' + b[-8:] else: return b class Worker: """ :class:`threading.Thread` subclass that re-raises any exceptions from the thread when joined """ def __init__(self, name, worker, start=True, fail_ok=False): self._exception = None self._name = str(name) self._worker = worker self._thread = threading.Thread(name=self._name, target=self._run_and_catch_exceptions) if start: self.start(fail_ok=fail_ok) @property def exception(self): return self._exception @property def name(self): return self._name @property def is_running(self): return self._thread.is_alive() def _run_and_catch_exceptions(self): try: self._worker() except BaseException as e: self._exception = e def start(self, fail_ok=False): if not self._thread.is_alive(): try: self._thread.start() except RuntimeError as e: if fail_ok: _debug(f'{self.name}: Failed to start thread: {e!r} - but that\'s ok') else: _debug(f'{self.name}: Failed to start thread: {e!r}') raise else: _debug(f'{self.name}: Started') def join(self, *args, **kwargs): if self.is_running: self._thread.join(*args, **kwargs) if self._exception: raise self._exception class Reader(Worker): """ :class:`Worker` subclass that reads files in pieces and pushes them to a queue """ def __init__(self, *, torrent, queue_size, path=None): self._torrent = torrent self._path = path self._piece_queue = queue.Queue(maxsize=queue_size) self._stop = False self._memory_error_timestamp = -1 super().__init__(name='reader', worker=self._push_pieces) def _push_pieces(self): stream = TorrentFileStream(self._torrent) try: iter_pieces = stream.iter_pieces(self._path, oom_callback=self._handle_oom) for piece_index, (piece, filepath, exceptions) in enumerate(iter_pieces): # _debug(f'{_thread_name()}: Read #{piece_index}') if self._stop: _debug(f'{_thread_name()}: Stopped reading') break elif exceptions: self._push_piece(piece_index=piece_index, filepath=filepath, exceptions=exceptions) elif piece: self._push_piece(piece_index=piece_index, filepath=filepath, piece=piece) else: # `piece` is None because of missing file, and the exception # was already sent for the first `piece_index` of that file self._push_piece(piece_index=piece_index, filepath=filepath) # _debug(f'{_thread_name()}: {self._piece_queue.qsize()} pieces queued') except BaseException as e: _debug(f'{_thread_name()}: Exception while reading: {e!r}') raise finally: self._piece_queue.put(QUEUE_CLOSED) _debug(f'{_thread_name()}: Piece queue is now exhausted') stream.close() def _push_piece(self, *, piece_index, filepath, piece=None, exceptions=()): # _debug(f'{_thread_name()}: Pushing #{piece_index}: {filepath}: {_pretty_bytes(piece)}, {exceptions!r}') self._piece_queue.put((piece_index, filepath, piece, exceptions)) def _handle_oom(self, exception): # Reduce piece_queue.maxsize by 1 every 100ms until the MemoryErrors stop now = time_monotonic() time_diff = now - self._memory_error_timestamp if time_diff >= 0.1: old_maxsize = self._piece_queue.maxsize new_maxsize = max(1, int(old_maxsize * 0.9)) if new_maxsize != old_maxsize: _debug(f'{_thread_name()}: Reducing piece_queue.maxsize to {new_maxsize}') self._piece_queue.maxsize = new_maxsize self._memory_error_timestamp = now else: raise errors.ReadError(errno.ENOMEM, exception) def stop(self): """Stop reading and close the piece queue""" if not self._stop: _debug(f'{_thread_name()}: {type(self).__name__}: Setting stop flag') self._stop = True @property def piece_queue(self): """ :class:`queue.Queue` instance that gets evenly sized pieces from the concatenated stream of files """ return self._piece_queue class HasherPool: """ Wrapper around one or more :class:`Worker` instances that each read a piece from :attr:`Reader.piece_queue`, feed it to :func:`~.hashlib.sha1`, and push the resulting hash to :attr:`hash_queue` """ def __init__(self, hasher_threads, piece_queue): self._piece_queue = piece_queue self._hash_queue = queue.Queue() self._finalize_event = threading.Event() # Janitor takes care of closing the hash queue, removing idle hashers, etc self._janitor = Worker( name='janitor', worker=self._janitor_thread, start=False, ) # Hashers read from piece_queue and push to hash_queue self._hashers = [ Worker( name='hasher1', # One hasher is vital an may not die from boredom worker=lambda: self._hasher_thread(is_vital=True), start=False, ), ] for i in range(2, hasher_threads + 1): self._hashers.append( Worker( name=f'hasher{i}', # All other hashers should die if they are bored worker=lambda: self._hasher_thread(is_vital=False), start=False, ) ) # Start threads manually after they were created to prevent race # conditions and make sure all required threads are running self._janitor.start(fail_ok=False) # Hashers are allowed to fail (e.g. because of OS limits), but we need # at least one to start successfully self._hashers[0].start(fail_ok=False) for hasher in self._hashers[1:]: hasher.start(fail_ok=True) def _hasher_thread(self, is_vital=True): piece_queue = self._piece_queue handle_piece = self._handle_piece while True: # _debug(f'{_thread_name()}: Waiting for next task') try: task = piece_queue.get(timeout=0.5) except queue.Empty: if not is_vital: _debug(f'{_thread_name()}: I am bored, byeee!') break else: _debug(f'{_thread_name()}: I am bored, but needed.') else: if task is QUEUE_CLOSED: _debug(f'{_thread_name()}: piece_queue is closed') # Repeat QUEUE_CLOSED to the next sibling. This ensures # there is always one more QUEUE_CLOSED queued than running # threads. Otherwise, one thread might consume multiple # QUEUE_CLOSED and leave other threads running forvever. piece_queue.put(QUEUE_CLOSED) # Signal janitor to initiate shutdown procedure self._finalize_event.set() break else: handle_piece(*task) def _handle_piece(self, piece_index, filepath, piece, exceptions): if exceptions: # _debug(f'{_thread_name()}: Forwarding exceptions for #{piece_index}: {exceptions!r}') self._hash_queue.put((piece_index, filepath, None, exceptions)) elif piece: piece_hash = sha1(piece).digest() # _debug(f'{_thread_name()}: Hashed #{piece_index}: ' # f'{_pretty_bytes(piece)} [{len(piece)} bytes] -> {piece_hash}') self._hash_queue.put((piece_index, filepath, piece_hash, ())) else: # _debug(f'{_thread_name()}: Nothing to hash for #{piece_index}: {piece!r}') self._hash_queue.put((piece_index, filepath, None, ())) def _janitor_thread(self): while True: _debug(f'{_thread_name()}: Waiting for finalize event') finalization_initiated = self._finalize_event.wait(timeout=1.0) if finalization_initiated: self._wait_for_hashers() _debug(f'{_thread_name()}: Closing hash queue') self._hash_queue.put(QUEUE_CLOSED) break else: # Remove terminated idle hashers for hasher in tuple(self._hashers): if not hasher.is_running: _debug(f'{_thread_name()}: Pruning {hasher.name}') self._hashers.remove(hasher) _debug(f'{_thread_name()}: Terminating') def _wait_for_hashers(self): while True: # _debug(f'{_thread_name()}: Hashers running: {[h.name for h in self._hashers if h.is_running]}') if all(not h.is_running for h in self._hashers): _debug(f'{_thread_name()}: All hashers terminated') break def join(self): """Block until all threads have terminated""" for hasher in self._hashers: _debug(f'{_thread_name()}: Joining {hasher.name}') hasher.join() _debug(f'{_thread_name()}: Joined all hashers') _debug(f'{_thread_name()}: Joining {self._janitor.name}') self._janitor.join() _debug(f'{_thread_name()}: Joined {self._janitor.name}') @property def hash_queue(self): """:class:`queue.Queue` instance that gets piece hashes""" return self._hash_queue class Collector: """ Consume items from :attr:`HasherPool.hash_queue` and ensure proper termination of all threads if anything goes wrong or the user cancels the operation """ def __init__(self, torrent, reader, hashers, callback=None): self._reader = reader self._hashers = hashers self._callback = callback self._hashes_unsorted = [] self._pieces_seen = [] self._pieces_total = torrent.pieces def collect(self): """ Read piece hashes from :attr:`HasherPool.hash_queue` When this method returns, :attr:`hashes` is an ordered sequence of collected piece hashes. Exceptions from :class:`Reader`, :class:`HasherPool` or the provided callback are raised after all threads are terminated and joined. :return: the same value as :attr:`hashes` """ try: hash_queue = self._hashers.hash_queue while True: # _debug(f'{_thread_name()}: Waiting for next piece hash') task = hash_queue.get() # _debug(f'{_thread_name()}: Got task: {task}') if task is QUEUE_CLOSED: break else: self._collect(*task) except BaseException as e: _debug(f'{_thread_name()}: Exception while dequeueing piece hashes: {e!r}') self._cancel() raise finally: self._finalize() return self.hashes def _collect(self, piece_index, filepath, piece_hash, exceptions): # _debug(f'{_thread_name()}: Collecting #{piece_index}: {_pretty_bytes(piece_hash)}, {exceptions}') # Remember which pieces where hashed to count them and for sanity checking assert piece_index not in self._pieces_seen self._pieces_seen.append(piece_index) # Collect piece if not exceptions and piece_hash: self._hashes_unsorted.append((piece_index, piece_hash)) # If there is no callback, raise first exception if exceptions and not self._callback: raise exceptions[0] # Report progress/exceptions and allow callback to cancel if self._callback: # _debug(f'{_thread_name()}: Collector callback: {self._callback}') maybe_cancel = self._callback( piece_index, len(self._pieces_seen), self._pieces_total, filepath, piece_hash, exceptions, ) # _debug(f'{_thread_name()}: Collector callback return value: {maybe_cancel}') if maybe_cancel is not None: self._cancel() def _cancel(self): # NOTE: We don't need to stop HasherPool or Collector.collect() because # they will stop when Reader._push_pieces() pushes QUEUE_CLOSED. # They will process the pieces in the queue, but that shouldn't # take long unless the Reader's queue size is too big. self._reader.stop() def _finalize(self): _debug(f'{_thread_name()}: Joining {self._reader}') self._reader.join() _debug(f'{_thread_name()}: Joining {self._hashers}') self._hashers.join() _debug(f'{_thread_name()}: hash_queue has {self._hashers.hash_queue.qsize()} items left') @property def hashes(self): """Ordered sequence of piece hashes""" return tuple(hash for index, hash in sorted(self._hashes_unsorted)) class _IntervaledCallback: """ Callable that calls `callback`, but only if at least `interval` seconds elapsed since the previous call """ def __init__(self, callback, interval=0): self._callback = callback self._interval = interval self._prev_call_time = -1 def __call__(self, *args, force=False): now = time_monotonic() diff = now - self._prev_call_time # _debug(f'{_thread_name()}: Callback? {force=} or {diff=} >= {self._interval=}') if force or diff >= self._interval: self._prev_call_time = now # _debug(f'{_thread_name()}: Callback! {args=}') return self._callback(*args) class _TranslatingCallback: def __init__(self, callback, interval, torrent): self._callback = callback self._torrent = torrent self._intervaled_callback = _IntervaledCallback( callback=self._call_callback, interval=interval, ) def __call__(self, piece_index, pieces_done, pieces_total, filepath, piece_hash, exceptions): force = self._force_callback(piece_index, pieces_done, pieces_total, filepath, piece_hash, exceptions) return self._intervaled_callback(piece_index, pieces_done, pieces_total, filepath, piece_hash, exceptions, force=force) def _force_callback(self, piece_index, pieces_done, pieces_total, filepath, piece_hash, exceptions): # Figure out if we must ignore the interval for this call. This method # is called for every hashed piece and should be as efficient as # possible. raise NotImplementedError('You must implement this method!') def _call_callback(self, piece_index, pieces_done, pieces_total, filepath, piece_hash, exceptions): # Translate arguments for the actual callback. This method is only # called at intervals (e.g. once per second). raise NotImplementedError('You must implement this method!') class GenerateCallback(_TranslatingCallback): """ Translate arguments from :class:`Collector` to what's specified by :meth:`~.Torrent.generate` """ def _force_callback(self, piece_index, pieces_done, pieces_total, filepath, piece_hash, exceptions): return exceptions or pieces_done >= pieces_total def _call_callback(self, piece_index, pieces_done, pieces_total, filepath, piece_hash, exceptions): if exceptions: # Torrent creation errors are always fatal and must be raised raise exceptions[0] elif self._callback: # Report progress and allow callback to cancel return self._callback(self._torrent, filepath, pieces_done, pieces_total) class VerifyCallback(_TranslatingCallback): """ Translate arguments from :class:`Collector` to what's specified by :meth:`~.Torrent.verify` """ def __init__(self, *args, path, **kwargs): super().__init__(*args, **kwargs) # Store piece hashes from the torrent for quick access self._exp_hashes = self._torrent.hashes # Map expected file system paths to expected file sizes # NOTE: The last segment in `path` is supposed to be the torrent name so # we must remove the name stored in the torrent file from each # `file`. This allows verification of any renamed file/directory # against a torrent. self._exp_file_sizes = tuple( ( os.sep.join((str(path), *file.parts[1:])), self._torrent.partial_size(file), ) for file in self._torrent.files ) def _force_callback(self, piece_index, pieces_done, pieces_total, filepath, piece_hash, exceptions): return ( # Always report exceptions exceptions # Always report completion or pieces_done >= pieces_total # Always report hash mismatch or piece_hash is not None and piece_hash != self._exp_hashes[piece_index] ) def _call_callback(self, piece_index, pieces_done, pieces_total, filepath, piece_hash, exceptions): if ( # Don't add verification error if there are other errors not exceptions # Piece hash was calculated and doesn't match and piece_hash is not None and piece_hash != self._exp_hashes[piece_index] ): exceptions = (errors.VerifyContentError( filepath, piece_index, self._torrent.piece_size, self._exp_file_sizes, ),) if self._callback: # Callback can raise exception or handle it otherwise def call_callback(fpath, exception): return self._callback( self._torrent, fpath, pieces_done, pieces_total, piece_index, piece_hash, exception, ) if exceptions: # Call callback for each exception until it indicates # cancellation by returning anything truthy for exception in exceptions: fpath = self._get_path_from_exception(exception) maybe_cancel = call_callback(fpath, exception) if maybe_cancel is not None: return maybe_cancel else: # Report progress and return cancellation indicator return call_callback(filepath, None) elif exceptions: # Default to raising first exception raise exceptions[0] @staticmethod def _get_path_from_exception(exception): for attr in ('filepath', 'path'): try: return getattr(exception, attr) except AttributeError: pass raise RuntimeError(f'Failed to get path from {exception!r}') rndusr-torf-547b989/torf/_magnet.py000066400000000000000000000332411513142010300172230ustar00rootroot00000000000000# This file is part of torf. # # torf is free software: you can redistribute it and/or modify it under the # terms of the GNU General Public License as published by the Free Software # Foundation, either version 3 of the License, or (at your option) any later # version. # # torf is distributed in the hope that it will be useful, but WITHOUT ANY # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR # A PARTICULAR PURPOSE. See the GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with torf. If not, see . import base64 import binascii import io import re import time import urllib from collections import abc, defaultdict from . import _errors as error from . import _utils as utils class Magnet(): """ BTIH Magnet URI :param str xt: eXact Topic: Info hash (Base 16 or 32) :param str dn: Display Name: Name of the torrent :param int xl: eXact Length: Size in bytes :param list tr: TRacker: Iterable of tracker URLs :param str xs: eXact Source: Torrent file URL :param str as\\_: Acceptable Source: Fallback torrent file URL :param list ws: WebSeeds: Iterable of webseed URLs (see BEP19) :param list kt: Keyword Topic: List of search keywords All keyword arguments that start with ``x_`` go into the :attr:`x` dictionary with the part after the underscore as the key. They appear as "x." in the rendered URI. References: | https://www.bittorrent.org/beps/bep_0009.html | https://en.wikipedia.org/wiki/Magnet_URL | http://magnet-uri.sourceforge.net/magnet-draft-overview.txt | https://wiki.theory.org/index.php/BitTorrent_Magnet-URI_Webseeding | http://shareaza.sourceforge.net/mediawiki/index.php/Magnet_URI_scheme """ _INFOHASH_REGEX = re.compile(r'^[0-9a-f]{40}|[a-z2-7]{32}$', flags=re.IGNORECASE) _XT_REGEX = re.compile(r'^urn:btih:([0-9a-f]{40}|[a-z2-7]{32})$', flags=re.IGNORECASE) def __init__(self, xt, *, dn=None, xl=None, tr=None, xs=None, as_=None, ws=None, kt=None, **kwargs): self._tr = utils.MonitoredList(type=utils.URL) self._ws = utils.MonitoredList(type=utils.URL) self.xt = xt self.dn = dn self.xl = xl self.tr = tr self.xs = xs self.as_ = as_ self.ws = ws self.kt = kt self._x = defaultdict(lambda: None) for key in tuple(kwargs): if key.startswith('x_'): self._x[key[2:]] = kwargs.pop(key) if kwargs: key, value = next(iter(kwargs.items())) raise TypeError(f'Unrecognized argument: {key}={value!r}') @property def dn(self): """Display Name: Name of the torrent or ``None``""" return self._dn @dn.setter def dn(self, value): self._dn = str(value).replace('\n', ' ') if value is not None else None @property def xt(self): """ eXact Topic: URN containing the info hash as base 16 or base 32 Example: urn:btih:3bb9561e35b06175bb6d2c2330578dc83846cc5d For convenience, this property may be set to the info hash without the ``urn:btih`` part. :raises MagnetError: if set to an invalid value """ return f'urn:btih:{self._infohash}' @xt.setter def xt(self, value): value = str(value) if self._INFOHASH_REGEX.match(value): self._infohash = value else: match = self._XT_REGEX.match(value) if match: self._infohash = match.group(1) if not hasattr(self, '_infohash'): raise error.MagnetError(value, 'Invalid exact topic ("xt")') @property def infohash(self): """ Info hash as base 16 or base 32 :raises MagnetError: if set to an invalid value """ return self._infohash @infohash.setter def infohash(self, value): value = str(value) match = self._INFOHASH_REGEX.match(value) if match: self._infohash = value else: raise error.MagnetError(value, 'Invalid info hash') @property def xl(self): """ eXact Length: Size in bytes or ``None`` :raises MagnetError: if set to an invalid value """ return self._xl @xl.setter def xl(self, value): if value is not None: try: value = int(value) except ValueError: raise error.MagnetError(value, 'Invalid exact length ("xl")') else: if value < 1: raise error.MagnetError(value, 'Must be 1 or larger') else: self._xl = value else: self._xl = None @property def tr(self): """ TRackers: List of tracker URLs, single tracker URL or ``None`` :raises URLError: if any of the URLs is invalid """ return self._tr @tr.setter def tr(self, value): if value is None: self._tr.clear() elif isinstance(value, str): self._tr.replace((value,)) else: self._tr.replace(value) @property def xs(self): """ eXact Source: Torrent file URL or ``None`` :raises URLError: if the URL is invalid """ return self._xs @xs.setter def xs(self, value): self._xs = utils.URL(value) if value is not None else None @property def as_(self): """ Acceptable Source: Fallback torrent file URL or ``None`` (The trailing underscore is needed because "as" is a keyword in Python.) :raises URLError: if the URL is invalid """ return self._as @as_.setter def as_(self, value): self._as = utils.URL(value) if value is not None else None @property def ws(self): """ WebSeeds: List of webseed URLs, single webseed URL or ``None`` See BEP19. :raises URLError: if any of the URLs is invalid """ return self._ws @ws.setter def ws(self, value): if value is None: self._ws.clear() elif isinstance(value, str): self._ws.replace((value,)) else: self._ws.replace(value) @property def kt(self): """Keyword Topic: List of search keywords or ``None``""" return self._kt @kt.setter def kt(self, value): if value is None: self._kt = [] elif isinstance(value, str): self._kt = [value] elif isinstance(value, abc.Iterable): self._kt = [str(v) for v in value] if value is not None else None else: raise error.MagnetError(value, 'Invalid keyword topic ("kt")') @property def x(self): """ Mapping of custom keys to their values For example, "x.pe" (a peer address) would be accessed as ``magnet.x['pe']``. """ return self._x def torrent(self): """:class:`Torrent` instance""" # Prevent circular import issues from ._torrent import Torrent torrent = Torrent() torrent.name = self.dn if self.tr: torrent.trackers = self.tr if self.ws: torrent.webseeds = self.ws if self.xl: torrent._metainfo['info']['length'] = self.xl if hasattr(self, '_info'): torrent.metainfo['info'] = self._info elif len(self.infohash) == 40: torrent._infohash = self.infohash else: # Convert base 32 to base 16 (SHA1) torrent._infohash = base64.b16encode( base64.b32decode(self.infohash)).decode('utf-8').lower() return torrent def get_info(self, validate=True, timeout=60, callback=None): """ Download the torrent's "info" section Try the following sources in this order: :attr:`xs`, :attr:`as`, :attr:`tr` :meth:`torrent` can only return a complete torrent if this method is called first. :param validate: Whether to ensure the downloaded "info" section is valid :param timeout: Give up after this many seconds :type timeout: int, float :param callback callable: Callable that is called with a :class:`TorfError` instance if a source is specified but fails :return: ``True`` if the "info" section was successfully downloaded, ``False`` otherwise """ def success(): return hasattr(self, '_info') torrent_urls = [] if self.xs: torrent_urls.append(self.xs) # noqa: E701 if self.as_: torrent_urls.append(self.as_) # noqa: E701 torrent_urls.extend((url.rstrip('/') + '.torrent' for url in self.ws)) # I couldn't find any documentation for the "/file?info_hash=..." GET request, but # it seems to work for HTTP trackers. # https://stackoverflow.com/a/1019588 for url in self.tr: if url.scheme in ('http', 'https'): infohash_enc = urllib.parse.quote_from_bytes(binascii.unhexlify(self.infohash)) torrent_urls.append(f'{url.scheme}://{url.netloc}/file?info_hash={infohash_enc}') start = time.monotonic() for url in torrent_urls: to = timeout - (time.monotonic() - start) try: torrent = utils.download(url, timeout=to) except error.ConnectionError as e: if callback: callback(e) else: self._set_info_from_torrent(torrent, validate, callback) if success() or to <= 0: break return success() def _set_info_from_torrent(self, torrent_data, validate=True, callback=False): """Extract "info" section from `torrent_data` for :meth:`torrent`""" # Prevent circular import issues from ._torrent import Torrent stream = io.BytesIO(torrent_data) try: torrent = Torrent.read_stream(stream, validate=validate) except error.TorfError as e: if callback: callback(e) else: if validate and self.infohash != torrent.infohash: raise error.MetainfoError(f'Mismatching info hashes: {self.infohash} != {torrent.infohash}') elif torrent.metainfo['info']: self._info = torrent.metainfo['info'] _KNOWN_PARAMETERS = ('xt', 'dn', 'xl', 'tr', 'xs', 'as', 'ws', 'kt') @classmethod def from_string(cls, uri): """ Create :class:`Magnet` URI from string :raises URLError: if `uri` contains an invalid URL (e.g. :attr:`tr`) :raises MagnetError: if `uri` is not a valid magnet URI """ info = urllib.parse.urlparse(uri.strip(), scheme='magnet', allow_fragments=False) if not info.scheme == 'magnet': raise error.MagnetError(uri, 'Not a magnet URI') else: query = urllib.parse.parse_qs(info.query) # Check for unknown parameters for key in query: if key not in cls._KNOWN_PARAMETERS and not key.startswith('x_'): raise error.MagnetError(uri, f'{key}: Unknown parameter') if 'xt' not in query: raise error.MagnetError(uri, 'Missing exact topic ("xt")') elif len(query['xt']) > 1: raise error.MagnetError(uri, 'Multiple exact topics ("xt")') else: self = cls(xt=query['xt'][0]) # Parameters that accept only one value for param,attr,name,parse in (('dn', 'dn', 'display name', lambda v: v), ('xl', 'xl', 'exact length', lambda v: v), ('xs', 'xs', 'exact source', lambda v: v), ('as', 'as_', 'acceptable source', lambda v: v), ('kt', 'kt', 'keyword topic', lambda v: v.split())): if param in query: if len(query[param]) > 1: raise error.MagnetError(uri, f'Multiple {name}s ("{param}")') else: setattr(self, attr, parse(query[param][0])) # Parameters that accept multiple values for param,name in (('tr', 'tracker'), ('ws', 'webseed')): if param in query: setattr(self, param, query[param]) return self def __str__(self): uri = [f'magnet:?xt={self.xt}'] for key in ('dn', 'xl', 'xs', 'as_'): value = getattr(self, f'{key}') if value is not None: if isinstance(value, str): uri.append(f'{key}={utils.urlquote(value)}') else: uri.append(f'{key}={value}') if self.kt: uri.append(f'kt={"+".join(utils.urlquote(k) for k in self.kt)}') for key in ('tr', 'ws'): seq = getattr(self, f'{key}') if seq is not None: for item in seq: uri.append(f'{key}={utils.urlquote(item)}') for key,value in self._x.items(): uri.append(f'x.{key}={utils.urlquote(value)}') return '&'.join(uri) def __repr__(self): clsname = type(self).__name__ kwargs = {} for param in self._KNOWN_PARAMETERS: if param == 'as': param = 'as_' value = getattr(self, param) if value: kwargs[param] = value for k,v in self.x.items(): kwargs[f'x_{k}'] = v kwargs_str = ', '.join(f'{k}={repr(v)}' for k,v in kwargs.items()) return f'{clsname}({kwargs_str})' rndusr-torf-547b989/torf/_magnet.pyi000066400000000000000000000042551513142010300173770ustar00rootroot00000000000000from collections import defaultdict from collections.abc import Iterable from re import Pattern from typing import Any, Callable from typing_extensions import Self from ._errors import TorfError from ._torrent import Torrent from ._utils import URL, MonitoredList class Magnet: _INFOHASH_REGEX: Pattern[str] = ... _XT_REGEX: Pattern[str] = ... def __init__( self, xt: str, *, dn: str | None = None, xl: int | None = None, tr: Iterable[str] | str | None = None, xs: str | None = None, as_: str | None = None, ws: Iterable[str] | str | None = None, kt: Iterable[str] | str | None = None, **kwargs: Any, ) -> None: ... @property def dn(self) -> str | None: ... @dn.setter def dn(self, value: str) -> None: ... @property def xt(self) -> str: ... @xt.setter def xt(self, value: str) -> None: ... @property def infohash(self) -> str: ... @infohash.setter def infohash(self, value: str) -> None: ... @property def xl(self) -> int | None: ... @xl.setter def xl(self, value: int) -> None: ... @property def tr(self) -> MonitoredList[str]: ... @tr.setter def tr(self, value: Iterable[str] | str | None) -> None: ... @property def xs(self) -> URL | None: ... @xs.setter def xs(self, value: str | None) -> None: ... @property def as_(self) -> URL | None: ... @as_.setter def as_(self, value: str | None) -> None: ... @property def ws(self) -> MonitoredList[str]: ... @ws.setter def ws(self, value: Iterable[str] | str | None) -> None: ... @property def kt(self) -> list[str] | None: ... @kt.setter def kt(self, value: Iterable[str] | str | None) -> None: ... @property def x(self) -> defaultdict[str, Any]: ... def torrent(self) -> Torrent: ... def get_info( self, validate: bool = True, timeout: int = 60, callback: Callable[[TorfError], None] | None = None ) -> bool: ... _KNOWN_PARAMETERS: tuple[str, ...] = ... @classmethod def from_string(cls, uri: str) -> Self: ... def __str__(self) -> str: ... def __repr__(self) -> str: ... rndusr-torf-547b989/torf/_reuse.py000066400000000000000000000156711513142010300171020ustar00rootroot00000000000000import errno import os from . import _errors as error from . import _generate as generate from . import _stream as stream class find_torrent_files: """Iterator over ``(torrent_file, torrent_file_counter, exception)`` tuples""" def __init__(self, *paths, max_file_size=float('inf')): self._paths = paths self._counter = 0 self._max_file_size = max_file_size def __iter__(self): """ Find torrent files recursively beneath each path in `paths` Each list item is a 4-tuple that contains the torrent file path or ``None``, a counter that increments for each torrent file and an exception or ``None``. """ for path in self._paths: yield from self._find(path) def _find(self, path): if os.path.isdir(path): try: for name in os.listdir(path): subpath = os.sep.join((str(path), name)) yield from self._find(subpath) except OSError as e: yield None, self._counter, error.ReadError(e.errno, str(path)) elif os.path.basename(path).lower().endswith('.torrent'): try: file_size = os.path.getsize(path) except OSError: self._counter += 1 yield path, self._counter, error.ReadError(errno.ENOENT, str(path)) else: if file_size <= self._max_file_size: self._counter += 1 yield path, self._counter, None elif not os.path.exists(path): yield None, self._counter, error.ReadError(errno.ENOENT, str(path)) @property def total(self): """Total number of torrents beneath all paths""" # Get a sequence of all torrents without changing self._counter. items = tuple(type(self)(*self._paths, max_file_size=self._max_file_size)) if items: # Last item should contain the number of torrents found. return items[-1][1] else: return 0 def is_file_match(torrent, candidate): """ Whether `torrent` contains the same files as `candidate` Both arugments are :class:`~.Torrent` objects. The torrents match if they both share the same ``name`` and ``files`` or ``name`` and ``length`` fields in their :attr:`~.Torrent.metainfo`. `candidate`'s :attr:`~.Torrent.piece_size` of must also not exceed `torrent`'s :attr:`~.Torrent.piece_size_max`. This is a quick check that doesn't require any system calls. """ # Compare relative file paths and file sizes. # Order of files is important. torrent_info, candidate_info = torrent.metainfo['info'], candidate.metainfo['info'] # Don't bother doing anything else if the names are different if torrent_info['name'] != candidate_info['name']: return False torrent_id = _get_filepaths_and_sizes(torrent_info) candidate_id = _get_filepaths_and_sizes(candidate_info) if torrent_id == candidate_id: if torrent.piece_size_min <= candidate.piece_size <= torrent.piece_size_max: return True return False def _get_filepaths_and_sizes(info): name = info['name'] # Singlefile torrent length = info.get('length', None) if length: return [(name, length)] # Multifile torrent files = info.get('files', None) if files: files_and_sizes = [] for file in files: files_and_sizes.append(( os.sep.join((name, *file['path'])), file['length'], )) return sorted(files_and_sizes) else: raise RuntimeError(f'Unable to find files: {info!r}') def is_content_match(torrent, candidate): """ Whether `torrent` contains the same files as `candidate` Both arugments are :class:`~.Torrent` objects. If a `candidate` matches, a few piece hashes from each file are compared to the corresponding hashes from `candidate` to detect files name/size collisions. This is relatively slow and should only be used after :func:`is_file_match` returned `True`. """ if not torrent.path: raise RuntimeError(f'Torrent does not have a file system path: {torrent!r}') # Compare some piece hashes for each file with stream.TorrentFileStream(candidate, content_path=torrent.path) as tfs: check_piece_indexes = set() for file in torrent.files: all_file_piece_indexes = tfs.get_piece_indexes_of_file(file) middle_piece_index = int(len(all_file_piece_indexes) / 2) some_file_piece_indexes = ( all_file_piece_indexes[:1] + [middle_piece_index] + all_file_piece_indexes[-1:] ) check_piece_indexes.update(some_file_piece_indexes) for piece_index in sorted(check_piece_indexes): if not tfs.verify_piece(piece_index): return False return True def copy(from_torrent, to_torrent): """ Copy ``pieces``, ``piece length`` and ``files`` from `from_torrent` to `to_torrent` """ source_info = from_torrent.metainfo['info'] to_torrent.metainfo['info']['pieces'] = source_info['pieces'] to_torrent.metainfo['info']['piece length'] = source_info['piece length'] if 'files' in from_torrent.metainfo['info']: # Confirm both file lists are identical while ignoring order def make_sortable(files): return [tuple(f.items()) for f in files] # Only include "length" and "files" fields source_files = [ {'length': file['length'], 'path': file['path']} for file in source_info['files'] ] assert sorted(make_sortable(to_torrent.metainfo['info']['files'])) \ == sorted(make_sortable(source_files)) # Copy file order from `source_info` to_torrent.metainfo['info']['files'] = source_files class ReuseCallback(generate._IntervaledCallback): def __init__(self, *args, torrent, torrent_files_total, **kwargs): super().__init__(*args, **kwargs) self._torrent = torrent self._torrent_files_total = torrent_files_total def __call__(self, torrent_filepath, torrent_files_done, is_match, exception): if self._callback: force = bool( # Call callback if there is an error, e.g. "Permission denied" exception # Call callback if we found a match of if we are verifying file contents or is_match in (True, None) # Call callback if this is the last torrent file or torrent_files_done >= self._torrent_files_total ) return super().__call__( self._torrent, torrent_filepath, torrent_files_done, self._torrent_files_total, is_match, exception, force=force, ) elif exception: raise exception rndusr-torf-547b989/torf/_stream.py000066400000000000000000000730461513142010300172520ustar00rootroot00000000000000import errno import hashlib import itertools import math import os from . import _errors as error class TorrentFileStream: """ Traverse concatenated files as they are described in a torrent :param torrent: :class:`~.torf.Torrent` object Files are opened on demand and kept open for re-use. It is recommended to make use of the context manager protocol to make sure they are properly closed when no longer needed. Example: >>> torrent = torf.Torrent(...) >>> with TorrentFileStream(torrent) as tfs: >>> # Get the 29th piece of the concatenated file stream >>> piece = tfs.get_piece(29) """ def __init__(self, torrent, content_path=None): self._torrent = torrent self._content_path = content_path self._open_files = {} def _get_content_path(self, content_path, none_ok=False, file=None): # Get content_path argument from class or method call or from # Torrent.path attribute if content_path is not None: content_path = content_path elif self._content_path is not None: content_path = self._content_path elif self._torrent.path is not None: content_path = self._torrent.path elif none_ok: content_path = None else: raise ValueError('Missing content_path argument and torrent has no path specified') if self._torrent.mode == 'singlefile': # Torrent contains no directory, just a file return content_path or file # Torrent contains directory with one or more files in it if file is None: return content_path else: # Append internal path from torrent file if content_path: # Use the torrent name from `content_path`, not the one from the # torrent (i.e. the first path segment of `file`) so the user # can operate on renamed a directory/file (files and # subdirectories in multifile torrents still have to have the # same names) file_parts = list(file.parts) assert len(file_parts) >= 2, file_parts file_parts.pop(0) content_file_path = os.path.join(content_path, *file_parts) return type(file)(content_file_path, file.size) else: return file def __enter__(self): return self def __exit__(self, exc_type, exc_value, traceback): self.close() def close(self): """ Close all opened files This is called automatically when the instance is used as a context manager. """ for filepath, fh in tuple(self._open_files.items()): fh.close() del self._open_files[filepath] @property def max_piece_index(self): """Largest valid piece index (smallest is always 0)""" return math.floor((self._torrent.size - 1) / self._torrent.piece_size) def get_file_position(self, file): """ Return index of first byte of `file` in stream of concatenated files :param file: :class:`~torf.File` object :raise ValueError: if `file` is not specified in the torrent """ try: file_index = self._torrent.files.index(file) except ValueError: raise ValueError(f'File not specified: {file}') else: stream_pos = sum(f.size for f in self._torrent.files[:file_index]) return stream_pos def get_file_at_position(self, position, content_path=None): """ Return file that belongs to the byte at `position` in stream of concatenated files :param position: Byte index in the stream; minimum is 0, maximum is the torrent's size minus 1 :param content_path: Path to file or directory (defaults to class argument of the same name, :attr:`~.Torrent.path` or the file path from the torrent) """ if position >= 0: pos = 0 for file in self._torrent.files: pos += file.size - 1 if pos >= position: return self._get_content_path(content_path, none_ok=True, file=file) else: pos += 1 raise ValueError(f'position is out of bounds (0 - {self._torrent.size - 1}): {position}') def get_piece_indexes_of_file(self, file, exclusive=False): """ Return indexes of pieces that contain at least one byte of `file` :param bool exclusive: Whether to include only indexes of pieces that don't contain bytes of any other files, i.e. only return piece indexes that belong to `file` exlusively :param file: :class:`~torf.File` object """ piece_size = self._torrent.piece_size stream_pos = self.get_file_position(file) first_piece_index = math.floor(stream_pos / piece_size) last_piece_index = math.floor((stream_pos + file.size - 1) / piece_size) piece_indexes = list(range(first_piece_index, last_piece_index + 1)) if exclusive: # Remove first piece index if it's not exclusive files_in_first_piece = self.get_files_at_piece_index(first_piece_index) if files_in_first_piece != [file]: piece_indexes.remove(first_piece_index) # Remove last piece index if it's not exclusive files_in_last_piece = self.get_files_at_piece_index(last_piece_index) if last_piece_index in piece_indexes and files_in_last_piece != [file]: piece_indexes.remove(last_piece_index) return piece_indexes def get_files_at_byte_range(self, first_byte_index, last_byte_index, content_path=None): """ Return list of files that have at least one byte at `first_byte_index`, `last_byte_index` or between those two in the stream of concatenated files :param content_path: Path to file or directory (defaults to class argument of the same name, :attr:`~.Torrent.path` or the file path from the torrent) """ assert first_byte_index <= last_byte_index, (first_byte_index, last_byte_index) pos = 0 files = [] for file in self._torrent.files: file_first_byte_index = pos file_last_byte_index = pos + file.size - 1 if ( # Is first byte of file inside of range? first_byte_index <= file_first_byte_index <= last_byte_index or # Is last byte of file inside of range? first_byte_index <= file_last_byte_index <= last_byte_index or # Are all bytes of file inside of range? (first_byte_index >= file_first_byte_index and last_byte_index <= file_last_byte_index) ): content_file_path = self._get_content_path(content_path, none_ok=True, file=file) files.append(content_file_path) pos += file.size return files def get_byte_range_of_file(self, file): """ Return index of first and last byte in the stream of concatenated files that contains at least one byte of `file` """ start = self.get_file_position(file) return start, start + file.size - 1 def get_files_at_piece_index(self, piece_index, content_path=None): """ Return list of files that have 1 or more bytes in piece at `piece_index` :param piece_index: Index of the piece; minimum is 0, maximum is the torrent's number of pieces minus 1 :param content_path: Path to file or directory (defaults to class argument of the same name, :attr:`~.Torrent.path` or the file path from the torrent) """ if piece_index >= 0: piece_size = self._torrent.piece_size piece_start_pos = piece_index * piece_size piece_end_pos = ((piece_index + 1) * piece_size) - 1 files = self.get_files_at_byte_range( piece_start_pos, piece_end_pos, content_path=content_path, ) if files: return files raise ValueError(f'piece_index is out of bounds (0 - {self.max_piece_index}): {piece_index}') def get_absolute_piece_indexes(self, file, relative_piece_indexes): """ Return list of validated absolute piece indexes :param file: :class:`~torf.File` object :param relative_piece_indexes: Sequence of piece indexes within `file`; negative values address pieces at the end of `file`, e.g. [0, 12, -1, -2] Example: >>> # Assume `file` starts in the 50th piece in the stream of >>> # concatenated files and is 100 pieces long. `1000` and `-1000` are >>> # ignored because they are out of bounds. >>> tfs.get_absolute_piece_indexes(file, (0, 1, 70, 75, 1000, -1000, -3, -2, -1)) [50, 51, 120, 125, 147, 148, 149] """ file_piece_indexes = self.get_piece_indexes_of_file(file) pi_abs_min = file_piece_indexes[0] pi_abs_max = file_piece_indexes[-1] pi_rel_min = 0 pi_rel_max = pi_abs_max - pi_abs_min validated_piece_indexes = set() for pi_rel in relative_piece_indexes: pi_rel = int(pi_rel) # Convert negative to absolute index if pi_rel < 0: pi_rel = pi_rel_max - abs(pi_rel) + 1 # Ensure relative piece_index is within bounds pi_rel = max(pi_rel_min, min(pi_rel_max, pi_rel)) # Convert to absolute piece_index pi_abs = pi_abs_min + pi_rel validated_piece_indexes.add(pi_abs) return sorted(validated_piece_indexes) def get_relative_piece_indexes(self, file, relative_piece_indexes): """ Return list of validated relative piece indexes :param file: :class:`~torf.File` object :param relative_piece_indexes: Sequence of piece indexes within `file`; negative values address pieces at the end of `file`, e.g. [0, 12, -1, -2] Example: >>> # Assume `file` starts in the 50th piece in the stream of >>> # concatenated files and is 100 pieces long. `1000` and `-1000` are >>> # ignored because they are out of bounds. >>> tfs.get_absolute_piece_indexes(file, (0, 1, 70, 75, 1000, -1000, -3, -2, -1)) [0, 1, 70, 75, 97, 98, 99] """ validated_piece_indexes = set() min_piece_index = 0 max_piece_index = math.floor((file.size - 1) / self._torrent.piece_size) for rpi in relative_piece_indexes: valid_rpi = int(rpi) if rpi < 0: valid_rpi = max_piece_index - abs(rpi) + 1 valid_rpi = max(min_piece_index, min(max_piece_index, valid_rpi)) validated_piece_indexes.add(valid_rpi) return sorted(validated_piece_indexes) def get_piece(self, piece_index, content_path=None): """ Return piece at `piece_index` or `None` for nonexisting file(s) :param piece_index: Index of the piece; minimum is 0, maximum is the torrent's number of pieces minus 1 :param content_path: Path to file or directory to read piece from (defaults to class argument of the same name or :attr:`~.Torrent.path`) :raise ReadError: if a file exists but cannot be read :raise VerifyFileSizeError: if a file has unexpected size """ piece_size = self._torrent.piece_size torrent_size = sum(f.size for f in self._torrent.files) min_piece_index = 0 max_piece_index = math.floor((torrent_size - 1) / piece_size) if not min_piece_index <= piece_index <= max_piece_index: raise ValueError( 'piece_index must be in range ' f'{min_piece_index} - {max_piece_index}: {piece_index}' ) # Find out which files we need to read from first_byte_index_of_piece = piece_index * piece_size last_byte_index_of_piece = min( first_byte_index_of_piece + piece_size - 1, torrent_size - 1, ) relevant_files = self.get_files_at_byte_range( first_byte_index_of_piece, last_byte_index_of_piece, # Ensure we get the torrent path, not the file system path content_path='', ) # Find out where to start reading in the first relevant file if len(relevant_files) == 1: # Our piece belongs to a single file file_pos = self.get_file_position(relevant_files[0]) seek_to = first_byte_index_of_piece - file_pos else: # Our piece is spread over multiple files file = self.get_file_at_position(first_byte_index_of_piece, content_path='') file_pos = self.get_file_position(file) seek_to = file.size - ((file_pos + file.size) % piece_size) # Read piece data from `relevant_files` bytes_to_read = piece_size piece = bytearray() for file in relevant_files: # Translate path within torrent into path within file system filepath = self._get_content_path(content_path, none_ok=False, file=file) fh = self._get_open_file(filepath) # Complain about wrong file size. It's theoretically possible that a # file with the wrong size can produce the correct pieces, but that # would be unexpected. actual_file_size = self._get_file_size_from_fs(filepath) if actual_file_size != file.size: raise error.VerifyFileSizeError(filepath, actual_file_size, file.size) try: fh.seek(seek_to) seek_to = 0 content = fh.read(bytes_to_read) bytes_to_read -= len(content) piece.extend(content) except OSError as e: raise error.ReadError(e.errno, file) # Ensure expected `piece` length if last_byte_index_of_piece == torrent_size - 1: exp_piece_size = torrent_size % piece_size if exp_piece_size == 0: exp_piece_size = piece_size else: exp_piece_size = piece_size assert len(piece) == exp_piece_size, (len(piece), exp_piece_size) return bytes(piece) def _get_file_size_from_fs(self, filepath): if os.path.exists(filepath): try: return os.path.getsize(filepath) except OSError: pass # Maximum number of open files (1024 seems to be a common maximum) max_open_files = 10 def _get_open_file(self, filepath): if filepath not in self._open_files: # Prevent "Too many open files" (EMFILE) while len(self._open_files) > self.max_open_files: old_filepath = tuple(self._open_files)[0] self._open_files[old_filepath].close() del self._open_files[old_filepath] try: self._open_files[filepath] = open(filepath, 'rb') except OSError as e: raise error.ReadError(e.errno, filepath) return self._open_files.get(filepath, None) def iter_pieces(self, content_path=None, oom_callback=None): """ Iterate over `(piece, filepath, (exception1, exception2, ...))` Each piece consists of :attr:`~.Torrent.piece_size` bytes, except for the final piece in the stream of concatenated files, which may be shorter. Filepaths are generated from `content_path` and the relative file paths from the torrent. Exceptions are :class:`~.TorfError` subclasses. If a file is not readable, pieces are `None` for each missing piece. This usually includes the last piece of the previous file and the first piece of the next file unless the unreadable file starts/ends right on a piece boundary. You can wrap this iterator in :func:`enumerate` to get the piece index for each piece: >>> for piece_index, (piece, filepath, exceptions) in stream.iter_pieces(): >>> ... :param content_path: Path to file or directory to read pieces from (defaults to class argument of the same name or :attr:`~.Torrent.path`) :param oom_callback: Callable that gets :class:`~.errors.MemoryError` instance Between calls to `oom_callback`, the piece that caused the exception is read again and again until it fits into memory. This callback offers a way to free more memory. If it fails, it is up to the callback to raise the exception or deal with it in some other way. If this is `None`, :class:`~.errors.MemoryError` is raised normally. :raise ReadError: if file exists but is not readable :raise VerifyFileSizeError: if file has unexpected size """ trailing_bytes = b'' missing_pieces = _MissingPieces(torrent=self._torrent, stream=self) skip_bytes = 0 for file in self._torrent.files: if file in missing_pieces.bycatch_files: continue # Get expected file system path filepath = self._get_content_path(content_path, none_ok=False, file=file) # Get file handle or exception fh = exception = None actual_file_size = self._get_file_size_from_fs(filepath) if actual_file_size is not None and file.size != actual_file_size: exception = error.VerifyFileSizeError(filepath, actual_file_size, file.size) else: try: fh = self._get_open_file(filepath) except error.ReadError as e: exception = e # Make generator that yields `(piece, filepath, exceptions)` tuples if fh: # _debug(f'{file}: Reading {filepath}') # Read pieces from opened file pieces, skip_bytes = self._iter_from_file_handle( fh, prepend=trailing_bytes, skip_bytes=skip_bytes, oom_callback=oom_callback, ) trailing_bytes = b'' piece_size = self._torrent.piece_size for piece in pieces: if len(piece) == piece_size: yield (piece, filepath, ()) else: trailing_bytes = piece else: # _debug(f'{file}: Faking {filepath}') # We can't complete the current piece trailing_bytes = b'' # Opening file failed items, skip_bytes = missing_pieces(file, content_path, reason=exception) for item in items: yield item # Yield last few bytes in stream unless stream size is perfectly # divisible by piece size if trailing_bytes: yield (trailing_bytes, filepath, ()) def _iter_from_file_handle(self, fh, prepend, skip_bytes, oom_callback): # Read pieces from from file handle. # `prepend` is the incomplete piece from the previous file, i.e. the # leading bytes of the next piece. # `skip_bytes` is the number of bytes from `fh` to dump before # reading the next piece. if skip_bytes: skipped = fh.seek(skip_bytes) skip_bytes -= skipped def iter_pieces(fh, prepend): piece_size = self._torrent.piece_size piece = b'' # Iterate over pieces in `prepend`ed bytes, store incomplete piece # in `piece` for pos in range(0, len(prepend), piece_size): piece = prepend[pos:pos + piece_size] if len(piece) == piece_size: yield piece piece = b'' try: # Fill incomplete piece with first bytes from `fh` if piece: piece += self._read_from_fh( fh=fh, size=piece_size - len(piece), oom_callback=oom_callback, ) yield piece # Iterate over `piece_size`ed chunks from `fh` while True: piece = self._read_from_fh( fh=fh, size=piece_size, oom_callback=oom_callback, ) if piece: yield piece else: break # EOF except OSError as e: raise error.ReadError(e.errno, fh.name) return iter_pieces(fh, prepend), skip_bytes def _read_from_fh(self, fh, size, oom_callback): while True: try: return fh.read(size) except MemoryError: e = error.MemoryError(f'Out of memory while reading from {fh.name} at position {fh.tell()}') if oom_callback is None: raise e else: oom_callback(e) def get_piece_hash(self, piece_index, content_path=None): """ Read piece at `piece_index` from file(s) and return its SHA1 hash :param piece_index: Index of the piece; minimum is 0, maximum is the torrent's number of pieces minus 1 :param content_path: Path to file or directory to read piece from (defaults to class argument of the same name or :attr:`~.Torrent.path`) :raise ReadError: if a file exists but cannot be read :raise VerifyFileSizeError: if a file has unexpected size :return: :class:`bytes` """ try: piece = self.get_piece(piece_index, content_path=content_path) except error.ReadError as e: if e.errno is errno.ENOENT: # No such file return None else: # Other read error, e.g. permission denied raise else: return hashlib.sha1(piece).digest() def verify_piece(self, piece_index, content_path=None): """ Generate SHA1 hash for piece at `piece_index` and compare to the expected hash in the torrent :param piece_index: Index of the piece; minimum is 0, maximum is the torrent's number of pieces minus 1 :param content_path: Path to file or directory to read piece from (defaults to class argument of the same name or :attr:`~.Torrent.path`) :raise ReadError: if a file exists but cannot be read :raise VerifyFileSizeError: if a file has unexpected size :return: result of the hash comparision (:class:`bool`) or `None` if a file at `piece_index` does not exist """ try: stored_piece_hash = self._torrent.hashes[piece_index] except IndexError: raise ValueError(f'piece_index must be in range 0 - {self.max_piece_index}: {piece_index}') generated_piece_hash = self.get_piece_hash(piece_index, content_path=content_path) if generated_piece_hash is not None: return stored_piece_hash == generated_piece_hash class _MissingPieces: """Calculate the missing pieces for a given file""" def __init__(self, torrent, stream): self._torrent = torrent self._stream = stream self._piece_indexes_seen = set() self._bycatch_files = [] def __call__(self, file, content_path, reason): # Get the number of pieces covered by `file` minus all pieces we have # already reported due to overlaps piece_indexes = self._stream.get_piece_indexes_of_file(file) for piece_index in piece_indexes: if piece_index in self._piece_indexes_seen: piece_indexes.remove(piece_index) self._piece_indexes_seen.update(piece_indexes) # Figure out which subsequent files are affected by the missing last # piece of `file` affected_files = self._stream.get_files_at_piece_index(piece_indexes[-1], content_path='') affected_files.remove(file) # _debug(f'{affected_files=}') # Files that are processed as a side effect because they only exist in a # piece that also belongs to `file` bycatch_files = [] # Unless `file` is the last file or it ends perfectly at a piece # boundary, we must calculate where the next piece starts in the next # file skip_bytes = 0 if affected_files: # There are multiple files in the last piece of `file` # NOTE: `next_file` is the first file in the next piece, not the # file after `file` in the stream (remember: each piece can # fit lots and lots of files) next_file = affected_files[-1] next_file_start, next_file_end = self._stream.get_byte_range_of_file(next_file) # Stream index of the last byte of the last missing piece of `file` next_piece_boundary_index = ( (piece_indexes[-1] * self._torrent.piece_size) + self._torrent.piece_size - 1 ) if next_file_end > next_piece_boundary_index: # The last file in this last missing piece continues in the next # piece. When we read from that file to create the next piece, # we must skip the first few bytes. skip_bytes = next_piece_boundary_index - next_file_start + 1 # Mark all files between `file` and `next_file` as bycatch, # excluding `file` and `next_file` bycatch_files.extend(affected_files[:-1]) else: # Include `next_file` in bycatch because it doesn't reach into # the next piece bycatch_files.extend(affected_files) self._bycatch_files.extend(bycatch_files) def iter_yields(): # _debug(f'Calculated missing pieces: {piece_indexes}') # _debug(f'Calculated bycatch files: {bycatch_files}') # _debug(f'Skipping {skip_bytes} bytes at the start of next file') piece_count = len(piece_indexes) it = itertools.chain( self._first_yield(piece_count, file, content_path, bycatch_files, reason), self._middle_yields(piece_count, file, content_path), self._last_yield(piece_count, file, content_path, bycatch_files), ) yield from it return iter_yields(), skip_bytes def _first_yield(self, piece_count, file, content_path, bycatch_files, reason): assert isinstance(reason, BaseException), repr(reason) exceptions = [reason] if piece_count == 1: # First piece is also last piece, so we must add bycatch exceptions # to the original exception (`reason`) bycatch_exceptions = self._get_bycatch_exceptions(bycatch_files, content_path) # _debug(f'First yield: Stream has only one piece - adding bycatch exceptions: {bycatch_exceptions}') exceptions.extend(bycatch_exceptions) filepath = self._stream._get_content_path(content_path, none_ok=False, file=file) yield (None, filepath, tuple(exceptions)) def _middle_yields(self, piece_count, file, content_path): # Subtract first and last piece middle_piece_count = piece_count - 2 # _debug(f'Middle yields: {max(0, middle_piece_count)} middle pieces found') if middle_piece_count >= 1: # Yield second to second-to-last pieces (exceptions are reported by # _first/last_yield()) filepath = self._stream._get_content_path(content_path, none_ok=False, file=file) middle_piece = (None, filepath, ()) for i in range(middle_piece_count): yield middle_piece def _last_yield(self, piece_count, file, content_path, bycatch_files): # Yield bycatch exceptions unless _first_yield() already did it if piece_count > 1: # Report bycatch exceptions with last piece exceptions = self._get_bycatch_exceptions(bycatch_files, content_path) # _debug(f'Last yield: Exceptions: {exceptions}') filepath = self._stream._get_content_path(content_path, none_ok=False, file=file) yield (None, filepath, tuple(exceptions)) # else: # _debug(f'Last yield: First piece is last piece') def _get_bycatch_exceptions(self, bycatch_files, content_path): exceptions = [] for bc_file in bycatch_files: bc_filepath = self._stream._get_content_path(content_path, none_ok=False, file=bc_file) actual_size = self._stream._get_file_size_from_fs(bc_filepath) if actual_size is None: # No such file exceptions.append(error.ReadError(errno.ENOENT, bc_filepath)) elif bc_filepath.size != actual_size: exceptions.append(error.VerifyFileSizeError(bc_filepath, actual_size, bc_filepath.size)) # if exceptions: # _debug(f'bycatch: {exceptions[-1]!r}') return exceptions @property def bycatch_files(self): """ Files that only exist within a missing file's piece It is important that these files are not read to maintain the correct piece positions in the stream. """ return tuple(self._bycatch_files) rndusr-torf-547b989/torf/_stream.pyi000066400000000000000000000040741513142010300174160ustar00rootroot00000000000000from collections.abc import Iterable, Iterator from types import TracebackType from typing import Callable from _typeshed import StrPath from typing_extensions import Self from ._errors import MemoryError, TorfError from ._torrent import Torrent from ._utils import File class TorrentFileStream: def __init__(self, torrent: Torrent, content_path: StrPath | None = None) -> None: ... def __enter__(self) -> Self: ... def __exit__( self, exc_type: type[BaseException] | None, exc_value: BaseException | None, traceback: TracebackType | None ) -> None: ... def close(self) -> None: ... @property def max_piece_index(self) -> int: ... def get_file_position(self, file: File) -> int: ... def get_file_at_position(self, position: int, content_path: StrPath | None = None) -> File: ... def get_piece_indexes_of_file(self, file: File, exclusive: bool = False) -> list[int]: ... def get_files_at_byte_range( self, first_byte_index: int, last_byte_index: int, content_path: StrPath | None = None ) -> list[File]: ... def get_byte_range_of_file(self, file: File) -> tuple[int, int]: ... def get_files_at_piece_index(self, piece_index: int, content_path: StrPath | None = None) -> list[File]: ... def get_absolute_piece_indexes(self, file: File, relative_piece_indexes: Iterable[int]) -> list[int]: ... def get_relative_piece_indexes(self, file: File, relative_piece_indexes: Iterable[int]) -> list[int]: ... def get_piece( self, piece_index: int, content_path: StrPath | None = None ) -> bytes: ... # Docstrings say it can be `None` but from what I can see it can never be None? max_open_files: int = 10 def iter_pieces( self, content_path: StrPath | None = None, oom_callback: Callable[[MemoryError], None] | None = None ) -> Iterator[tuple[bytes | None, File, tuple[TorfError, ...]]]: ... def get_piece_hash(self, piece_index: int, content_path: StrPath | None = None) -> bytes | None: ... def verify_piece(self, piece_index: int, content_path: StrPath | None = None) -> bool | None: ... rndusr-torf-547b989/torf/_torrent.py000066400000000000000000002102341513142010300174440ustar00rootroot00000000000000# This file is part of torf. # # torf is free software: you can redistribute it and/or modify it under the # terms of the GNU General Public License as published by the Free Software # Foundation, either version 3 of the License, or (at your option) any later # version. # # torf is distributed in the hope that it will be useful, but WITHOUT ANY # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR # A PARTICULAR PURPOSE. See the GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with torf. If not, see . import base64 import errno import hashlib import inspect import io import itertools import math import os import pathlib import re from collections import abc from datetime import datetime from . import __version__ from . import _errors as error from . import _flatbencode as bencode from . import _generate as generate from . import _reuse as reuse from . import _utils as utils _PACKAGE_NAME = __name__.split('.')[0] # os.sched_getaffinity() is only available on some Unix platforms. try: NCORES = len(os.sched_getaffinity(0)) except AttributeError: import multiprocessing NCORES = multiprocessing.cpu_count() DEFAULT_TORRENT_NAME = 'UNNAMED TORRENT' class Torrent(): """ Torrent metainfo representation Create a new Torrent instance: >>> from torf import Torrent >>> torrent = Torrent('path/to/My Torrent', ... trackers=['https://localhost:123/announce'], ... comment='This is my first torrent. Be gentle.') Convenient access to metainfo via properties: >>> torrent.comment 'This is my first torrent. Be gentle.' >>> torrent.comment = "This is my first torrent. Let's rock!" >>> torrent.private = True Full control over unencoded metainfo: >>> torrent.metainfo['info']['private'] True >>> torrent.metainfo['more stuff'] = {'foo': 12, ... 'bar': ('x', 'y', 'z')} Hash pieces and update progress once per second: >>> def cb(torrent, filepath, pieces_done, pieces_total): ... print(f'{pieces_done/pieces_total*100:3.0f} % done') >>> success = torrent.generate(callback=cb, interval=1) 1 % done 2 % done [...] 100 % done Write torrent file: >>> torrent.write('my_torrent.torrent') Create magnet link: >>> torrent.magnet() 'magnet:?xt=urn:btih:e167b1fbb42ea72f051f4f50432703308efb8fd1&dn=My+Torrent&xl=142631&tr=https%3A%2F%2Flocalhost%3A123%2Fannounce' Read torrent from file: >>> t = Torrent.read('my_torrent.torrent') >>> t.comment "This is my first torrent. Let's rock!" >>> t.metainfo['more stuff'] {'bar': ['x', 'y', 'z'], 'foo': 12} """ def __init__(self, path=None, name=None, exclude_globs=(), exclude_regexs=(), include_globs=(), include_regexs=(), trackers=None, webseeds=None, httpseeds=None, private=None, comment=None, source=None, creation_date=None, created_by='%s %s' % (_PACKAGE_NAME, __version__), piece_size=None, piece_size_min=None, piece_size_max=None, randomize_infohash=False): self._path = None self._metainfo = {} self._exclude = {'globs' : utils.MonitoredList(callback=self._filters_changed, type=str), 'regexs' : utils.MonitoredList(callback=self._filters_changed, type=re.compile)} self._include = {'globs' : utils.MonitoredList(callback=self._filters_changed, type=str), 'regexs' : utils.MonitoredList(callback=self._filters_changed, type=re.compile)} self.piece_size_min = piece_size_min self.piece_size_max = piece_size_max self.trackers = trackers self.webseeds = webseeds self.httpseeds = httpseeds self.private = private self.comment = comment self.creation_date = creation_date self.created_by = created_by self.source = source self.randomize_infohash = randomize_infohash self.exclude_globs = exclude_globs self.exclude_regexs = exclude_regexs self.include_globs = include_globs self.include_regexs = include_regexs self.path = path # Values that are implicitly changed by setting self.path if piece_size is not None: self.piece_size = piece_size if name is not None: self.name = name @property def metainfo(self): """ Unencoded torrent metainfo as mutable mapping You can put anything in here as long as keys are convertable to :class:`bytes` and values are convertable to :class:`bytes`, :class:`int`, :class:`list` or :class:`dict`. See also :meth:`convert` and :meth:`validate`. The ``info`` key is guaranteed to exist. """ if 'info' not in self._metainfo: self._metainfo['info'] = {} return self._metainfo @property def path(self): """ File system path to torrent content Files are filtered according to :attr:`exclude_globs`, :attr:`exclude_regexs`, :attr:`include_globs` and :attr:`include_regexs`. Setting or manipulating this property updates :attr:`metainfo`\\ ``['info']``: - ``name``, ``piece length`` and ``files`` or ``length`` are set. - ``pieces`` and ``md5sum`` are removed if they exist. :raises ReadError: if :attr:`path` or any path underneath it is not readable """ return self._path @path.setter def path(self, value): if value is None: # Keep info about name and files, but forget where they are stored self._path = None self.metainfo['info'].pop('pieces', None) else: basepath = pathlib.Path(str(value)) filepaths = tuple(utils.File(fp, size=utils.real_size(fp)) for fp in utils.list_files(basepath)) self._set_files(filepaths, basepath) @property def location(self): """ :attr:`path` without :attr:`name` or `None` if :attr:`path` is `None` """ if self.path: return self._path.parent @property def files(self): """ List of relative paths in this torrent Paths are :class:`File` objects and items are automatically deduplicated. Every path starts with :attr:`name`. Setting or manipulating this property updates :attr:`metainfo`\\ ``['info']``: - ``name``, ``piece length`` and ``files`` or ``length`` are set. - ``pieces`` and ``md5sum`` are removed if they exist. See :attr:`filepaths` for a list of file system paths. :raises PathError: if any path is absolute :raises CommonPathError: if not all files share a common parent directory :raises ValueError: if any file is not a :class:`File` object """ info = self.metainfo['info'] if self.mode == 'singlefile': files = ( utils.File( utils.force_as_string( info.get('name', DEFAULT_TORRENT_NAME) ), size=self.size, ), ) elif self.mode == 'multifile': basedir = utils.force_as_string( info.get('name', DEFAULT_TORRENT_NAME) ) files = ( utils.File( os.path.join(basedir, *(utils.force_as_string(p) for p in fileinfo['path'])), size=fileinfo['length'], ) for fileinfo in info['files'] ) else: files = () return utils.Files(files, callback=self._files_changed) def _files_changed(self, files): self.files = files @files.setter def files(self, files): if not isinstance(files, utils.Iterable): raise ValueError(f'Not an Iterable: {files}') for f in files: if not isinstance(f, utils.File): raise ValueError(f'Not a File object: {f}') elif f.is_absolute(): raise error.PathError(f, msg='Not a relative path') if not files: self._set_files(files=()) else: # os.path.commonpath() returns '' if there is no common path and # raises ValueError if there are absolute and relative paths. try: basepath = os.path.commonpath(files) except ValueError: basepath = '' if basepath == '': raise error.CommonPathError(files) self._set_files(files, pathlib.Path(basepath)) @property def filepaths(self): """ List of paths of existing files in :attr:`path` included in the torrent Paths are :class:`Filepath` objects and items are automatically deduplicated. Directories are resolved into a list of files. Setting or manipulating this property updates :attr:`metainfo`\\ ``['info']``: - ``name``, ``piece length`` and ``files`` or ``length`` are set. - ``pieces`` and ``md5sum`` are removed if they exist. :raises ReadError: if any file path is not readable """ filepaths = () if self.path is not None: if self.mode == 'singlefile': filepaths = (self.path,) elif self.mode == 'multifile': dirpath = self.path filepaths = (os.path.join(dirpath, *fileinfo['path']) for fileinfo in self.metainfo['info']['files']) return utils.Filepaths(filepaths, callback=self._filepaths_changed) def _filepaths_changed(self, filepaths): self.filepaths = filepaths @filepaths.setter def filepaths(self, filepaths): if not isinstance(filepaths, utils.Iterable): raise ValueError(f'Not an Iterable: {filepaths}') filepaths = utils.Filepaths(filepaths) # Resolve directories if not filepaths: self._set_files(files=()) else: # Make all paths absolute so we can find the common path. Do not # resolve symlinks so the user isn't confronted with unexpected # paths in case of an error. cwd = pathlib.Path.cwd() filepaths_abs = tuple(fp if fp.is_absolute() else cwd / fp for fp in filepaths) try: basepath = pathlib.Path(os.path.commonpath(filepaths_abs)) except ValueError: raise error.CommonPathError(filepaths) filepaths = tuple(utils.File(fp, size=utils.real_size(fp)) for fp in filepaths) self._set_files(filepaths, basepath) def _set_files(self, files, basepath=None): """ Update ``name`` and ``files`` or ``length``, remove ``pieces`` and ``md5sum`` in :attr:`metainfo`\\ ``['info']`` :param files: Sequence of :class:`File` :param basepath: path-like that all paths in `files` start with; may be ``None`` if ``files`` is empty """ def abspath(p): # Absolute path without resolved symlinks if p.is_absolute(): return pathlib.Path(os.path.normpath(p)) else: return pathlib.Path.cwd() / os.path.normpath(p) def relpath_without_parent(p): # Relative path without common parent directory return pathlib.Path(abspath(p)).relative_to(abspath(basepath)) def relpath_with_parent(p): # Relative path with common parent directory return pathlib.Path(abspath(p)).relative_to(abspath(basepath).parent) # Apply filters to relative paths with torrent name as first segment exclude_globs = tuple(str(g) for g in self._exclude['globs']) exclude_regexs = tuple(re.compile(r) for r in self._exclude['regexs']) exclude = tuple(itertools.chain(exclude_globs, exclude_regexs)) include_globs = tuple(str(g) for g in self._include['globs']) include_regexs = tuple(re.compile(r) for r in self._include['regexs']) include = tuple(itertools.chain(include_globs, include_regexs)) files = utils.filter_files(files, getter=relpath_with_parent, exclude=exclude, include=include, hidden=False, empty=False) info = self.metainfo['info'] if not files or all(f.size <= 0 for f in files): info.pop('files', None) info.pop('length', None) info.pop('pieces', None) info.pop('md5sum', None) elif len(files) == 1 and files[0] == basepath: # There is only one file and it is not in a directory. # NOTE: A directory with a single file in it is a multifile torrent. info['length'] = files[0].size info['name'] = files[0].name info.pop('files', None) info.pop('pieces', None) info.pop('md5sum', None) else: if str(basepath) == os.curdir: # Name of current working directory name = pathlib.Path.cwd().name elif str(basepath) == os.pardir: # Name of logical parent directory # NOTE: Path.resolve() returns the physical parent directory; if # the parent directory is a symlink, we get an unexpected name name = os.path.basename(os.path.dirname(os.getcwd())) elif str(basepath).endswith(os.curdir) or str(basepath).endswith(os.pardir): # Name of current/parent directory (logical parent, see NOTE above) name = pathlib.Path(os.path.normpath(basepath)).name else: name = basepath.name files_info = [] for f in sorted(files): files_info.append({'length': f.size, 'path' : list(relpath_without_parent(f).parts)}) info['name'] = name info['files'] = files_info info.pop('length', None) info.pop('pieces', None) info.pop('md5sum', None) # Set new path attribute if basepath exists if basepath is not None and os.path.exists(basepath): self._path = basepath else: self._path = None # Calculate new piece size self.piece_size = None @property def exclude_globs(self): """ List of case-insensitive wildcard patterns to exclude Include patterns take precedence over exclude patterns to allow including files that match an exclude pattern. Patterns are matched against paths in :attr:`files`. ========== ================================ Wildcard Description ========== ================================ ``*`` matches everything ``?`` matches any single character ``[SEQ]`` matches any character in ``SEQ`` ``[!SEQ]`` matches any char not in ``SEQ`` ========== ================================ """ return self._exclude['globs'] @exclude_globs.setter def exclude_globs(self, value): if not isinstance(value, utils.Iterable): raise ValueError(f'Must be Iterable, not {type(value).__name__}: {value}') self._exclude['globs'][:] = value @property def include_globs(self): """ List of case-insensitive wildcard patterns to include See :attr:`exclude_globs`. """ return self._include['globs'] @include_globs.setter def include_globs(self, value): if not isinstance(value, utils.Iterable): raise ValueError(f'Must be Iterable, not {type(value).__name__}: {value}') self._include['globs'][:] = value @property def exclude_regexs(self): """ List of regular expression patterns to exclude Include patterns take precedence over exclude patterns to allow including files that match an exclude pattern. Patterns are matched against paths in :attr:`files`. :raises re.error: if any regular expression is invalid """ return self._exclude['regexs'] @exclude_regexs.setter def exclude_regexs(self, value): if not isinstance(value, utils.Iterable): raise ValueError(f'Must be Iterable, not {type(value).__name__}: {value}') self._exclude['regexs'][:] = value @property def include_regexs(self): """ List of regular expression patterns to include See :attr:`exclude_regexs`. """ return self._include['regexs'] @include_regexs.setter def include_regexs(self, value): if not isinstance(value, utils.Iterable): raise ValueError(f'Must be Iterable, not {type(value).__name__}: {value}') self._include['regexs'][:] = value def _filters_changed(self, _): """Callback for MonitoredLists in Torrent._exclude""" # Apply filters if self.path is not None: # Read file list from disk again self.path = self.path else: # There are no existing files specified so we can just remove files self.files = self.files @property def filetree(self): """ :attr:`files` as a dictionary tree Parent nodes are dictionaries and leaf nodes are :class:`File` objects. The top node is always a dictionary with the single key :attr:`name`. Example: .. code:: python {'Torrent': {'bar': {'baz.mp3': File('Torrent/bar/baz.mp3', size=543210), 'baz.pdf': File('Torrent/bar/baz.pdf', size=999)}, 'foo.txt': File('Torrent/foo.txt', size=123456)}} """ tree = {} # Complete directory tree paths = (tuple(f.parts) for f in self.files) for path in paths: dirpath = path[:-1] # Path without filename filename = path[-1] subtree = tree for item in dirpath: if item not in subtree: subtree[item] = {} subtree = subtree[item] subtree[filename] = utils.File(path, size=self.partial_size(path)) return tree @property def name(self): """ Name of the torrent Default to last item in :attr:`path` or ``None`` if :attr:`path` is ``None``. If this property is set to ``None`` and :attr:`path` is not ``None``, it is set to the default. Setting this property sets or removes ``name`` in :attr:`metainfo`\\ ``['info']``. """ if 'name' not in self.metainfo['info'] and self.path is not None: self.metainfo['info']['name'] = self.path.name return utils.force_as_string( self.metainfo['info'].get('name', None) ) @name.setter def name(self, value): if value is None: self.metainfo['info'].pop('name', None) self.name # Set default name else: self.metainfo['info']['name'] = str(value) @property def mode(self): """ ``singlefile`` if this torrent contains one file that is not in a directory, ``multifile`` if it contains one or more files in a directory, or ``None`` if no content is specified (i.e. :attr:`files` is empty). """ if 'length' in self.metainfo['info']: return 'singlefile' elif 'files' in self.metainfo['info']: return 'multifile' @property def size(self): """Total size of content in bytes""" if self.mode == 'singlefile': return self.metainfo['info']['length'] elif self.mode == 'multifile': return sum(fileinfo['length'] for fileinfo in self.metainfo['info']['files']) else: return 0 def partial_size(self, path): """ Return size of one or more files as specified in :attr:`metainfo` :param path: Relative path within torrent, starting with :attr:`name`; may point to file or directory :type path: str, path-like or iterable :raises PathError: if `path` is not known """ if isinstance(path, str): path = tuple(path.split(os.sep)) elif isinstance(path, os.PathLike): path = tuple(path.parts) elif isinstance(path, abc.Iterable): path = tuple(str(part) for part in path) else: raise ValueError(f'Must be str, Path or Iterable, not {type(path).__name__}: {path}') if self.mode == 'singlefile' and path == (self.name,): return self.metainfo['info']['length'] elif self.mode == 'multifile': file_sizes = [] for info in self.metainfo['info']['files']: this_path = (self.name,) + tuple(c for c in info['path'] if c) if this_path == path: # path points to file return info['length'] elif utils.iterable_startswith(this_path, path): # path points to directory file_sizes.append(info['length']) if file_sizes: return sum(file_sizes) raise error.PathError(os.path.join(*path), msg='Unknown path') @property def piece_size(self): """ Length of each piece in bytes If set to ``None`` and :attr:`size` is larger than 0, use the return value of :attr:`calculate_piece_size`. If set to ``None`` and :attr:`size` is smaller than 1, remove :attr:`metainfo`\\ ``['info']``\\ ``['piece length']`` . Setting this property sets or removes ``piece length`` in :attr:`metainfo`\\ ``['info']``. """ return self.metainfo['info'].get('piece length', 0) @piece_size.setter def piece_size(self, value): if value is None: if self.size <= 0: self.metainfo['info'].pop('piece length', None) return else: value = self.calculate_piece_size( self.size, min_size=self.piece_size_min, max_size=self.piece_size_max, ) try: piece_length = int(value) except (TypeError, ValueError): raise ValueError(f'piece_size must be int, not {type(value).__name__}: {value!r}') else: if not utils.is_divisible_by_16_kib(piece_length): raise error.PieceSizeError(piece_length) elif not self.piece_size_min <= piece_length <= self.piece_size_max: raise error.PieceSizeError(piece_length, min=self.piece_size_min, max=self.piece_size_max) self.metainfo['info']['piece length'] = piece_length @property def piece_size_min(self): """ Smallest allowed piece size Setting this property also sets :attr:`piece_size` to the same value if it is smaller. """ return self._piece_size_min @piece_size_min.setter def piece_size_min(self, piece_size_min): if piece_size_min is None: self._piece_size_min = type(self).piece_size_min_default elif not utils.is_divisible_by_16_kib(piece_size_min): raise error.PieceSizeError(piece_size_min) else: self._piece_size_min = int(piece_size_min) # If a piece size is set, silently limit it to new minimum if self.piece_size: self.piece_size = max(self.piece_size_min, self.piece_size) @property def piece_size_max(self): """ Largest allowed piece size Setting this property also sets :attr:`piece_size` to the same value if it is bigger. """ return self._piece_size_max @piece_size_max.setter def piece_size_max(self, piece_size_max): if piece_size_max is None: self._piece_size_max = type(self).piece_size_max_default elif not utils.is_divisible_by_16_kib(piece_size_max): raise error.PieceSizeError(piece_size_max) else: self._piece_size_max = int(piece_size_max) # If a piece size is set, silently limit it to new maximum if self.piece_size: self.piece_size = min(self.piece_size_max, self.piece_size) piece_size_min_default = 16 * 1024 # 16 KiB """ Smallest allowed piece size (default value) Setting :attr:`piece_size` to a smaller value raises :class:`PieceSizeError`. """ piece_size_max_default = 16 * 1024 * 1024 # 16 MiB """ Greatest allowed piece size (default value) Setting :attr:`piece_size` to a greater value raises :class:`PieceSizeError`. """ @classmethod def calculate_piece_size(cls, size, min_size=None, max_size=None): """ Return the piece size for a total torrent size of ``size`` bytes It is safe to override this method to implement a custom algorithm. :param int min_size: Minimum piece size; defaults to :attr:`Torrent.piece_size_min` :param int max_size: Maximum piece size; defaults to :attr:`Torrent.piece_size_max` :return: calculated piece size """ if size <= 2**30: # <= 1 GiB / 1 - 512 pieces / 16 KiB - 2 MiB per piece max_pieces = 512 elif size <= 8 * 2**30: # 1 - 8 GiB / 512 - 1024 pieces / 2 - 8 MiB per piece max_pieces = 1024 elif size <= 16 * 2**30: # 8 - 16 GiB / up to 1024 + 512 pieces / 8 - 16 MiB per piece max_pieces = 1536 else: # > 16 GiB / up to 2048 pieces / 16 MiB - `max_size` per piece max_pieces = 2048 # Math is magic! exponent = math.ceil(math.log2(size / max_pieces)) piece_size = int(math.pow(2, exponent)) if min_size is None: min_size = cls.piece_size_min_default if max_size is None: max_size = cls.piece_size_max_default return int(min(max(piece_size, min_size), max_size)) @property def pieces(self): """Number of pieces the content is split into""" size, piece_size = self.size, self.piece_size if size and piece_size and size > 0 and piece_size > 0: return math.ceil(size / piece_size) else: return 0 @property def hashes(self): """Tuple of SHA1 piece hashes as :class:`bytes`""" hashes = self.metainfo['info'].get('pieces') if isinstance(hashes, (bytes, bytearray)): # Each hash is 20 bytes long return tuple(bytes(hashes[pos : pos + 20]) for pos in range(0, len(hashes), 20)) else: return () @property def trackers(self): """ List of tiers (lists) of announce URLs http://bittorrent.org/beps/bep_0012.html This is a smart list that ensures the proper list-of-lists structure, validation and deduplication. You can set this property to a URL, an iterable of URLs or an iterable of iterables of URLs (i.e. "tiers"). This property automatically sets :attr:`metainfo`\\ ``['announce']`` and :attr:`metainfo`\\ ``['announce-list']`` when it is manipulated or set according to these rules: - If it contains a single URL, :attr:`metainfo`\\ ``['announce']`` is set and :attr:`metainfo`\\ ``['announce-list']`` is removed if it exists. - If it contains an iterable of URLs, :attr:`metainfo`\\ ``['announce']`` is set to the first URL and :attr:`metainfo`\\ ``['announce-list']`` is set to a list of tiers, one tier for each URL. - If it contains an iterable of iterables of URLs, :attr:`metainfo`\\ ``['announce']`` is set to the first URL of the first iterable and :attr:`metainfo`\\ ``['announce-list']`` is set to a list of tiers, one tier for each iterable of URLs. :raises URLError: if any of the announce URLs is invalid :raises ValueError: if set to anything that isn't an iterable or a string """ tiers = list(self.metainfo.get('announce-list', ())) announce = self.metainfo.get('announce', None) flat_urls = tuple(url for tier in tiers for url in tier) if announce is not None and announce not in flat_urls: tiers.insert(0, [announce]) return utils.Trackers(tiers, callback=self._trackers_changed) @trackers.setter def trackers(self, value): if value is None: value = () if isinstance(value, abc.Iterable): self._trackers_changed(utils.Trackers(value)) else: raise ValueError(f'Must be Iterable, str or None, not {type(value).__name__}: {value}') def _trackers_changed(self, trackers): # Set "announce" to first tracker of first tier try: self.metainfo['announce'] = str(trackers[0][0]) except IndexError: self.metainfo.pop('announce', None) # Remove "announce-list" if there's only one tracker if len(trackers.flat) <= 1: self.metainfo.pop('announce-list', None) else: if 'announce-list' not in self.metainfo: self.metainfo['announce-list'] = [] # Set announce-list without changing its identity self.metainfo['announce-list'][:] = ([str(url) for url in tier] for tier in trackers) @property def webseeds(self): """ List of webseed URLs http://bittorrent.org/beps/bep_0019.html The list returned by this property automatically updates :attr:`metainfo`\\ ``['url-list']`` when manipulated. Setting this property sets :attr:`metainfo`\\ ``['url-list']``. :raises URLError: if any URL is invalid :raises ValueError: if set to anything that isn't an iterable or a string """ return utils.URLs(self.metainfo.get('url-list', ()), callback=self._webseeds_changed) @webseeds.setter def webseeds(self, value): if isinstance(value, str): urls = utils.URLs((value,)) elif isinstance(value, abc.Iterable): urls = utils.URLs(value) elif value is None: urls = utils.URLs(()) else: raise ValueError(f'Must be Iterable, str or None, not {type(value).__name__}: {value}') self._webseeds_changed(urls) def _webseeds_changed(self, webseeds): if webseeds: self.metainfo['url-list'] = [str(url) for url in webseeds] else: self.metainfo.pop('url-list', None) @property def httpseeds(self): """ List of webseed URLs http://bittorrent.org/beps/bep_0017.html The list returned by this property automatically updates :attr:`metainfo`\\ ``['httpseeds']`` when manipulated. Setting this property sets :attr:`metainfo`\\ ``['httpseeds']``. :raises URLError: if any URL is invalid :raises ValueError: if set to anything that isn't an iterable or a string """ return utils.URLs(self.metainfo.get('httpseeds', ()), callback=self._httpseeds_changed) @httpseeds.setter def httpseeds(self, value): if isinstance(value, str): urls = utils.URLs((value,)) elif isinstance(value, abc.Iterable): urls = utils.URLs(value) elif value is None: urls = utils.URLs(()) else: raise ValueError(f'Must be Iterable, str or None, not {type(value).__name__}: {value}') self._httpseeds_changed(urls) def _httpseeds_changed(self, httpseeds): if httpseeds: self.metainfo['httpseeds'] = [str(url) for url in httpseeds] else: self.metainfo.pop('httpseeds', None) @property def private(self): """ Whether torrent should use trackers exclusively for peer discovery ``True`` or ``False`` if :attr:`metainfo`\\ ``['info']``\\ ``['private']`` exists, ``None`` otherwise. """ if 'private' in self.metainfo['info']: return bool(self.metainfo['info']['private']) else: return None @private.setter def private(self, value): if value is None: self.metainfo['info'].pop('private', None) else: self.metainfo['info']['private'] = bool(value) @property def comment(self): """ Comment string or ``None`` for no comment Setting this property sets or removes :attr:`metainfo`\\ ``['comment']``. """ return utils.force_as_string( self.metainfo.get('comment', None) ) @comment.setter def comment(self, value): if value is not None: self.metainfo['comment'] = str(value) else: self.metainfo.pop('comment', None) @property def creation_date(self): """ :class:`datetime.datetime` instance or ``None`` for no creation date :class:`int` and :class:`float` are also allowed and converted with :meth:`datetime.datetime.fromtimestamp`. Setting this property sets or removes :attr:`metainfo`\\ ``['creation date']``. """ date = self.metainfo.get('creation date', None) if isinstance(date, (float, int)): return datetime.fromtimestamp(date) else: return date @creation_date.setter def creation_date(self, value): if isinstance(value, (float, int)): self.metainfo['creation date'] = datetime.fromtimestamp(value) elif isinstance(value, datetime): self.metainfo['creation date'] = value elif not value: self.metainfo.pop('creation date', None) else: raise ValueError( 'Must be None, int or datetime object, ' f'not {type(value).__name__}: {value!r}' ) @property def created_by(self): """ Application name or ``None`` for no creator Setting this property sets or removes :attr:`metainfo`\\ ``['created by']``. """ return utils.force_as_string( self.metainfo.get('created by', None) ) @created_by.setter def created_by(self, value): if value is not None: self.metainfo['created by'] = str(value) else: self.metainfo.pop('created by', None) @property def source(self): """ Source string or ``None`` for no source Setting this property sets or removes :attr:`metainfo`\\ ``['info']``\\ ``['source']``. """ return utils.force_as_string( self.metainfo['info'].get('source', None) ) @source.setter def source(self, value): if value is not None: self.metainfo['info']['source'] = str(value) else: self.metainfo['info'].pop('source', None) @property def infohash(self): """ SHA1 info hash :raises MetainfoError: if :attr:`validate` fails or :attr:`metainfo` is not bencodable """ try: # Try to calculate infohash self.validate() try: info = utils.encode_dict(self.metainfo['info']) except ValueError as e: raise error.MetainfoError(e) else: return hashlib.sha1(bencode.encode(info)).hexdigest() except error.MetainfoError as e: # If we can't calculate infohash, see if it was explicitly specifed. # This is necessary to create a Torrent from a Magnet URI. try: return self._infohash except AttributeError: raise e @property def infohash_base32(self): """Base 32 encoded SHA1 info hash""" return base64.b32encode(base64.b16decode(self.infohash.upper())) @property def randomize_infohash(self): """ Whether to ensure that :attr:`infohash` is always unique This allows for cross-seeding without changing :attr:`piece_size`. Setting this property to ``True`` sets :attr:`metainfo`\\ ``['info']``\\ ``['entropy']`` to a random integer. Setting it to ``False`` removes that field. """ return bool(self.metainfo['info'].get('entropy', False)) @randomize_infohash.setter def randomize_infohash(self, value): if value: # According to BEP0003 "Integers have no size limitation", but some # parsers seem to have problems with large numbers. import random self.metainfo['info']['entropy'] = random.randint(int(-2e9), int(2e9)) else: self.metainfo['info'].pop('entropy', None) @property def is_ready(self): """Whether this torrent is ready to be exported to a file or magnet link""" try: self.validate() except error.MetainfoError: return False else: return True def generate(self, threads=None, callback=None, interval=0): """ Hash pieces and report progress to `callback` This method sets :attr:`metainfo`\\ ``['info']``\\ ``['pieces']`` after all pieces are hashed successfully. :param int threads: How many threads to use for hashing pieces or ``None`` to use one thread per available CPU core :param callable callback: Callable to report progress and/or abort `callback` must accept 4 positional arguments: 1. Torrent instance (:class:`Torrent`) 2. Path of the currently hashed file (:class:`str`) 3. Number of hashed pieces (:class:`int`) 4. Total number of pieces (:class:`int`) If `callback` returns anything that is not ``None``, hashing is stopped. :param float interval: Minimum number of seconds between calls to `callback`; if 0, `callback` is called once per hashed piece :raises PathError: if :attr:`path` contains only empty files/directories :raises ReadError: if :attr:`path` or any file beneath it is not readable :raises RuntimeError: if :attr:`path` is None :return: ``True`` if all pieces were successfully hashed, ``False`` otherwise """ if self.path is None: raise RuntimeError('generate() called with no path specified') elif sum(utils.real_size(fp) for fp in self.filepaths) < 1: raise error.PathError(self.path, msg='Empty or all files excluded') hasher_threads = threads or NCORES # Read piece_size'd chunks from disk and send them to HasherPool reader = generate.Reader(torrent=self, queue_size=hasher_threads * 3) # Multiple threads that get chunks from Reader, calculate the hashes, # and push them to a hash queue hashers = generate.HasherPool( hasher_threads=hasher_threads, piece_queue=reader.piece_queue, ) # Collect piece hashes from HasherPool and call `callback` for status # reporting/cancellation collector = generate.Collector( torrent=self, reader=reader, hashers=hashers, callback=generate.GenerateCallback( callback=callback, interval=interval, torrent=self, ), ) # Collect piece hashes piece_hashes = collector.collect() concatenated_piece_hashes = b''.join(piece_hashes) hashes_count = len(concatenated_piece_hashes) / 20 if hashes_count == self.pieces: self.metainfo['info']['pieces'] = concatenated_piece_hashes return True elif hashes_count < self.pieces: # Hashing was cancelled return False else: raise RuntimeError('Unexpected number of hashes generated: ' f'{hashes_count} instead of {self.pieces}') def verify(self, path, threads=None, callback=None, interval=0): """ Check if `path` contains all the data specified in this torrent Generate hashes from file content and compare them to the ones stored in :attr:`metainfo`\\ ``['info']``\\ ``['pieces']``. :param str path: Directory or file to read from :param int threads: How many threads to use for hashing pieces or ``None`` to use one thread per available CPU core :param callable callback: Callable to report progress and/or abort `callback` must accept 7 positional arguments: 1. Torrent instance (:class:`Torrent`) 2. File path in file system (:class:`str`) 3. Number of checked pieces (:class:`int`) 4. Total number of pieces (:class:`int`) 5. Index of the current piece (:class:`int`) 6. SHA1 hash of the current piece (:class:`bytes` or ``None``) 7. Exception (:class:`TorfError` or ``None``) If `callback` returns anything that is not ``None``, verification is stopped. :param float interval: Minimum number of seconds between calls to `callback` (if 0, `callback` is called once per piece); this is ignored if an error is found If a callback is specified, exceptions are not raised but passed to `callback` instead. :raises VerifyContentError: if a file contains unexpected data :raises VerifyIsDirectoryError: if `path` is a directory and this torrent contains a single file :raises VerifyNotDirectoryError: if `path` is a file and this torrent contains a directory :raises ReadError: if a file is not readable :raises MetainfoError: if :meth:`validate` fails :return: ``True`` if `path` is verified successfully, ``False`` otherwise """ # First make sure we are a valid torrent self.validate() # Wrapper around callback function that compares hashes verify_callback = generate.VerifyCallback( callback=callback, interval=interval, torrent=self, path=path, ) def early_exception(exception): piece_index = 0 pieces_done = 0 pieces_total = self.pieces filepath = None piece_hash = None exceptions = (exception,) verify_callback(piece_index, pieces_done, pieces_total, filepath, piece_hash, exceptions) if self.mode == 'singlefile' and os.path.isdir(path): early_exception(error.VerifyIsDirectoryError(path)) return False elif self.mode == 'multifile' and not os.path.isdir(path): early_exception(error.VerifyNotDirectoryError(path)) return False else: hasher_threads = threads or NCORES # Read piece_size'd chunks from disk and send them to HasherPool reader = generate.Reader( torrent=self, queue_size=hasher_threads * 3, path=path, ) # Multiple threads that get chunks from Reader, calculate the hashes, # and push them to a hash queue hashers = generate.HasherPool( hasher_threads=hasher_threads, piece_queue=reader.piece_queue, ) # Collect piece hashes from HasherPool and call `callback` for status # reporting/cancellation collector = generate.Collector( torrent=self, reader=reader, hashers=hashers, callback=verify_callback, ) piece_hashes = collector.collect() return piece_hashes == self.hashes def verify_filesize(self, path, callback=None): """ Check if `path` has the expected file size Walk through :attr:`files` and check if each file exists relative to `path`, is readable and has the correct size. Excess files in `path` are ignored. This is fast and should find most manipulations, but :meth:`verify` is necessary to detect corruptions (e.g. due to bit rot). :param str path: Directory or file to check :param callable callback: Callable to report progress and/or abort `callback` must accept 6 positional arguments: 1. Torrent instance (:class:`Torrent`) 2. File path in file system (:class:`str`) 3. File path in torrent (:class:`str`) 4. Number of checked files (:class:`int`) 5. Total number of files (:class:`int`) 6. Exception (:class:`TorfError`) or ``None`` If `callback` returns anything that is not ``None``, verification is stopped. If a callback is specified, exceptions are not raised but passed to `callback` instead. :raises VerifyFileSizeError: if a file has an unexpected size :raises VerifyIsDirectoryError: if `path` is a directory and this torrent contains a single file :raises ReadError: if any file's size can't be determined :raises MetainfoError: if :meth:`validate` fails :return: ``True`` if `path` is verified successfully, ``False`` otherwise """ # First make sure we are a valid torrent self.validate() # Generate an ordered list of file system paths and their corresponding # paths inside the torrent # NOTE: The last segment in `path` is supposed to be the torrent name so # we must remove the name from each `file`. This allows # verification of any renamed file/directory against a torrent. filepaths = tuple( ( utils.File([path, *file.parts[1:]], file.size), file, ) for file in self.files ) files_total = len(filepaths) def cancel(file_index, exception): if callback: fs_filepath = filepaths[file_index][0] torrent_filepath = filepaths[file_index][1] files_done = file_index + 1 return_value = callback(self, fs_filepath, torrent_filepath, files_done, files_total, exception) if return_value is not None: return True elif exception: # Raise exception if there is no callback to handle it raise exception else: return False exception = None # If we expect a file, check if path is a file. We don't need to check # for a directory if we expect one because we are iterating over files # (filepaths), so the path "foo/bar/baz" will result in a ReadError if # "foo" or "foo/bar" is a file. if self.mode == 'singlefile' and os.path.isdir(path): exception = error.VerifyIsDirectoryError(path) cancel(file_index=0, exception=exception) return False for file_index, (fs_filepath, torrent_filepath) in enumerate(filepaths): # Check if path exists if not os.path.exists(fs_filepath): exception = error.ReadError(errno.ENOENT, fs_filepath) if cancel(file_index, exception): return False else: continue # Check file size fs_filepath_size = utils.real_size(fs_filepath) expected_size = self.partial_size(torrent_filepath) if fs_filepath_size != expected_size: exception = error.VerifyFileSizeError(fs_filepath, fs_filepath_size, expected_size) if cancel(file_index, exception): return False else: continue # Report no error for current file if cancel(file_index, exception=None): return False if exception: # `exception` is just an indicator of success/failure. At this point # it was already raised or passed to cancel(). return False else: return True def validate(self): """ Check if all mandatory keys exist in :attr:`metainfo` and all standard keys have correct types References: | http://bittorrent.org/beps/bep_0003.html | https://wiki.theory.org/index.php/BitTorrentSpecification#Metainfo_File_Structure :raises MetainfoError: if :attr:`metainfo` would not generate a valid torrent file or magnet link """ md = self.metainfo info = md['info'] # Check values shared by singlefile and multifile torrents utils.assert_type(md, ('info',), (dict,), must_exist=True) utils.assert_type(md, ('info', 'name'), (str, bytes), must_exist=True) utils.assert_type(md, ('info', 'piece length'), (int,), must_exist=True, check=utils.is_divisible_by_16_kib) utils.assert_type(md, ('info', 'pieces'), (bytes,), must_exist=True) utils.assert_type(md, ('info', 'private'), (bool, int), must_exist=False) utils.assert_type(md, ('creation date',), (int, datetime), must_exist=False) utils.assert_type(md, ('announce',), (str,), must_exist=False, check=utils.is_url) utils.assert_type(md, ('announce-list',), (utils.Iterable,), must_exist=False) for i,_ in enumerate(md.get('announce-list', ())): utils.assert_type(md, ('announce-list', i), (utils.Iterable,)) for j,_ in enumerate(md['announce-list'][i]): utils.assert_type(md, ('announce-list', i, j), (str,), check=utils.is_url) if len(info['pieces']) == 0: raise error.MetainfoError("['info']['pieces'] is empty") elif len(info['pieces']) % 20 != 0: raise error.MetainfoError("length of ['info']['pieces'] is not divisible by 20") elif 'length' in info and 'files' in info: raise error.MetainfoError("['info'] includes both 'length' and 'files'") elif 'length' in info: # Validate info as singlefile torrent utils.assert_type(md, ('info', 'length'), (int, float), must_exist=True) utils.assert_type(md, ('info', 'md5sum'), (str,), must_exist=False, check=utils.is_md5sum) # Validate expected number of pieces piece_count = int(len(info['pieces']) / 20) exp_piece_count = math.ceil(info['length'] / info['piece length']) if piece_count != exp_piece_count: raise error.MetainfoError(f'Expected {exp_piece_count} pieces but there are {piece_count}') if self.path is not None: # Check if filepath actually points to a file if not os.path.isfile(self.path): raise error.MetainfoError(f"Metainfo includes {self.path} as file, but it is not a file") # Check if size matches path_size = utils.real_size(self.path) if path_size != info['length']: raise error.MetainfoError(f"Mismatching file sizes in metainfo ({info['length']})" f" and file system ({path_size}): {self.path}") elif 'files' in info: # Validate info as multifile torrent utils.assert_type(md, ('info', 'files'), (utils.Iterable,), must_exist=True) for i,fileinfo in enumerate(info['files']): utils.assert_type(md, ('info', 'files', i), (abc.Mapping,), must_exist=True) utils.assert_type(md, ('info', 'files', i, 'length'), (int, float), must_exist=True) utils.assert_type(md, ('info', 'files', i, 'path'), (utils.Iterable,), must_exist=True) utils.assert_type(md, ('info', 'files', i, 'md5sum'), (str,), must_exist=False, check=utils.is_md5sum) for j,item in enumerate(fileinfo['path']): utils.assert_type(md, ('info', 'files', i, 'path', j), (str, bytes)) # - validate() should ensure that ['info']['pieces'] is math.ceil(self.size / # self.piece_size) bytes long. piece_count = int(len(info['pieces']) / 20) exp_piece_count = math.ceil(sum(fileinfo['length'] for fileinfo in info['files']) / info['piece length']) if piece_count != exp_piece_count: raise error.MetainfoError(f'Expected {exp_piece_count} pieces but there are {piece_count}') if self.path is not None: # Check if filepath actually points to a directory if not os.path.isdir(self.path): raise error.MetainfoError(f"Metainfo includes {self.path} as directory, but it is not a directory") for i,fileinfo in enumerate(info['files']): filepath = os.path.join(self.path, os.path.join(*fileinfo['path'])) # Check if filepath exists and is a file if not os.path.exists(filepath): raise error.MetainfoError(f"Metainfo includes file that doesn't exist: {filepath}") if not os.path.isfile(filepath): raise error.MetainfoError(f"Metainfo includes file that isn't a file: {filepath}") # Check if sizes match filesize = utils.real_size(filepath) if filesize != fileinfo['length']: raise error.MetainfoError(f"Mismatching file sizes in metainfo ({fileinfo['length']})" f" and file system ({filesize}): {filepath}") else: raise error.MetainfoError("Missing 'length' or 'files' in 'info'") def convert(self): """ Return :attr:`metainfo` with all keys encoded to :class:`bytes` and all values encoded to :class:`bytes`, :class:`int`, :class:`list` or :class:`OrderedDict` :raises MetainfoError: if a value cannot be converted properly """ try: return utils.encode_dict(self.metainfo) except ValueError as e: raise error.MetainfoError(e) def dump(self, validate=True): """ Create bencoded :attr:`metainfo` (i.e. the content of a torrent file) :param bool validate: Whether to run :meth:`validate` first :return: :attr:`metainfo` as bencoded :class:`bytes` """ if validate: self.validate() return bencode.encode(self.convert()) def write_stream(self, stream, validate=True): """ Write :attr:`metainfo` to a file-like object :param stream: Writable file-like object (e.g. :class:`io.BytesIO`) :param bool validate: Whether to run :meth:`validate` first :raises WriteError: if writing to `stream` fails :raises MetainfoError: if :attr:`metainfo` is invalid """ content = self.dump(validate=validate) try: # Remove existing data from stream *after* dump() didn't raise # anything so we don't destroy it prematurely. if stream.seekable(): stream.seek(0) stream.truncate(0) stream.write(content) except OSError as e: raise error.WriteError(e.errno) def write(self, filepath, validate=True, overwrite=False): """ Write :attr:`metainfo` to torrent file :param filepath: Path of the torrent file :param bool validate: Whether to run :meth:`validate` first :param bool overwrite: Whether to silently overwrite `filepath` (only after all pieces were hashed successfully) :raises WriteError: if writing to `filepath` fails :raises MetainfoError: if :attr:`metainfo` is invalid """ if not overwrite and os.path.exists(filepath): raise error.WriteError(errno.EEXIST, filepath) # Get file content before opening the file in case there are errors like # incomplete metainfo content = io.BytesIO() self.write_stream(content, validate=validate) content.seek(0) try: with open(filepath, 'wb') as f: f.write(content.read()) except OSError as e: raise error.WriteError(e.errno, filepath) def magnet(self, name=True, size=True, trackers=True, tracker=False): """ :class:`Magnet` instance :param bool name: Whether to include the name :param bool size: Whether to include the size :param bool trackers: Whether to include all trackers :param bool tracker: Whether to include only the first tracker of the first tier (overrides `trackers`) :raises MetainfoError: if :attr:`metainfo` is invalid """ kwargs = {'xt': 'urn:btih:' + self.infohash} if name: kwargs['dn'] = self.name if size: kwargs['xl'] = self.size if tracker: kwargs['tr'] = (self.trackers[0][0],) elif trackers: kwargs['tr'] = (url for tier in self.trackers for url in tier) if self.webseeds is not None: kwargs['ws'] = self.webseeds # Prevent circular import issues from ._magnet import Magnet return Magnet(**kwargs) # Maximum number of bytes that read() reads from torrent files. This limit # exists because we don't want to read gigabytes before raising an error. MAX_TORRENT_FILE_SIZE = int(10e6) # 10MB @classmethod def read_stream(cls, stream, validate=True): """ Read torrent metainfo from file-like object :param stream: An instance of :class:`bytes` or :class:`bytearray`, or a readable file-like object (e.g. :class:`io.BytesIO`) :param bool validate: Whether to run :meth:`validate` on the new Torrent instance NOTE: If the "info" field is not a dictionary, :class:`MetainfoError` is raised even if `validate` is set to False :raises ReadError: if reading from `stream` fails :raises BdecodeError: if `stream` does not produce a valid bencoded byte sequence :raises MetainfoError: if `validate` is `True` and the read metainfo is invalid :return: New :class:`Torrent` instance """ try: if isinstance(stream, (bytes, bytearray)): if len(stream) > cls.MAX_TORRENT_FILE_SIZE: raise ValueError( 'Size of stream exceeds Torrent.MAX_TORRENT_FILE_SIZE: ' f'{len(stream)} > {Torrent.MAX_TORRENT_FILE_SIZE}' ) content = stream elif hasattr(stream, 'read'): content = stream.read(cls.MAX_TORRENT_FILE_SIZE) else: raise TypeError( f'Expected bytes, bytearray or a readable file-like object, got {type(stream).__name__}' ) except OSError as e: raise error.ReadError(e.errno) else: try: metainfo_enc = bencode.decode(content) except (bencode.DecodingError, ValueError): raise error.BdecodeError() else: if not isinstance(metainfo_enc, abc.Mapping): raise error.BdecodeError() # Extract 'pieces' from metainfo before decoding because it's the # only byte sequence that isn't supposed to be decoded to a string. if (b'info' in metainfo_enc and isinstance(metainfo_enc[b'info'], dict) and b'pieces' in metainfo_enc[b'info']): pieces = metainfo_enc[b'info'].pop(b'pieces') metainfo = utils.decode_dict(metainfo_enc) metainfo['info']['pieces'] = pieces else: metainfo = utils.decode_dict(metainfo_enc) # "info" must be a dictionary. If validation is not wanted, it's OK # if it doesn't exist because the "metainfo" property will add it # automatically. utils.assert_type(metainfo, ('info',), (dict,), must_exist=validate) torrent = cls() torrent._metainfo = metainfo # Convert "creation date" to datetime.datetime and "private" to # bool, but only if they exist if b'creation date' in metainfo_enc: torrent.creation_date = metainfo_enc[b'creation date'] if b'private' in metainfo_enc.get(b'info', {}): torrent.private = metainfo_enc[b'info'][b'private'] if validate: torrent.validate() return torrent @classmethod def read(cls, filepath, validate=True): """ Read torrent metainfo from file :param filepath: Path of the torrent file :param bool validate: Whether to run :meth:`validate` on the new Torrent instance :raises ReadError: if reading from `filepath` fails :raises BdecodeError: if `filepath` does not contain a valid bencoded byte sequence :raises MetainfoError: if `validate` is `True` and the read metainfo is invalid :return: New :class:`Torrent` instance """ try: with open(filepath, 'rb') as f: return cls.read_stream(f, validate=validate) except (OSError, error.ReadError) as e: raise error.ReadError(e.errno, filepath) except error.BdecodeError: raise error.BdecodeError(filepath) def copy(self): """Create a new :class:`Torrent` instance with the same metainfo""" from copy import deepcopy cp = type(self)() cp._metainfo = deepcopy(self._metainfo) return cp def reuse(self, path, callback=None, interval=0): """ Copy ``pieces`` and ``piece length`` from existing torrent If `path` is a directory, it is searched recursively for a matching ``.torrent`` file. Otherwise, it is :meth:`read` as a ``.torrent`` file. `path` may also be an iterable, in which case each item is treated as described above until a match is found. A matching torrent is found by searching for a torrent with the same :attr:`name` and :attr:`files` (file size must also match). If a match is found, compare three piece hashes per file to reduce the risk of a false positive. .. warning:: This should almost always work in practice, but a false positive match is theoretically possible, and there is no way to avoid that. .. note:: This will likely change :attr:`infohash` and turn this instance into a new torrent. :param path: Path to torrent file or directory or iterable of file/directory paths :param callable callback: Callable to report progress and/or abort `callback` must accept 6 positional arguments: 1. Torrent instance (:class:`Torrent`) 2. Torrent file path (:class:`str`) or ``None`` 3. Number of checked torrent files (:class:`int`) 4. Total number of torrent files (:class:`int`) 5. Status indicator: ``False`` - File name or size does not match ``True`` - File name and size matches ``None`` - Hashing file and comparing piece hashes 6. Exception (:class:`TorfError`) or ``None`` If `callback` returns anything that is not ``None``, stop searching. :param float interval: Minimum number of seconds between calls to `callback`; if 0, `callback` is called for each torrent file; `callback` is always called if `exception` is not ``None`` :raises ReadError: if reading a torrent file fails :raises BdecodeError: if parsing a torrent file fails :raises MetainfoError: if a torrent file contains invalid or insufficient metadata :return: `True` if a matching torrent was found, `False` otherwise """ if not self.path: raise RuntimeError('reuse() called with no path specified') elif not self.files: raise RuntimeError('reuse() called while file list is empty') if isinstance(path, (str, pathlib.PurePath)): paths = [path] elif isinstance(path, abc.Iterable): paths = tuple(path) else: raise ValueError(f'Invalid path argument: {path!r}') torrent_file_items = reuse.find_torrent_files(*paths, max_file_size=self.MAX_TORRENT_FILE_SIZE) maybe_call_callback = reuse.ReuseCallback( callback=callback, interval=interval, torrent=self, torrent_files_total=torrent_file_items.total, ) for candidate_path, files_done, exception in torrent_file_items: try: if candidate_path: candidate = Torrent.read(candidate_path) elif exception: raise exception else: raise RuntimeError('Both candidate_path and exception are None?!') except (error.ReadError, error.BdecodeError, error.MetainfoError) as e: cancelled = maybe_call_callback(candidate_path, files_done, False, e) if cancelled is not None: break else: assert exception is None if reuse.is_file_match(self, candidate): cancelled = maybe_call_callback(candidate_path, files_done, None, exception) if cancelled is not None: break if reuse.is_content_match(self, candidate): maybe_call_callback(candidate_path, files_done, True, exception) reuse.copy(candidate, self) return True else: cancelled = maybe_call_callback(candidate_path, files_done, False, exception) if cancelled is not None: break else: cancelled = maybe_call_callback(candidate_path, files_done, False, exception) if cancelled is not None: break return False def __repr__(self): sig = inspect.signature(self.__init__) args = [] def get_class_default(name): if hasattr(type(self), f'{param.name}_default'): return getattr(type(self), f'{param.name}_default') elif hasattr(type(self), param.name): return getattr(type(self), param.name) for param in sig.parameters.values(): value = getattr(self, param.name) default = param.default if default is param.empty: # Positional argument args.append(repr(value)) elif ( value # Keyword argument value is different from default? and default != value # Keyword argument value is different from class default and value != get_class_default(param.name) ): args.append(f'{param.name}={value!r}') return type(self).__name__ + '(' + ', '.join(args) + ')' def __eq__(self, other): if isinstance(other, type(self)): return self._metainfo == other._metainfo else: return NotImplemented rndusr-torf-547b989/torf/_torrent.pyi000066400000000000000000000175061513142010300176240ustar00rootroot00000000000000import sys from collections import OrderedDict from collections.abc import Iterable from datetime import datetime from pathlib import Path from re import Pattern from typing import Any, Callable, Literal, Protocol from _typeshed import StrPath from typing_extensions import Self from . import __version__ from ._errors import TorfError from ._magnet import Magnet from ._utils import File, Filepath, Filepaths, Files, MonitoredList, Trackers, URLs if sys.version_info < (3, 11): from typing_extensions import NotRequired, Required, TypedDict else: from typing import NotRequired, Required, TypedDict class _WritableBinaryStream(Protocol): def seek(self, offset: int, whence: int = 0) -> int: ... def seekable(self) -> bool: ... def truncate(self, size: int | None = None) -> int: ... def write(self, s: bytes) -> int: ... class _ReadableBinaryStream(Protocol): def read(self, n: int = -1) -> bytes: ... _PACKAGE_NAME: str = ... NCORES: int = ... DEFAULT_TORRENT_NAME: Literal["UNNAMED TORRENT"] = ... class _FilesDict(TypedDict): length: int path: list[str] _InfoDict = TypedDict( "_InfoDict", { "name": str, "piece length": int, "pieces": bytes, "length": NotRequired[int], "files": NotRequired[list[_FilesDict]], "private": NotRequired[bool], "source": NotRequired[str] } ) """See BEP 0003: https://www.bittorrent.org/beps/bep_0003.html""" _MetaInfo = TypedDict( "_MetaInfo", { "info": Required[_InfoDict], "announce": str, "announce-list": list[list[str]], "comment": str, "created by": str, "creation date": datetime, "url-list": list[str], }, total=False ) class Torrent: def __init__( self, path: StrPath | None = None, name: str | None = None, exclude_globs: Iterable[str] = (), exclude_regexs: Iterable[str] = (), include_globs: Iterable[str] = (), include_regexs: Iterable[str] = (), trackers: Iterable[str] | str | None = None, webseeds: Iterable[str] | str | None = None, httpseeds: Iterable[str] | str | None = None, private: bool | None = None, comment: str | None = None, source: str | None = None, creation_date: int | float | datetime | None = None, created_by: str | None = f"{_PACKAGE_NAME} {__version__}", piece_size: int | None = None, piece_size_min: int | None = None, piece_size_max: int | None = None, randomize_infohash: bool = False, ) -> None: ... @property def metainfo(self) -> _MetaInfo: ... @property def path(self) -> Path | None: ... @path.setter def path(self, value: StrPath | None) -> None: ... @property def location(self) -> Path | None: ... @property def files(self) -> Files: ... @files.setter def files(self, files: Iterable[File]) -> None: ... @property def filepaths(self) -> Filepaths: ... @filepaths.setter def filepaths(self, filepaths: Iterable[Filepath]) -> None: ... @property def exclude_globs(self) -> MonitoredList[str]: ... @exclude_globs.setter def exclude_globs(self, value: Iterable[str]) -> None: ... @property def include_globs(self) -> MonitoredList[str]: ... @include_globs.setter def include_globs(self, value: Iterable[str]) -> None: ... @property def exclude_regexs(self) -> MonitoredList[Pattern[str]]: ... @exclude_regexs.setter def exclude_regexs(self, value: Iterable[str]) -> None: ... @property def include_regexs(self) -> MonitoredList[Pattern[str]]: ... @include_regexs.setter def include_regexs(self, value: Iterable[str]) -> None: ... @property def filetree(self) -> dict[str, dict[str, File]]: ... @property def name(self) -> str | None: ... @name.setter def name(self, value: str | None) -> None: ... @property def mode(self) -> Literal["singlefile", "multifile"] | None: ... @property def size(self) -> int: ... def partial_size(self, path: StrPath | Iterable[StrPath]) -> int: ... @property def piece_size(self) -> int: ... @piece_size.setter def piece_size(self, value: int | None) -> None: ... @property def piece_size_min(self) -> int: ... @piece_size_min.setter def piece_size_min(self, piece_size_min: int | None) -> None: ... @property def piece_size_max(self) -> int: ... @piece_size_max.setter def piece_size_max(self, piece_size_max: int | None) -> None: ... piece_size_min_default: int = ... piece_size_max_default: int = ... @classmethod def calculate_piece_size(cls, size: int, min_size: int | None = None, max_size: int | None = None) -> int: ... @property def pieces(self) -> int: ... @property def hashes(self) -> tuple[bytes, ...]: ... @property def trackers(self) -> Trackers: ... @trackers.setter def trackers(self, value: str | Iterable[str] | None) -> None: ... @property def webseeds(self) -> URLs: ... @webseeds.setter def webseeds(self, value: str | Iterable[str] | None) -> None: ... @property def httpseeds(self) -> URLs: ... @httpseeds.setter def httpseeds(self, value: str | Iterable[str] | None) -> None: ... @property def private(self) -> bool | None: ... @private.setter def private(self, value: bool | None) -> None: ... @property def comment(self) -> str | None: ... @comment.setter def comment(self, value: str | None) -> None: ... @property def creation_date(self) -> datetime | None: ... @creation_date.setter def creation_date(self, value: int | float | datetime | None) -> None: ... @property def created_by(self) -> str | None: ... @created_by.setter def created_by(self, value: str | None) -> None: ... @property def source(self) -> str | None: ... @source.setter def source(self, value: str | None) -> None: ... @property def infohash(self) -> str: ... @property def infohash_base32(self) -> bytes: ... @property def randomize_infohash(self) -> bool: ... @randomize_infohash.setter def randomize_infohash(self, value: bool) -> None: ... @property def is_ready(self) -> bool: ... def generate( self, threads: int | None = None, callback: Callable[[Torrent, str, int, int], Any] | None = None, interval: float = 0, ) -> bool: ... def verify( self, path: StrPath, threads: int | None = None, callback: Callable[[Torrent, str, int, int, int, bytes | None, TorfError | None], Any] | None = None, interval: float = 0, ) -> bool: ... def verify_filesize( self, path: StrPath, callback: Callable[[Torrent, str, str, int, int, TorfError | None], Any] | None = None ) -> bool: ... def validate(self) -> None: ... def convert(self) -> OrderedDict[bytes, Any]: ... def dump(self, validate: bool = True) -> bytes: ... def write_stream(self, stream: _WritableBinaryStream, validate: bool = True) -> None: ... def write(self, filepath: StrPath, validate: bool = True, overwrite: bool = False) -> None: ... def magnet(self, name: bool = True, size: bool = True, trackers: bool = True, tracker: bool = False) -> Magnet: ... MAX_TORRENT_FILE_SIZE: int = ... @classmethod def read_stream(cls, stream: bytes | bytearray | _ReadableBinaryStream, validate: bool = True) -> Self: ... @classmethod def read(cls, filepath: StrPath, validate: bool = True) -> Self: ... def copy(self) -> Self: ... def reuse( self, path: StrPath, callback: Callable[[Torrent, str | None, int, int, bool | None, TorfError | None], Any] | None = None, interval: float = 0, ) -> bool: ... def __repr__(self) -> str: ... def __eq__(self, other: object) -> bool: ... rndusr-torf-547b989/torf/_utils.py000066400000000000000000000640331513142010300171130ustar00rootroot00000000000000# This file is part of torf. # # torf is free software: you can redistribute it and/or modify it under the # terms of the GNU General Public License as published by the Free Software # Foundation, either version 3 of the License, or (at your option) any later # version. # # torf is distributed in the hope that it will be useful, but WITHOUT ANY # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR # A PARTICULAR PURPOSE. See the GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with torf. If not, see . import abc import collections import contextlib import errno import fnmatch import functools import http.client import itertools import os import pathlib import re import socket import typing import urllib.error import urllib.parse import urllib.request from datetime import datetime from urllib.parse import quote_plus as urlquote # noqa: F401 from . import _errors as error def is_divisible_by_16_kib(num): """Return whether `num` is divisible by 16384 and positive""" if num <= 0: return False return num % 16384 == 0 def iterable_startswith(a, b): a_len = len(a) for i, b_item in enumerate(b): if i >= a_len: # a can't start with b if b is longer than a return False if a[i] != b_item: return False return True def flatten(items): for item in items: if isinstance(item, Iterable): yield from flatten(item) else: yield item _md5sum_regex = re.compile(r'^[0-9a-fA-F]{32}$') def is_md5sum(value): return bool(_md5sum_regex.match(value)) def real_size(path): """ Return size for `path`, which is a (link to a) file or directory Raise ReadError on failure """ if os.path.isdir(os.path.realpath(path)): def onerror(exc): raise error.ReadError(getattr(exc, 'errno', None), getattr(exc, 'filename', None)) size = 0 walker = os.walk(path, followlinks=True, onerror=onerror) for dirpath,dirnames,filenames in walker: for filename in filenames: filepath = os.path.join(dirpath, filename) size += os.path.getsize(filepath) return size else: try: return os.path.getsize(path) except OSError as exc: raise error.ReadError(getattr(exc, 'errno', None), getattr(exc, 'filename', None)) def list_files(path): """ Return list of sorted file paths in `path` Raise ReadError if `path` or any file or directory underneath it is not readable. """ def assert_readable(path): os_supports_effective_ids = os.access in os.supports_effective_ids if not os.access(path, os.R_OK, effective_ids=os_supports_effective_ids): raise error.ReadError(errno.EACCES, path) if os.path.isfile(path): assert_readable(path) return [path] else: def onerror(exc): raise error.ReadError(getattr(exc, 'errno', None), getattr(exc, 'filename', None)) filepaths = [] for dirpath, dirnames, filenames in os.walk(path, onerror=onerror, followlinks=True): for filename in filenames: filepath = os.path.join(dirpath, filename) assert_readable(filepath) filepaths.append(filepath) return list(sorted(filepaths, key=lambda fp: str(fp).casefold())) def filter_files(items, getter=lambda f: f, hidden=True, empty=True, exclude=(), include=()): """ Return reduced copy of `items` items: Iterable of file paths or abritrary objects that `getter` can turn into a a file path getter: Callable that takes an item of `filepaths` and returns a file path exclude: Sequence of regular expressions or strings with wildcard characters (see `fnmatch`) that are matched against full paths include: Same as `exclude`, but instead of removing files, matching patterns keep files even if they match a pattern in `excluude hidden: Whether to include hidden files empty: Whether to include empty files """ def is_hidden(path): for name in str(path).split(os.sep): if name != '.' and name != '..' and name and name[0] == '.': return True return False def is_excluded(path, ex_regexs=tuple(x for x in exclude if isinstance(x, typing.Pattern)), ex_globs=tuple(x for x in exclude if isinstance(x, str)), in_regexs=tuple(i for i in include if isinstance(i, typing.Pattern)), in_globs=tuple(i for i in include if isinstance(i, str))): # Include patterns take precedence over exclude pattersn if any(r.search(str(path)) for r in in_regexs): return False elif any(fnmatch.fnmatch(str(path).casefold(), g.casefold()) for g in in_globs): return False elif any(r.search(str(path)) for r in ex_regexs): return True elif any(fnmatch.fnmatch(str(path).casefold(), g.casefold()) for g in ex_globs): return True return False items = tuple(items) filepaths = tuple(getter(i) for i in items) try: basepath = pathlib.Path(os.path.commonpath(filepaths)) except ValueError: basepath = pathlib.Path().cwd() items_filtered = [] for item in items: filepath = getter(item) relpath_without_base = pathlib.Path(os.path.relpath(filepath, basepath)) relpath_with_base = pathlib.Path(basepath.parent, filepath) # Exclude hidden files and directories, but not hidden directories in # `basepath` if not hidden and is_hidden(relpath_without_base): continue # Exclude empty file elif not empty and os.path.exists(filepath) and real_size(filepath) <= 0: continue # Exclude file matching regex elif is_excluded(relpath_with_base): continue else: items_filtered.append(item) return items_filtered class MonitoredList(collections.abc.MutableSequence): """List with change callback""" def __init__(self, items=(), callback=None, type=None): self._items = [] self._type = type self._callback = callback with self._callback_disabled(): self.replace(items) @contextlib.contextmanager def _callback_disabled(self): cb = self._callback self._callback = None yield self._callback = cb def __getitem__(self, index): return self._items[index] def __delitem__(self, index): del self._items[index] if self._callback is not None: self._callback(self) def _coerce(self, value): if self._type is not None: return self._type(value) else: return value def _filter_func(self, item): if item not in self._items: return item def __setitem__(self, index, value): if isinstance(value, Iterable): value = map(self._filter_func, map(self._coerce, value)) else: value = self._filter_func(self._coerce(value)) self._items[index] = value if self._callback is not None: self._callback(self) def insert(self, index, value): value = self._filter_func(self._coerce(value)) if value is not None: self._items.insert(index, value) if self._callback is not None: self._callback(self) def replace(self, items): if not isinstance(items, Iterable): raise ValueError(f'Not an iterable: {items!r}') # Don't clear list before we know all new values are valid items = tuple(map(self._coerce, items)) self._items.clear() with self._callback_disabled(): self.extend(items) if self._callback is not None: self._callback(self) def clear(self): self._items.clear() if self._callback is not None: self._callback(self) def __len__(self): return len(self._items) def __eq__(self, other): if isinstance(other, type(self)): return frozenset(other._items) == frozenset(self._items) elif isinstance(other, collections.abc.Iterable): return (len(other) == len(self._items) and all(item in self._items for item in other)) else: return False def __ne__(self, other): return not self.__eq__(other) def __add__(self, other): if isinstance(other, type(self)): items = self._items + other._items elif isinstance(other, Iterable): items = self._items + list(other) else: items = self._items + [other] return type(self)(items, callback=self._callback) def __repr__(self): return repr(self._items) class File(os.PathLike): """Path-like that also stores the file size""" def __fspath__(self): return str(self._path) def __reduce__(self): # __reduce__() is needed to properly pickle File objects state = ( # Preserve positional and keyword arguments functools.partial( self.__class__, os.path.join(*self._path.parts), size=self._size, ), # Mandatory positional args (already preserved by partial()) (), ) return state def __init__(self, path, size): if isinstance(path, str): self._path = pathlib.Path(path) elif isinstance(path, os.PathLike): self._path = path elif isinstance(path, collections.abc.Iterable): self._path = pathlib.Path(*path) else: raise ValueError(f'Path must be str, PathLike or Iterable, not {type(path).__name__}: {path}') try: self._size = int(size) except (ValueError, TypeError): raise ValueError(f'Size must be int, not {type(size).__name__}: {size}') @property def size(self): return self._size def __getattr__(self, name): return getattr(self._path, name) def __str__(self): return str(self._path) def __eq__(self, other): if type(other) is type(self): return self._path == other._path and self._size == other._size elif isinstance(other, os.PathLike): return self._path == other else: return NotImplemented def __hash__(self): return hash((self._path, self._size)) def __gt__(self, other): if not isinstance(other, type(self)): return NotImplemented return self._path > other._path def __lt__(self, other): if not isinstance(other, type(self)): return NotImplemented return self._path < other._path def __ge__(self, other): if not isinstance(other, type(self)): return NotImplemented return self._path >= other._path def __le__(self, other): if not isinstance(other, type(self)): return NotImplemented return self._path <= other._path def __repr__(self): return f'{type(self).__name__}({repr(str(self._path))}, size={self._size})' class Files(MonitoredList): """Deduplicated list of :class:`Files` objects""" def __init__(self, files, callback=None): if isinstance(files, str): files = (files,) else: files = flatten(files) super().__init__(files, callback=callback, type=File) def _coerce(self, value): if not isinstance(value, self._type): raise ValueError(f'Not a File object: {value} ({type(value).__name__})') else: return value class Filepath(type(pathlib.Path())): """Path-like that makes relative paths equal to their absolute versions""" @classmethod def _realpath(cls, path): if os.path.islink(path): return os.path.realpath(str(path)) elif os.path.isabs(path): return str(path) else: return os.path.join(os.getcwd(), str(path)) def __eq__(self, other): # Use fast cached path if possible if isinstance(other, Filepath): return hash(self) == hash(other) else: return self._realpath(self) == self._realpath(other) def __hash__(self): try: return self.__hash except AttributeError: self.__hash = hash(self._realpath(self)) return self.__hash class Filepaths(MonitoredList): """Deduplicated list of :class:`Filepath` objects with change callback""" def __init__(self, filepaths, callback=None): if isinstance(filepaths, str): filepaths = (filepaths,) else: filepaths = list(flatten(filepaths)) super().__init__(filepaths, callback=callback, type=Filepath) def __setitem__(self, index, path): path = self._coerce(path) # Remove files that are equal to or start with `path`. This removes # directories recursively. If `path` exists as a file, it is removed # and then added again. path_removed = False for f in tuple(self._items): if path == f or path in f.parents: self._items.remove(f) path_removed = True if path.is_dir(): self.insert(index, path) else: if path_removed: super().insert(index, path) else: super().__setitem__(index, path) def insert(self, index, path): path = self._coerce(path) try: path_is_dir = path.is_dir() except OSError as exc: raise error.ReadError(getattr(exc, 'errno', None), getattr(exc, 'filename', None)) if path_is_dir: # Add files in directory recursively with self._callback_disabled(): for i,child in enumerate(sorted(path.iterdir())): self.insert(index + i, child) if self._callback is not None: self._callback(self) else: super().insert(index, path) def is_url(url): """Return whether `url` is a valid URL""" try: u = urllib.parse.urlparse(url) u.port # Trigger 'invalid port' exception except Exception: return False else: if not u.scheme or not u.netloc: return False return True class URL(str): def __new__(cls, s): return super().__new__(cls, str(s).replace(' ', '+')) def __init__(self, url): if not is_url(url): raise error.URLError(url) else: self._parsed = urllib.parse.urlparse(url) @property def scheme(self): return self._parsed.scheme @property def netloc(self): return self._parsed.netloc @property def hostname(self): return self._parsed.hostname @property def port(self): return self._parsed.port @property def path(self): return self._parsed.path @property def params(self): return self._parsed.params @property def query(self): return self._parsed.query @property def fragment(self): return self._parsed.fragment class URLs(MonitoredList): """Auto-flattening list of `:class:URL` objects with change callback""" def __init__(self, urls, callback=None, _get_known_urls=lambda: ()): self._get_known_urls = _get_known_urls if isinstance(urls, str): if not urls.strip(): urls = () else: urls = (urls,) else: urls = flatten(urls) super().__init__(urls, callback=callback, type=URL) def _filter_func(self, url): # _get_known_urls is a hack for the Trackers class to deduplicate across # multiple tiers. if url not in self._items and url not in self._get_known_urls(): return url class Trackers(collections.abc.MutableSequence): """List of :class:`URLs` instances with change callback""" def __init__(self, tiers, callback=None): self._callback = None self._tiers = [] if isinstance(tiers, str): self.append((tiers,)) elif isinstance(tiers, collections.abc.Iterable): for urls in tiers: self.append(urls) else: raise ValueError(f'Must be str or Iterable, not {type(tiers).__name__}: {repr(tiers)}') self._callback = callback @property def flat(self): """Tuple of all URLs of all tiers""" return tuple(flatten(self._tiers)) @contextlib.contextmanager def _callback_disabled(self): cb = self._callback self._callback = None yield self._callback = cb def _tier_changed(self, tier): # Auto-remove empty tiers if len(tier) == 0: self._tiers.remove(tier) if self._callback is not None: self._callback(self) def __getitem__(self, index): return self._tiers[index] def __setitem__(self, index, value): tier = URLs(value, callback=self._tier_changed, _get_known_urls=lambda self=self: self.flat) if len(tier) > 0 and tier not in self._tiers: self._tiers[index] = tier if self._callback is not None: self._callback(self) def __delitem__(self, index): del self._tiers[index] if self._callback is not None: self._callback(self) def insert(self, index, value): tier = URLs(value, callback=self._tier_changed, _get_known_urls=lambda self=self: self.flat) if len(tier) > 0 and tier not in self._tiers: self._tiers.insert(index, tier) if self._callback is not None: self._callback(self) def replace(self, tiers): if not isinstance(tiers, Iterable): raise ValueError(f'Not an iterable: {tiers!r}') with self._callback_disabled(): self._tiers.clear() for urls in tiers: self.append(urls) if self._callback is not None: self._callback(self) def clear(self): self._tiers.clear() if self._callback is not None: self._callback(self) def __len__(self): return len(self._tiers) def __eq__(self, other): if isinstance(other, type(self)): return other._tiers == self._tiers elif isinstance(other, collections.abc.Iterable): return list(other) == self._tiers else: return False def __ne__(self, other): return not self.__eq__(other) def __add__(self, other): if isinstance(other, type(self)): other_tiers = other._tiers elif isinstance(other, collections.abc.Iterable): other_tiers = other new_tiers = [] for tier1,x in itertools.zip_longest(self._tiers, other_tiers): if tier1 is None: tier1 = [] if isinstance(x, str) and len(x) > 1: new_tier = tier1 + [x] elif isinstance(x, collections.abc.Iterable): new_tier = tier1 + list(x) elif x is not None: return NotImplemented else: new_tier = tier1 new_tiers.append(new_tier) return type(self)(new_tiers, callback=self._callback) def __repr__(self): return repr(self._tiers) def download(url, timeout=60): """ Download data from URL :raises ConnectionError: if the download fails or the protocol is not supported :return: the downloaded data """ if timeout <= 0: raise error.ConnectionError(url, 'Timed out') elif url.startswith('http://') or url.startswith('https://'): return download_http(url, timeout=timeout) else: raise error.ConnectionError(url, 'Unsupported protocol') def download_http(url, timeout=60): try: response = urllib.request.urlopen(URL(url), timeout=timeout).read() except urllib.error.URLError as e: try: msg = e.args[0].strerror except (AttributeError, IndexError): msg = (getattr(e, 'msg', None) or getattr(e, 'strerror', None) or 'Failed') raise error.ConnectionError(url, msg) except socket.timeout: raise error.ConnectionError(url, 'Timed out') except http.client.HTTPException: raise error.ConnectionError(url, 'No HTTP response') except (OSError, IOError): raise error.ConnectionError(url, 'Unknown error') else: return response class Iterable(abc.ABC): """ Iterable that is not a :class:`str` This allows you to write isinstance(x, Iterable) instead of isinstance(x, collections.abc.Iterable) and not isinstance(x, str) """ @classmethod def __subclasshook__(cls, C): if cls is Iterable: if issubclass(C, collections.abc.Iterable) and not issubclass(C, str): return True return False def key_exists_in_list_or_dict(key, lst_or_dct): """True if `lst_or_dct[key]` does not raise an Exception""" if isinstance(lst_or_dct, collections.abc.Mapping) and key in lst_or_dct: return True elif isinstance(lst_or_dct, collections.abc.Sequence): min_i, max_i = 0, len(lst_or_dct) if min_i <= key < max_i: return True return False def assert_type(obj, keys, exp_types, must_exist=True, check=None): """ Raise MetainfoError if value is not of a particular type :param obj: The object to check :type obj: sequence or mapping :param keys: Sequence of keys so that ``obj[key[0]][key[1]]...`` resolves to a value :type obj: sequence :param exp_types: Sequence of allowed types that the value specified by `keys` must be an instance of :type obj: sequence :param bool must_exist: Whether to raise MetainfoError if `keys` does not resolve to a value :param callable check: Callable that gets the value specified by `keys` and returns True if it is OK, False otherwise """ keys = list(keys) keychain = [] while len(keys[:-1]) > 0: key = keys.pop(0) try: obj = obj[key] except (KeyError, IndexError): break keychain.append(key) keychain_str = ''.join(f'[{key!r}]' for key in keychain) key = keys.pop(0) if not key_exists_in_list_or_dict(key, obj): if must_exist: if keychain_str: raise error.MetainfoError(f'Missing {key!r} in {keychain_str}') else: raise error.MetainfoError(f'Missing {key!r}') elif not isinstance(obj[key], exp_types): if len(exp_types) > 2: exp_types_str = ', '.join(t.__name__ for t in exp_types[:-1]) exp_types_str += ' or ' + exp_types[-1].__name__ else: exp_types_str = ' or '.join(t.__name__ for t in exp_types) type_str = type(obj[key]).__name__ raise error.MetainfoError(f'{keychain_str}[{key!r}] must be {exp_types_str}, ' f'not {type_str}: {obj[key]!r}') elif check is not None and not check(obj[key]): raise error.MetainfoError(f"{keychain_str}[{key!r}] is invalid: {obj[key]!r}") def force_as_string(value): """ Return `value` as string, replace non-UTF8 characters with "�" """ if isinstance(value, bytes): return value.decode('utf8', errors='replace') elif isinstance(value, str) or value is None: return value else: return str(value) def decode_value(value): if isinstance(value, bytes): # Try to decode `value` as UTF8, but return it as-is if that fails # because we don't want to change the infohash. Non-UTF8-encoded strings # (torrent name, files, etc) should be stored as bytes and decoded on # demand by the relevant Torrent attributes. # # TODO: We should store the metainfo as bytes so it can be used to # create a byte-for-byte identical torrent, regardless of encoding # or weird stuff stored in the original torrent. But that's a lot # of work. try: return bytes.decode(value, encoding='utf8', errors='strict') except UnicodeDecodeError: return value elif isinstance(value, collections.abc.Sequence): return decode_list(value) elif isinstance(value, collections.abc.Mapping): return decode_dict(value) else: return value def decode_list(lst): lst_dec = [] for value in lst: lst_dec.append(decode_value(value)) return lst_dec def decode_dict(dct): dct_dec = {} for key,value in dct.items(): value_dec = decode_value(value) key_dec = decode_value(key) dct_dec[key_dec] = value_dec return dct_dec def encode_value(value): if type(value) in ENCODE_ALLOWED_TYPES: return value else: for cls,converter in ENCODE_CONVERTERS.items(): if isinstance(value, cls): return converter(value) raise ValueError(f'Invalid value: {value!r}') def encode_list(lst): lst_enc = [] for i,value in enumerate(lst): lst_enc.append(encode_value(value)) return lst_enc def encode_dict(dct): dct_enc = collections.OrderedDict() for key,value in sorted(dct.items()): if not isinstance(key, str): raise ValueError(f'Invalid key: {key!r}') key_enc = str(key).encode('utf8') value_enc = encode_value(value) dct_enc[key_enc] = value_enc return dct_enc ENCODE_ALLOWED_TYPES = (bytes, int) ENCODE_CONVERTERS = { str: lambda val: str(val).encode(encoding='utf-8', errors='replace'), float: int, bool: int, collections.abc.Mapping: encode_dict, collections.abc.Sequence: encode_list, collections.abc.Collection: encode_list, datetime: lambda dt: int(dt.timestamp()), } rndusr-torf-547b989/torf/_utils.pyi000066400000000000000000000106511513142010300172610ustar00rootroot00000000000000import os import pathlib import sys from collections.abc import Container, Iterable, MutableSequence from functools import partial from typing import Any, Callable, TypeVar, overload from _typeshed import StrPath from typing_extensions import Self T = TypeVar("T") class MonitoredList(MutableSequence[T]): def __init__( self, items: Iterable[T] = (), callback: Callable[[Self], None] | None = None, type: type[T] | None = None ) -> None: ... @overload def __getitem__(self, index: int, /) -> T: ... @overload def __getitem__(self, index: slice, /) -> Self: ... @overload def __delitem__(self, index: int, /) -> None: ... @overload def __delitem__(self, index: slice, /) -> None: ... @overload def __setitem__(self, index: int, value: T, /) -> None: ... @overload def __setitem__(self, index: slice, value: Iterable[T], /) -> None: ... def insert(self, index: int, value: T) -> None: ... def replace(self, items: Iterable[T]) -> None: ... def clear(self) -> None: ... def __len__(self) -> int: ... def __eq__(self, other: object) -> bool: ... def __ne__(self, other: object) -> bool: ... def __add__(self, other: object) -> Self: ... def __repr__(self) -> str: ... class File(os.PathLike[str]): def __fspath__(self) -> str: ... def __reduce__(self) -> tuple[partial[Self], tuple[()]]: ... def __init__(self, path: StrPath | Iterable[StrPath], size: int) -> None: ... @property def size(self) -> int: ... def __getattr__(self, name: str) -> Any: ... def __str__(self) -> str: ... def __eq__(self, other: object) -> bool: ... def __hash__(self) -> int: ... def __gt__(self, other: object) -> bool: ... def __lt__(self, other: object) -> bool: ... def __ge__(self, other: object) -> bool: ... def __le__(self, other: object) -> bool: ... def __repr__(self) -> str: ... class Files(MonitoredList[File]): def __init__(self, files: str | Iterable[str], callback: Callable[[Self], None] | None = None): ... # There is special recognition in Mypy for `sys.platform`, not `os.name` if sys.platform == "win32": PathBase = pathlib.WindowsPath else: PathBase = pathlib.PosixPath class Filepath(PathBase): def __eq__(self, other: object) -> bool: ... def __hash__(self) -> int: ... class Filepaths(MonitoredList[Filepath]): def __init__(self, filepaths: str | Iterable[str], callback: Callable[[Self], None] | None = None) -> None: ... @overload def __setitem__(self, index: int, path: StrPath, /) -> None: ... @overload def __setitem__(self, index: slice, path: Iterable[StrPath], /) -> None: ... def insert(self, index: int, path: StrPath) -> None: ... class URL(str): def __new__(cls, s: str) -> Self: ... def __init__(self, url: str) -> None: ... @property def scheme(self) -> str: ... @property def netloc(self) -> str: ... @property def hostname(self) -> str | None: ... @property def port(self) -> int | None: ... @property def path(self) -> str: ... @property def params(self) -> str: ... @property def query(self) -> str: ... @property def fragment(self) -> str: ... class URLs(MonitoredList[URL]): def __init__( self, urls: str | Iterable[str], callback: Callable[[Self], None] | None = None, _get_known_urls: Callable[[], Container[str]] = lambda: (), ): ... class Trackers(MutableSequence[URLs]): def __init__(self, tiers: str | Iterable[str], callback: Callable[[Self], None] | None = None) -> None: ... @property def flat(self) -> tuple[URL, ...]: ... @overload def __getitem__(self, index: int, /) -> URLs: ... @overload def __getitem__(self, index: slice, /) -> Self: ... @overload def __delitem__(self, index: int, /) -> None: ... @overload def __delitem__(self, index: slice, /) -> None: ... @overload def __setitem__(self, index: int, value: URLs, /) -> None: ... @overload def __setitem__(self, index: slice, value: Iterable[URLs], /) -> None: ... def insert(self, index: int, value: Iterable[str]) -> None: ... def replace(self, tiers: Iterable[str]) -> None: ... def clear(self) -> None: ... def __len__(self) -> int: ... def __eq__(self, other: object) -> bool: ... def __ne__(self, other: object) -> bool: ... def __add__(self, other: str | Iterable[str]) -> Self: ... def __repr__(self) -> str: ... rndusr-torf-547b989/torf/py.typed000066400000000000000000000000001513142010300167210ustar00rootroot00000000000000rndusr-torf-547b989/tox.ini000066400000000000000000000010311513142010300155700ustar00rootroot00000000000000[tox] envlist = py314, py313, py312, py311, py310, py39, py38, lint [testenv] deps = pytest pytest-xdist pytest-httpserver pytest-mock commands = pytest {posargs} [testenv:lint] deps = flake8 isort ruff sphinx mypy commands = flake8 torf tests isort --check-only torf tests ruff check torf tests mypy torf --exclude torf/_flatbencode.py sphinx-build -E -j auto -q -W --keep-going docs /tmp/sphinx-docs-build rm -r /tmp/sphinx-docs-build git clean docs --force --quiet allowlist_externals = rm git