soupsieve-2.7/.pyspelling.yml0000644000000000000000000000424613615410400013321 0ustar00spellchecker: aspell matrix: - name: mkdocs sources: - site/**/*.html hunspell: d: en_US aspell: lang: en dictionary: wordlists: - docs/src/dictionary/en-custom.txt output: build/dictionary/mkdocs.dic pipeline: - pyspelling.filters.html: comments: false attributes: - title - alt ignores: - 'code, pre, a.magiclink, span.keys' - '.MathJax_Preview, .md-nav__link, .md-footer-custom-text, .md-source__repository, .headerlink, .md-icon' - '.md-social__link' - pyspelling.filters.url: - name: markdown sources: - README.md hunspell: d: en_US aspell: lang: en dictionary: wordlists: - docs/src/dictionary/en-custom.txt output: build/dictionary/mkdocs.dic pipeline: - pyspelling.filters.markdown: markdown_extensions: - pymdownx.superfences: - pymdownx.highlight: - pyspelling.filters.html: comments: false attributes: - title - alt ignores: - :is(code, pre) - pyspelling.filters.url: - name: python sources: - setup.py - '{soupsieve,tests,tools}/**/*.py' hunspell: d: en_US aspell: lang: en dictionary: wordlists: - docs/src/dictionary/en-custom.txt output: build/dictionary/python.dic pipeline: - pyspelling.filters.python: group_comments: true - pyspelling.flow_control.wildcard: allow: - py-comment - pyspelling.filters.context: context_visible_first: true delimiters: # Ignore lint (noqa) and coverage (pragma) as well as shebang (#!) - open: '^(?: *(?:noqa\b|pragma: no cover|type: .*?)|!)' close: '$' # Ignore Python encoding string -*- encoding stuff -*- - open: '^ *-\*-' close: '-\*-$' - pyspelling.filters.context: context_visible_first: true escapes: '\\[\\`]' delimiters: # Ignore multiline content between fences (fences can have 3 or more back ticks) # ``` # content # ``` - open: '(?s)^(?P *`{3,})$' close: '^(?P=open)$' # Ignore text between inline back ticks - open: '(?P`+)' close: '(?P=open)' - pyspelling.filters.url: soupsieve-2.7/mkdocs.yml0000644000000000000000000001037113615410400012331 0ustar00site_name: Soup Sieve site_url: https://facelessuser.github.io/soupsieve repo_url: https://github.com/facelessuser/soupsieve edit_uri: tree/main/docs/src/markdown site_description: A modern CSS selector library for Beautiful Soup. copyright: | Copyright © 2018 - 2025 Isaac Muse docs_dir: docs/src/markdown theme: name: material custom_dir: docs/theme icon: logo: material/book-open-page-variant palette: scheme: dracula primary: deep purple accent: deep purple font: text: Roboto code: Roboto Mono features: - navigation.tabs - navigation.top - navigation.instant - navigation.indexes - toc.follow - content.code.copy - navigation.footer - search.share - search.highlight - search.suggest pymdownx: sponsor: "https://github.com/sponsors/facelessuser" nav: - Home: - Quick Start: index.md - API: api.md - F.A.Q.: faq.md - Beautiful Soup Differences: differences.md - CSS Selectors: - General Details: selectors/index.md - Basic Selectors: selectors/basic.md - Combinators and Lists: selectors/combinators.md - Pseudo Classes: selectors/pseudo-classes.md - Non-Applicable Pseudo Classes: selectors/unsupported.md - About: - Contributing & Support: about/contributing.md - Development: about/development.md - Security Vulnerabilities: about/security.md - Changelog: about/changelog.md - License: about/license.md markdown_extensions: - markdown.extensions.toc: slugify: !!python/object/apply:pymdownx.slugs.slugify {kwds: {case: lower}} permalink: "" - markdown.extensions.smarty: smart_quotes: false - pymdownx.betterem: - markdown.extensions.attr_list: - markdown.extensions.tables: - markdown.extensions.abbr: - markdown.extensions.md_in_html: - pymdownx.superfences: - pymdownx.highlight: extend_pygments_lang: - name: pycon3 lang: pycon options: python3: true - pymdownx.inlinehilite: - pymdownx.magiclink: repo_url_shortener: true repo_url_shorthand: true social_url_shorthand: true user: facelessuser repo: soupsieve - pymdownx.tilde: - pymdownx.caret: - pymdownx.smartsymbols: - pymdownx.emoji: emoji_index: !!python/name:material.extensions.emoji.twemoji emoji_generator: !!python/name:material.extensions.emoji.to_svg - pymdownx.escapeall: hardbreak: True nbsp: True - pymdownx.tasklist: custom_checkbox: true - pymdownx.progressbar: - pymdownx.mark: - pymdownx.striphtml: - pymdownx.snippets: base_path: - docs/src/markdown/.snippets - LICENSE.md - SECURITY.md auto_append: - refs.md - pymdownx.keys: separator: "\uff0b" - pymdownx.saneheaders: - pymdownx.blocks.admonition: types: - new - settings - note - abstract - info - tip - success - question - warning - failure - danger - bug - example - quote - pymdownx.blocks.details: types: - name: details-new class: new - name: details-settings class: settings - name: details-note class: note - name: details-abstract class: abstract - name: details-info class: info - name: details-tip class: tip - name: details-success class: success - name: details-question class: question - name: details-warning class: warning - name: details-failure class: failure - name: details-danger class: danger - name: details-bug class: bug - name: details-example class: example - name: details-quote class: quote - pymdownx.blocks.html: - pymdownx.blocks.definition: - pymdownx.blocks.tab: alternate_style: True - pymdownx.blocks.caption: - pymdownx.fancylists: inject_style: true extra: social: - icon: fontawesome/brands/github link: https://github.com/facelessuser plugins: - search: separator: '[:\s\-]+' - git-revision-date-localized - minify: minify_html: true - mkdocs_pymdownx_material_extras soupsieve-2.7/docs/src/markdown/api.md0000644000000000000000000002624113615410400014765 0ustar00# API Soup Sieve implements most of the selectors from the stable specification and even many from the latest draft specification. Selectors can be used to detect and filter elements. To learn more about which specific selectors are implemented, see [CSS Selectors](./selectors/index.md). Soup Sieve will detect the document type being used from the Beautiful Soup object that is given to it, and depending on the document type, its behavior may be slightly different. When detecting XHTML, Soup Sieve simply looks to see if the root element of an XML document is under the XHTML namespace and does not currently look at the `doctype`. If in the future there is a need for stricter XHTML detection, this may change. - HTML document types (HTML, HTML5) will have their tag names and attribute names treated without case sensitivity, like most browsers do. - XML document types (including XHTML) will have their tag names and attribute names treated with case sensitivity. - HTML5, XHTML and XML documents will have namespaces evaluated per the document's support (provided via the parser). Some additional configuration is required when using namespaces, see [Namespace](#namespaces) for more information. /// tip | Getting Proper Namespaces The `html5lib` parser provides proper namespaces for HTML5, but `lxml`'s HTML parser will not. If you need namespace support for HTML5, consider using `html5lib`. For XML, the `lxml-xml` parser (`xml` for short) will provide proper namespaces. It is generally suggested that `lxml-xml` is used to parse XHTML documents to take advantage of namespaces. /// - While attribute values are generally treated as case sensitive, HTML5 and HTML treat the `type` attribute special. The `type` attribute's value is always case insensitive. This is generally how most browsers treat `type`. If you need `type` to be sensitive, you can use the `s` flag: `#!css [type="submit" s]`. While Soup Sieve access is exposed through Beautiful Soup's API, Soup Sieve's API can always be imported and accessed directly for more controlled tag selection if needed. ## Flags ### `soupseive.DEBUG` Print debug output when parsing a selector. ```pycon3 >>> import soupsieve as sv >>> sv.compile('p:has(#id) > span.some-class:contains(text)', flags=sv.DEBUG) ## PARSING: 'p:has(#id) > span.some-class:contains(text)' TOKEN: 'tag' --> 'p' at position 0 TOKEN: 'pseudo_class' --> ':has(' at position 1 is_pseudo: True is_open: True is_relative: True TOKEN: 'id' --> '#id' at position 6 TOKEN: 'pseudo_close' --> ')' at position 9 TOKEN: 'combine' --> ' > ' at position 10 TOKEN: 'tag' --> 'span' at position 13 TOKEN: 'class' --> '.some-class' at position 17 TOKEN: 'pseudo_contains' --> ':contains(text)' at position 28 ## END PARSING SoupSieve(pattern='p:has(#id) > span.some-class:contains(text)', namespaces=None, custom=None, flags=1) ``` ## `soupsieve.select_one()` ```py3 def select_one(select, tag, namespaces=None, flags=0, **kwargs): """Select the specified tags.""" ``` `select_one` will return the first tag under the given tag that matches the given CSS selectors provided, or it will return `None` if a suitable tag was not found. `select_one` accepts a CSS selector string, a `Tag`/`BeautifulSoup` object, an optional [namespace](#namespaces) dictionary, and `flags`. ```pycon3 >>> import soupsieve as sv >>> sv.select_one('p:is(.a, .b, .c)', soup)

Cat

``` ## `soupsieve.select()` ```py3 def select(select, tag, namespaces=None, limit=0, flags=0, **kwargs): """Select the specified tags.""" ``` `select` will return all tags under the given tag that match the given CSS selectors provided. You can also limit the number of tags returned by providing a positive integer via the `limit` parameter (0 means to return all tags). `select` accepts a CSS selector string, a `Tag`/`BeautifulSoup` object, an optional [namespace](#namespaces) dictionary, a `limit`, and `flags`. ```pycon3 >>> import soupsieve as sv >>> sv.select('p:is(.a, .b, .c)', soup) [

Cat

,

Dog

,

Mouse

] ``` ## `soupsieve.iselect()` ```py3 def iselect(select, node, namespaces=None, limit=0, flags=0, **kwargs): """Select the specified tags.""" ``` `iselect` is exactly like `select` except that it returns a generator instead of a list. ## `soupsieve.closest()` ```py3 def closest(select, tag, namespaces=None, flags=0, **kwargs): """Match closest ancestor to the provided tag.""" ``` `closest` returns the tag closest to the given tag that matches the given selector. The element found must be a direct ancestor of the tag or the tag itself. `closest` accepts a CSS selector string, a `Tag`/`BeautifulSoup` object, an optional [namespace](#namespaces) dictionary, and `flags`. ## `soupsieve.match()` ```py3 def match(select, tag, namespaces=None, flags=0, **kwargs): """Match node.""" ``` The `match` function matches a given tag with a given CSS selector. `match` accepts a CSS selector string, a `Tag`/`BeautifulSoup` object, an optional [namespace](#namespaces) dictionary, and flags. ```pycon3 >>> nodes = sv.select('p:is(.a, .b, .c)', soup) >>> sv.match('p:not(.b)', nodes[0]) True >>> sv.match('p:not(.b)', nodes[1]) False ``` ## `soupsieve.filter()` ```py3 def filter(select, nodes, namespaces=None, flags=0, **kwargs): """Filter list of nodes.""" ``` `filter` takes an iterable containing HTML nodes and will filter them based on the provided CSS selector string. If given a `Tag`/`BeautifulSoup` object, it will iterate the direct children filtering them. `filter` accepts a CSS selector string, an iterable containing nodes, an optional [namespace](#namespaces) dictionary, and flags. ```pycon3 >>> sv.filter('p:not(.b)', soup.div) [

Cat

,

Mouse

] ``` ## `soupsieve.escape()` ```py3 def escape(ident): """Escape CSS identifier.""" ``` `escape` is used to escape CSS identifiers. It follows the [CSS specification][cssom] and escapes any character that would normally cause an identifier to be invalid. ```pycon3 >>> sv.escape(".foo#bar") '\\.foo\\#bar' >>> sv.escape("()[]{}") '\\(\\)\\[\\]\\{\\}' >>> sv.escape('--a') '--a' >>> sv.escape('0') '\\30 ' >>> sv.escape('\0') '�' ``` /// new | New in 1.9.0 `escape` is a new API function added in 1.9.0. /// ## `soupsieve.compile()` ```py3 def compile(pattern, namespaces=None, flags=0, **kwargs): """Compile CSS pattern.""" ``` `compile` will pre-compile a CSS selector pattern returning a `SoupSieve` object. The `SoupSieve` object has the same selector functions available via the module without the need to specify the selector, namespaces, or flags. ```py3 class SoupSieve: """Match tags in Beautiful Soup with CSS selectors.""" def match(self, tag): """Match.""" def closest(self, tag): """Match closest ancestor.""" def filter(self, iterable): """Filter.""" def select_one(self, tag): """Select a single tag.""" def select(self, tag, limit=0): """Select the specified tags.""" def iselect(self, tag, limit=0): """Iterate the specified tags.""" ``` ## `soupsieve.purge()` Soup Sieve caches compiled patterns for performance. If for whatever reason, you need to purge the cache, simply call `purge`. ## Custom Selectors The custom selector feature is loosely inspired by the `css-extensions` [proposal][custom-extensions-1]. In its current form, Soup Sieve allows assigning a complex selector to a custom pseudo-class name. The pseudo-class name must start with `:--` to avoid conflicts with any future pseudo-classes. To create custom selectors, you simply need to pass a dictionary containing the custom pseudo-class names (keys) with the associated CSS selectors that the pseudo-classes are meant to represent (values). It is important to remember that pseudo-class names are not case sensitive, so even though a dictionary will allow you to specify multiple keys with the same name (as long as the character cases are different), Soup Sieve will not and will throw an exception if you attempt to do so. In the following example, we will define our own custom selector called `#!css :--header` that will be an alias for `#!css h1, h2, h3, h4, h5, h6`. ```py3 import soupsieve as sv import bs4 markup = """

Header 1

Header 2

child

Header 1,

Header 2

] ``` Custom selectors can also be dependent upon other custom selectors. You don't have to worry about the order in the dictionary as custom selectors will be compiled "just in time" when they are needed. Be careful though, if you create a circular dependency, you will get a `SelectorSyntaxError`. Assuming the same markup as in the first example, we will now create a custom selector that should find any element that has child elements, we will call the selector `:--parent`. Then we will create another selector called `:--parent-paragraph` that will use the `:--parent` selector to find `#!html

` elements that are also parents: ```py3 custom = { ":--parent": ":has(> *|*)", ":--parent-paragraph": "p:--parent" } print(sv.select(':--parent-paragraph', soup, custom=custom)) ``` The above code will yield the only paragraph that is a parent: ``` [

child

] ``` ## Namespaces Many of Soup Sieve's selector functions take an optional namespace dictionary. Namespaces, just like CSS, must be defined for Soup Sieve to evaluate `ns|tag` type selectors. This is analogous to CSS's namespace at-rule: ```css @namespace url("http://www.w3.org/1999/xhtml"); @namespace svg url("http://www.w3.org/2000/svg"); ``` A namespace dictionary should have keys (prefixes) and values (namespaces). An empty key string for a key would denote the default key. An empty value would essentially represent a null namespace. To represent the above CSS example for Soup Sieve, we would configure it like so: ```py3 namespace = { "": "http://www.w3.org/1999/xhtml", # Default namespace is for XHTML "svg": "http://www.w3.org/2000/svg", # The SVG namespace defined with prefix of "svg" } ``` Prefixes used in the namespace dictionary do not have to match the prefixes in the document. The provided prefix is never compared against the prefixes in the document, only the namespaces are compared. The prefixes in the document are only there for the parser to know which tags get which namespace. And the prefixes in the namespace dictionary are only defined in order to provide an alias for the namespaces when using the namespace selector syntax: `ns|name`. Tags do not necessarily have to have a prefix for Soup Sieve to recognize them either. For instance, in HTML5, SVG *should* automatically get the SVG namespace. Depending how namespaces were defined in the document, tags may inherit namespaces in some conditions. Namespace assignment is mainly handled by the parser and exposed through the Beautiful Soup API. Soup Sieve uses the Beautiful Soup API to then compare namespaces for supported documents. soupsieve-2.7/docs/src/markdown/differences.md0000644000000000000000000001420213615410400016463 0ustar00# Beautiful Soup Differences Soup Sieve is the official CSS "select" implementation of Beautiful Soup 4.7.0+. While the inclusion of Soup Sieve fixes many issues and greatly expands CSS support in Beautiful Soup, it does introduce some differences which may surprise some who've become accustom to the old "select" implementation. Beautiful Soup's old select method had numerous limitations and quirks that do not align with the actual CSS specifications. Most are insignificant, but there are a couple differences that people over the years had come to rely on. Soup Sieve, which aims to follow the CSS specification closely, does not support these differences. ## Attribute Values Beautiful Soup was very relaxed when it came to attribute values in selectors: `#!css [attribute=value]`. Beautiful Soup would allow almost anything for a valid unquoted value. Soup Sieve, on the other hand, follows the CSS specification and requires that a value be a valid identifier, or it must be quoted. If you get an error complaining about a malformed attribute, you may need to quote the value. For instance, if you previously used a selector like this: ```py3 soup.select('[attr={}]') ``` You would need to quote the value as `{}` is not a valid CSS identifier, so it must be quoted: ```py3 soup.select('[attr="{}"]') ``` You can also use the [escape](./api.md#soupsieveescape) function to escape dynamic content: ```py3 import soupsieve soup.select('[attr=%s]' % soupsieve.escape('{}')) ``` ## CSS Identifiers Since Soup Sieve follows the CSS specification, class names, id names, tag names, etc. must be valid identifiers. Since identifiers, according to the CSS specification, cannot *start* with a number, some users may find that their old class, id, or tag name selectors that started with numbers will not work. To specify such selectors, you'll have to use CSS escapes. So if you used to use: ```py3 soup.select('.2class') ``` You would need to update with: ```py3 soup.select(r'.\32 class') ``` Numbers in the middle or at the end of a class will work as they always did: ```py3 soup.select('.class2') ``` ## Relative Selectors Whether on purpose or on accident, Beautiful Soup used to allow relative selectors: ```py3 soup.select('> div') ``` The above is not a valid CSS selector according the CSS specifications. Relative selector lists have only recently been added to the CSS specifications, and they are only allowed in a `#!css :has()` pseudo-class: ```css article:has(> div) ``` But, in the level 4 CSS specifications, the `:scope` pseudo-class has been added which allows for the same feel as using `#!css > div`. Since Soup Sieve supports the `:scope` pseudo-class, it can be used to produce the same behavior as the legacy select method. In CSS, the `:scope` pseudo-class represents the element that the CSS select operation is called on. In supported browsers, the following JavaScript example would treats `:scope` as the element that `el` references: ```js el.querySelectorAll(':scope > .class') ``` Just like in the JavaScript example above, Soup Sieve would also treat `:scope` as the element that `el` references: ```py3 el.select(':scope > .class') ``` In the case where the element is the document node, `:scope` would simply represent the root element of the document. So, if you used to have selectors such as: ```py3 soup.select('> div') ``` You can simply add `:scope`, and it should work the same: ```py3 soup.select(':scope > div') ``` While this will generally give you what is expected for the relative, descendant selectors, this will not work for sibling selectors, and the reasons why are covered in more details in [Out of Scope Selectors](#out-of-scope-selectors). ## Out of Scope Selectors In a browser, when requesting a selector via `querySelectorAll`, the element that `querySelectorAll` is called on is the *scoped* element. So in the following example, `el` is the *scoped* element. ```js el.querySelectorAll('.class') ``` This same concept applies to Soup Sieve, where the element that `select` or `select_one` is called on is also the *scoped* element. So in the following example, `el` is also the *scoped* element: ```py3 el.select('.class') ``` In browsers, `querySelectorAll` and `querySelector` only return elements under the *scoped* element. They do not return the *scoped* element itself, its parents, or its siblings. Only when `querySelectorAll` or `querySelector` is called on the document node will it return the *scoped* selector, which would be the *root* element, as the query is being called on the document itself and not the *scoped* element. Soup Sieve aims to essentially mimic the browser functions such as `querySelector`, `querySelectorAll`, `matches`, etc. In Soup Sieve `select` and `select_one` are analogous to `querySelectorAll` and `querySelector` respectively. For this reason, Soup Sieve also only returns elements under the *scoped* element. The idea is to provide a familiar interface that behaves, as close as possible, to what people familiar with CSS selectors are used to. So while Soup Sieve will find elements relative to `:scope` with `>` or  : ```py3 soup.select(':scope > div') ``` It will not find elements relative to `:scope` with `+` or `~` as siblings to the *scoped* element are not under the *scoped* element: ```py3 soup.select(':scope + div') ``` This is by design and is in align with the behavior exhibited in all web browsers. ## Selected Element Order Another quirk of Beautiful Soup's old implementation was that it returned the HTML nodes in the order of how the selectors were defined. For instance, Beautiful Soup, if given the pattern `#!css article, body` would first return `#!html
` and then `#!html `. Soup Sieve does not, and frankly cannot, honor Beautiful Soup's old ordering convention due to the way it is designed. Soup Sieve returns the nodes in the order they are defined in the document as that is how the elements are searched. This much more efficient and provides better performance. So, given the earlier selector pattern of `article, body`, Soup Sieve would return the element `#!html ` and then `#!html
` as that is how it is ordered in the HTML document. soupsieve-2.7/docs/src/markdown/faq.md0000644000000000000000000000617313615410400014765 0ustar00# Frequent Asked Questions ## Why do selectors not work the same in Beautiful Soup 4.7+? Soup Sieve is the official CSS selector library in Beautiful Soup 4.7+, and with this change, Soup Sieve introduces a number of changes that break some of the expected behaviors that existed in versions prior to 4.7. In short, Soup Sieve follows the CSS specifications fairly close, and this broke a number of non-standard behaviors. These non-standard behaviors were not allowed according to the CSS specifications. Soup Sieve has no intentions of bringing back these behaviors. For more details on specific changes, and the reasoning why a specific change is considered a good change, or simply a feature that Soup Sieve cannot/will not support, see [Beautiful Soup Differences](./differences.md). ## How does `iframe` handling work? In web browsers, CSS selectors do not usually select content inside an `iframe` element if the selector is called on an element outside of the `iframe`. Each HTML document is usually encapsulated and CSS selector leakage across this `iframe` boundary is usually prevented. In it's current iteration, Soup Sieve is not aware of the origin of the documents in the `iframe`, and Soup Sieve will not prevent selectors from crossing these boundaries. Soup Sieve is not used to style documents, but to scrape documents. For this reason, it seems to be more helpful to allow selector combinators to cross these boundaries. Soup Sieve isn't entirely unaware of `iframe` elements though. In Soup Sieve 1.9.1, it was noticed that some pseudo-classes behaved in unexpected ways without awareness to `iframes`, this was fixed in 1.9.1. Pseudo-classes such as [`:default`](./selectors/pseudo-classes.md#:default), [`:indeterminate`](./selectors/pseudo-classes.md#:indeterminate), [`:dir()`](./selectors/pseudo-classes.md#:dir), [`:lang()`](./selectors/pseudo-classes.md#:lang), [`:root`](./selectors/pseudo-classes.md#:root), and [`:contains()`](./selectors/pseudo-classes.md#:contains) were given awareness of `iframes` to ensure they behaved properly and returned the expected elements. This doesn't mean that `select` won't return elements in `iframes`, but it won't allow something like `:default` to select a `button` in an `iframe` whose parent `form` is outside the `iframe`. Or better put, a default `button` will be evaluated in the context of the document it is in. With all of this said, if your selectors have issues with `iframes`, it is most likely because `iframes` are handled differently by different parsers. `html.parser` will usually parse `iframe` elements as it sees them. `lxml` parser will often remove `html` and `body` tags of an `iframe` HTML document. `lxml-xml` will simply ignore the content in a XHTML document. And `html5lib` will HTML escape the content of an `iframe` making traversal impossible. In short, Soup Sieve will return elements from all documents, even `iframes`. But certain pseudo-classes may take into consideration the context of the document they are in. But even with all of this, a parser's handling of `iframes` may make handling its content difficult if it doesn't parse it as HTML elements, or augments its structure. soupsieve-2.7/docs/src/markdown/index.md0000644000000000000000000001267113615410400015325 0ustar00# Quick Start ## Overview Soup Sieve is a CSS selector library designed to be used with [Beautiful Soup 4][bs4]. It aims to provide selecting, matching, and filtering using modern CSS selectors. Soup Sieve currently provides selectors from the CSS level 1 specifications up through the latest CSS level 4 drafts and beyond (though some are not yet implemented). Soup Sieve was written with the intent to replace Beautiful Soup's builtin select feature, and as of Beautiful Soup version 4.7.0, it now is :confetti_ball:. Soup Sieve can also be imported in order to use its API directly for more controlled, specialized parsing. Soup Sieve has implemented most of the CSS selectors up through the latest CSS draft specifications, though there are a number that don't make sense in a non-browser environment. Selectors that cannot provide meaningful functionality simply do not match anything. Some of the supported selectors are: - `#!css .classes` - `#!css #ids` - `#!css [attributes=value]` - `#!css parent child` - `#!css parent > child` - `#!css sibling ~ sibling` - `#!css sibling + sibling` - `#!css :not(element.class, element2.class)` - `#!css :is(element.class, element2.class)` - `#!css parent:has(> child)` - and [many more](./selectors/index.md) ## Installation You must have Beautiful Soup already installed: ``` pip install beautifulsoup4 ``` In most cases, assuming you've installed version 4.7.0, that should be all you need to do, but if you've installed via some alternative method, and Soup Sieve is not automatically installed, you can install it directly: ``` pip install soupsieve ``` If you want to manually install it from source, first ensure that [`build`][build] is installed: ``` pip install build ``` Then navigate to the root of the project and build the wheel and install (replacing `` with the current version): ``` python -m build -w pip install dist/soupsive--py3-none-any.whl ``` ## Usage To use Soup Sieve, you must create a `BeautifulSoup` object: ```pycon3 >>> import bs4 >>> text = """ ...
... ...

Cat

...

Dog

...

Mouse

...
... """ >>> soup = bs4.BeautifulSoup(text, 'html5lib') ``` For most people, using the Beautiful Soup 4.7.0+ API may be more than sufficient. Beautiful Soup offers two methods that employ Soup Sieve: `select` and `select_one`. Beautiful Soup's select API is identical to Soup Sieve's, except that you don't have to hand it the tag object, the calling object passes itself to Soup Sieve: ```pycon3 >>> soup = bs4.BeautifulSoup(text, 'html5lib') >>> soup.select_one('p:is(.a, .b, .c)')

Cat

``` ```pycon3 >>> soup = bs4.BeautifulSoup(text, 'html5lib') >>> soup.select('p:is(.a, .b, .c)') [

Cat

,

Dog

,

Mouse

] ``` You can also use the Soup Sieve API directly to get access to the full range of possibilities that Soup Sieve offers. You can select a single tag: ```pycon3 >>> import soupsieve as sv >>> sv.select_one('p:is(.a, .b, .c)', soup)

Cat

``` You can select all tags: ```pycon3 >>> import soupsieve as sv >>> sv.select('p:is(.a, .b, .c)', soup) [

Cat

,

Dog

,

Mouse

] ``` You can select the closest ancestor: ```pycon3 >>> import soupsieve as sv >>> el = sv.select_one('.c', soup) >>> sv.closest('div', el)

Cat

Dog

Mouse

``` You can filter a tag's Children (or an iterable of tags): ```pycon3 >>> sv.filter('p:not(.b)', soup.div) [

Cat

,

Mouse

] ``` You can match a single tag: ```pycon3 >>> els = sv.select('p:is(.a, .b, .c)', soup) >>> sv.match('p:not(.b)', els[0]) True >>> sv.match('p:not(.b)', els[1]) False ``` Or even just extract comments: ```pycon3 >>> sv.comments(soup) [' These are animals '] ``` Selectors do not have to be constrained to one line either. You can span selectors over multiple lines just like you would in a CSS file. ```pycon3 >>> selector = """ ... .a, ... .b, ... .c ... """ >>> sv.select(selector, soup) [

Cat

,

Dog

,

Mouse

] ``` You can even use comments to annotate a particularly complex selector. ```pycon3 >>> selector = """ ... /* This isn't complicated, but we're going to annotate it anyways. ... This is the a class */ ... .a, ... /* This is the b class */ ... .b, ... /* This is the c class */ ... .c ... """ >>> sv.select(selector, soup) [

Cat

,

Dog

,

Mouse

] ``` If you've ever used Python's Re library for regular expressions, you may know that it is often useful to pre-compile a regular expression pattern, especially if you plan to use it more than once. The same is true for Soup Sieve's matchers, though is not required. If you have a pattern that you want to use more than once, it may be wise to pre-compile it early on: ```pycon3 >>> selector = sv.compile('p:is(.a, .b, .c)') >>> selector.filter(soup.div) [

Cat

,

Dog

,

Mouse

] ``` A compiled object has all the same methods, though the parameters will be slightly different as they don't need things like the pattern or flags once compiled. See [API](./api.md) documentation for more info. Compiled patterns are cached, so if for any reason you need to clear the cache, simply issue the `purge` command. ```pycon3 >>> sv.purge() ``` soupsieve-2.7/docs/src/markdown/.snippets/abbr.md0000644000000000000000000000000013615410400017026 0ustar00soupsieve-2.7/docs/src/markdown/.snippets/links.md0000644000000000000000000000104213615410400017247 0ustar00[aspell]: https://github.com/GNUAspell/aspell [bs4]: https://beautiful-soup-4.readthedocs.io/en/latest/# [build]: https://pypi.org/project/build/ [contains-draft]: https://www.w3.org/TR/2001/CR-css3-selectors-20011113/#content-selectors [cssom]: https://drafts.csswg.org/cssom/#common-serializing-idioms [custom-extensions-1]: https://drafts.csswg.org/css-extensions-1/ [html5lib]: https://github.com/html5lib/html5lib-python [lxml]: https://github.com/lxml/lxml [registry]: https://developer.mozilla.org/en-US/docs/Web/API/CustomElementRegistry soupsieve-2.7/docs/src/markdown/.snippets/refs.md0000644000000000000000000000003713615410400017071 0ustar00--8<-- links.md abbr.md --8<-- soupsieve-2.7/docs/src/markdown/.snippets/selector_styles.md0000644000000000000000000000056413615410400021362 0ustar00 soupsieve-2.7/docs/src/markdown/about/changelog.md0000644000000000000000000004021113615410400017246 0ustar00# Changelog ## 2.7 - **NEW**: Add `:open` pseudo selector. - **NEW**: Add `:muted` pseudo selector. - **NEW**: Recognize the following pseudo selectors: `:autofill`, `:buffering`, `:fullscreen`, `:picture-in-picture`, `:popover-open`, `:seeking`, `:stalled`, and `:volume-locked`. These selectors, while recognized, will not match any element as they require a live environment to check element states and browser states. This just prevents Soup Sieve from failing when any of these selectors are specified. - **NEW**: A number of existing pseudo-classes are no longer noted as experimental. - **FIX**: Typing fixes. ## 2.6 - **NEW**: Add official support for Python 3.13. - **NEW**: Add support for `&` as scoping root per the CSS Nesting Module, Level 1. When `&` is used outside the context of nesting, it is treated as the scoping root (equivalent to `:scope`). - **FIX**: Improve error message when an unrecognized pseudo-class is used. ## 2.5 - **NEW**: Update to support Python 3.12. - **NEW**: Drop support for Python 3.7. ## 2.4.1 - **FIX**: Attribute syntax for case insensitive flag optionally allows a space, it does not require one. ## 2.4 - **NEW**: Update to support changes related to `:lang()` in the official CSS spec. `:lang("")` should match unspecified languages, e.g. `lang=""`, but not `lang=und`. - **NEW**: Only `:is()` and `:where()` should allow forgiving selector lists according to latest CSS (as far as Soup Sieve supports "forgiving" which is limited to empty selectors). - **NEW**: Formally drop Python 3.6. - **NEW**: Formally declare support for Python 3.11. ## 2.3.2.post1 - **FIX**: Documentation for installation from source is outdated. ## 2.3.2 - **FIX**: Fix some typos in error messages. ## 2.3.1 - **FIX**: Ensure attribute selectors match tags that have new line characters in attributes. (#233) ## 2.3 - **NEW**: Officially support Python 3.10. - **NEW**: Add static typing. - **NEW**: `:has()`, `:is()`, and `:where()` now use a forgiving selector list. While not as forgiving as CSS might be, it will forgive such things as empty sets and empty slots due to multiple consecutive commas, leading commas, or trailing commas. Essentially, these pseudo-classes will match all non-empty selectors and ignore empty ones. As the scraping environment is different than a browser environment, it was chosen not to aggressively forgive bad syntax and invalid features to ensure the user is alerted that their program may not perform as expected. - **NEW**: Add support to output a pretty print format of a compiled `SelectorList` for debug purposes. - **FIX**: Some small corner cases discovered with static typing. ## 2.2.1 - **FIX**: Fix an issue with namespaces when one of the keys is `self`. ## 2.2 - **NEW**: `:link` and `:any-link` no longer include `#!html ` due to a change in the level 4 selector specification. This actually yields more sane results. - **FIX**: BeautifulSoup, when using `find`, is quite forgiving of odd types that a user may place in an element's attribute value. Soup Sieve will also now be more forgiving and attempt to match these unexpected values in a sane manner by normalizing them before compare. (#212) ## 2.1 - **NEW**: Officially support Python 3.9. - **NEW**: Drop official support for Python 3.5. - **NEW**: In order to avoid conflicts with future CSS specification changes, non-standard pseudo classes will now start with the `:-soup-` prefix. As a consequence, `:contains()` will now be known as `:-soup-contains()`, though for a time the deprecated form of `:contains()` will still be allowed with a warning that users should migrate over to `:-soup-contains()`. - **NEW**: Added new non-standard pseudo class `:-soup-contains-own()` which operates similar to `:-soup-contains()` except that it only looks at text nodes directly associated with the currently scoped element and not its descendants. - **FIX**: Import `bs4` globally instead of in local functions as it appears there are no adverse affects due to circular imports as `bs4` does not immediately reference `soupsieve` functions and `soupsieve` does not immediately reference `bs4` functions. This should give a performance boost to functions that had previously included `bs4` locally. ## 2.0.1 - **FIX**: Remove unused code. ## 2.0 - **NEW**: `SelectorSyntaxError` is derived from `Exception` not `SyntaxError`. - **NEW**: Remove deprecated `comments` and `icomments` from the API. - **NEW**: Drop support for EOL Python versions (Python 2 and Python < 3.5). - **FIX**: Corner case with splitting namespace and tag name that have an escaped `|`. ## 1.9.6 /// note | Last version for Python 2.7 /// - **FIX**: Prune dead code. - **FIX**: Corner case with splitting namespace and tag name that have an escaped `|`. ## 1.9.5 - **FIX**: `:placeholder-shown` should not match if the element has content that overrides the placeholder. ## 1.9.4 - **FIX**: `:checked` rule was too strict with `option` elements. The specification for `:checked` does not require an `option` element to be under a `select` element. - **FIX**: Fix level 4 `:lang()` wildcard match handling with singletons. Implicit wildcard matching should not match any singleton. Explicit wildcard matching (`*` in the language range: `*-US`) is allowed to match singletons. ## 1.9.3 - **FIX**: `[attr!=value]` pattern was mistakenly using `:not([attr|=value])` logic instead of `:not([attr=value])`. - **FIX**: Remove undocumented `_QUIRKS` mode flag. Beautiful Soup was meant to use it to help with transition to Soup Sieve, but never released with it. Help with transition at this point is no longer needed. ## 1.9.2 - **FIX**: Shortcut last descendant calculation if possible for performance. - **FIX**: Fix issue where `Doctype` strings can be mistaken for a normal text node in some cases. - **FIX**: A top level tag is not a `:root` tag if it has sibling text nodes or tag nodes. This is an issue that mostly manifests when using `html.parser` as the parser will allow multiple root nodes. ## 1.9.1 - **FIX**: `:root`, `:contains()`, `:default`, `:indeterminate`, `:lang()`, and `:dir()` will properly account for HTML `iframe` elements in their logic when selecting or matching an element. Their logic will be restricted to the document for which the element under consideration applies. - **FIX**: HTML pseudo-classes will check that all key elements checked are in the XHTML namespace (HTML parsers that do not provide namespaces will assume the XHTML namespace). - **FIX**: Ensure that all pseudo-class names are case insensitive and allow CSS escapes. ## 1.9 - **NEW**: Allow `:contains()` to accept a list of text to search for. (#115) - **NEW**: Add new `escape` function for escaping CSS identifiers. (#125) - **NEW**: Deprecate `comments` and `icomments` functions in the API to ensure Soup Sieve focuses only on CSS selectors. `comments` and `icomments` will most likely be removed in 2.0. (#130) - **NEW**: Add Python 3.8 support. (#133) - **FIX**: Don't install test files when installing the `soupsieve` package. (#111) - **FIX**: Improve efficiency of `:contains()` comparison. - **FIX**: Null characters should translate to the Unicode REPLACEMENT CHARACTER (`U+FFFD`) according to the specification. This applies to CSS escaped NULL characters as well. (#124) - **FIX**: Escaped EOF should translate to `U+FFFD` outside of CSS strings. In a string, they should just be ignored, but as there is no case where we could resolve such a string and still have a valid selector, string handling remains the same. (#128) ## 1.8 - **NEW**: Add custom selector support. (#92)(#108) - **FIX**: Small tweak to CSS identifier pattern to ensure it matches the CSS specification exactly. Specifically, you can't have an identifier of only `-`. (#107) - **FIX**: CSS string patterns should allow escaping newlines to span strings across multiple lines. (#107) - **FIX**: Newline regular expression for CSS newlines should treat `\r\n` as a single character, especially in cases such as string escapes: `\\\r\n`. (#107) - **FIX**: Allow `--` as a valid identifier or identifier start. (#107) - **FIX**: Bad CSS syntax now raises a `SelectorSyntaxError`, which is still currently derived from `SyntaxError`, but will most likely be derived from `Exception` in the future. ## 1.7.3 - **FIX**: Fix regression with tag names in regards to case sensitivity, and ensure there are tests to prevent breakage in the future. - **FIX**: XHTML should always be case sensitive like XML. ## 1.7.2 - **FIX**: Fix HTML detection `type` selector. - **FIX**: Fixes for `:enabled` and `:disabled`. - **FIX**: Provide a way for Beautiful Soup to parse selectors in a quirks mode to mimic some of the quirks of the old select method prior to Soup Sieve, but with warnings. This is to help old scripts to not break during the transitional period with newest Beautiful Soup. In the future, these quirks will raise an exception as Soup Sieve requires selectors to follow the CSS specification. ## 1.7.1 - **FIX**: Fix issue with `:has()` selector where a leading combinator can only be provided in the first selector in a relative selector list. ## 1.7 - **NEW**: Add support for `:in-range` and `:out-of-range` selectors. (#60) - **NEW**: Add support for `:defined` selector. (#76) - **FIX**: Fix pickling issue when compiled selector contains a `NullSelector` object. (#70) - **FIX**: Better exception messages in the CSS selector parser and fix a position reporting issue that can occur in some exceptions. (#72, #73) - **FIX**: Don't compare prefixes when evaluating attribute namespaces, compare the actual namespace. (#75) - **FIX**: Split whitespace attribute lists by all whitespace characters, not just space. - **FIX**: `:nth-*` patterns were converting numbers to base 16 when they should have been converting to base 10. ## 1.6.2 - **FIX**: Fix pattern compile issues on Python < 2.7.4. - **FIX**: Don't use `\d` in Unicode `Re` patterns as they will contain characters outside the range of `[0-9]`. ## 1.6.1 - **FIX**: Fix warning about not importing `Mapping` from `collections.abc`. ## 1.6 - **NEW**: Add `closest` method to the API that matches closest ancestor. - **FIX**: Add missing `select_one` reference to module's `__all__`. ## 1.5 - **NEW**: Add `select_one` method like Beautiful Soup has. - **NEW**: Add `:dir()` selector (HTML only). - **FIX**: Fix issues when handling HTML fragments (elements without a `BeautifulSoup` object as a parent). - **FIX**: Fix internal `nth` range check. ## 1.4.0 - **NEW**: Throw `NotImplementedError` for at-rules: `@page`, etc. - **NEW**: Match nothing for `:host`, `:host()`, and `:host-context()`. - **NEW**: Add support for `:read-write` and `:read-only`. - **NEW**: Selector patterns can be annotated with CSS comments. - **FIX**: `\r`, `\n`, and `\f` cannot be escaped with `\` in CSS. You must use Unicode escapes. ## 1.3.1 - **FIX**: Fix issue with undefined namespaces. ## 1.3 - **NEW**: Add support for `:scope`. - **NEW**: `:user-invalid`, `:playing`, `:paused`, and `:local-link` will not cause a failure, but all will match nothing as their use cases are not possible in an environment outside a web browser. - **FIX**: Fix `[attr~=value]` handling of whitespace. According to the spec, if the value contains whitespace, or is an empty string, it should not match anything. - **FIX**: Precompile internal patterns for pseudo-classes to prevent having to parse them again. ## 1.2.1 - **FIX**: More descriptive exceptions. Exceptions will also now mention position in the pattern that is problematic. - **FIX**: `filter` ignores `NavigableString` objects in normal iterables and `Tag` iterables. Basically, it filters all Beautiful Soup document parts regardless of iterable type where as it used to only filter out a `NavigableString` in a `Tag` object. This is viewed as fixing an inconsistency. - **FIX**: `DEBUG` flag has been added to help with debugging CSS selector parsing. This is mainly for development. - **FIX**: If forced to search for language in `meta` tag, and no language is found, cache that there is no language in the `meta` tag to prevent searching again during the current select. - **FIX**: If a non `BeautifulSoup`/`Tag` object is given to the API to compare against, raise a `TypeError`. ## 1.2 - **NEW**: Add Python 2.7 support. - **NEW**: Remove old pre 1.0 deprecations. ## 1.1 - **NEW**: Adds support for `[attr!=value]` which is equivalent to `:not([attr=value])`. - **NEW**: Add support for `:active`, `:focus`, `:hover`, `:visited`, `:target`, `:focus-within`, `:focus-visible`, `:target-within`, `:current()`/`:current`, `:past`, and `:future`, but they will never match as these states don't exist in the Soup Sieve environment. - **NEW**: Add support for `:checked`, `:enabled`, `:disabled`, `:required`, `:optional`, `:default`, and `:placeholder-shown` which will only match in HTML documents as these concepts are not defined in XML. - **NEW**: Add support for `:link` and `:any-link`, both of which will target all ``, ``, and `` elements with an `href` attribute as all links will be treated as unvisited in Soup Sieve. - **NEW**: Add support for `:lang()` (CSS4) which works in XML and HTML. - **NEW**: Users must install Beautiful Soup themselves. This requirement is removed in the hopes that Beautiful Soup may use this in the future. - **FIX**: Attributes in the form `prefix:attr` can be matched with the form `[prefix\:attr]` without specifying a namespaces if desired. - **FIX**: Fix exception when `[type]` is used (with no value). ## 1.0.2 - **FIX**: Use proper CSS identifier patterns for tag names, classes, ids, etc. Things like `#3` or `#-3` should not match and should require `#\33` or `#-\33`. - **FIX**: Do not raise `NotImplementedError` for supported pseudo classes/elements with bad syntax, instead raise `SyntaxError`. ## 1.0.1 - **FIX**: When giving a tag to `select`, it should only return the children of that tag, never the tag itself. - **FIX**: For informational purposes, raise a `NotImplementedError` when an unsupported pseudo class is used. ## 1.0 - **NEW**: Official 1.0.0 release. ## 1.0.0b2 - **NEW**: Drop document flags. Document type can be detected from the Beautiful Soup object directly. - **FIX**: CSS selectors should be evaluated with CSS whitespace rules. - **FIX**: Processing instructions, CDATA, and declarations should all be ignored in `:contains` and child considerations for `:empty`. - **FIX**: In Beautiful Soup, the document itself is the first tag. Do not match the "document" tag by returning false for any tag that doesn't have a parent. ## 1.0.0b1 - **NEW**: Add support for non-standard `:contains()` selector. - **FIX**: Compare pseudo class names case insensitively when matching unexpected cases. - **FIX**: Don't allow attribute case flags when no attribute value is defined. ## 0.6 - **NEW**: `mode` attribute is now called `flags` to allow for other options in the future. - **FIX**: More corner cases for `nth` selectors. ## 0.5.3 - **FIX**: Previously, all pseudo classes' selector lists were evaluated as one big group, but now each pseudo class's selector lists are evaluated separately. - **FIX**: CSS selector tokens are not case sensitive. ## 0.5.2 - **FIX**: Add missing `s` flag to attribute selector for forced case sensitivity of attribute values. - **FIX**: Relax attribute pattern matching to allow non-essential whitespace. - **FIX**: Attribute selector flags themselves are not case sensitive. - **FIX**: `type` attribute in HTML is handled special. While all other attributes values are case sensitive, `type` in HTML is usually treated special and is insensitive. In XML, this is not the case. ## 0.5.1 - **FIX**: Fix namespace check for `:nth-of-type`. ## 0.5 - **NEW**: Deprecate `commentsiter` and `selectiter` in favor of `icomments` and `iselect`. Expect removal in version 1.0. ## 0.4 - **NEW**: Initial prerelease. soupsieve-2.7/docs/src/markdown/about/contributing.md0000644000000000000000000000507313615410400020035 0ustar00# Contributing & Support ## Become a Sponsor :octicons-heart-fill-16:{: .heart-throb} Open source projects take time and money. Help support the project by becoming a sponsor. You can add your support at any tier you feel comfortable with. No amount is too little. We also accept one time contributions via PayPal. [:octicons-mark-github-16: GitHub Sponsors](https://github.com/sponsors/facelessuser){: .md-button .md-button--primary } [:fontawesome-brands-paypal: PayPal](https://www.paypal.me/facelessuser){ .md-button} ## Bug Reports 1. Please **read the documentation** and **search the issue tracker** to try and find the answer to your question **before** posting an issue. 2. When creating an issue on the repository, please provide as much information as possible: - Version being used. - Operating system. - Version of Python. - Errors in console. - Detailed description of the problem. - Examples for reproducing the error. You can post pictures, but if specific text or code is required to reproduce the issue, please provide the text in a plain text format for easy copy/paste. The more info provided the greater the chance someone will take the time to answer, implement, or fix the issue. 3. Be prepared to answer questions and provide additional information if required. Issues in which the creator refuses to respond to follow up questions will be marked as stale and closed. ## Reviewing Code Take part in reviewing pull requests and/or reviewing direct commits. Make suggestions to improve the code and discuss solutions to overcome weakness in the algorithm. ## Answer Questions in Issues Take time and answer questions and offer suggestions to people who've created issues in the issue tracker. Often people will have questions that you might have an answer for. Or maybe you know how to help them accomplish a specific task they are asking about. Feel free to share your experience to help others. ## Pull Requests Pull requests are welcome, and a great way to help fix bugs and add new features. If you are interested in directly contributing to the code, please check out [Development](./development.md) for more information on the environment and processes. ## Documentation Improvements A ton of time has been spent not only creating and supporting this tool and related extensions, but also spent making this documentation. If you feel it is still lacking, show your appreciation for the tool by helping to improve the documentation. Check out [Development](./development.md) for more info on documentation. soupsieve-2.7/docs/src/markdown/about/development.md0000644000000000000000000003656313615410400017660 0ustar00# Development ## Project Layout There are a number of files for build, test, and continuous integration in the root of the project, but in general, the project is broken up like so. ``` ├── docs │   └── src │      ├── dictionary │      └── markdown ├── soupsieve ├── requirements └── tests ``` Directory | Description --------------------- | ----------- `docs/src/dictionary` | Contains the spell check wordlist(s) for the project. `docs/src/markdown` | Contains the content for the documentation. `soupsieve` | Contains the source code for the project. `requirements` | Contains files with lists of dependencies that are required for the project, and required for continuous integration. `tests` | Contains unit test files. ## Coding Standards When writing code, the code should roughly conform to PEP8 and PEP257 suggestions along with some other requirements. The project utilizes the @astral-sh/ruff linter that helps to ensure code conforms (give or take some of the rules). When in doubt, follow the formatting hints of existing code when adding files or modifying existing files. Usually this can be automated with Tox (assuming it is installed): `tox -e lint`. ## Building and Editing Documents Documents are in Markdown (with some additional syntax provided by extensions) and are converted to HTML via Python Markdown. If you would like to build and preview the documentation, you must have these packages installed: - @Python-Markdown/markdown: the Markdown parser. - @mkdocs/mkdocs: the document site generator. - @squidfunk/mkdocs-material: a material theme for MkDocs. - @facelessuser/pymdown-extensions: this Python Markdown extension bundle. It is advised that you just install document dependencies with the following as the above list may not include all document plugins: ``` pip install -r requirements/docs.txt ``` In order to build and preview the documents, just run the command below from the root of the project and you should be able to view the documents at `localhost:8000` in your browser. After that, you should be able to update the documents and have your browser preview update live. ``` mkdocs serve ``` ## Spell Checking Documents Spell checking is performed via @facelessuser/pyspelling. During validation we build the docs and spell check various files in the project. [Aspell][aspell] must be installed and in the path. Currently this project uses one of the more recent versions of Aspell. It is not expected that everyone will install and run Aspell locally, but it will be run in CI tests for pull requests. In order to perform the spell check locally, it is expected you are setup to build the documents, and that you have Aspell installed in your system path (if needed you can use the `--binary` option to point to the location of your Aspell binary). It is also expected that you have the `en` dictionary installed as well. To initiate the spell check, run the following command from the root of the project. You will need to make sure the documents are built first: ``` mkdocs build --clean ``` And then run the spell checker. ``` pyspelling ``` It should print out the files with the misspelled words if any are found. If you find it prints words that are not misspelled, you can add them in `docs/src/dictionary/en-custom.text`. ## Validation Tests In order to preserve good code health, a test suite has been put together with pytest (@pytest-dev/pytest). To run these tests, you can use the following command: ``` pytest ``` ### Running Validation With Tox Tox (@tox-dev/tox) is a great way to run the validation tests, spelling checks, and linting in virtual environments so as not to mess with your current working environment. Tox will use the specified Python version for the given environment and create a virtual environment and install all the needed requirements (minus Aspell). You could also setup your own virtual environments with the Virtualenv module without Tox, and manually do the same. First, you need to have Tox installed: ``` pip install tox ``` By running Tox, it will walk through all the environments and create them (assuming you have all the python versions on your machine) and run the related tests. See `tox.ini` to learn more. ``` tox ``` If you don't have all the Python versions needed to test all the environments, those entries will fail. To run the tests for specific versions of Python, you specify the environment with `-e PXY` where `X` is the major version and `Y` is the minor version. ``` tox -e py310 ``` To target linting: ``` tox -e lint ``` To select spell checking and document building: ``` tox -e documents ``` ## Code Coverage When running the validation tests through Tox, it is setup to track code coverage via the Coverage (@bitbucket:ned/coveragepy) module. Coverage is run on each `pyxx` environment. If you've made changes to the code, you can clear the old coverage data: ``` coverage erase ``` Then run each unit test environment to generate coverage data. All the data from each run is merged together. HTML is output for each file in `.tox/pyXX/tmp`. You can use these to see areas that are not covered/exercised yet with testing. You can checkout `tox.ini` to see how this is accomplished. ## Code Documentation The Soup Sieve module is laid out in the following structure: ``` soupseive ├── __init__.py ├── __meta__.py ├── css_match.py ├── css_parser.py ├── css_types.py └── util.py ``` File | Description --------------- | ----------- `__init__.py` | Contains the API for the user. `__meta__.py` | Contains package meta data like version. `css_match.py` | Contains the logic for matching tags with a CSS selector. `css_parser.py` | Contains the CSS selector parser. `css_types.py` | Contains the CSS types for the compiled CSS patterns. `util.py` | Contains miscellaneous helper functions, classes, and constants. ### Compiled CSS Selector Structure When a CSS selector string is given to Soup Sieve, it is run through the `CSSParser` class. `CSSParser` will return a `SelectorList` class. This class is sent to the `SoupSieve` class as a parameter along with things like `namespace` and `flags`. One of the most important things to understand when contributing is the structure of the `SelectorList` class. A `SelectorList` represents a list of compound selectors. So if you had the selector `#!css div > p`, you would get a `SelectorList` object containing one `Selector` object. If you had `#!css div, p`, you would get a `SelectorList` with two `Selector` objects as this is a selector list of two compound selectors. A compound selector gets parsed into pieces. Each part of a specific compound selector is usually assigned to an attribute in a single `Selector` object. The attributes of the `Selector` object may be as simple as a boolean or a string, but they can also be a tuple of more `SelectorList` objects. In the case of `#!css *:not(p, div)`, `#!css *` will be a `SelectorList` with one `Selector`. The `#!css :not(p, div)` selector list will be a tuple containing one `SelectorList` of two `Selectors` (one for `p` and one for `div`) under the `selectors` attribute of the `#!css *` `Selector`. In short, `Selectors` are always contained within a `SelectorList`, and a compound selector is a single `Selector` object that may chain other `SelectorLists` objects depending on the complexity of the compound selector. If you provide a selector list, then you will get multiple `Selector` objects (one for each compound selector in the list) which in turn may chain other `Selector` objects. To view the selector list in a compiled object for debugging purposes, one can access it via `SoupSieve.selectors`, though it is recommended to pretty print them: ```pycon3 >>> import soupsieve as sv >>> sv.compile('this > that.class[name=value]').selectors.pretty() SelectorList( selectors=( Selector( tag=SelectorTag( name='that', prefix=None), ids=(), classes=( 'class', ), attributes=( SelectorAttribute( attribute='name', prefix='', pattern=re.compile( '^value$'), xml_type_pattern=None), ), nth=(), selectors=(), relation=SelectorList( selectors=( Selector( tag=SelectorTag( name='this', prefix=None), ids=(), classes=(), attributes=(), nth=(), selectors=(), relation=SelectorList( selectors=(), is_not=False, is_html=False), rel_type='>', contains=(), lang=(), flags=0), ), is_not=False, is_html=False), rel_type=None, contains=(), lang=(), flags=0), ), is_not=False, is_html=False) ``` ### `SelectorList` ```py3 class SelectorList: """Selector list.""" def __init__(self, selectors=tuple(), is_not=False): """Initialize.""" ``` Attribute | Description -------------- | ----------- `selectors` | A list of `Selector` objects. `is_not` | The selectors in the selector list are from a `:not()`. `is_html` | The selectors in the selector list are HTML specific. ### `Selector` ```py3 class Selector: """Selector.""" def __init__( self, tag, ids, classes, attributes, nth, selectors, relation, rel_type, contains, lang, flags ): """Initialize.""" ``` Flags | Description ------------------- | ----------- `SEL_EMPTY` | The current compound selector contained an `:empty` pseudo-class. `SEL_ROOT` | The current compound selector contains `:root`. `SEL_DEFAULT` | The compound selector has a `:default` pattern and requires additional logic to determine if it is the first `submit` button in a form. `SEL_INDETERMINATE` | The compound selector has a `:indeterminate` pattern and requires additional logic to ensure a `radio` element and all of the `radio` elements with the same `name` under a form are not set. Attribute | Description --------------- | ----------- `tag` | Contains a single [`SelectorTag`](#selectortag) object, or `None`. `id` | Contains a tuple of ids to match. Usually if multiple conflicting ids are present, it simply won't match a tag, but it allows multiple to handle the syntax `tag#1#2` even if it is invalid. `classes` | Contains a tuple of class names to match. `attributes` | Contains a tuple of attributes. Each attribute is represented as a [`SelectorAttribute`](#selectorattribute). `nth` | Contains a tuple containing `nth` selectors, each selector being represented as a [`SelectorNth`](#selectornth). `nth` selectors contain things like `:first-child`, `:only-child`, `#!css :nth-child()`, `#!css :nth-of-type()`, etc. `selectors` | Contains a tuple of `SelectorList` objects for each pseudo-class selector part of the compound selector: `#!css :is()`, `#!css :not()`, `#!css :has()`, etc. `relation` | This will contain a `SelectorList` object with one `Selector` object, which could in turn chain an additional relation depending on the complexity of the compound selector. For instance, `div > p + a` would be a `Selector` for `a` that contains a `relation` for `p` (another `SelectorList` object) which also contains a relation of `div`. When matching, we would match that the tag is `a`, and then walk its relation chain verifying that they all match. In this case, the relation chain would be a direct, previous sibling of `p`, which has a direct parent of `div`. A `:has()` pseudo-class would walk this in the opposite order. `div:has(> p + a)` would verify `div`, and then check for a child of `p` with a sibling of `a`. `rel_type` | `rel_type` is attached to relational selectors. In the case of `#!css div > p + a`, the relational selectors of `div` and `p` would get a relational type of `>` and `+` respectively. `:has()` relational `rel_type` are preceded with `:` to signify a forward looking relation. `contains` | Contains a tuple of [`SelectorContains`](#selectorcontains) objects. Each object contains the list of text to match an element's content against. `lang` | Contains a tuple of [`SelectorLang`](#selectorlang) objects. `flags` | Selector flags that used to signal a type of selector is present. ### `SelectorNull` ```py3 class SelectorNull: """Null Selector.""" def __init__(self): """Initialize.""" ``` The null selector is like `Selector`, but it matches nothing. ### `SelectorTag` ```py3 class SelectorTag: """Selector tag.""" def __init__(self, name, prefix): """Initialize.""" ``` Attribute | Description ------------- | ----------- `name` | `name` contains the tag name to match. `prefix` | `prefix` contains the namespace prefix to match. `prefix` can also be `None`. ### `SelectorAttribute` ```py3 class SelectorAttribute: """Selector attribute rule.""" def __init__(self, attribute, prefix, pattern, xml_type_pattern): """Initialize.""" ``` Attribute | Description ------------------- | ----------- `attribute` | Contains the attribute name to match. `prefix` | Contains the attribute namespace prefix to match if any. `pattern` | Contains a `re` regular expression object that matches the desired attribute value. `xml_type_pattern` | As the default `type` pattern is case insensitive, when the attribute value is `type` and a case sensitivity has not been explicitly defined, a secondary case sensitive `type` pattern is compiled for use with XML documents when detected. ### `SelectorContains` ```py3 class SelectorContains: """Selector contains rule.""" def __init__(self, text): """Initialize.""" ``` Attribute | Description ------------------- | ----------- `text` | A tuple of acceptable text that an element should match. An element only needs to match at least one. ### `SelectorNth` ```py3 class SelectorNth: """Selector nth type.""" def __init__(self, a, n, b, of_type, last, selectors): """Initialize.""" ``` Attribute | Description ------------- | ----------- `a` | The `a` value in the formula `an+b` specifying an index. `n` | `True` if the provided formula has included a literal `n` which signifies the formula is not a static index. `b` | The `b` value in the formula `an+b`. `type` | `True` if the `nth` pseudo-class is an `*-of-type` variant. `last` | `True` if the `nth` pseudo-class is a `*last*` variant. `selectors` | A `SelectorList` object representing the `of S` portion of `:nth-chld(an+b [of S]?)`. ### `SelectorLang` ```py3 class SelectorLang: """Selector language rules.""" def __init__(self, languages): """Initialize.""" ``` Attribute | Description ------------- | ----------- `languages` | A list of regular expression objects that match a language pattern. soupsieve-2.7/docs/src/markdown/about/license.md0000644000000000000000000000003713615410400016743 0ustar00# License --8<-- "LICENSE.md" soupsieve-2.7/docs/src/markdown/about/security.md0000644000000000000000000000002513615410400017165 0ustar00--8<-- "SECURITY.md" soupsieve-2.7/docs/src/markdown/selectors/basic.md0000644000000000000000000004145413615410400017303 0ustar00# Basic Selectors Syntax and notation for basic selectors. ## Escapes Soup Sieve selectors support using CSS escapes. So if you need provide Unicode, or non-standard characters, you can use CSS style escapes. Escapes can be specified with a backslash followed by 1 - 6 hexadecimal digits: `#!css \20AC`, `#!css \0020AC`, etc. If you need to terminate an escape to avoid it accumulating unintended hexadecimal characters, you can use a space: `#!css \0020AC dont-escape-me`. You can also escape any non-hexadecimal character, and it will be treated as that character: `#!css \+` --> `+`. The one exception is that you cannot escape the form feed, newline, or carriage return. You can always use Soup Sieve's [escape command](../api.md#soupsieveescape) to escape identifiers as well. ## Type Selectors Type selectors match elements by node name. If a default namespace is defined in the [namespace dictionary](../api.md#namespaces), and no [namespace](#namespace-selectors) is explicitly defined, it will be assumed that the element must be in the default namespace. /// tab | Syntax ```css element ``` /// /// tab | Usage ```pycon3 >>> from bs4 import BeautifulSoup as bs >>> html = """ ... ... ... ...
Here is some text.
...
Here is some more text.
... ... ... """ >>> soup = bs(html, 'html5lib') >>> print(soup.select('div')) [
Here is some text.
,
Here is some more text.
] ``` /// /// tip | Additional Reading https://developer.mozilla.org/en-US/docs/Web/CSS/Type_selectors /// ## Universal Selectors The Universal selector (`*`) matches elements of any type. /// tab | Syntax ```css * ``` /// /// tab | Usage ```pycon3 >>> from bs4 import BeautifulSoup as bs >>> html = """ ... ... ... ...

Here is some text.

...
Here is some more text.
... ... ... """ >>> soup = bs(html, 'html5lib') >>> print(soup.select('*')) [
Here is some text.
Here is some more text.
, ,
Here is some text.
Here is some more text.
,
Here is some text.
,
Here is some more text.
] ``` /// /// tip | Additional Reading https://developer.mozilla.org/en-US/docs/Web/CSS/Universal_selectors /// ## ID Selectors The ID selector matches an element based on its `id` attribute. The ID must match exactly. /// tab | Syntax ```css #id ``` /// /// tab | Usage ```pycon3 >>> from bs4 import BeautifulSoup as bs >>> html = """ ... ... ... ...
Here is some text.
...
Here is some more text.
... ... ... """ >>> soup = bs(html, 'html5lib') >>> print(soup.select('#some-id')) [
Here is some text.
] ``` /// /// tip | Additional Reading https://developer.mozilla.org/en-US/docs/Web/CSS/ID_selectors /// /// note | XML Support While the use of the `id` attribute (in the context of CSS) is a very HTML centric idea, it is supported for XML as well because Beautiful Soup supported it before Soup Sieve's existence. /// ## Class Selectors The class selector matches an element based on the values contained in the `class` attribute. The `class` attribute is treated as a whitespace separated list, where each item is a **class**. /// tab | Syntax ```css .class ``` /// /// tab | Usage ```pycon3 >>> from bs4 import BeautifulSoup as bs >>> html = """ ... ... ... ...
Here is some text.
...
Here is some more text.
... ... ... """ >>> soup = bs(html, 'html5lib') >>> print(soup.select('.some-class')) [
Here is some text.
] ``` /// /// tip | Additional Reading https://developer.mozilla.org/en-US/docs/Web/CSS/Class_selectors /// /// note | XML Support While the use of the `class` attribute (in the context of CSS) is a very HTML centric idea, it is supported for XML as well because Beautiful Soup supported it before Soup Sieve's existence. /// ## Attribute Selectors The attribute selector matches an element based on its attributes. When specifying a value of an attribute, if it contains whitespace or special characters, you should quote them with either single or double quotes. /// tip | Additional Reading https://developer.mozilla.org/en-US/docs/Web/CSS/Attribute_selectors /// /// define | `[attribute]` - Represents elements with an attribute named **attribute**. //// tab | Syntax ```css [attr] ``` //// //// tab | Usage ```pycon3 >>> from bs4 import BeautifulSoup as bs >>> html = """ ... ... ... ...
... ... ... """ >>> soup = bs(html, 'html5lib') >>> print(soup.select('[href]')) [Internal link, Example link, Insensitive internal link, Example org link] ``` //// /// /// define `[attribute=value]` - Represents elements with an attribute named **attribute** that also has a value of **value**. //// tab | Syntax ```css [attr=value] [attr="value"] ``` //// //// tab | Usage ```pycon3 >>> from bs4 import BeautifulSoup as bs >>> html = """ ... ... ... ... ... ... ... """ >>> soup = bs(html, 'html5lib') >>> print(soup.select('[href="#internal"]')) [Internal link] ``` //// /// /// define `[attribute~=value]` - Represents elements with an attribute named **attribute** whose value is a space separated list which contains **value**. //// tab | Syntax ```css [attr~=value] [attr~="value"] ``` //// //// tab | Usage ```pycon3 >>> from bs4 import BeautifulSoup as bs >>> html = """ ... ... ... ... ... ... ... """ >>> soup = bs(html, 'html5lib') >>> print(soup.select('[class~=class2]')) [Internal link] ``` //// /// /// define `[attribute|=value]` - Represents elements with an attribute named **attribute** whose value is a dash separated list that starts with **value**. //// tab | Syntax ```css [attr|=value] [attr|="value"] ``` //// //// tab | Usage ```pycon3 >>> from bs4 import BeautifulSoup as bs >>> html = """ ... ... ... ...
Some text
...
Some more text
... ... ... """ >>> soup = bs(html, 'html5lib') >>> print(soup.select('div[lang|="en"]')) [
Some text
,
Some more text
] ``` //// /// /// define `[attribute^=value]` - Represents elements with an attribute named **attribute** whose value starts with **value**. //// tab | Syntax ```css [attr^=value] [attr^="value"] ``` //// //// tab | Usage ```pycon3 >>> from bs4 import BeautifulSoup as bs >>> html = """ ... ... ... ... ... ... ... """ >>> soup = bs(html, 'html5lib') >>> print(soup.select('[href^=http]')) [Example link, Example org link] ``` //// /// /// define `[attribute$=value]` - Represents elements with an attribute named **attribute** whose value ends with **value**. //// tab | Syntax ```css [attr$=value] [attr$="value"] ``` //// //// tab | Usage ```pycon3 >>> from bs4 import BeautifulSoup as bs >>> html = """ ... ... ... ... ... ... ... """ >>> soup = bs(html, 'html5lib') >>> print(soup.select('[href$=org]')) [Example org link] ``` //// /// /// define `[attribute*=value]` - Represents elements with an attribute named **attribute** whose value containing the substring **value**. //// tab | Syntax ```css [attr*=value] [attr*="value"] ``` //// //// tab | Usage ```pycon3 >>> from bs4 import BeautifulSoup as bs >>> html = """ ... ... ... ... ... ... ... """ >>> soup = bs(html, 'html5lib') >>> print(soup.select('[href*="example"]')) [Example link, Example org link] ``` //// /// /// define `[attribute!=value]`:material-star:{: title="Custom" data-md-color-primary="green" .icon} - Equivalent to `#!css :not([attribute=value])`. //// tab | Syntax ```css [attr!=value] [attr!="value"] ``` //// //// tab | Usage ```pycon3 >>> from bs4 import BeautifulSoup as bs >>> html = """ ... ... ... ... ... ... ... """ >>> soup = bs(html, 'html5lib') >>> print(soup.select('a[href!="#internal"]')) [Example link, Insensitive internal link, Example org link] ``` //// /// /// define `[attribute operator value i]`:material-flask:{: title="Experimental" data-md-color-primary="purple" .icon} - Represents elements with an attribute named **attribute** and whose value, when the **operator** is applied, matches **value** *without* case sensitivity. In general, attribute comparison is insensitive in normal HTML, but not XML. `i` is most useful in XML documents. //// tab | Syntax ```css [attr=value i] [attr="value" i] ``` //// //// tab | Usage ```pycon3 >>> from bs4 import BeautifulSoup as bs >>> html = """ ... ... ... ... ... ... ... """ >>> soup = bs(html, 'html5lib') >>> print(soup.select('[href="#INTERNAL" i]')) [Internal link] ``` //// /// /// define `[attribute operator value s]` :material-flask:{: title="Experimental" data-md-color-primary="purple" .icon} - Represents elements with an attribute named **attribute** and whose value, when the **operator** is applied, matches **value** *with* case sensitivity. //// tab | Syntax ```css [attr=value s] [attr="value" s] ``` //// //// tab | Usage ```pycon3 >>> from bs4 import BeautifulSoup as bs >>> html = """ ... ... ... ... ... ... ... """ >>> soup = bs(html, 'html5lib') >>> print(soup.select('[href="#INTERNAL" s]')) [] >>> print(soup.select('[href="#internal" s]')) [Internal link] ``` //// /// ## Namespace Selectors Namespace selectors are used in conjunction with type and universal selectors as well as attribute names in attribute selectors. They are specified by declaring the namespace and the selector separated with `|`: `namespace|selector`. `namespace`, in this context, is the prefix defined via the [namespace dictionary](../api.md#namespaces). The prefix defined for the CSS selector does not need to match the prefix name in the document as it is the namespace associated with the prefix that is compared, not the prefix itself. The universal selector (`*`) can be used to represent any namespace just as it can with types. By default, type selectors without a namespace selector will match any element whose type matches, regardless of namespace. But if a CSS default namespace is declared (one with an empty key: `{"": "http://www.w3.org/1999/xhtml"}`), all type selectors will assume the default namespace unless an explicit namespace selector is specified. For example, if the default name was defined to be `http://www.w3.org/1999/xhtml`, the selector `a` would only match `a` tags that are within the `http://www.w3.org/1999/xhtml` namespace. The one exception is within pseudo classes (`:not()`, `:has()`, etc.) as namespaces are not considered within pseudo classes unless one is explicitly specified. If the namespace is omitted (`|element`), any element without a namespace will be matched. In HTML documents that support namespaces (XHTML and HTML5), HTML elements are counted as part of the `http://www.w3.org/1999/xhtml` namespace, but attributes usually do not have a namespace unless one is explicitly defined in the markup. Namespaces can be used with attribute selectors as well except that when `[|attribute`] is used, it is equivalent to `[attribute]`. /// tab | Syntax ```css ns|element ns|* *|* *|element |element [ns|attr] [*|attr] [|attr] ``` /// /// tab | Usage ```pycon3 >>> from bs4 import BeautifulSoup as bs >>> html = """ ... ... ... ...

SVG Example

...

Soup Sieve Docs

... ... ... MDN Web Docs ... ... ... ... """ >>> soup = bs(html, 'html5lib') >>> print(soup.select('svg|a', namespaces={'svg': 'http://www.w3.org/2000/svg'})) [MDN Web Docs] >>> print(soup.select('a', namespaces={'svg': 'http://www.w3.org/2000/svg'})) [Soup Sieve Docs, MDN Web Docs] >>> print(soup.select('a', namespaces={'': 'http://www.w3.org/1999/xhtml', 'svg': 'http://www.w3.org/2000/svg'})) [Soup Sieve Docs] >>> print(soup.select('[xlink|href]', namespaces={'xlink': 'http://www.w3.org/1999/xlink'})) [MDN Web Docs] >>> print(soup.select('[|href]', namespaces={'xlink': 'http://www.w3.org/1999/xlink'})) [Soup Sieve Docs] ``` /// --8<-- selector_styles.md --8<-- soupsieve-2.7/docs/src/markdown/selectors/combinators.md0000644000000000000000000000671213615410400020540 0ustar00# Combinators and Selector Lists CSS employs a number of tokens in order to represent lists or to provide relational context between two selectors. ## Selector Lists Selector lists use the comma (`,`) to join multiple selectors in a list. When presented with a selector list, any selector in the list that matches an element will return that element. /// tab | Syntax ```css element1, element2 ``` /// /// tab | Usage ```pycon3 >>> from bs4 import BeautifulSoup as bs >>> html = """ ... ... ... ...

Title

...

Paragraph

... ... ... """ >>> soup = bs(html, 'html5lib') >>> print(soup.select('h1, p')) [

Title

,

Paragraph

] ``` /// ## Descendant Combinator Descendant combinators combine two selectors with whitespace ( ) in order to signify that the second element is matched if it has an ancestor that matches the first element. /// tab | Syntax ```css parent descendant ``` /// /// tab | Usage ```pycon3 >>> from bs4 import BeautifulSoup as bs >>> html = """ ... ... ... ...

Paragraph 1

...

Paragraph 2

... ... ... """ >>> soup = bs(html, 'html5lib') >>> print(soup.select('body p')) [

Paragraph 1

,

Paragraph 2

] ``` /// /// tip | Additional Reading https://developer.mozilla.org/en-US/docs/Web/CSS/Descendant_combinator /// ## Child combinator Child combinators combine two selectors with `>` in order to signify that the second element is matched if it has a parent that matches the first element. /// tab | Syntax ```css parent > child ``` /// /// tab | Usage ```pycon3 >>> from bs4 import BeautifulSoup as bs >>> html = """ ... ... ... ...

Paragraph 1

...
  • Paragraph 2

... ... ... """ >>> soup = bs(html, 'html5lib') >>> print(soup.select('div > p')) [

Paragraph 1

] ``` /// /// tip | Additional Reading https://developer.mozilla.org/en-US/docs/Web/CSS/Child_combinator /// ## General sibling combinator General sibling combinators combine two selectors with `~` in order to signify that the second element is matched if it has a sibling that precedes it that matches the first element. /// tab | Syntax ```css prevsibling ~ sibling ``` /// /// tab | Usage ```pycon3 >>> from bs4 import BeautifulSoup as bs >>> html = """ ... ... ... ...

Title

...

Paragraph 1

...

Paragraph 2

... ... ... """ >>> soup = bs(html, 'html5lib') >>> print(soup.select('h1 ~ p')) [

Paragraph 1

,

Paragraph 2

] ``` /// /// tip | Additional Reading https://developer.mozilla.org/en-US/docs/Web/CSS/General_sibling_combinator /// ## Adjacent sibling combinator Adjacent sibling combinators combine two selectors with `+` in order to signify that the second element is matched if it has an adjacent sibling that precedes it that matches the first element. /// tab | Syntax ```css prevsibling + nextsibling ``` /// /// tab | Usage ```pycon3 >>> from bs4 import BeautifulSoup as bs >>> html = """ ... ... ... ...

Title

...

Paragraph 1

...

Paragraph 2

... ... ... """ >>> soup = bs(html, 'html5lib') >>> print(soup.select('h1 + p')) [

Paragraph 1

] ``` /// /// tip | Additional Reading https://developer.mozilla.org/en-US/docs/Web/CSS/Adjacent_sibling_combinator /// --8<-- selector_styles.md --8<-- soupsieve-2.7/docs/src/markdown/selectors/index.md0000644000000000000000000001404313615410400017323 0ustar00# General Details ## Implementation Specifics The CSS selectors are based off of the CSS specification and includes not only stable selectors, but may also include selectors currently under development from the draft specifications. Primarily support has been added for selectors that were feasible to implement and most likely to get practical use. In addition to the selectors in the specification, Soup Sieve also supports a couple non-standard selectors. Soup Sieve aims to allow users to target XML/HTML elements with CSS selectors. It implements many pseudo classes, but it does not currently implement any pseudo elements and has no plans to do so. Soup Sieve also will not match anything for pseudo classes that are only relevant in a live, browser environment, but it will gracefully handle them if they've been implemented; such pseudo classes are non-applicable in the Beautiful Soup environment and are noted in [Non-Applicable Pseudo Classes](./unsupported.md#non-applicable-pseudo-classes). When speaking about namespaces, they only apply to XML, XHTML, or when dealing with recognized foreign tags in HTML5. Currently, Beautiful Soup's `html5lib` parser is the only parser that will return the appropriate namespaces for a HTML5 document. If you are using XHTML, you have to use the Beautiful Soup's `lxml-xml` parser (or `xml` for short) to get the appropriate namespaces in an XHTML document. In addition to using the correct parser, you must provide a dictionary of namespaces to Soup Sieve in order to use namespace selectors. See the documentation on [namespaces](../api.md#namespaces) to learn more. While an effort is made to mimic CSS selector behavior, there may be some differences or quirks, please report issues if any are found. ## Selector Context Key
Symbol Name Description
:material-language-html5:{: data-md-color-primary="orange" .big-icon} HTML Some selectors are very specific to HTML and either have no meaningful representation in XML, or such functionality has not been implemented. Selectors that are HTML only will be noted with :material-language-html5:{: data-md-color-primary="orange"}, and will match nothing if used in XML.
:material-star:{: data-md-color-primary="green" .big-icon} Custom Soup Sieve has implemented a couple non-standard selectors. These can contain useful selectors that were rejected from the official CSS specifications, selectors implemented by other systems such as JQuery, or even selectors specifically created for Soup Sieve. If a selector is considered non standard, it will be marked with :material-star:{: title="Custom" data-md-color-primary="green"}.
:material-flask:{: title="Experimental" data-md-color-primary="purple" .big-icon} Experimental All selectors that are from the current working draft of CSS4 are considered experimental and are marked with :material-flask:{: title="Experimental" data-md-color-primary="purple"}. Additionally, if there are other immature selectors, they may be marked as experimental as well. Experimental may mean we are not entirely sure if our implementation is correct, that things may still be in flux as they are part of a working draft, or even both. If at anytime a working draft drops a selector from the current draft, it will most likely also be removed here, most likely with a deprecation path, except where there may be a conflict that requires a less graceful transition. One exception is in the rare case that the selector is found to be far too useful despite being rejected. In these cases, we may adopt them as "custom" selectors.
/// tip | Additional Reading If usage of a selector is not clear in this documentation, you can find more information by reading these specification documents: [CSS Level 3 Specification](https://www.w3.org/TR/selectors-3/) : Contains the latest official document outlying official behaviors of CSS selectors. [CSS Level 4 Working Draft](https://www.w3.org/TR/selectors-4/) : Contains the latest published working draft of the CSS level 4 selectors which outlines the experimental new selectors and experimental behavioral changes. [HTML5](https://www.w3.org/TR/html50/) : The HTML 5.0 specification document. Defines the semantics regarding HTML. [HTML Living Standard](https://html.spec.whatwg.org/) : The HTML Living Standard document. Defines semantics regarding HTML. /// ## Selector Terminology Certain terminology is used throughout this document when describing selectors. In order to fully understand the syntax a selector may implement, it is important to understand a couple of key terms. ### Selector Selector is used to describe any selector whether it is a [simple](#simple-selector), [compound](#compound-selector), or [complex](#complex-selector) selector. ### Simple Selector A simple selector represents a single condition on an element. It can be a [type selector](#type-selectors), [universal selector](#universal-selectors), [ID selector](#id-selectors), [class selector](#class-selectors), [attribute selector](#attribute-selectors), or [pseudo class selector](#pseudo-classes). ### Compound Selector A [compound](#compound-selector) selector is a sequence of [simple](#simple-selector) selectors. They do not contain any [combinators](#combinators-and-selector-lists). If a universal or type selector is used, they must come first, and only one instance of either a universal or type selector can be used, both cannot be used at the same time. ### Complex Selector A complex selector consists of multiple [simple](#simple-selector) or [compound](#compound-selector) selectors joined with [combinators](#combinators-and-selector-lists). ### Selector List A selector list is a list of selectors joined with a comma (`,`). A selector list is used to specify that a match is valid if any of the selectors in a list matches. --8<-- selector_styles.md --8<-- soupsieve-2.7/docs/src/markdown/selectors/pseudo-classes.md0000644000000000000000000014237113615410400021154 0ustar00# Pseudo-Classes ## Overview These are pseudo classes that are either fully or partially supported. Partial support is usually due to limitations of not being in a live, browser environment. Pseudo classes that cannot be implemented are found under [Non-Applicable Pseudo Classes](./unsupported.md/#non-applicable-pseudo-classes). Any selectors that are not found here or under the non-applicable either are under consideration, have not yet been evaluated, or are too new and viewed as a risk to implement as they might not stick around. ## `:any-link`:material-language-html5:{: title="HTML" data-md-color-primary="orange" .icon} {:#:any-link} Selects every `#!html `, or `#!html ` element that has an `href` attribute, independent of whether it has been visited. /// tab | Syntax ```css :any-link ``` /// /// tab | Usage ```pycon3 >>> from bs4 import BeautifulSoup as bs >>> html = """ ... ... ... ...

A link to click

... ... ... """ >>> soup = bs(html, 'html5lib') >>> print(soup.select(':any-link')) [click] ``` /// /// tip | Additional Reading https://developer.mozilla.org/en-US/docs/Web/CSS/:any-link /// /// new | New in 2.2 The CSS specification recently updated to not include `#!html ` in the definition; therefore, Soup Sieve has removed it as well. /// ## `:checked`:material-language-html5:{: title="HTML" data-md-color-primary="orange" .icon} {:#:checked} Selects any `#!html `, `#!html `, or `#!html ] ``` /// /// tip | Additional Reading https://developer.mozilla.org/en-US/docs/Web/CSS/:checked /// ## `:default`:material-language-html5:{: title="HTML" data-md-color-primary="orange" .icon} {:#:default} Selects any form element that is the default among a group of related elements, including: `#!html
""" self.assert_selector( markup, ":default", ['summer', 'd1', 'd3', 'hamster', 'enable'], flags=util.HTML ) def test_iframe(self): """Test with `iframe`.""" markup = """
""" self.assert_selector( markup, ":default", ['d1', 'd3', 'd4'], flags=util.PYHTML ) def test_nested_form(self): """ Test nested form. This is technically invalid use of forms, but browsers will generally evaluate first in the nested forms. """ markup = """
""" self.assert_selector( markup, ":default", ['d1'], flags=util.HTML ) def test_default_cached(self): """ Test that we use the cached "default". For the sake of coverage, we will do this impractical select to ensure we reuse the cached default. """ markup = """
""" self.assert_selector( markup, ":default:default", ['d1'], flags=util.HTML ) def test_nested_form_fail(self): """ Test that the search for elements will bail after the first nested form. You shouldn't nest forms, but if you do, when a parent form encounters a nested form, we will bail evaluation like browsers do. We should see button 1 getting found for nested form, but button 2 will not be found for parent form. """ markup = """
what
""" self.assert_selector( markup, ":default", [], flags=util.HTML ) soupsieve-2.7/tests/test_level4/test_defined.py0000644000000000000000000000501313615410400016726 0ustar00"""Test defined selectors.""" from .. import util class TestDefined(util.TestCase): """Test defined selectors.""" def test_defined_html(self): """Test defined HTML.""" markup = """
""" self.assert_selector( markup, 'body :defined', ['0', '2', '3'], flags=util.HTML ) @util.skip_no_lxml def test_defined_xhtml(self): """Test defined XHTML.""" markup = """
""" from lxml import etree self.assert_selector( markup, 'body :defined', # We should get 3, but for LXML versions less than 4.4.0 we don't for reasons stated above. ['0', '2'] if etree.LXML_VERSION < (4, 4, 0, 0) else ['0', '1', '2'], flags=util.XHTML ) def test_defined_xml(self): """Test defined HTML.""" markup = """
""" # Defined is a browser thing. # XML doesn't care about defined and this will match nothing in XML. self.assert_selector( markup, 'body :defined', [], flags=util.XML ) soupsieve-2.7/tests/test_level4/test_dir.py0000644000000000000000000001240313615410400016107 0ustar00"""Test direction selectors.""" from .. import util import soupsieve as sv class TestDir(util.TestCase): """Test direction selectors.""" MARKUP = """
test1
test2
עִבְרִית()
עִבְרִית
test3
""" def test_dir_rtl(self): """Test general direction right to left.""" self.assert_selector( self.MARKUP, "div:dir(rtl)", ["1", "4", "6"], flags=util.HTML ) def test_dir_ltr(self): """Test general direction left to right.""" self.assert_selector( self.MARKUP, "div:dir(ltr)", ["3"], flags=util.HTML ) def test_dir_conflict(self): """Test conflicting direction.""" self.assert_selector( self.MARKUP, "div:dir(ltr):dir(rtl)", [], flags=util.HTML ) def test_dir_xml(self): """Test direction with XML (not supported).""" self.assert_selector( self.MARKUP, "div:dir(ltr)", [], flags=util.XML ) def test_dir_bidi_detect(self): """Test bidirectional detection.""" self.assert_selector( self.MARKUP, "span:dir(rtl)", ['2', '5', '7'], flags=util.HTML ) self.assert_selector( self.MARKUP, "span:dir(ltr)", ['8'], flags=util.HTML ) def test_dir_on_input(self): """Test input direction rules.""" self.assert_selector( self.MARKUP, ":is(input, textarea):dir(ltr)", ['9', '10', '11', '12', '13'], flags=util.HTML5 ) def test_dir_on_root(self): """Test that the root is assumed left to right if not explicitly defined.""" self.assert_selector( self.MARKUP, "html:dir(ltr)", ['0'], flags=util.HTML ) def test_dir_auto_root(self): """Test that the root is assumed left to right if auto used.""" markup = """ """ self.assert_selector( markup, "html:dir(ltr)", ['0'], flags=util.HTML ) def test_dir_on_input_root(self): """Test input direction when input is the root.""" markup = """""" # Input is root for parser in util.available_parsers('html.parser', 'lxml', 'html5lib'): soup = self.soup(markup, parser) fragment = soup.input.extract() self.assertTrue(sv.match(":root:dir(ltr)", fragment, flags=sv.DEBUG)) def test_iframe(self): """Test direction in `iframe`.""" markup = """
""" self.assert_selector( markup, "div:dir(ltr)", ['1'], flags=util.PYHTML ) self.assert_selector( markup, "div:dir(rtl)", ['2'], flags=util.PYHTML ) def test_xml_in_html(self): """Test cases for when we have XML in HTML.""" markup = """
עִבְרִית other text
""" self.assert_selector( markup, "div:dir(ltr)", ['1'], flags=util.HTML5 ) self.assert_selector( markup, "div:dir(rtl)", [], flags=util.HTML5 ) self.assert_selector( markup, "math:dir(rtl)", [], flags=util.HTML5 ) soupsieve-2.7/tests/test_level4/test_focus_visible.py0000644000000000000000000000124313615410400020165 0ustar00"""Test focus visible selectors.""" from .. import util class TestFocusVisible(util.TestCase): """Test focus visible selectors.""" MARKUP = """
""" def test_focus_visible(self): """Test focus visible.""" self.assert_selector( self.MARKUP, "form:focus-visible", [], flags=util.HTML ) def test_not_focus_visible(self): """Test inverse of focus visible.""" self.assert_selector( self.MARKUP, "form:not(:focus-visible)", ["form"], flags=util.HTML ) soupsieve-2.7/tests/test_level4/test_focus_within.py0000644000000000000000000000123213615410400020030 0ustar00"""Test focus within selectors.""" from .. import util class TestFocusWithin(util.TestCase): """Test focus within selectors.""" MARKUP = """
""" def test_focus_within(self): """Test focus within.""" self.assert_selector( self.MARKUP, "form:focus-within", [], flags=util.HTML ) def test_not_focus_within(self): """Test inverse of focus within.""" self.assert_selector( self.MARKUP, "form:not(:focus-within)", ["form"], flags=util.HTML ) soupsieve-2.7/tests/test_level4/test_future.py0000644000000000000000000000144313615410400016645 0ustar00"""Test future selectors.""" from .. import util class TestFuture(util.TestCase): """Test future selectors.""" MARKUP = """

Some text in a paragraph. Link Placeholder text.

""" def test_future(self): """Test future (should match nothing).""" self.assert_selector( self.MARKUP, "p:future", [], flags=util.HTML ) def test_not_future(self): """Test not future.""" self.assert_selector( self.MARKUP, "p:not(:future)", ["0"], flags=util.HTML ) soupsieve-2.7/tests/test_level4/test_has.py0000644000000000000000000001012713615410400016105 0ustar00"""Test has selectors.""" from .. import util from soupsieve import SelectorSyntaxError class TestHas(util.TestCase): """Test has selectors.""" MARKUP = """

""" MARKUP2 = """

""" def test_has_descendant(self): """Test has descendant.""" self.assert_selector( self.MARKUP, 'div:not(.aaaa):has(.kkkk > p.llll)', ['4', '5', '6'], flags=util.HTML ) def test_has_next_sibling(self): """Test has next sibling.""" self.assert_selector( self.MARKUP, 'p:has(+ .dddd:has(+ div .jjjj))', ['2'], flags=util.HTML ) def test_has_subsequent_sibling(self): """Test has subsequent sibling.""" self.assert_selector( self.MARKUP, 'p:has(~ .jjjj)', ['7', '8'], flags=util.HTML ) def test_has_child(self): """Test has2.""" self.assert_selector( self.MARKUP2, 'div:has(> .bbbb)', ['0'], flags=util.HTML ) def test_has_case(self): """Test has case insensitive.""" self.assert_selector( self.MARKUP, 'div:NOT(.aaaa):HAS(.kkkk > p.llll)', ['4', '5', '6'], flags=util.HTML ) def test_has_mixed(self): """Test has mixed.""" self.assert_selector( self.MARKUP2, 'div:has(> .bbbb, .ffff, .jjjj)', ['0', '4', '8'], flags=util.HTML ) self.assert_selector( self.MARKUP2, 'div:has(.ffff, > .bbbb, .jjjj)', ['0', '4', '8'], flags=util.HTML ) def test_has_nested_pseudo(self): """Test has with nested pseudo.""" self.assert_selector( self.MARKUP2, 'div:has(> :not(.bbbb, .ffff, .jjjj))', ['2', '6', '8'], flags=util.HTML ) self.assert_selector( self.MARKUP2, 'div:not(:has(> .bbbb, .ffff, .jjjj))', ['2', '6'], flags=util.HTML ) def test_has_no_match(self): """Test has with a non-matching selector.""" self.assert_selector( self.MARKUP2, 'div:has(:paused)', [], flags=util.HTML ) def test_has_empty(self): """Test has with empty slot due to multiple commas.""" self.assert_raises('div:has()', SelectorSyntaxError) def test_invalid_incomplete_has(self): """Test `:has()` fails with just a combinator.""" self.assert_raises(':has(>)', SelectorSyntaxError) def test_invalid_has_double_combinator(self): """Test `:has()` fails with consecutive combinators.""" self.assert_raises(':has(>> has a)', SelectorSyntaxError) self.assert_raises(':has(> has, >> a)', SelectorSyntaxError) self.assert_raises(':has(> has >> a)', SelectorSyntaxError) def test_invalid_has_trailing_combinator(self): """Test `:has()` fails with trailing combinator.""" self.assert_raises(':has(> has >)', SelectorSyntaxError) soupsieve-2.7/tests/test_level4/test_host.py0000644000000000000000000000111413615410400016303 0ustar00"""Test host selectors.""" from .. import util class TestHost(util.TestCase): """Test host selectors.""" MARKUP = """

header

some text

""" def test_host(self): """Test host (not supported).""" self.assert_selector( self.MARKUP, ":host", [], flags=util.HTML ) def test_host_func(self): """Test host function (not supported).""" self.assert_selector( self.MARKUP, ":host(h1)", [], flags=util.HTML ) soupsieve-2.7/tests/test_level4/test_host_context.py0000644000000000000000000000065213615410400020055 0ustar00"""Test host context selectors.""" from .. import util class TestHostContext(util.TestCase): """Test host context selectors.""" def test_host_context(self): """Test host context (not supported).""" markup = """

header

some text

""" self.assert_selector( markup, ":host-context(h1, h2)", [], flags=util.HTML ) soupsieve-2.7/tests/test_level4/test_in_range.py0000644000000000000000000002532313615410400017120 0ustar00"""Test in range selectors.""" from .. import util class TestInRange(util.TestCase): """Test in range selectors.""" def test_in_range_number(self): """Test in range number.""" markup = """ """ self.assert_selector( markup, ":in-range", ['0', '1', '2', '3', '4', '5', '6', '7', '8'], flags=util.HTML ) def test_in_range_range(self): """Test in range range.""" markup = """ """ self.assert_selector( markup, ":in-range", ['0', '1', '2', '3', '4', '5', '6', '7', '8'], flags=util.HTML ) def test_in_range_month(self): """Test in range month.""" markup = """ """ self.assert_selector( markup, ":in-range", ['0', '1', '2', '3', '4', '5', '6'], flags=util.HTML ) def test_in_range_week(self): """Test in range week.""" markup = """ """ self.assert_selector( markup, ":in-range", ['0', '1', '2', '3', '4', '5', '6', '7'], flags=util.HTML ) def test_in_range_date(self): """Test in range date.""" markup = """ """ self.assert_selector( markup, ":in-range", ['0', '1', '2', '3', '4', '5', '6'], flags=util.HTML ) def test_in_range_date_time(self): """Test in range date_time.""" markup = """ """ self.assert_selector( markup, ":in-range", ['0', '1', '2', '3', '4', '5', '6'], flags=util.HTML ) def test_in_range_time(self): """Test in range time.""" markup = """ """ self.assert_selector( markup, ":in-range", ['0', '1', '2', '3', '4', '5', '6', '7'], flags=util.HTML ) soupsieve-2.7/tests/test_level4/test_indeterminate.py0000644000000000000000000000454713615410400020173 0ustar00"""Test indeterminate selectors.""" from .. import util class TestIndeterminate(util.TestCase): """Test indeterminate selectors.""" def test_indeterminate(self): """Test indeterminate.""" markup = """
""" self.assert_selector( markup, ":indeterminate", ['checkbox', 'radio1', 'radio6', 'radio4', 'radio5', 'radio-no-name1'], flags=util.HTML ) def test_iframe(self): """Test indeterminate when `iframe` is involved.""" markup = """
""" self.assert_selector( markup, ":indeterminate", ['radio1', 'radio3'], flags=util.PYHTML ) soupsieve-2.7/tests/test_level4/test_is.py0000644000000000000000000000647513615410400015760 0ustar00"""Test is selectors.""" from .. import util from soupsieve import SelectorSyntaxError class TestIs(util.TestCase): """Test is selectors.""" MARKUP = """

Some text in a paragraph. Link

""" def test_is(self): """Test multiple selectors with "is".""" self.assert_selector( self.MARKUP, ":is(span, a)", ["1", "2"], flags=util.HTML ) def test_is_multi_comma(self): """Test multiple selectors but with an empty slot due to multiple commas.""" self.assert_selector( self.MARKUP, ":is(span, , a)", ["1", "2"], flags=util.HTML ) def test_is_leading_comma(self): """Test multiple selectors but with an empty slot due to leading commas.""" self.assert_selector( self.MARKUP, ":is(, span, a)", ["1", "2"], flags=util.HTML ) def test_is_trailing_comma(self): """Test multiple selectors but with an empty slot due to trailing commas.""" self.assert_selector( self.MARKUP, ":is(span, a, )", ["1", "2"], flags=util.HTML ) def test_is_empty(self): """Test empty `:is()` selector list.""" self.assert_selector( self.MARKUP, ":is()", [], flags=util.HTML ) def test_nested_is(self): """Test multiple nested selectors.""" self.assert_selector( self.MARKUP, ":is(span, a:is(#\\32))", ["1", "2"], flags=util.HTML ) self.assert_selector( self.MARKUP, ":is(span, a:is(#\\32))", ["1", "2"], flags=util.HTML ) def test_is_with_other_pseudo(self): """Test `:is()` behavior when paired with `:not()`.""" # Each pseudo class is evaluated separately # So this will not match self.assert_selector( self.MARKUP, ":is(span):not(span)", [], flags=util.HTML ) def test_multiple_is(self): """Test `:is()` behavior when paired with `:not()`.""" # Each pseudo class is evaluated separately # So this will not match self.assert_selector( self.MARKUP, ":is(span):is(div)", [], flags=util.HTML ) # Each pseudo class is evaluated separately # So this will match self.assert_selector( self.MARKUP, ":is(a):is(#\\32)", ['2'], flags=util.HTML ) def test_invalid_pseudo_class_start_combinator(self): """Test invalid start combinator in pseudo-classes other than `:has()`.""" self.assert_raises(':is(> div)', SelectorSyntaxError) self.assert_raises(':is(div, > div)', SelectorSyntaxError) def test_invalid_pseudo_orphan_close(self): """Test invalid, orphaned pseudo close.""" self.assert_raises('div)', SelectorSyntaxError) def test_invalid_pseudo_open(self): """Test invalid pseudo close.""" self.assert_raises(':is(div', SelectorSyntaxError) soupsieve-2.7/tests/test_level4/test_lang.py0000644000000000000000000002361613615410400016262 0ustar00"""Test language selectors.""" from .. import util class TestLang(util.TestCase): """Test language selectors.""" MARKUP = """

""" def test_lang(self): """Test language and that it uses implicit wildcard.""" # Implicit wild self.assert_selector( self.MARKUP, "p:lang(de-DE)", ['1', '2', '3', '4', '5', '6'], flags=util.HTML ) def test_lang_missing_range(self): """Test language range with a missing range.""" # Implicit wild self.assert_selector( self.MARKUP, "p:lang(de--DE)", [], flags=util.HTML ) def test_explicit_wildcard(self): """Test language with explicit wildcard (same as implicit).""" # Explicit wild self.assert_selector( self.MARKUP, "p:lang(de-\\*-DE)", ['1', '2', '3', '4', '5', '6'], flags=util.HTML ) def test_only_wildcard(self): """Test language with only a wildcard.""" self.assert_selector( self.MARKUP, "p:lang('*')", ['1', '2', '3', '4', '5', '6', '7', '8', '9'], flags=util.HTML ) def test_wildcard_start_no_match(self): """Test language with a wildcard at start, but it matches nothing.""" self.assert_selector( self.MARKUP, "p:lang('*-de-DE')", [], flags=util.HTML ) def test_wildcard_start_collapse(self): """Test that language with multiple wildcard patterns at start collapse.""" self.assert_selector( self.MARKUP, "p:lang('*-*-*-DE')", ['1', '2', '3', '4', '5', '6', '7'], flags=util.HTML ) def test_wildcard_at_start_escaped(self): """ Test language with wildcard at start (escaped). Wildcard in the middle is same as implicit, but at the start, it has specific meaning. """ self.assert_selector( self.MARKUP, "p:lang(\\*-DE)", ['1', '2', '3', '4', '5', '6', '7'], flags=util.HTML ) def test_language_quoted(self): """Test language (quoted).""" # Normal quoted self.assert_selector( self.MARKUP, "p:lang('de-DE')", ['1', '2', '3', '4', '5', '6'], flags=util.HTML ) def test_language_quoted_with_escaped_newline(self): """Test language (quoted) with escaped new line.""" # Normal quoted self.assert_selector( self.MARKUP, "p:lang('de-\\\nDE')", ['1', '2', '3', '4', '5', '6'], flags=util.HTML ) def test_wildcard_at_start_quoted(self): """Test language with wildcard at start (quoted).""" # First wild quoted self.assert_selector( self.MARKUP, "p:lang('*-DE')", ['1', '2', '3', '4', '5', '6', '7'], flags=util.HTML ) def test_avoid_implicit_language(self): """Test that we can narrow language selection to elements that match and explicitly state language.""" # Target element with language and language attribute self.assert_selector( self.MARKUP, "p[lang]:lang(de-DE)", ['6'], flags=util.HTML ) def test_language_und(self): """Test that undefined language can be matched by `*`.""" markup = """
""" self.assert_selector( markup, "div:lang('*')", ['2'], flags=util.HTML ) def test_language_empty_string(self): """Test that an empty string language will only match untagged languages `lang=""`.""" markup = """
""" self.assert_selector( markup, "div:lang('')", ['1', '3', '4'], flags=util.HTML ) def test_language_list(self): """Test language list.""" # Multiple languages markup = """

""" self.assert_selector( markup, "p:lang(de-DE, '*-US')", ['1', '3', '4', '5', '6'], flags=util.HTML ) def test_undetermined_language(self): """Test undetermined language.""" markup = """

""" self.assert_selector( markup, "p:lang(en)", [], flags=util.HTML ) def test_language_in_header(self): """Test that we can find language in header.""" markup = """

""" self.assert_selector( markup, "p:lang('*-US')", ['1', '2'], flags=util.HTML ) def test_xml_style_language_in_html5(self): """Test XML style language when out of HTML5 namespace.""" markup = """
""" self.assert_selector( markup, "mtext:lang(en)", ['1'], flags=util.HTML5 ) def test_xml_style_language(self): """Test XML style language.""" # XML style language markup = """

""" self.assert_selector( markup, "p:lang(de-DE)", ['1', '2', '3', '4', '5', '6'], flags=util.XML ) def test_language_in_xhtml(self): """Test language in XHTML.""" markup = """

""" self.assert_selector( markup, "p:lang(de-DE)", ['1', '2', '3', '4', '5', '6'], flags=util.XML ) def test_language_in_xhtml_without_html_style_lang(self): """ Test language in XHTML. HTML namespace elements must use HTML style language. """ # XHTML language: `lang` markup = """

""" self.assert_selector( markup, "p:lang(de-DE)", [], flags=util.XHTML ) soupsieve-2.7/tests/test_level4/test_local_link.py0000644000000000000000000000133113615410400017436 0ustar00"""Test local link selectors.""" from .. import util class TestLocalLink(util.TestCase): """Test local link selectors.""" MARKUP = """ Link Another link """ def test_local_link(self): """Test local link (matches nothing).""" self.assert_selector( self.MARKUP, "a:local-link", [], flags=util.HTML ) def test_not_local_link(self): """Test not local link.""" self.assert_selector( self.MARKUP, "a:not(:local-link)", ["1", "2"], flags=util.HTML ) soupsieve-2.7/tests/test_level4/test_matches.py0000644000000000000000000000140713615410400016757 0ustar00"""Test matches selectors.""" from .. import util class TestMatches(util.TestCase): """Test matches selectors.""" MARKUP = """

Some text in a paragraph. Link

""" def test_matches(self): """Test multiple selectors with "matches".""" self.assert_selector( self.MARKUP, ":matches(span, a)", ["1", "2"], flags=util.HTML ) def test_nested_matches(self): """Test multiple nested selectors with "matches".""" self.assert_selector( self.MARKUP, ":matches(span, a:matches(#\\32))", ["1", "2"], flags=util.HTML ) soupsieve-2.7/tests/test_level4/test_muted.py0000644000000000000000000000203713615410400016451 0ustar00"""Test muted selectors.""" from .. import util class TestPaused(util.TestCase): """Test paused selectors.""" MARKUP = """ """ def test_muted(self): """Test muted.""" self.assert_selector( self.MARKUP, "video:muted", ['vid1'], flags=util.HTML ) def test_not_muted(self): """Test not muted.""" self.assert_selector( self.MARKUP, "video:not(:muted)", ["vid2"], flags=util.HTML ) soupsieve-2.7/tests/test_level4/test_not.py0000644000000000000000000000132313615410400016130 0ustar00"""Test not selectors.""" from .. import util class TestNot(util.TestCase): """Test not selectors.""" def test_multi_nested_not(self): """Test nested not and multiple selectors.""" markup = """

Some text in a paragraph.

Link Direct child
        Child 1
        Child 2
        Child 3
        
""" self.assert_selector( markup, 'div :not(p, :not([id=\\35]))', ['5'], flags=util.HTML ) soupsieve-2.7/tests/test_level4/test_nth_child.py0000644000000000000000000000226213615410400017267 0ustar00"""Test `nth` child selectors.""" from .. import util class TestNthChild(util.TestCase): """Test `nth` child selectors.""" MARKUP = """

""" def test_nth_child_of_s_simple(self): """Test `nth` child with selector (simple).""" self.assert_selector( self.MARKUP, ":nth-child(-n+3 of p)", ['0', '1', '7'], flags=util.HTML ) def test_nth_child_of_s_complex(self): """Test `nth` child with selector (complex).""" self.assert_selector( self.MARKUP, ":nth-child(2n + 1 of :is(p, span).test)", ['2', '6', '10'], flags=util.HTML ) self.assert_selector( self.MARKUP, ":nth-child(2n + 1 OF :is(p, span).test)", ['2', '6', '10'], flags=util.HTML ) soupsieve-2.7/tests/test_level4/test_nth_last_child.py0000644000000000000000000000145613615410400020316 0ustar00"""Test `nth` last child selectors.""" from .. import util class TestNthLastChild(util.TestCase): """Test `nth` last child selectors.""" def test_nth_child_of_s_complex(self): """Test `nth` child with selector (complex).""" markup = """

""" self.assert_selector( markup, ":nth-last-child(2n + 1 of p[id], span[id])", ['1', '3', '5', '7', '9', '11'], flags=util.HTML ) soupsieve-2.7/tests/test_level4/test_open.py0000644000000000000000000000271413615410400016276 0ustar00"""Test open selectors.""" from .. import util class TestOpen(util.TestCase): """Test open selectors.""" MARKUP = """
This is closed.A closed details element.

This is open.An open details element.

Greetings, one and all!

Goodbye, one and all!

""" def test_open(self): """Test open.""" self.assert_selector( self.MARKUP, ":open", ['2', '3'], flags=util.HTML ) def test_targted_open(self): """Test targeted open.""" self.assert_selector( self.MARKUP, "details:open", ['2'], flags=util.HTML ) self.assert_selector( self.MARKUP, "dialog:open", ['3'], flags=util.HTML ) def test_not_open(self): """Test not open.""" self.assert_selector( self.MARKUP, ":is(dialog, details):not(:open)", ["1", "4"], flags=util.HTML ) soupsieve-2.7/tests/test_level4/test_optional.py0000644000000000000000000000152113615410400017155 0ustar00"""Test optional selectors.""" from .. import util class TestOptional(util.TestCase): """Test optional selectors.""" MARKUP = """
""" def test_optional(self): """Test optional.""" self.assert_selector( self.MARKUP, ":optional", ['3', '4', '5'], flags=util.HTML ) def test_specific_optional(self): """Test specific optional.""" self.assert_selector( self.MARKUP, "input:optional", ['3'], flags=util.HTML ) soupsieve-2.7/tests/test_level4/test_out_of_range.py0000644000000000000000000002526013615410400020005 0ustar00"""Test out of range selectors.""" from .. import util class TestOutOfRange(util.TestCase): """Test out of range selectors.""" def test_out_of_range_number(self): """Test in range number.""" markup = """ """ self.assert_selector( markup, ":out-of-range", ['9', '10', '11'], flags=util.HTML ) def test_out_of_range_range(self): """Test in range range.""" markup = """ """ self.assert_selector( markup, ":out-of-range", ['9', '10'], flags=util.HTML ) def test_out_of_range_month(self): """Test in range month.""" markup = """ """ self.assert_selector( markup, ":out-of-range", ['7', '8', '9', '10'], flags=util.HTML ) def test_out_of_range_week(self): """Test in range week.""" markup = """ """ self.assert_selector( markup, ":out-of-range", ['8', '9', '10', '11'], flags=util.HTML ) def test_out_of_range_date(self): """Test in range date.""" markup = """ """ self.assert_selector( markup, ":out-of-range", ['7', '8', '9', '10', '11', '12'], flags=util.HTML ) def test_out_of_range_date_time(self): """Test in range date time.""" markup = """ """ self.assert_selector( markup, ":out-of-range", ['7', '8', '9', '10', '11', '12', '13', '14', '15', '16'], flags=util.HTML ) def test_out_of_range_time(self): """Test in range time.""" markup = """ """ self.assert_selector( markup, ":out-of-range", ['8', '9', '10', '11', '12', '13', '14'], flags=util.HTML ) soupsieve-2.7/tests/test_level4/test_past.py0000644000000000000000000000142113615410400016276 0ustar00"""Test past selectors.""" from .. import util class TestPast(util.TestCase): """Test past selectors.""" MARKUP = """

Some text in a paragraph. Link Placeholder text.

""" def test_past(self): """Test past (should match nothing).""" self.assert_selector( self.MARKUP, "p:past", [], flags=util.HTML ) def test_not_past(self): """Test not past.""" self.assert_selector( self.MARKUP, "p:not(:past)", ["0"], flags=util.HTML ) soupsieve-2.7/tests/test_level4/test_paused.py0000644000000000000000000000164313615410400016616 0ustar00"""Test paused selectors.""" from .. import util class TestPaused(util.TestCase): """Test paused selectors.""" MARKUP = """ """ def test_paused(self): """Test paused (matches nothing).""" # Not actually sure how this is used, but it won't match anything anyways self.assert_selector( self.MARKUP, "video:paused", [], flags=util.HTML ) def test_not_paused(self): """Test not paused.""" self.assert_selector( self.MARKUP, "video:not(:paused)", ["vid"], flags=util.HTML ) soupsieve-2.7/tests/test_level4/test_placeholder_shown.py0000644000000000000000000000623313615410400021035 0ustar00"""Test placeholder shown selectors.""" from .. import util class TestPlaceholderShown(util.TestCase): """Test placeholder shown selectors.""" def test_placeholder_shown(self): """Test placeholder shown.""" markup = """ """ self.assert_selector( markup, ":placeholder-shown", ['0', '1', '4', '5', '6', '7', '8', '9', '10', '11', '12', '28', '32'], flags=util.HTML ) soupsieve-2.7/tests/test_level4/test_playing.py0000644000000000000000000000165413615410400017002 0ustar00"""Test playing selectors.""" from .. import util class TestPlaying(util.TestCase): """Test playing selectors.""" MARKUP = """ """ def test_playing(self): """Test playing (matches nothing).""" # Not actually sure how this is used, but it won't match anything anyways self.assert_selector( self.MARKUP, "video:playing", [], flags=util.HTML ) def test_not_playing(self): """Test not playing.""" self.assert_selector( self.MARKUP, "video:not(:playing)", ["vid"], flags=util.HTML ) soupsieve-2.7/tests/test_level4/test_read_only.py0000644000000000000000000000337613615410400017316 0ustar00"""Test read only selectors.""" from .. import util class TestReadOnly(util.TestCase): """Test read only selectors.""" def test_read_only(self): """Test read only.""" markup = """

Text

Text

Text

Text

Text

""" self.assert_selector( markup, "body :read-only", [ '3', '13', '14', '15', '18', '19', '20', '22', '23', '24', '25', '31', '32', '33' ], flags=util.HTML ) soupsieve-2.7/tests/test_level4/test_read_write.py0000644000000000000000000000337413615410400017465 0ustar00"""Test read write selectors.""" from .. import util class TestReadWrite(util.TestCase): """Test read write selectors.""" def test_read_write(self): """Test read write.""" markup = """

Text

Text

Text

Text

Text

""" self.assert_selector( markup, ":read-write", [ '0', '1', '2', '4', '5', '6', '7', '8', '9', '10', '11', '12', '16', '17', '21', '26', '27', '28', '29', '30' ], flags=util.HTML ) soupsieve-2.7/tests/test_level4/test_required.py0000644000000000000000000000155513615410400017157 0ustar00"""Test required selectors.""" from .. import util class TestRequired(util.TestCase): """Test required selectors.""" MARKUP = """
""" def test_required(self): """Test required.""" self.assert_selector( self.MARKUP, ":required", ['1', '2', '4', '5'], flags=util.HTML ) def test_specific_required(self): """Test specific required.""" self.assert_selector( self.MARKUP, "input:required", ['1', '2'], flags=util.HTML ) soupsieve-2.7/tests/test_level4/test_scope.py0000644000000000000000000000530613615410400016446 0ustar00"""Test scope selectors.""" from .. import util import soupsieve as sv class TestScope(util.TestCase): """Test scope selectors.""" MARKUP = """

Some text in a paragraph.

Link Direct child
    Child 1
    Child 2
    Child 3
    
""" def test_scope_is_root(self): """Test scope is the root when the a specific element is not the target of the select call.""" # Scope is root when applied to a document node self.assert_selector( self.MARKUP, ":scope", ["root"], flags=util.HTML ) self.assert_selector( self.MARKUP, ":scope > body > div", ["div"], flags=util.HTML ) def test_scope_cannot_select_target(self): """Test that scope, the element which scope is called on, cannot be selected.""" for parser in util.available_parsers( 'html.parser', 'lxml', 'html5lib', 'xml'): soup = self.soup(self.MARKUP, parser) el = soup.html # Scope is the element we are applying the select to, and that element is never returned self.assertTrue(len(sv.select(':scope', el, flags=sv.DEBUG)) == 0) def test_scope_is_select_target(self): """Test that scope is the element which scope is called on.""" for parser in util.available_parsers( 'html.parser', 'lxml', 'html5lib', 'xml'): soup = self.soup(self.MARKUP, parser) el = soup.html # Scope here means the current element under select ids = [el.attrs['id'] for el in sv.select(':scope div', el, flags=sv.DEBUG)] self.assertEqual(sorted(ids), sorted(['div'])) el = soup.body ids = [el.attrs['id'] for el in sv.select(':scope div', el, flags=sv.DEBUG)] self.assertEqual(sorted(ids), sorted(['div'])) # `div` is the current element under select, and it has no `div` elements. el = soup.div ids = [el.attrs['id'] for el in sv.select(':scope div', el, flags=sv.DEBUG)] self.assertEqual(sorted(ids), sorted([])) # `div` does have an element with the class `.wordshere` ids = [el.attrs['id'] for el in sv.select(':scope .wordshere', el, flags=sv.DEBUG)] self.assertEqual(sorted(ids), sorted(['pre'])) soupsieve-2.7/tests/test_level4/test_target_within.py0000644000000000000000000000147213615410400020205 0ustar00"""Test target within selectors.""" from .. import util class TestTargetWithin(util.TestCase): """Test target within selectors.""" MARKUP = """ Jump

Header 1

content

Header 2

content

""" def test_target_within(self): """Test target within.""" self.assert_selector( self.MARKUP, "article:target-within", [], flags=util.HTML ) def test_not_target_within(self): """Test inverse of target within.""" self.assert_selector( self.MARKUP, "article:not(:target-within)", ["article"], flags=util.HTML ) soupsieve-2.7/tests/test_level4/test_user_invalid.py0000644000000000000000000000113413615410400020014 0ustar00"""Test invalid selectors.""" from .. import util class TestInvalid(util.TestCase): """Test invalid selectors.""" def test_user_invalid(self): """Test user invalid (matches nothing).""" markup = """
""" self.assert_selector( markup, "input:user-invalid", [], flags=util.HTML ) self.assert_selector( markup, "input:not(:user-invalid)", ["1"], flags=util.HTML ) soupsieve-2.7/tests/test_level4/test_where.py0000644000000000000000000000136313615410400016446 0ustar00"""Test where selectors.""" from .. import util class TestWhere(util.TestCase): """Test where selectors.""" MARKUP = """

Some text in a paragraph. Link

""" def test_where(self): """Test multiple selectors with "where".""" self.assert_selector( self.MARKUP, ":where(span, a)", ["1", "2"], flags=util.HTML ) def test_nested_where(self): """Test multiple nested selectors with "where".""" self.assert_selector( self.MARKUP, ":where(span, a:where(#\\32))", ["1", "2"], flags=util.HTML ) soupsieve-2.7/tests/test_nesting_1/__init__.py0000644000000000000000000000005613615410400016526 0ustar00"""Test CSS introduced by Nesting level 1.""" soupsieve-2.7/tests/test_nesting_1/test_amp.py0000644000000000000000000000525313615410400016607 0ustar00"""Test ampersand selectors.""" from .. import util import soupsieve as sv class TestAmp(util.TestCase): """Test scope selectors.""" MARKUP = """

Some text in a paragraph.

Link Direct child
    Child 1
    Child 2
    Child 3
    
""" def test_amp_is_root(self): """Test ampersand is the root when the a specific element is not the target of the select call.""" # Scope is root when applied to a document node self.assert_selector( self.MARKUP, "&", ["root"], flags=util.HTML ) self.assert_selector( self.MARKUP, "& > body > div", ["div"], flags=util.HTML ) def test_amp_cannot_select_target(self): """Test that ampersand, the element which scope is called on, cannot be selected.""" for parser in util.available_parsers( 'html.parser', 'lxml', 'html5lib', 'xml'): soup = self.soup(self.MARKUP, parser) el = soup.html # Scope is the element we are applying the select to, and that element is never returned self.assertTrue(len(sv.select('&', el, flags=sv.DEBUG)) == 0) def test_amp_is_select_target(self): """Test that ampersand is the element which scope is called on.""" for parser in util.available_parsers( 'html.parser', 'lxml', 'html5lib', 'xml'): soup = self.soup(self.MARKUP, parser) el = soup.html # Scope here means the current element under select ids = [el.attrs['id'] for el in sv.select('& div', el, flags=sv.DEBUG)] self.assertEqual(sorted(ids), sorted(['div'])) el = soup.body ids = [el.attrs['id'] for el in sv.select('& div', el, flags=sv.DEBUG)] self.assertEqual(sorted(ids), sorted(['div'])) # `div` is the current element under select, and it has no `div` elements. el = soup.div ids = [el.attrs['id'] for el in sv.select('& div', el, flags=sv.DEBUG)] self.assertEqual(sorted(ids), sorted([])) # `div` does have an element with the class `.wordshere` ids = [el.attrs['id'] for el in sv.select('& .wordshere', el, flags=sv.DEBUG)] self.assertEqual(sorted(ids), sorted(['pre'])) soupsieve-2.7/.gitignore0000644000000000000000000000247013615410400012317 0ustar00.DS_Store # Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] *$py.class # C extensions *.so # Distribution / packaging .Python build/ develop-eggs/ dist/ downloads/ eggs/ .eggs/ lib/ lib64/ parts/ sdist/ var/ wheels/ *.egg-info/ .installed.cfg *.egg MANIFEST # PyInstaller # Usually these files are written by a python script from a template # before PyInstaller builds the exe, so as to inject date/other infos into it. *.manifest *.spec # Installer logs pip-log.txt pip-delete-this-directory.txt # Unit test / coverage reports htmlcov/ .tox/ .nox/ .coverage .coverage.* .cache nosetests.xml coverage.xml *.cover .hypothesis/ .pytest_cache/ # Translations *.mo *.pot # Django stuff: *.log local_settings.py db.sqlite3 # Flask stuff: instance/ .webassets-cache # Scrapy stuff: .scrapy # Sphinx documentation docs/_build/ # PyBuilder target/ # Jupyter Notebook .ipynb_checkpoints # IPython profile_default/ ipython_config.py # pyenv .python-version # celery beat schedule file celerybeat-schedule # SageMath parsed files *.sage.py # Environments .env .venv env/ venv/ ENV/ env.bak/ venv.bak/ # Spyder project settings .spyderproject .spyproject # Rope project settings .ropeproject # mkdocs documentation /site # mypy .mypy_cache/ .dmypy.json dmypy.json # Pyre type checker .pyre/ # Patches *.patch soupsieve-2.7/LICENSE.md0000644000000000000000000000211013615410400011722 0ustar00MIT License Copyright (c) 2018 - 2025 Isaac Muse Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. soupsieve-2.7/README.md0000644000000000000000000000664413615410400011615 0ustar00[![Donate via PayPal][donate-image]][donate-link] [![Build][github-ci-image]][github-ci-link] [![Coverage Status][codecov-image]][codecov-link] [![PyPI Version][pypi-image]][pypi-link] [![PyPI Downloads][pypi-down]][pypi-link] [![PyPI - Python Version][python-image]][pypi-link] [![License][license-image-mit]][license-link] # Soup Sieve ## Overview Soup Sieve is a CSS selector library designed to be used with [Beautiful Soup 4][bs4]. It aims to provide selecting, matching, and filtering using modern CSS selectors. Soup Sieve currently provides selectors from the CSS level 1 specifications up through the latest CSS level 4 drafts and beyond (though some are not yet implemented). Soup Sieve was written with the intent to replace Beautiful Soup's builtin select feature, and as of Beautiful Soup version 4.7.0, it now is :confetti_ball:. Soup Sieve can also be imported in order to use its API directly for more controlled, specialized parsing. Soup Sieve has implemented most of the CSS selectors up through the latest CSS draft specifications, though there are a number that don't make sense in a non-browser environment. Selectors that cannot provide meaningful functionality simply do not match anything. Some of the supported selectors are: - `.classes` - `#ids` - `[attributes=value]` - `parent child` - `parent > child` - `sibling ~ sibling` - `sibling + sibling` - `:not(element.class, element2.class)` - `:is(element.class, element2.class)` - `parent:has(> child)` - and [many more](https://facelessuser.github.io/soupsieve/selectors/) ## Installation You must have Beautiful Soup already installed: ``` pip install beautifulsoup4 ``` In most cases, assuming you've installed version 4.7.0, that should be all you need to do, but if you've installed via some alternative method, and Soup Sieve is not automatically installed, you can install it directly: ``` pip install soupsieve ``` If you want to manually install it from source, first ensure that [`build`](https://pypi.org/project/build/) is installed: ``` pip install build ``` Then navigate to the root of the project and build the wheel and install (replacing `` with the current version): ``` python -m build -w pip install dist/soupsieve--py3-none-any.whl ``` ## Documentation Documentation is found here: https://facelessuser.github.io/soupsieve/. ## License MIT [bs4]: https://beautiful-soup-4.readthedocs.io/en/latest/# [github-ci-image]: https://github.com/facelessuser/soupsieve/workflows/build/badge.svg [github-ci-link]: https://github.com/facelessuser/soupsieve/actions?query=workflow%3Abuild+branch%3Amain [codecov-image]: https://img.shields.io/codecov/c/github/facelessuser/soupsieve/master.svg?logo=codecov&logoColor=aaaaaa&labelColor=333333 [codecov-link]: https://codecov.io/github/facelessuser/soupsieve [pypi-image]: https://img.shields.io/pypi/v/soupsieve.svg?logo=pypi&logoColor=aaaaaa&labelColor=333333 [pypi-down]: https://img.shields.io/pypi/dm/soupsieve.svg?logo=pypi&logoColor=aaaaaa&labelColor=333333 [pypi-link]: https://pypi.python.org/pypi/soupsieve [python-image]: https://img.shields.io/pypi/pyversions/soupsieve?logo=python&logoColor=aaaaaa&labelColor=333333 [license-image-mit]: https://img.shields.io/badge/license-MIT-blue.svg?labelColor=333333 [license-link]: https://github.com/facelessuser/soupsieve/blob/main/LICENSE.md [donate-image]: https://img.shields.io/badge/Donate-PayPal-3fabd1?logo=paypal [donate-link]: https://www.paypal.me/facelessuser soupsieve-2.7/hatch_build.py0000644000000000000000000000304613615410400013147 0ustar00"""Dynamically define some metadata.""" import os from hatchling.metadata.plugin.interface import MetadataHookInterface def get_version_dev_status(root): """Get version_info without importing the entire module.""" import importlib.util path = os.path.join(root, "soupsieve", "__meta__.py") spec = importlib.util.spec_from_file_location("__meta__", path) module = importlib.util.module_from_spec(spec) spec.loader.exec_module(module) return module.__version_info__._get_dev_status() class CustomMetadataHook(MetadataHookInterface): """Our metadata hook.""" def update(self, metadata): """See https://ofek.dev/hatch/latest/plugins/metadata-hook/ for more information.""" metadata["classifiers"] = [ f"Development Status :: {get_version_dev_status(self.root)}", 'Environment :: Console', 'Intended Audience :: Developers', 'License :: OSI Approved :: MIT License', 'Operating System :: OS Independent', 'Programming Language :: Python :: 3', 'Programming Language :: Python :: 3.8', 'Programming Language :: Python :: 3.9', 'Programming Language :: Python :: 3.10', 'Programming Language :: Python :: 3.11', 'Programming Language :: Python :: 3.12', 'Programming Language :: Python :: 3.13', 'Topic :: Internet :: WWW/HTTP :: Dynamic Content', 'Topic :: Software Development :: Libraries :: Python Modules', 'Typing :: Typed' ] soupsieve-2.7/pyproject.toml0000644000000000000000000000545113615410400013245 0ustar00[build-system] requires = [ "hatchling>=0.21.1", ] build-backend = "hatchling.build" [project] name = "soupsieve" description = "A modern CSS selector implementation for Beautiful Soup." readme = "README.md" license = "MIT" requires-python = ">=3.8" authors = [ { name = "Isaac Muse", email = "Isaac.Muse@gmail.com" }, ] keywords = [ "CSS", "HTML", "XML", "selector", "filter", "query", "soup" ] dynamic = [ "classifiers", "version", ] [project.urls] Homepage = "https://github.com/facelessuser/soupsieve" [tool.hatch.version] source = "code" path = "soupsieve/__meta__.py" [tool.hatch.build.targets.wheel] include = [ "/soupsieve", ] [tool.hatch.build.targets.sdist] include = [ "/docs/src/markdown/**/*.md", "/docs/src/markdown/**/*.gif", "/docs/src/markdown/**/*.png", "/docs/src/markdown/dictionary/*.txt", "/docs/theme/**/*.css", "/docs/theme/**/*.js", "/docs/theme/**/*.html", "/requirements/*.txt", "/soupsieve/**/*.py", "/soupsieve/py.typed", "/tests/**/*.py", "/.pyspelling.yml", "/.coveragerc", "/mkdocs.yml" ] [tool.mypy] files = [ "soupsieve" ] strict = true show_error_codes = true [tool.hatch.metadata.hooks.custom] [tool.ruff] line-length = 120 lint.select = [ "A", # flake8-builtins "B", # flake8-bugbear "D", # pydocstyle "C4", # flake8-comprehensions "N", # pep8-naming "E", # pycodestyle "F", # pyflakes "PGH", # pygrep-hooks "RUF", # ruff # "UP", # pyupgrade "W", # pycodestyle "YTT", # flake8-2020, "PERF" # Perflint ] lint.ignore = [ "E741", "D202", "D401", "D212", "D203", "N802", "N801", "N803", "N806", "N818", "RUF012", "RUF005", "PGH004", "RUF100", "RUF022", "RUF023" ] [tool.tox] legacy_tox_ini = """ [tox] isolated_build = true envlist = py{38,39,310,311,312}, lint, nolxml, nohtml5lib [testenv] passenv = * deps = -rrequirements/tests.txt commands = mypy pytest --cov soupsieve --cov-append {toxinidir} coverage html -d {envtmpdir}/coverage coverage xml coverage report --show-missing [testenv:documents] passenv = * deps = -rrequirements/docs.txt commands = mkdocs build --clean --verbose --strict pyspelling -j 8 [testenv:lint] passenv = * deps = -rrequirements/lint.txt commands = "{envbindir}"/ruff check . [testenv:nolxml] passenv = * deps = -rrequirements/tests-nolxml.txt commands = pytest {toxinidir} [testenv:nohtml5lib] passenv = * deps = -rrequirements/tests-nohtml5lib.txt commands = pytest {toxinidir} [pytest] filterwarnings = ignore:\nCSS selector pattern:UserWarning """ [tool.pytest.ini_options] filterwarnings = [ "ignore:The 'strip_cdata':DeprecationWarning" ] soupsieve-2.7/PKG-INFO0000644000000000000000000001103013615410400011414 0ustar00Metadata-Version: 2.4 Name: soupsieve Version: 2.7 Summary: A modern CSS selector implementation for Beautiful Soup. Project-URL: Homepage, https://github.com/facelessuser/soupsieve Author-email: Isaac Muse License-Expression: MIT License-File: LICENSE.md Keywords: CSS,HTML,XML,filter,query,selector,soup Classifier: Development Status :: 5 - Production/Stable Classifier: Environment :: Console Classifier: Intended Audience :: Developers Classifier: License :: OSI Approved :: MIT License Classifier: Operating System :: OS Independent Classifier: Programming Language :: Python :: 3 Classifier: Programming Language :: Python :: 3.8 Classifier: Programming Language :: Python :: 3.9 Classifier: Programming Language :: Python :: 3.10 Classifier: Programming Language :: Python :: 3.11 Classifier: Programming Language :: Python :: 3.12 Classifier: Programming Language :: Python :: 3.13 Classifier: Topic :: Internet :: WWW/HTTP :: Dynamic Content Classifier: Topic :: Software Development :: Libraries :: Python Modules Classifier: Typing :: Typed Requires-Python: >=3.8 Description-Content-Type: text/markdown [![Donate via PayPal][donate-image]][donate-link] [![Build][github-ci-image]][github-ci-link] [![Coverage Status][codecov-image]][codecov-link] [![PyPI Version][pypi-image]][pypi-link] [![PyPI Downloads][pypi-down]][pypi-link] [![PyPI - Python Version][python-image]][pypi-link] [![License][license-image-mit]][license-link] # Soup Sieve ## Overview Soup Sieve is a CSS selector library designed to be used with [Beautiful Soup 4][bs4]. It aims to provide selecting, matching, and filtering using modern CSS selectors. Soup Sieve currently provides selectors from the CSS level 1 specifications up through the latest CSS level 4 drafts and beyond (though some are not yet implemented). Soup Sieve was written with the intent to replace Beautiful Soup's builtin select feature, and as of Beautiful Soup version 4.7.0, it now is :confetti_ball:. Soup Sieve can also be imported in order to use its API directly for more controlled, specialized parsing. Soup Sieve has implemented most of the CSS selectors up through the latest CSS draft specifications, though there are a number that don't make sense in a non-browser environment. Selectors that cannot provide meaningful functionality simply do not match anything. Some of the supported selectors are: - `.classes` - `#ids` - `[attributes=value]` - `parent child` - `parent > child` - `sibling ~ sibling` - `sibling + sibling` - `:not(element.class, element2.class)` - `:is(element.class, element2.class)` - `parent:has(> child)` - and [many more](https://facelessuser.github.io/soupsieve/selectors/) ## Installation You must have Beautiful Soup already installed: ``` pip install beautifulsoup4 ``` In most cases, assuming you've installed version 4.7.0, that should be all you need to do, but if you've installed via some alternative method, and Soup Sieve is not automatically installed, you can install it directly: ``` pip install soupsieve ``` If you want to manually install it from source, first ensure that [`build`](https://pypi.org/project/build/) is installed: ``` pip install build ``` Then navigate to the root of the project and build the wheel and install (replacing `` with the current version): ``` python -m build -w pip install dist/soupsieve--py3-none-any.whl ``` ## Documentation Documentation is found here: https://facelessuser.github.io/soupsieve/. ## License MIT [bs4]: https://beautiful-soup-4.readthedocs.io/en/latest/# [github-ci-image]: https://github.com/facelessuser/soupsieve/workflows/build/badge.svg [github-ci-link]: https://github.com/facelessuser/soupsieve/actions?query=workflow%3Abuild+branch%3Amain [codecov-image]: https://img.shields.io/codecov/c/github/facelessuser/soupsieve/master.svg?logo=codecov&logoColor=aaaaaa&labelColor=333333 [codecov-link]: https://codecov.io/github/facelessuser/soupsieve [pypi-image]: https://img.shields.io/pypi/v/soupsieve.svg?logo=pypi&logoColor=aaaaaa&labelColor=333333 [pypi-down]: https://img.shields.io/pypi/dm/soupsieve.svg?logo=pypi&logoColor=aaaaaa&labelColor=333333 [pypi-link]: https://pypi.python.org/pypi/soupsieve [python-image]: https://img.shields.io/pypi/pyversions/soupsieve?logo=python&logoColor=aaaaaa&labelColor=333333 [license-image-mit]: https://img.shields.io/badge/license-MIT-blue.svg?labelColor=333333 [license-link]: https://github.com/facelessuser/soupsieve/blob/main/LICENSE.md [donate-image]: https://img.shields.io/badge/Donate-PayPal-3fabd1?logo=paypal [donate-link]: https://www.paypal.me/facelessuser