<html><head><meta name="color-scheme" content="light dark"></head><body><pre style="word-wrap: break-word; white-space: pre-wrap;">"""Tests to ensure that the lxml tree builder generates good trees."""

import pickle
import re
import warnings

try:
    import lxml.etree
    LXML_PRESENT = True
    LXML_VERSION = lxml.etree.LXML_VERSION
except ImportError as e:
    LXML_PRESENT = False
    LXML_VERSION = (0,)

if LXML_PRESENT:
    from bs4.builder import LXMLTreeBuilder, LXMLTreeBuilderForXML

from bs4 import (
    BeautifulSoup,
    BeautifulStoneSoup,
    )
from bs4.element import Comment, Doctype, SoupStrainer
from . import (
    HTMLTreeBuilderSmokeTest,
    XMLTreeBuilderSmokeTest,
    SoupTest,
    skipIf,
)

@skipIf(
    not LXML_PRESENT,
    "lxml seems not to be present, not testing its tree builder.")
class TestLXMLTreeBuilder(SoupTest, HTMLTreeBuilderSmokeTest):
    """See ``HTMLTreeBuilderSmokeTest``."""

    @property
    def default_builder(self):
        return LXMLTreeBuilder

    def test_out_of_range_entity(self):
        self.assert_soup(
            "&lt;p&gt;foo&amp;#10000000000000;bar&lt;/p&gt;", "&lt;p&gt;foobar&lt;/p&gt;")
        self.assert_soup(
            "&lt;p&gt;foo&amp;#x10000000000000;bar&lt;/p&gt;", "&lt;p&gt;foobar&lt;/p&gt;")
        self.assert_soup(
            "&lt;p&gt;foo&amp;#1000000000;bar&lt;/p&gt;", "&lt;p&gt;foobar&lt;/p&gt;")
        
    def test_entities_in_foreign_document_encoding(self):
        # We can't implement this case correctly because by the time we
        # hear about markup like "&amp;#147;", it's been (incorrectly) converted into
        # a string like u'\x93'
        pass
        
    # In lxml &lt; 2.3.5, an empty doctype causes a segfault. Skip this
    # test if an old version of lxml is installed.

    @skipIf(
        not LXML_PRESENT or LXML_VERSION &lt; (2,3,5,0),
        "Skipping doctype test for old version of lxml to avoid segfault.")
    def test_empty_doctype(self):
        soup = self.soup("&lt;!DOCTYPE&gt;")
        doctype = soup.contents[0]
        assert "" == doctype.strip()

    def test_beautifulstonesoup_is_xml_parser(self):
        # Make sure that the deprecated BSS class uses an xml builder
        # if one is installed.
        with warnings.catch_warnings(record=True) as w:
            soup = BeautifulStoneSoup("&lt;b /&gt;")
        assert "&lt;b/&gt;" == str(soup.b)
        assert "BeautifulStoneSoup class is deprecated" in str(w[0].message)

    def test_tracking_line_numbers(self):
        # The lxml TreeBuilder cannot keep track of line numbers from
        # the original markup. Even if you ask for line numbers, we
        # don't have 'em.
        #
        # This means that if you have a tag like &lt;sourceline&gt; or
        # &lt;sourcepos&gt;, attribute access will find it rather than
        # giving you a numeric answer.
        soup = self.soup(
            "\n   &lt;p&gt;\n\n&lt;sourceline&gt;\n&lt;b&gt;text&lt;/b&gt;&lt;/sourceline&gt;&lt;sourcepos&gt;&lt;/p&gt;",
            store_line_numbers=True
        )
        assert "sourceline" == soup.p.sourceline.name
        assert "sourcepos" == soup.p.sourcepos.name
        
@skipIf(
    not LXML_PRESENT,
    "lxml seems not to be present, not testing its XML tree builder.")
class TestLXMLXMLTreeBuilder(SoupTest, XMLTreeBuilderSmokeTest):
    """See ``HTMLTreeBuilderSmokeTest``."""

    @property
    def default_builder(self):
        return LXMLTreeBuilderForXML

    def test_namespace_indexing(self):
        soup = self.soup(
            '&lt;?xml version="1.1"?&gt;\n'
            '&lt;root&gt;'
            '&lt;tag xmlns="http://unprefixed-namespace.com"&gt;content&lt;/tag&gt;'
            '&lt;prefix:tag2 xmlns:prefix="http://prefixed-namespace.com"&gt;content&lt;/prefix:tag2&gt;'
            '&lt;prefix2:tag3 xmlns:prefix2="http://another-namespace.com"&gt;'
            '&lt;subtag xmlns="http://another-unprefixed-namespace.com"&gt;'
            '&lt;subsubtag xmlns="http://yet-another-unprefixed-namespace.com"&gt;'
            '&lt;/prefix2:tag3&gt;'
            '&lt;/root&gt;'
        )

        # The BeautifulSoup object includes every namespace prefix
        # defined in the entire document. This is the default set of
        # namespaces used by soupsieve.
        #
        # Un-prefixed namespaces are not included, and if a given
        # prefix is defined twice, only the first prefix encountered
        # in the document shows up here.
        assert soup._namespaces == {
            'xml': 'http://www.w3.org/XML/1998/namespace',
            'prefix': 'http://prefixed-namespace.com',
            'prefix2': 'http://another-namespace.com'
        }

        # A Tag object includes only the namespace prefixes
        # that were in scope when it was parsed.

        # We do not track un-prefixed namespaces as we can only hold
        # one (the first one), and it will be recognized as the
        # default namespace by soupsieve, even when operating from a
        # tag with a different un-prefixed namespace.
        assert soup.tag._namespaces == {
            'xml': 'http://www.w3.org/XML/1998/namespace',
        }

        assert soup.tag2._namespaces == {
            'prefix': 'http://prefixed-namespace.com',
            'xml': 'http://www.w3.org/XML/1998/namespace',
        }

        assert soup.subtag._namespaces == {
            'prefix2': 'http://another-namespace.com',
            'xml': 'http://www.w3.org/XML/1998/namespace',
        }

        assert soup.subsubtag._namespaces == {
            'prefix2': 'http://another-namespace.com',
            'xml': 'http://www.w3.org/XML/1998/namespace',
        }


    def test_namespace_interaction_with_select_and_find(self):
        # Demonstrate how namespaces interact with select* and
        # find* methods.
        
        soup = self.soup(
            '&lt;?xml version="1.1"?&gt;\n'
            '&lt;root&gt;'
            '&lt;tag xmlns="http://unprefixed-namespace.com"&gt;content&lt;/tag&gt;'
            '&lt;prefix:tag2 xmlns:prefix="http://prefixed-namespace.com"&gt;content&lt;/tag&gt;'
            '&lt;subtag xmlns:prefix="http://another-namespace-same-prefix.com"&gt;'
             '&lt;prefix:tag3&gt;'
            '&lt;/subtag&gt;'
            '&lt;/root&gt;'
        )

        # soupselect uses namespace URIs.
        assert soup.select_one('tag').name == 'tag'
        assert soup.select_one('prefix|tag2').name == 'tag2'

        # If a prefix is declared more than once, only the first usage
        # is registered with the BeautifulSoup object.
        assert soup.select_one('prefix|tag3') is None

        # But you can always explicitly specify a namespace dictionary.
        assert soup.select_one(
            'prefix|tag3', namespaces=soup.subtag._namespaces
        ).name == 'tag3'

        # And a Tag (as opposed to the BeautifulSoup object) will
        # have a set of default namespaces scoped to that Tag.
        assert soup.subtag.select_one('prefix|tag3').name=='tag3'

        # the find() methods aren't fully namespace-aware; they just
        # look at prefixes.
        assert soup.find('tag').name == 'tag'
        assert soup.find('prefix:tag2').name == 'tag2'
        assert soup.find('prefix:tag3').name == 'tag3'
        assert soup.subtag.find('prefix:tag3').name == 'tag3'

    def test_pickle_removes_builder(self):
        # The lxml TreeBuilder is not picklable, so it won't be
        # preserved in a pickle/unpickle operation.

        soup = self.soup("&lt;a&gt;some markup&lt;/a&gt;")
        assert isinstance(soup.builder, self.default_builder)
        pickled = pickle.dumps(soup)
        unpickled = pickle.loads(pickled)
        assert "some markup" == unpickled.a.string
        assert unpickled.builder is None
</pre></body></html>