fully make bs4 and soupsieve standalone in the project

This commit is contained in:
2025-04-24 17:39:41 +02:00
parent ed336866ee
commit aefb27614f
16 changed files with 72 additions and 77 deletions

View File

@ -4,7 +4,7 @@ __license__ = "MIT"
from collections import defaultdict
import itertools
import sys
from bs4.element import (
from ...bs4.element import (
CharsetMetaAttributeValue,
ContentMetaAttributeValue,
Stylesheet,

View File

@ -7,13 +7,13 @@ __all__ = [
import warnings
import re
from bs4.builder import (
from ...bs4.builder import (
PERMISSIVE,
HTML,
HTML_5,
HTMLTreeBuilder,
)
from bs4.element import (
from ...bs4.element import (
NamespacedAttribute,
nonwhitespace_re,
)
@ -22,7 +22,7 @@ from html5lib.constants import (
namespaces,
prefixes,
)
from bs4.element import (
from ...bs4.element import (
Comment,
Doctype,
NavigableString,
@ -120,7 +120,7 @@ class TreeBuilderForHtml5lib(treebuilder_base.TreeBuilder):
if soup:
self.soup = soup
else:
from bs4 import BeautifulSoup
from ...bs4 import BeautifulSoup
# TODO: Why is the parser 'html.parser' here? To avoid an
# infinite loop?
self.soup = BeautifulSoup(
@ -166,7 +166,7 @@ class TreeBuilderForHtml5lib(treebuilder_base.TreeBuilder):
return TextNode(Comment(data), self.soup)
def fragmentClass(self):
from bs4 import BeautifulSoup
from ...bs4 import BeautifulSoup
# TODO: Why is the parser 'html.parser' here? To avoid an
# infinite loop?
self.soup = BeautifulSoup("", "html.parser")
@ -184,7 +184,7 @@ class TreeBuilderForHtml5lib(treebuilder_base.TreeBuilder):
return treebuilder_base.TreeBuilder.getFragment(self).element
def testSerializer(self, element):
from bs4 import BeautifulSoup
from ...bs4 import BeautifulSoup
rv = []
doctype_re = re.compile(r'^(.*?)(?: PUBLIC "(.*?)"(?: "(.*?)")?| SYSTEM "(.*?)")?$')

View File

@ -34,16 +34,16 @@ CONSTRUCTOR_STRICT_IS_DEPRECATED = major == 3 and minor == 3
CONSTRUCTOR_TAKES_CONVERT_CHARREFS = major == 3 and minor >= 4
from bs4.element import (
from ...bs4.element import (
CData,
Comment,
Declaration,
Doctype,
ProcessingInstruction,
)
from bs4.dammit import EntitySubstitution, UnicodeDammit
from ...bs4.dammit import EntitySubstitution, UnicodeDammit
from bs4.builder import (
from ...bs4.builder import (
HTML,
HTMLTreeBuilder,
STRICT,

View File

@ -14,14 +14,14 @@ except ImportError as e:
from io import BytesIO
from io import StringIO
from lxml import etree
from bs4.element import (
from ...bs4.element import (
Comment,
Doctype,
NamespacedAttribute,
ProcessingInstruction,
XMLProcessingInstruction,
)
from bs4.builder import (
from ...bs4.builder import (
FAST,
HTML,
HTMLTreeBuilder,
@ -29,7 +29,7 @@ from bs4.builder import (
ParserRejectedMarkup,
TreeBuilder,
XML)
from bs4.dammit import EncodingDetector
from ...bs4.dammit import EncodingDetector
LXML = 'lxml'

View File

@ -6,9 +6,9 @@ __license__ = "MIT"
import cProfile
from io import StringIO
from html.parser import HTMLParser
import bs4
from bs4 import BeautifulSoup, __version__
from bs4.builder import builder_registry
from ..bs4 import BeautifulSoup as bs4
from ..bs4 import BeautifulSoup, __version__
from ..bs4.builder import builder_registry
import os
import pstats

View File

@ -9,14 +9,14 @@ import re
import sys
import warnings
try:
import soupsieve
from ..soupsieve import *
except ImportError as e:
soupsieve = None
warnings.warn(
'The soupsieve package is not installed. CSS selectors cannot be used.'
)
from bs4.formatter import (
from ..bs4.formatter import (
Formatter,
HTMLFormatter,
XMLFormatter,
@ -380,7 +380,7 @@ class PageElement(object):
and not isinstance(new_child, NavigableString)):
new_child = NavigableString(new_child)
from bs4 import BeautifulSoup
from ..bs4 import BeautifulSoup
if isinstance(new_child, BeautifulSoup):
# We don't want to end up with a situation where one BeautifulSoup
# object contains another. Insert the children one at a time.

View File

@ -1,4 +1,4 @@
from bs4.dammit import EntitySubstitution
from ..bs4.dammit import EntitySubstitution
class Formatter(EntitySubstitution):
"""Describes a strategy to use when outputting a parse tree to a string.

View File

@ -9,8 +9,8 @@ import copy
import functools
import unittest
from unittest import TestCase
from bs4 import BeautifulSoup
from bs4.element import (
from ..bs4 import BeautifulSoup
from ..bs4.element import (
CharsetMetaAttributeValue,
Comment,
ContentMetaAttributeValue,
@ -22,7 +22,7 @@ from bs4.element import (
Tag
)
from bs4.builder import HTMLParserTreeBuilder
from ..bs4.builder import HTMLParserTreeBuilder
default_builder = HTMLParserTreeBuilder
BAD_DOCUMENT = """A bare string

View File

@ -3,21 +3,21 @@
import unittest
import warnings
from bs4 import BeautifulSoup
from bs4.builder import (
from ...bs4 import BeautifulSoup
from ...bs4.builder import (
builder_registry as registry,
HTMLParserTreeBuilder,
TreeBuilderRegistry,
)
try:
from bs4.builder import HTML5TreeBuilder
from ...bs4.builder import HTML5TreeBuilder
HTML5LIB_PRESENT = True
except ImportError:
HTML5LIB_PRESENT = False
try:
from bs4.builder import (
from ...bs4.builder import (
LXMLTreeBuilderForXML,
LXMLTreeBuilder,
)

View File

@ -3,12 +3,12 @@
import warnings
try:
from bs4.builder import HTML5TreeBuilder
from ...bs4.builder import HTML5TreeBuilder
HTML5LIB_PRESENT = True
except ImportError as e:
HTML5LIB_PRESENT = False
from bs4.element import SoupStrainer
from bs4.testing import (
from ...bs4.element import SoupStrainer
from ...bs4.testing import (
HTML5TreeBuilderSmokeTest,
SoupTest,
skipIf,

View File

@ -3,9 +3,9 @@ trees."""
from pdb import set_trace
import pickle
from bs4.testing import SoupTest, HTMLTreeBuilderSmokeTest
from bs4.builder import HTMLParserTreeBuilder
from bs4.builder._htmlparser import BeautifulSoupHTMLParser
from ...bs4.testing import SoupTest, HTMLTreeBuilderSmokeTest
from ...bs4.builder import HTMLParserTreeBuilder
from ...bs4.builder._htmlparser import BeautifulSoupHTMLParser
class HTMLParserTreeBuilderSmokeTest(SoupTest, HTMLTreeBuilderSmokeTest):

View File

@ -12,16 +12,16 @@ except ImportError as e:
LXML_VERSION = (0,)
if LXML_PRESENT:
from bs4.builder import LXMLTreeBuilder, LXMLTreeBuilderForXML
from ...bs4.builder import LXMLTreeBuilder, LXMLTreeBuilderForXML
from bs4 import (
from ...bs4 import (
BeautifulSoup,
BeautifulStoneSoup,
)
from bs4.element import Comment, Doctype, SoupStrainer
from bs4.testing import skipIf
from bs4.tests import test_htmlparser
from bs4.testing import (
from ...bs4.element import Comment, Doctype, SoupStrainer
from ...bs4.testing import skipIf
from ...bs4.tests import test_htmlparser
from ...bs4.testing import (
HTMLTreeBuilderSmokeTest,
XMLTreeBuilderSmokeTest,
SoupTest,

View File

@ -7,17 +7,17 @@ import unittest
import sys
import tempfile
from bs4 import (
from ...bs4 import (
BeautifulSoup,
BeautifulStoneSoup,
GuessedAtParserWarning,
MarkupResemblesLocatorWarning,
)
from bs4.builder import (
from ...bs4.builder import (
TreeBuilder,
ParserRejectedMarkup,
)
from bs4.element import (
from ...bs4.element import (
CharsetMetaAttributeValue,
Comment,
ContentMetaAttributeValue,
@ -27,13 +27,13 @@ from bs4.element import (
NavigableString,
)
import bs4.dammit
from bs4.dammit import (
from ...bs4.dammit import *
from ...bs4.dammit import (
EntitySubstitution,
UnicodeDammit,
EncodingDetector,
)
from bs4.testing import (
from ...bs4.testing import (
default_builder,
SoupTest,
skipIf,
@ -41,7 +41,7 @@ from bs4.testing import (
import warnings
try:
from bs4.builder import LXMLTreeBuilder, LXMLTreeBuilderForXML
from ...bs4.builder import LXMLTreeBuilder, LXMLTreeBuilderForXML
LXML_PRESENT = True
except ImportError as e:
LXML_PRESENT = False
@ -418,13 +418,13 @@ class TestEncodingConversion(SoupTest):
def test_ascii_in_unicode_out(self):
# ASCII input is converted to Unicode. The original_encoding
# attribute is set to 'utf-8', a superset of ASCII.
chardet = bs4.dammit.chardet_dammit
chardet = chardet_dammit
logging.disable(logging.WARNING)
try:
def noop(str):
return None
# Disable chardet, which will realize that the ASCII is ASCII.
bs4.dammit.chardet_dammit = noop
chardet_dammit = noop
ascii = b"<foo>a</foo>"
soup_from_ascii = self.soup(ascii)
unicode_output = soup_from_ascii.decode()
@ -433,7 +433,7 @@ class TestEncodingConversion(SoupTest):
self.assertEqual(soup_from_ascii.original_encoding.lower(), "utf-8")
finally:
logging.disable(logging.NOTSET)
bs4.dammit.chardet_dammit = chardet
chardet_dammit = chardet
def test_unicode_in_unicode_out(self):
# Unicode input is left alone. The original_encoding attribute
@ -574,12 +574,12 @@ class TestUnicodeDammit(unittest.TestCase):
doc = b"""\357\273\277<?xml version="1.0" encoding="UTF-8"?>
<html><b>\330\250\330\252\330\261</b>
<i>\310\322\321\220\312\321\355\344</i></html>"""
chardet = bs4.dammit.chardet_dammit
chardet = chardet_dammit
logging.disable(logging.WARNING)
try:
def noop(str):
return None
bs4.dammit.chardet_dammit = noop
chardet_dammit = noop
dammit = UnicodeDammit(doc)
self.assertEqual(True, dammit.contains_replacement_characters)
self.assertTrue("\ufffd" in dammit.unicode_markup)
@ -588,7 +588,7 @@ class TestUnicodeDammit(unittest.TestCase):
self.assertTrue(soup.contains_replacement_characters)
finally:
logging.disable(logging.NOTSET)
bs4.dammit.chardet_dammit = chardet
chardet_dammit = chardet
def test_byte_order_mark_removed(self):
# A document written in UTF-16LE will have its byte order marker stripped.

View File

@ -14,12 +14,12 @@ import copy
import pickle
import re
import warnings
from bs4 import BeautifulSoup
from bs4.builder import (
from ...bs4 import BeautifulSoup
from ...bs4.builder import (
builder_registry,
HTMLParserTreeBuilder,
)
from bs4.element import (
from ...bs4.element import (
PY3K,
CData,
Comment,
@ -33,11 +33,11 @@ from bs4.element import (
Tag,
TemplateString,
)
from bs4.testing import (
from ...bs4.testing import (
SoupTest,
skipIf,
)
from soupsieve import SelectorSyntaxError
from ...soupsieve import SelectorSyntaxError
XML_BUILDER_PRESENT = (builder_registry.lookup("xml") is not None)
LXML_PRESENT = (builder_registry.lookup("lxml") is not None)