fully make bs4 and soupsieve standalone in the project

This commit is contained in:
2025-04-24 17:39:41 +02:00
parent ed336866ee
commit aefb27614f
16 changed files with 72 additions and 77 deletions

View File

@ -7,17 +7,17 @@ import unittest
import sys
import tempfile
from bs4 import (
from ...bs4 import (
BeautifulSoup,
BeautifulStoneSoup,
GuessedAtParserWarning,
MarkupResemblesLocatorWarning,
)
from bs4.builder import (
from ...bs4.builder import (
TreeBuilder,
ParserRejectedMarkup,
)
from bs4.element import (
from ...bs4.element import (
CharsetMetaAttributeValue,
Comment,
ContentMetaAttributeValue,
@ -27,13 +27,13 @@ from bs4.element import (
NavigableString,
)
import bs4.dammit
from bs4.dammit import (
from ...bs4.dammit import *
from ...bs4.dammit import (
EntitySubstitution,
UnicodeDammit,
EncodingDetector,
)
from bs4.testing import (
from ...bs4.testing import (
default_builder,
SoupTest,
skipIf,
@ -41,7 +41,7 @@ from bs4.testing import (
import warnings
try:
from bs4.builder import LXMLTreeBuilder, LXMLTreeBuilderForXML
from ...bs4.builder import LXMLTreeBuilder, LXMLTreeBuilderForXML
LXML_PRESENT = True
except ImportError as e:
LXML_PRESENT = False
@ -418,13 +418,13 @@ class TestEncodingConversion(SoupTest):
def test_ascii_in_unicode_out(self):
# ASCII input is converted to Unicode. The original_encoding
# attribute is set to 'utf-8', a superset of ASCII.
chardet = bs4.dammit.chardet_dammit
chardet = chardet_dammit
logging.disable(logging.WARNING)
try:
def noop(str):
return None
# Disable chardet, which will realize that the ASCII is ASCII.
bs4.dammit.chardet_dammit = noop
chardet_dammit = noop
ascii = b"<foo>a</foo>"
soup_from_ascii = self.soup(ascii)
unicode_output = soup_from_ascii.decode()
@ -433,7 +433,7 @@ class TestEncodingConversion(SoupTest):
self.assertEqual(soup_from_ascii.original_encoding.lower(), "utf-8")
finally:
logging.disable(logging.NOTSET)
bs4.dammit.chardet_dammit = chardet
chardet_dammit = chardet
def test_unicode_in_unicode_out(self):
# Unicode input is left alone. The original_encoding attribute
@ -574,12 +574,12 @@ class TestUnicodeDammit(unittest.TestCase):
doc = b"""\357\273\277<?xml version="1.0" encoding="UTF-8"?>
<html><b>\330\250\330\252\330\261</b>
<i>\310\322\321\220\312\321\355\344</i></html>"""
chardet = bs4.dammit.chardet_dammit
chardet = chardet_dammit
logging.disable(logging.WARNING)
try:
def noop(str):
return None
bs4.dammit.chardet_dammit = noop
chardet_dammit = noop
dammit = UnicodeDammit(doc)
self.assertEqual(True, dammit.contains_replacement_characters)
self.assertTrue("\ufffd" in dammit.unicode_markup)
@ -588,7 +588,7 @@ class TestUnicodeDammit(unittest.TestCase):
self.assertTrue(soup.contains_replacement_characters)
finally:
logging.disable(logging.NOTSET)
bs4.dammit.chardet_dammit = chardet
chardet_dammit = chardet
def test_byte_order_mark_removed(self):
# A document written in UTF-16LE will have its byte order marker stripped.