fully make bs4 and soupsieve standalone in the project
This commit is contained in:
@ -4,7 +4,7 @@ __license__ = "MIT"
|
||||
from collections import defaultdict
|
||||
import itertools
|
||||
import sys
|
||||
from bs4.element import (
|
||||
from ...bs4.element import (
|
||||
CharsetMetaAttributeValue,
|
||||
ContentMetaAttributeValue,
|
||||
Stylesheet,
|
||||
|
||||
@ -7,13 +7,13 @@ __all__ = [
|
||||
|
||||
import warnings
|
||||
import re
|
||||
from bs4.builder import (
|
||||
from ...bs4.builder import (
|
||||
PERMISSIVE,
|
||||
HTML,
|
||||
HTML_5,
|
||||
HTMLTreeBuilder,
|
||||
)
|
||||
from bs4.element import (
|
||||
from ...bs4.element import (
|
||||
NamespacedAttribute,
|
||||
nonwhitespace_re,
|
||||
)
|
||||
@ -22,7 +22,7 @@ from html5lib.constants import (
|
||||
namespaces,
|
||||
prefixes,
|
||||
)
|
||||
from bs4.element import (
|
||||
from ...bs4.element import (
|
||||
Comment,
|
||||
Doctype,
|
||||
NavigableString,
|
||||
@ -120,7 +120,7 @@ class TreeBuilderForHtml5lib(treebuilder_base.TreeBuilder):
|
||||
if soup:
|
||||
self.soup = soup
|
||||
else:
|
||||
from bs4 import BeautifulSoup
|
||||
from ...bs4 import BeautifulSoup
|
||||
# TODO: Why is the parser 'html.parser' here? To avoid an
|
||||
# infinite loop?
|
||||
self.soup = BeautifulSoup(
|
||||
@ -166,7 +166,7 @@ class TreeBuilderForHtml5lib(treebuilder_base.TreeBuilder):
|
||||
return TextNode(Comment(data), self.soup)
|
||||
|
||||
def fragmentClass(self):
|
||||
from bs4 import BeautifulSoup
|
||||
from ...bs4 import BeautifulSoup
|
||||
# TODO: Why is the parser 'html.parser' here? To avoid an
|
||||
# infinite loop?
|
||||
self.soup = BeautifulSoup("", "html.parser")
|
||||
@ -184,7 +184,7 @@ class TreeBuilderForHtml5lib(treebuilder_base.TreeBuilder):
|
||||
return treebuilder_base.TreeBuilder.getFragment(self).element
|
||||
|
||||
def testSerializer(self, element):
|
||||
from bs4 import BeautifulSoup
|
||||
from ...bs4 import BeautifulSoup
|
||||
rv = []
|
||||
doctype_re = re.compile(r'^(.*?)(?: PUBLIC "(.*?)"(?: "(.*?)")?| SYSTEM "(.*?)")?$')
|
||||
|
||||
|
||||
@ -34,16 +34,16 @@ CONSTRUCTOR_STRICT_IS_DEPRECATED = major == 3 and minor == 3
|
||||
CONSTRUCTOR_TAKES_CONVERT_CHARREFS = major == 3 and minor >= 4
|
||||
|
||||
|
||||
from bs4.element import (
|
||||
from ...bs4.element import (
|
||||
CData,
|
||||
Comment,
|
||||
Declaration,
|
||||
Doctype,
|
||||
ProcessingInstruction,
|
||||
)
|
||||
from bs4.dammit import EntitySubstitution, UnicodeDammit
|
||||
from ...bs4.dammit import EntitySubstitution, UnicodeDammit
|
||||
|
||||
from bs4.builder import (
|
||||
from ...bs4.builder import (
|
||||
HTML,
|
||||
HTMLTreeBuilder,
|
||||
STRICT,
|
||||
|
||||
@ -14,14 +14,14 @@ except ImportError as e:
|
||||
from io import BytesIO
|
||||
from io import StringIO
|
||||
from lxml import etree
|
||||
from bs4.element import (
|
||||
from ...bs4.element import (
|
||||
Comment,
|
||||
Doctype,
|
||||
NamespacedAttribute,
|
||||
ProcessingInstruction,
|
||||
XMLProcessingInstruction,
|
||||
)
|
||||
from bs4.builder import (
|
||||
from ...bs4.builder import (
|
||||
FAST,
|
||||
HTML,
|
||||
HTMLTreeBuilder,
|
||||
@ -29,7 +29,7 @@ from bs4.builder import (
|
||||
ParserRejectedMarkup,
|
||||
TreeBuilder,
|
||||
XML)
|
||||
from bs4.dammit import EncodingDetector
|
||||
from ...bs4.dammit import EncodingDetector
|
||||
|
||||
LXML = 'lxml'
|
||||
|
||||
|
||||
@ -6,9 +6,9 @@ __license__ = "MIT"
|
||||
import cProfile
|
||||
from io import StringIO
|
||||
from html.parser import HTMLParser
|
||||
import bs4
|
||||
from bs4 import BeautifulSoup, __version__
|
||||
from bs4.builder import builder_registry
|
||||
from ..bs4 import BeautifulSoup as bs4
|
||||
from ..bs4 import BeautifulSoup, __version__
|
||||
from ..bs4.builder import builder_registry
|
||||
|
||||
import os
|
||||
import pstats
|
||||
|
||||
@ -9,14 +9,14 @@ import re
|
||||
import sys
|
||||
import warnings
|
||||
try:
|
||||
import soupsieve
|
||||
from ..soupsieve import *
|
||||
except ImportError as e:
|
||||
soupsieve = None
|
||||
warnings.warn(
|
||||
'The soupsieve package is not installed. CSS selectors cannot be used.'
|
||||
)
|
||||
|
||||
from bs4.formatter import (
|
||||
from ..bs4.formatter import (
|
||||
Formatter,
|
||||
HTMLFormatter,
|
||||
XMLFormatter,
|
||||
@ -380,7 +380,7 @@ class PageElement(object):
|
||||
and not isinstance(new_child, NavigableString)):
|
||||
new_child = NavigableString(new_child)
|
||||
|
||||
from bs4 import BeautifulSoup
|
||||
from ..bs4 import BeautifulSoup
|
||||
if isinstance(new_child, BeautifulSoup):
|
||||
# We don't want to end up with a situation where one BeautifulSoup
|
||||
# object contains another. Insert the children one at a time.
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
from bs4.dammit import EntitySubstitution
|
||||
from ..bs4.dammit import EntitySubstitution
|
||||
|
||||
class Formatter(EntitySubstitution):
|
||||
"""Describes a strategy to use when outputting a parse tree to a string.
|
||||
|
||||
@ -9,8 +9,8 @@ import copy
|
||||
import functools
|
||||
import unittest
|
||||
from unittest import TestCase
|
||||
from bs4 import BeautifulSoup
|
||||
from bs4.element import (
|
||||
from ..bs4 import BeautifulSoup
|
||||
from ..bs4.element import (
|
||||
CharsetMetaAttributeValue,
|
||||
Comment,
|
||||
ContentMetaAttributeValue,
|
||||
@ -22,7 +22,7 @@ from bs4.element import (
|
||||
Tag
|
||||
)
|
||||
|
||||
from bs4.builder import HTMLParserTreeBuilder
|
||||
from ..bs4.builder import HTMLParserTreeBuilder
|
||||
default_builder = HTMLParserTreeBuilder
|
||||
|
||||
BAD_DOCUMENT = """A bare string
|
||||
|
||||
@ -3,21 +3,21 @@
|
||||
import unittest
|
||||
import warnings
|
||||
|
||||
from bs4 import BeautifulSoup
|
||||
from bs4.builder import (
|
||||
from ...bs4 import BeautifulSoup
|
||||
from ...bs4.builder import (
|
||||
builder_registry as registry,
|
||||
HTMLParserTreeBuilder,
|
||||
TreeBuilderRegistry,
|
||||
)
|
||||
|
||||
try:
|
||||
from bs4.builder import HTML5TreeBuilder
|
||||
from ...bs4.builder import HTML5TreeBuilder
|
||||
HTML5LIB_PRESENT = True
|
||||
except ImportError:
|
||||
HTML5LIB_PRESENT = False
|
||||
|
||||
try:
|
||||
from bs4.builder import (
|
||||
from ...bs4.builder import (
|
||||
LXMLTreeBuilderForXML,
|
||||
LXMLTreeBuilder,
|
||||
)
|
||||
|
||||
@ -3,12 +3,12 @@
|
||||
import warnings
|
||||
|
||||
try:
|
||||
from bs4.builder import HTML5TreeBuilder
|
||||
from ...bs4.builder import HTML5TreeBuilder
|
||||
HTML5LIB_PRESENT = True
|
||||
except ImportError as e:
|
||||
HTML5LIB_PRESENT = False
|
||||
from bs4.element import SoupStrainer
|
||||
from bs4.testing import (
|
||||
from ...bs4.element import SoupStrainer
|
||||
from ...bs4.testing import (
|
||||
HTML5TreeBuilderSmokeTest,
|
||||
SoupTest,
|
||||
skipIf,
|
||||
|
||||
@ -3,9 +3,9 @@ trees."""
|
||||
|
||||
from pdb import set_trace
|
||||
import pickle
|
||||
from bs4.testing import SoupTest, HTMLTreeBuilderSmokeTest
|
||||
from bs4.builder import HTMLParserTreeBuilder
|
||||
from bs4.builder._htmlparser import BeautifulSoupHTMLParser
|
||||
from ...bs4.testing import SoupTest, HTMLTreeBuilderSmokeTest
|
||||
from ...bs4.builder import HTMLParserTreeBuilder
|
||||
from ...bs4.builder._htmlparser import BeautifulSoupHTMLParser
|
||||
|
||||
class HTMLParserTreeBuilderSmokeTest(SoupTest, HTMLTreeBuilderSmokeTest):
|
||||
|
||||
|
||||
@ -12,16 +12,16 @@ except ImportError as e:
|
||||
LXML_VERSION = (0,)
|
||||
|
||||
if LXML_PRESENT:
|
||||
from bs4.builder import LXMLTreeBuilder, LXMLTreeBuilderForXML
|
||||
from ...bs4.builder import LXMLTreeBuilder, LXMLTreeBuilderForXML
|
||||
|
||||
from bs4 import (
|
||||
from ...bs4 import (
|
||||
BeautifulSoup,
|
||||
BeautifulStoneSoup,
|
||||
)
|
||||
from bs4.element import Comment, Doctype, SoupStrainer
|
||||
from bs4.testing import skipIf
|
||||
from bs4.tests import test_htmlparser
|
||||
from bs4.testing import (
|
||||
from ...bs4.element import Comment, Doctype, SoupStrainer
|
||||
from ...bs4.testing import skipIf
|
||||
from ...bs4.tests import test_htmlparser
|
||||
from ...bs4.testing import (
|
||||
HTMLTreeBuilderSmokeTest,
|
||||
XMLTreeBuilderSmokeTest,
|
||||
SoupTest,
|
||||
|
||||
@ -7,17 +7,17 @@ import unittest
|
||||
import sys
|
||||
import tempfile
|
||||
|
||||
from bs4 import (
|
||||
from ...bs4 import (
|
||||
BeautifulSoup,
|
||||
BeautifulStoneSoup,
|
||||
GuessedAtParserWarning,
|
||||
MarkupResemblesLocatorWarning,
|
||||
)
|
||||
from bs4.builder import (
|
||||
from ...bs4.builder import (
|
||||
TreeBuilder,
|
||||
ParserRejectedMarkup,
|
||||
)
|
||||
from bs4.element import (
|
||||
from ...bs4.element import (
|
||||
CharsetMetaAttributeValue,
|
||||
Comment,
|
||||
ContentMetaAttributeValue,
|
||||
@ -27,13 +27,13 @@ from bs4.element import (
|
||||
NavigableString,
|
||||
)
|
||||
|
||||
import bs4.dammit
|
||||
from bs4.dammit import (
|
||||
from ...bs4.dammit import *
|
||||
from ...bs4.dammit import (
|
||||
EntitySubstitution,
|
||||
UnicodeDammit,
|
||||
EncodingDetector,
|
||||
)
|
||||
from bs4.testing import (
|
||||
from ...bs4.testing import (
|
||||
default_builder,
|
||||
SoupTest,
|
||||
skipIf,
|
||||
@ -41,7 +41,7 @@ from bs4.testing import (
|
||||
import warnings
|
||||
|
||||
try:
|
||||
from bs4.builder import LXMLTreeBuilder, LXMLTreeBuilderForXML
|
||||
from ...bs4.builder import LXMLTreeBuilder, LXMLTreeBuilderForXML
|
||||
LXML_PRESENT = True
|
||||
except ImportError as e:
|
||||
LXML_PRESENT = False
|
||||
@ -418,13 +418,13 @@ class TestEncodingConversion(SoupTest):
|
||||
def test_ascii_in_unicode_out(self):
|
||||
# ASCII input is converted to Unicode. The original_encoding
|
||||
# attribute is set to 'utf-8', a superset of ASCII.
|
||||
chardet = bs4.dammit.chardet_dammit
|
||||
chardet = chardet_dammit
|
||||
logging.disable(logging.WARNING)
|
||||
try:
|
||||
def noop(str):
|
||||
return None
|
||||
# Disable chardet, which will realize that the ASCII is ASCII.
|
||||
bs4.dammit.chardet_dammit = noop
|
||||
chardet_dammit = noop
|
||||
ascii = b"<foo>a</foo>"
|
||||
soup_from_ascii = self.soup(ascii)
|
||||
unicode_output = soup_from_ascii.decode()
|
||||
@ -433,7 +433,7 @@ class TestEncodingConversion(SoupTest):
|
||||
self.assertEqual(soup_from_ascii.original_encoding.lower(), "utf-8")
|
||||
finally:
|
||||
logging.disable(logging.NOTSET)
|
||||
bs4.dammit.chardet_dammit = chardet
|
||||
chardet_dammit = chardet
|
||||
|
||||
def test_unicode_in_unicode_out(self):
|
||||
# Unicode input is left alone. The original_encoding attribute
|
||||
@ -574,12 +574,12 @@ class TestUnicodeDammit(unittest.TestCase):
|
||||
doc = b"""\357\273\277<?xml version="1.0" encoding="UTF-8"?>
|
||||
<html><b>\330\250\330\252\330\261</b>
|
||||
<i>\310\322\321\220\312\321\355\344</i></html>"""
|
||||
chardet = bs4.dammit.chardet_dammit
|
||||
chardet = chardet_dammit
|
||||
logging.disable(logging.WARNING)
|
||||
try:
|
||||
def noop(str):
|
||||
return None
|
||||
bs4.dammit.chardet_dammit = noop
|
||||
chardet_dammit = noop
|
||||
dammit = UnicodeDammit(doc)
|
||||
self.assertEqual(True, dammit.contains_replacement_characters)
|
||||
self.assertTrue("\ufffd" in dammit.unicode_markup)
|
||||
@ -588,7 +588,7 @@ class TestUnicodeDammit(unittest.TestCase):
|
||||
self.assertTrue(soup.contains_replacement_characters)
|
||||
finally:
|
||||
logging.disable(logging.NOTSET)
|
||||
bs4.dammit.chardet_dammit = chardet
|
||||
chardet_dammit = chardet
|
||||
|
||||
def test_byte_order_mark_removed(self):
|
||||
# A document written in UTF-16LE will have its byte order marker stripped.
|
||||
|
||||
@ -14,12 +14,12 @@ import copy
|
||||
import pickle
|
||||
import re
|
||||
import warnings
|
||||
from bs4 import BeautifulSoup
|
||||
from bs4.builder import (
|
||||
from ...bs4 import BeautifulSoup
|
||||
from ...bs4.builder import (
|
||||
builder_registry,
|
||||
HTMLParserTreeBuilder,
|
||||
)
|
||||
from bs4.element import (
|
||||
from ...bs4.element import (
|
||||
PY3K,
|
||||
CData,
|
||||
Comment,
|
||||
@ -33,11 +33,11 @@ from bs4.element import (
|
||||
Tag,
|
||||
TemplateString,
|
||||
)
|
||||
from bs4.testing import (
|
||||
from ...bs4.testing import (
|
||||
SoupTest,
|
||||
skipIf,
|
||||
)
|
||||
from soupsieve import SelectorSyntaxError
|
||||
from ...soupsieve import SelectorSyntaxError
|
||||
|
||||
XML_BUILDER_PRESENT = (builder_registry.lookup("xml") is not None)
|
||||
LXML_PRESENT = (builder_registry.lookup("lxml") is not None)
|
||||
|
||||
Reference in New Issue
Block a user