made bs4 and soupsieve standalone in this project

This commit is contained in:
2025-04-24 19:26:20 +02:00
parent aefb27614f
commit 3dd7b5a18d
17 changed files with 68 additions and 62 deletions

View File

@ -149,7 +149,6 @@ class MarkdownLivePreviewListener(sublime_plugin.EventListener):
if self.file_name is None:
total_region = sublime.Region(0, markdown_view.size())
self.content = markdown_view.substr(total_region)
markdown_view.erase(edit, total_region)
else:
self.content = None

View File

@ -4,7 +4,7 @@ __license__ = "MIT"
from collections import defaultdict
import itertools
import sys
from ...bs4.element import (
from ..element import (
CharsetMetaAttributeValue,
ContentMetaAttributeValue,
Stylesheet,

View File

@ -7,13 +7,13 @@ __all__ = [
import warnings
import re
from ...bs4.builder import (
from . import (
PERMISSIVE,
HTML,
HTML_5,
HTMLTreeBuilder,
)
from ...bs4.element import (
from ..element import (
NamespacedAttribute,
nonwhitespace_re,
)
@ -22,7 +22,7 @@ from html5lib.constants import (
namespaces,
prefixes,
)
from ...bs4.element import (
from ..element import (
Comment,
Doctype,
NavigableString,
@ -120,7 +120,7 @@ class TreeBuilderForHtml5lib(treebuilder_base.TreeBuilder):
if soup:
self.soup = soup
else:
from ...bs4 import BeautifulSoup
from .. import BeautifulSoup
# TODO: Why is the parser 'html.parser' here? To avoid an
# infinite loop?
self.soup = BeautifulSoup(
@ -166,7 +166,7 @@ class TreeBuilderForHtml5lib(treebuilder_base.TreeBuilder):
return TextNode(Comment(data), self.soup)
def fragmentClass(self):
from ...bs4 import BeautifulSoup
from .. import BeautifulSoup
# TODO: Why is the parser 'html.parser' here? To avoid an
# infinite loop?
self.soup = BeautifulSoup("", "html.parser")
@ -184,7 +184,7 @@ class TreeBuilderForHtml5lib(treebuilder_base.TreeBuilder):
return treebuilder_base.TreeBuilder.getFragment(self).element
def testSerializer(self, element):
from ...bs4 import BeautifulSoup
from .. import BeautifulSoup
rv = []
doctype_re = re.compile(r'^(.*?)(?: PUBLIC "(.*?)"(?: "(.*?)")?| SYSTEM "(.*?)")?$')

View File

@ -34,16 +34,16 @@ CONSTRUCTOR_STRICT_IS_DEPRECATED = major == 3 and minor == 3
CONSTRUCTOR_TAKES_CONVERT_CHARREFS = major == 3 and minor >= 4
from ...bs4.element import (
from ..element import (
CData,
Comment,
Declaration,
Doctype,
ProcessingInstruction,
)
from ...bs4.dammit import EntitySubstitution, UnicodeDammit
from ..dammit import EntitySubstitution, UnicodeDammit
from ...bs4.builder import (
from . import (
HTML,
HTMLTreeBuilder,
STRICT,

View File

@ -14,14 +14,14 @@ except ImportError as e:
from io import BytesIO
from io import StringIO
from lxml import etree
from ...bs4.element import (
from ..element import (
Comment,
Doctype,
NamespacedAttribute,
ProcessingInstruction,
XMLProcessingInstruction,
)
from ...bs4.builder import (
from . import (
FAST,
HTML,
HTMLTreeBuilder,
@ -29,7 +29,7 @@ from ...bs4.builder import (
ParserRejectedMarkup,
TreeBuilder,
XML)
from ...bs4.dammit import EncodingDetector
from ..dammit import EncodingDetector
LXML = 'lxml'

View File

@ -6,9 +6,9 @@ __license__ = "MIT"
import cProfile
from io import StringIO
from html.parser import HTMLParser
from ..bs4 import BeautifulSoup as bs4
from ..bs4 import BeautifulSoup, __version__
from ..bs4.builder import builder_registry
from . import BeautifulSoup as bs4
from . import BeautifulSoup, __version__
from .builder import builder_registry
import os
import pstats

View File

@ -16,7 +16,7 @@ except ImportError as e:
'The soupsieve package is not installed. CSS selectors cannot be used.'
)
from ..bs4.formatter import (
from .formatter import (
Formatter,
HTMLFormatter,
XMLFormatter,
@ -380,7 +380,7 @@ class PageElement(object):
and not isinstance(new_child, NavigableString)):
new_child = NavigableString(new_child)
from ..bs4 import BeautifulSoup
from . import BeautifulSoup
if isinstance(new_child, BeautifulSoup):
# We don't want to end up with a situation where one BeautifulSoup
# object contains another. Insert the children one at a time.

View File

@ -1,4 +1,4 @@
from ..bs4.dammit import EntitySubstitution
from .dammit import EntitySubstitution
class Formatter(EntitySubstitution):
"""Describes a strategy to use when outputting a parse tree to a string.

View File

@ -9,8 +9,8 @@ import copy
import functools
import unittest
from unittest import TestCase
from ..bs4 import BeautifulSoup
from ..bs4.element import (
from . import BeautifulSoup
from .element import (
CharsetMetaAttributeValue,
Comment,
ContentMetaAttributeValue,
@ -22,7 +22,7 @@ from ..bs4.element import (
Tag
)
from ..bs4.builder import HTMLParserTreeBuilder
from .builder import HTMLParserTreeBuilder
default_builder = HTMLParserTreeBuilder
BAD_DOCUMENT = """A bare string

View File

@ -3,21 +3,21 @@
import unittest
import warnings
from ...bs4 import BeautifulSoup
from ...bs4.builder import (
from .. import BeautifulSoup
from ..builder import (
builder_registry as registry,
HTMLParserTreeBuilder,
TreeBuilderRegistry,
)
try:
from ...bs4.builder import HTML5TreeBuilder
from ..builder import HTML5TreeBuilder
HTML5LIB_PRESENT = True
except ImportError:
HTML5LIB_PRESENT = False
try:
from ...bs4.builder import (
from ..builder import (
LXMLTreeBuilderForXML,
LXMLTreeBuilder,
)

View File

@ -3,12 +3,12 @@
import warnings
try:
from ...bs4.builder import HTML5TreeBuilder
from ..builder import HTML5TreeBuilder
HTML5LIB_PRESENT = True
except ImportError as e:
HTML5LIB_PRESENT = False
from ...bs4.element import SoupStrainer
from ...bs4.testing import (
from ..element import SoupStrainer
from ..testing import (
HTML5TreeBuilderSmokeTest,
SoupTest,
skipIf,

View File

@ -3,9 +3,9 @@ trees."""
from pdb import set_trace
import pickle
from ...bs4.testing import SoupTest, HTMLTreeBuilderSmokeTest
from ...bs4.builder import HTMLParserTreeBuilder
from ...bs4.builder._htmlparser import BeautifulSoupHTMLParser
from ..testing import SoupTest, HTMLTreeBuilderSmokeTest
from ..builder import HTMLParserTreeBuilder
from ..builder._htmlparser import BeautifulSoupHTMLParser
class HTMLParserTreeBuilderSmokeTest(SoupTest, HTMLTreeBuilderSmokeTest):

View File

@ -12,16 +12,16 @@ except ImportError as e:
LXML_VERSION = (0,)
if LXML_PRESENT:
from ...bs4.builder import LXMLTreeBuilder, LXMLTreeBuilderForXML
from ..builder import LXMLTreeBuilder, LXMLTreeBuilderForXML
from ...bs4 import (
from .. import (
BeautifulSoup,
BeautifulStoneSoup,
)
from ...bs4.element import Comment, Doctype, SoupStrainer
from ...bs4.testing import skipIf
from ...bs4.tests import test_htmlparser
from ...bs4.testing import (
from ..element import Comment, Doctype, SoupStrainer
from ..testing import skipIf
from . import test_htmlparser
from ..testing import (
HTMLTreeBuilderSmokeTest,
XMLTreeBuilderSmokeTest,
SoupTest,

View File

@ -7,17 +7,17 @@ import unittest
import sys
import tempfile
from ...bs4 import (
from .. import (
BeautifulSoup,
BeautifulStoneSoup,
GuessedAtParserWarning,
MarkupResemblesLocatorWarning,
)
from ...bs4.builder import (
from ..builder import (
TreeBuilder,
ParserRejectedMarkup,
)
from ...bs4.element import (
from ..element import (
CharsetMetaAttributeValue,
Comment,
ContentMetaAttributeValue,
@ -27,13 +27,13 @@ from ...bs4.element import (
NavigableString,
)
from ...bs4.dammit import *
from ...bs4.dammit import (
from ..dammit import *
from ..dammit import (
EntitySubstitution,
UnicodeDammit,
EncodingDetector,
)
from ...bs4.testing import (
from ..testing import (
default_builder,
SoupTest,
skipIf,
@ -41,7 +41,7 @@ from ...bs4.testing import (
import warnings
try:
from ...bs4.builder import LXMLTreeBuilder, LXMLTreeBuilderForXML
from ..builder import LXMLTreeBuilder, LXMLTreeBuilderForXML
LXML_PRESENT = True
except ImportError as e:
LXML_PRESENT = False
@ -120,11 +120,10 @@ class TestConstructor(SoupTest):
def feed(self, *args, **kwargs):
raise ParserRejectedMarkup("Nope.")
def prepare_markup(self, *args, **kwargs):
def prepare_markup(self, markup, *args, **kwargs):
# We're going to try two different ways of preparing this markup,
# but feed() will reject both of them.
yield markup, None, None, False
yield markup, None, None, False
import re
self.assertRaisesRegex(
@ -613,7 +612,7 @@ class TestUnicodeDammit(unittest.TestCase):
self.assertRaises(UnicodeDecodeError, doc.decode, "utf8")
# Unicode, Dammit thinks the whole document is Windows-1252,
# and decodes it into "☃☃☃Hi, I like Windows!☃☃☃"
# and decodes it into "☃☃☃"Hi, I like Windows!"☃☃☃"
# But if we run it through fix_embedded_windows_1252, it's fixed:

View File

@ -14,12 +14,12 @@ import copy
import pickle
import re
import warnings
from ...bs4 import BeautifulSoup
from ...bs4.builder import (
from .. import BeautifulSoup
from ..builder import (
builder_registry,
HTMLParserTreeBuilder,
)
from ...bs4.element import (
from ..element import (
PY3K,
CData,
Comment,
@ -33,7 +33,7 @@ from ...bs4.element import (
Tag,
TemplateString,
)
from ...bs4.testing import (
from ..testing import (
SoupTest,
skipIf,
)

View File

@ -6,11 +6,13 @@ from .import css_types as ct
import unicodedata
from collections.abc import Sequence
from ..bs4 import *
from ..bs4.element import *
from ..bs4 import BeautifulSoup
from ..bs4.element import (
Tag, NavigableString, Comment, Declaration, CData, ProcessingInstruction, Doctype
)
# Empty tag pattern (whitespace okay)
RE_NOT_EMPTY = re.compile('[^ \t\r\n\f]')
RE_NOT_EMPTY = re.compile(r'[^ \t\r\n\f]')
RE_NOT_WS = re.compile('[^ \t\r\n\f]+')

View File

@ -194,13 +194,19 @@ def get_image_size(fhandle, pathlike):
fhandle.seek(size, 1)
byte = fhandle.read(1)
if byte == b"":
fhandle = end
byte = fhandle.read(1)
# Reached end of file unexpectedly, break the loop
break
while ord(byte) == 0xFF:
byte = fhandle.read(1)
if byte == b"": # Check EOF in inner loop too
break
if byte == b"": # Break outer loop if inner loop hit EOF
break
ftype = ord(byte)
size = struct.unpack(">H", fhandle.read(2))[0] - 2
# Check if the loop exited because of a break (EOF) before finding the marker
if not (0xC0 <= ftype <= 0xCF):
return "unknown format {!r}".format(format_)
# We are at a SOFn block
fhandle.seek(1, 1) # Skip `precision' byte.
height, width = struct.unpack(">HH", fhandle.read(4))