made bs4 and soupsieve standalone in this project

This commit is contained in:
2025-04-24 19:26:20 +02:00
parent aefb27614f
commit 3dd7b5a18d
17 changed files with 68 additions and 62 deletions

View File

@ -149,7 +149,6 @@ class MarkdownLivePreviewListener(sublime_plugin.EventListener):
if self.file_name is None: if self.file_name is None:
total_region = sublime.Region(0, markdown_view.size()) total_region = sublime.Region(0, markdown_view.size())
self.content = markdown_view.substr(total_region) self.content = markdown_view.substr(total_region)
markdown_view.erase(edit, total_region)
else: else:
self.content = None self.content = None

View File

@ -4,7 +4,7 @@ __license__ = "MIT"
from collections import defaultdict from collections import defaultdict
import itertools import itertools
import sys import sys
from ...bs4.element import ( from ..element import (
CharsetMetaAttributeValue, CharsetMetaAttributeValue,
ContentMetaAttributeValue, ContentMetaAttributeValue,
Stylesheet, Stylesheet,

View File

@ -7,13 +7,13 @@ __all__ = [
import warnings import warnings
import re import re
from ...bs4.builder import ( from . import (
PERMISSIVE, PERMISSIVE,
HTML, HTML,
HTML_5, HTML_5,
HTMLTreeBuilder, HTMLTreeBuilder,
) )
from ...bs4.element import ( from ..element import (
NamespacedAttribute, NamespacedAttribute,
nonwhitespace_re, nonwhitespace_re,
) )
@ -22,7 +22,7 @@ from html5lib.constants import (
namespaces, namespaces,
prefixes, prefixes,
) )
from ...bs4.element import ( from ..element import (
Comment, Comment,
Doctype, Doctype,
NavigableString, NavigableString,
@ -120,7 +120,7 @@ class TreeBuilderForHtml5lib(treebuilder_base.TreeBuilder):
if soup: if soup:
self.soup = soup self.soup = soup
else: else:
from ...bs4 import BeautifulSoup from .. import BeautifulSoup
# TODO: Why is the parser 'html.parser' here? To avoid an # TODO: Why is the parser 'html.parser' here? To avoid an
# infinite loop? # infinite loop?
self.soup = BeautifulSoup( self.soup = BeautifulSoup(
@ -166,7 +166,7 @@ class TreeBuilderForHtml5lib(treebuilder_base.TreeBuilder):
return TextNode(Comment(data), self.soup) return TextNode(Comment(data), self.soup)
def fragmentClass(self): def fragmentClass(self):
from ...bs4 import BeautifulSoup from .. import BeautifulSoup
# TODO: Why is the parser 'html.parser' here? To avoid an # TODO: Why is the parser 'html.parser' here? To avoid an
# infinite loop? # infinite loop?
self.soup = BeautifulSoup("", "html.parser") self.soup = BeautifulSoup("", "html.parser")
@ -184,7 +184,7 @@ class TreeBuilderForHtml5lib(treebuilder_base.TreeBuilder):
return treebuilder_base.TreeBuilder.getFragment(self).element return treebuilder_base.TreeBuilder.getFragment(self).element
def testSerializer(self, element): def testSerializer(self, element):
from ...bs4 import BeautifulSoup from .. import BeautifulSoup
rv = [] rv = []
doctype_re = re.compile(r'^(.*?)(?: PUBLIC "(.*?)"(?: "(.*?)")?| SYSTEM "(.*?)")?$') doctype_re = re.compile(r'^(.*?)(?: PUBLIC "(.*?)"(?: "(.*?)")?| SYSTEM "(.*?)")?$')

View File

@ -34,16 +34,16 @@ CONSTRUCTOR_STRICT_IS_DEPRECATED = major == 3 and minor == 3
CONSTRUCTOR_TAKES_CONVERT_CHARREFS = major == 3 and minor >= 4 CONSTRUCTOR_TAKES_CONVERT_CHARREFS = major == 3 and minor >= 4
from ...bs4.element import ( from ..element import (
CData, CData,
Comment, Comment,
Declaration, Declaration,
Doctype, Doctype,
ProcessingInstruction, ProcessingInstruction,
) )
from ...bs4.dammit import EntitySubstitution, UnicodeDammit from ..dammit import EntitySubstitution, UnicodeDammit
from ...bs4.builder import ( from . import (
HTML, HTML,
HTMLTreeBuilder, HTMLTreeBuilder,
STRICT, STRICT,

View File

@ -14,14 +14,14 @@ except ImportError as e:
from io import BytesIO from io import BytesIO
from io import StringIO from io import StringIO
from lxml import etree from lxml import etree
from ...bs4.element import ( from ..element import (
Comment, Comment,
Doctype, Doctype,
NamespacedAttribute, NamespacedAttribute,
ProcessingInstruction, ProcessingInstruction,
XMLProcessingInstruction, XMLProcessingInstruction,
) )
from ...bs4.builder import ( from . import (
FAST, FAST,
HTML, HTML,
HTMLTreeBuilder, HTMLTreeBuilder,
@ -29,7 +29,7 @@ from ...bs4.builder import (
ParserRejectedMarkup, ParserRejectedMarkup,
TreeBuilder, TreeBuilder,
XML) XML)
from ...bs4.dammit import EncodingDetector from ..dammit import EncodingDetector
LXML = 'lxml' LXML = 'lxml'

View File

@ -6,9 +6,9 @@ __license__ = "MIT"
import cProfile import cProfile
from io import StringIO from io import StringIO
from html.parser import HTMLParser from html.parser import HTMLParser
from ..bs4 import BeautifulSoup as bs4 from . import BeautifulSoup as bs4
from ..bs4 import BeautifulSoup, __version__ from . import BeautifulSoup, __version__
from ..bs4.builder import builder_registry from .builder import builder_registry
import os import os
import pstats import pstats

View File

@ -16,7 +16,7 @@ except ImportError as e:
'The soupsieve package is not installed. CSS selectors cannot be used.' 'The soupsieve package is not installed. CSS selectors cannot be used.'
) )
from ..bs4.formatter import ( from .formatter import (
Formatter, Formatter,
HTMLFormatter, HTMLFormatter,
XMLFormatter, XMLFormatter,
@ -380,7 +380,7 @@ class PageElement(object):
and not isinstance(new_child, NavigableString)): and not isinstance(new_child, NavigableString)):
new_child = NavigableString(new_child) new_child = NavigableString(new_child)
from ..bs4 import BeautifulSoup from . import BeautifulSoup
if isinstance(new_child, BeautifulSoup): if isinstance(new_child, BeautifulSoup):
# We don't want to end up with a situation where one BeautifulSoup # We don't want to end up with a situation where one BeautifulSoup
# object contains another. Insert the children one at a time. # object contains another. Insert the children one at a time.

View File

@ -1,4 +1,4 @@
from ..bs4.dammit import EntitySubstitution from .dammit import EntitySubstitution
class Formatter(EntitySubstitution): class Formatter(EntitySubstitution):
"""Describes a strategy to use when outputting a parse tree to a string. """Describes a strategy to use when outputting a parse tree to a string.

View File

@ -9,8 +9,8 @@ import copy
import functools import functools
import unittest import unittest
from unittest import TestCase from unittest import TestCase
from ..bs4 import BeautifulSoup from . import BeautifulSoup
from ..bs4.element import ( from .element import (
CharsetMetaAttributeValue, CharsetMetaAttributeValue,
Comment, Comment,
ContentMetaAttributeValue, ContentMetaAttributeValue,
@ -22,7 +22,7 @@ from ..bs4.element import (
Tag Tag
) )
from ..bs4.builder import HTMLParserTreeBuilder from .builder import HTMLParserTreeBuilder
default_builder = HTMLParserTreeBuilder default_builder = HTMLParserTreeBuilder
BAD_DOCUMENT = """A bare string BAD_DOCUMENT = """A bare string

View File

@ -3,21 +3,21 @@
import unittest import unittest
import warnings import warnings
from ...bs4 import BeautifulSoup from .. import BeautifulSoup
from ...bs4.builder import ( from ..builder import (
builder_registry as registry, builder_registry as registry,
HTMLParserTreeBuilder, HTMLParserTreeBuilder,
TreeBuilderRegistry, TreeBuilderRegistry,
) )
try: try:
from ...bs4.builder import HTML5TreeBuilder from ..builder import HTML5TreeBuilder
HTML5LIB_PRESENT = True HTML5LIB_PRESENT = True
except ImportError: except ImportError:
HTML5LIB_PRESENT = False HTML5LIB_PRESENT = False
try: try:
from ...bs4.builder import ( from ..builder import (
LXMLTreeBuilderForXML, LXMLTreeBuilderForXML,
LXMLTreeBuilder, LXMLTreeBuilder,
) )

View File

@ -3,12 +3,12 @@
import warnings import warnings
try: try:
from ...bs4.builder import HTML5TreeBuilder from ..builder import HTML5TreeBuilder
HTML5LIB_PRESENT = True HTML5LIB_PRESENT = True
except ImportError as e: except ImportError as e:
HTML5LIB_PRESENT = False HTML5LIB_PRESENT = False
from ...bs4.element import SoupStrainer from ..element import SoupStrainer
from ...bs4.testing import ( from ..testing import (
HTML5TreeBuilderSmokeTest, HTML5TreeBuilderSmokeTest,
SoupTest, SoupTest,
skipIf, skipIf,

View File

@ -3,9 +3,9 @@ trees."""
from pdb import set_trace from pdb import set_trace
import pickle import pickle
from ...bs4.testing import SoupTest, HTMLTreeBuilderSmokeTest from ..testing import SoupTest, HTMLTreeBuilderSmokeTest
from ...bs4.builder import HTMLParserTreeBuilder from ..builder import HTMLParserTreeBuilder
from ...bs4.builder._htmlparser import BeautifulSoupHTMLParser from ..builder._htmlparser import BeautifulSoupHTMLParser
class HTMLParserTreeBuilderSmokeTest(SoupTest, HTMLTreeBuilderSmokeTest): class HTMLParserTreeBuilderSmokeTest(SoupTest, HTMLTreeBuilderSmokeTest):

View File

@ -12,16 +12,16 @@ except ImportError as e:
LXML_VERSION = (0,) LXML_VERSION = (0,)
if LXML_PRESENT: if LXML_PRESENT:
from ...bs4.builder import LXMLTreeBuilder, LXMLTreeBuilderForXML from ..builder import LXMLTreeBuilder, LXMLTreeBuilderForXML
from ...bs4 import ( from .. import (
BeautifulSoup, BeautifulSoup,
BeautifulStoneSoup, BeautifulStoneSoup,
) )
from ...bs4.element import Comment, Doctype, SoupStrainer from ..element import Comment, Doctype, SoupStrainer
from ...bs4.testing import skipIf from ..testing import skipIf
from ...bs4.tests import test_htmlparser from . import test_htmlparser
from ...bs4.testing import ( from ..testing import (
HTMLTreeBuilderSmokeTest, HTMLTreeBuilderSmokeTest,
XMLTreeBuilderSmokeTest, XMLTreeBuilderSmokeTest,
SoupTest, SoupTest,

View File

@ -7,17 +7,17 @@ import unittest
import sys import sys
import tempfile import tempfile
from ...bs4 import ( from .. import (
BeautifulSoup, BeautifulSoup,
BeautifulStoneSoup, BeautifulStoneSoup,
GuessedAtParserWarning, GuessedAtParserWarning,
MarkupResemblesLocatorWarning, MarkupResemblesLocatorWarning,
) )
from ...bs4.builder import ( from ..builder import (
TreeBuilder, TreeBuilder,
ParserRejectedMarkup, ParserRejectedMarkup,
) )
from ...bs4.element import ( from ..element import (
CharsetMetaAttributeValue, CharsetMetaAttributeValue,
Comment, Comment,
ContentMetaAttributeValue, ContentMetaAttributeValue,
@ -27,13 +27,13 @@ from ...bs4.element import (
NavigableString, NavigableString,
) )
from ...bs4.dammit import * from ..dammit import *
from ...bs4.dammit import ( from ..dammit import (
EntitySubstitution, EntitySubstitution,
UnicodeDammit, UnicodeDammit,
EncodingDetector, EncodingDetector,
) )
from ...bs4.testing import ( from ..testing import (
default_builder, default_builder,
SoupTest, SoupTest,
skipIf, skipIf,
@ -41,7 +41,7 @@ from ...bs4.testing import (
import warnings import warnings
try: try:
from ...bs4.builder import LXMLTreeBuilder, LXMLTreeBuilderForXML from ..builder import LXMLTreeBuilder, LXMLTreeBuilderForXML
LXML_PRESENT = True LXML_PRESENT = True
except ImportError as e: except ImportError as e:
LXML_PRESENT = False LXML_PRESENT = False
@ -120,11 +120,10 @@ class TestConstructor(SoupTest):
def feed(self, *args, **kwargs): def feed(self, *args, **kwargs):
raise ParserRejectedMarkup("Nope.") raise ParserRejectedMarkup("Nope.")
def prepare_markup(self, *args, **kwargs): def prepare_markup(self, markup, *args, **kwargs):
# We're going to try two different ways of preparing this markup, # We're going to try two different ways of preparing this markup,
# but feed() will reject both of them. # but feed() will reject both of them.
yield markup, None, None, False yield markup, None, None, False
yield markup, None, None, False
import re import re
self.assertRaisesRegex( self.assertRaisesRegex(
@ -613,7 +612,7 @@ class TestUnicodeDammit(unittest.TestCase):
self.assertRaises(UnicodeDecodeError, doc.decode, "utf8") self.assertRaises(UnicodeDecodeError, doc.decode, "utf8")
# Unicode, Dammit thinks the whole document is Windows-1252, # Unicode, Dammit thinks the whole document is Windows-1252,
# and decodes it into "☃☃☃Hi, I like Windows!☃☃☃" # and decodes it into "☃☃☃"Hi, I like Windows!"☃☃☃"
# But if we run it through fix_embedded_windows_1252, it's fixed: # But if we run it through fix_embedded_windows_1252, it's fixed:

View File

@ -14,12 +14,12 @@ import copy
import pickle import pickle
import re import re
import warnings import warnings
from ...bs4 import BeautifulSoup from .. import BeautifulSoup
from ...bs4.builder import ( from ..builder import (
builder_registry, builder_registry,
HTMLParserTreeBuilder, HTMLParserTreeBuilder,
) )
from ...bs4.element import ( from ..element import (
PY3K, PY3K,
CData, CData,
Comment, Comment,
@ -33,7 +33,7 @@ from ...bs4.element import (
Tag, Tag,
TemplateString, TemplateString,
) )
from ...bs4.testing import ( from ..testing import (
SoupTest, SoupTest,
skipIf, skipIf,
) )

View File

@ -6,11 +6,13 @@ from .import css_types as ct
import unicodedata import unicodedata
from collections.abc import Sequence from collections.abc import Sequence
from ..bs4 import * from ..bs4 import BeautifulSoup
from ..bs4.element import * from ..bs4.element import (
Tag, NavigableString, Comment, Declaration, CData, ProcessingInstruction, Doctype
)
# Empty tag pattern (whitespace okay) # Empty tag pattern (whitespace okay)
RE_NOT_EMPTY = re.compile('[^ \t\r\n\f]') RE_NOT_EMPTY = re.compile(r'[^ \t\r\n\f]')
RE_NOT_WS = re.compile('[^ \t\r\n\f]+') RE_NOT_WS = re.compile('[^ \t\r\n\f]+')

View File

@ -194,13 +194,19 @@ def get_image_size(fhandle, pathlike):
fhandle.seek(size, 1) fhandle.seek(size, 1)
byte = fhandle.read(1) byte = fhandle.read(1)
if byte == b"": if byte == b"":
fhandle = end # Reached end of file unexpectedly, break the loop
byte = fhandle.read(1) break
while ord(byte) == 0xFF: while ord(byte) == 0xFF:
byte = fhandle.read(1) byte = fhandle.read(1)
if byte == b"": # Check EOF in inner loop too
break
if byte == b"": # Break outer loop if inner loop hit EOF
break
ftype = ord(byte) ftype = ord(byte)
size = struct.unpack(">H", fhandle.read(2))[0] - 2 size = struct.unpack(">H", fhandle.read(2))[0] - 2
# Check if the loop exited because of a break (EOF) before finding the marker
if not (0xC0 <= ftype <= 0xCF):
return "unknown format {!r}".format(format_)
# We are at a SOFn block # We are at a SOFn block
fhandle.seek(1, 1) # Skip `precision' byte. fhandle.seek(1, 1) # Skip `precision' byte.
height, width = struct.unpack(">HH", fhandle.read(4)) height, width = struct.unpack(">HH", fhandle.read(4))