fully make bs4 and soupsieve standalone in the project
This commit is contained in:
@ -4,7 +4,7 @@ __license__ = "MIT"
|
|||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
import itertools
|
import itertools
|
||||||
import sys
|
import sys
|
||||||
from bs4.element import (
|
from ...bs4.element import (
|
||||||
CharsetMetaAttributeValue,
|
CharsetMetaAttributeValue,
|
||||||
ContentMetaAttributeValue,
|
ContentMetaAttributeValue,
|
||||||
Stylesheet,
|
Stylesheet,
|
||||||
|
|||||||
@ -7,13 +7,13 @@ __all__ = [
|
|||||||
|
|
||||||
import warnings
|
import warnings
|
||||||
import re
|
import re
|
||||||
from bs4.builder import (
|
from ...bs4.builder import (
|
||||||
PERMISSIVE,
|
PERMISSIVE,
|
||||||
HTML,
|
HTML,
|
||||||
HTML_5,
|
HTML_5,
|
||||||
HTMLTreeBuilder,
|
HTMLTreeBuilder,
|
||||||
)
|
)
|
||||||
from bs4.element import (
|
from ...bs4.element import (
|
||||||
NamespacedAttribute,
|
NamespacedAttribute,
|
||||||
nonwhitespace_re,
|
nonwhitespace_re,
|
||||||
)
|
)
|
||||||
@ -22,7 +22,7 @@ from html5lib.constants import (
|
|||||||
namespaces,
|
namespaces,
|
||||||
prefixes,
|
prefixes,
|
||||||
)
|
)
|
||||||
from bs4.element import (
|
from ...bs4.element import (
|
||||||
Comment,
|
Comment,
|
||||||
Doctype,
|
Doctype,
|
||||||
NavigableString,
|
NavigableString,
|
||||||
@ -120,7 +120,7 @@ class TreeBuilderForHtml5lib(treebuilder_base.TreeBuilder):
|
|||||||
if soup:
|
if soup:
|
||||||
self.soup = soup
|
self.soup = soup
|
||||||
else:
|
else:
|
||||||
from bs4 import BeautifulSoup
|
from ...bs4 import BeautifulSoup
|
||||||
# TODO: Why is the parser 'html.parser' here? To avoid an
|
# TODO: Why is the parser 'html.parser' here? To avoid an
|
||||||
# infinite loop?
|
# infinite loop?
|
||||||
self.soup = BeautifulSoup(
|
self.soup = BeautifulSoup(
|
||||||
@ -166,7 +166,7 @@ class TreeBuilderForHtml5lib(treebuilder_base.TreeBuilder):
|
|||||||
return TextNode(Comment(data), self.soup)
|
return TextNode(Comment(data), self.soup)
|
||||||
|
|
||||||
def fragmentClass(self):
|
def fragmentClass(self):
|
||||||
from bs4 import BeautifulSoup
|
from ...bs4 import BeautifulSoup
|
||||||
# TODO: Why is the parser 'html.parser' here? To avoid an
|
# TODO: Why is the parser 'html.parser' here? To avoid an
|
||||||
# infinite loop?
|
# infinite loop?
|
||||||
self.soup = BeautifulSoup("", "html.parser")
|
self.soup = BeautifulSoup("", "html.parser")
|
||||||
@ -184,7 +184,7 @@ class TreeBuilderForHtml5lib(treebuilder_base.TreeBuilder):
|
|||||||
return treebuilder_base.TreeBuilder.getFragment(self).element
|
return treebuilder_base.TreeBuilder.getFragment(self).element
|
||||||
|
|
||||||
def testSerializer(self, element):
|
def testSerializer(self, element):
|
||||||
from bs4 import BeautifulSoup
|
from ...bs4 import BeautifulSoup
|
||||||
rv = []
|
rv = []
|
||||||
doctype_re = re.compile(r'^(.*?)(?: PUBLIC "(.*?)"(?: "(.*?)")?| SYSTEM "(.*?)")?$')
|
doctype_re = re.compile(r'^(.*?)(?: PUBLIC "(.*?)"(?: "(.*?)")?| SYSTEM "(.*?)")?$')
|
||||||
|
|
||||||
|
|||||||
@ -34,16 +34,16 @@ CONSTRUCTOR_STRICT_IS_DEPRECATED = major == 3 and minor == 3
|
|||||||
CONSTRUCTOR_TAKES_CONVERT_CHARREFS = major == 3 and minor >= 4
|
CONSTRUCTOR_TAKES_CONVERT_CHARREFS = major == 3 and minor >= 4
|
||||||
|
|
||||||
|
|
||||||
from bs4.element import (
|
from ...bs4.element import (
|
||||||
CData,
|
CData,
|
||||||
Comment,
|
Comment,
|
||||||
Declaration,
|
Declaration,
|
||||||
Doctype,
|
Doctype,
|
||||||
ProcessingInstruction,
|
ProcessingInstruction,
|
||||||
)
|
)
|
||||||
from bs4.dammit import EntitySubstitution, UnicodeDammit
|
from ...bs4.dammit import EntitySubstitution, UnicodeDammit
|
||||||
|
|
||||||
from bs4.builder import (
|
from ...bs4.builder import (
|
||||||
HTML,
|
HTML,
|
||||||
HTMLTreeBuilder,
|
HTMLTreeBuilder,
|
||||||
STRICT,
|
STRICT,
|
||||||
|
|||||||
@ -14,14 +14,14 @@ except ImportError as e:
|
|||||||
from io import BytesIO
|
from io import BytesIO
|
||||||
from io import StringIO
|
from io import StringIO
|
||||||
from lxml import etree
|
from lxml import etree
|
||||||
from bs4.element import (
|
from ...bs4.element import (
|
||||||
Comment,
|
Comment,
|
||||||
Doctype,
|
Doctype,
|
||||||
NamespacedAttribute,
|
NamespacedAttribute,
|
||||||
ProcessingInstruction,
|
ProcessingInstruction,
|
||||||
XMLProcessingInstruction,
|
XMLProcessingInstruction,
|
||||||
)
|
)
|
||||||
from bs4.builder import (
|
from ...bs4.builder import (
|
||||||
FAST,
|
FAST,
|
||||||
HTML,
|
HTML,
|
||||||
HTMLTreeBuilder,
|
HTMLTreeBuilder,
|
||||||
@ -29,7 +29,7 @@ from bs4.builder import (
|
|||||||
ParserRejectedMarkup,
|
ParserRejectedMarkup,
|
||||||
TreeBuilder,
|
TreeBuilder,
|
||||||
XML)
|
XML)
|
||||||
from bs4.dammit import EncodingDetector
|
from ...bs4.dammit import EncodingDetector
|
||||||
|
|
||||||
LXML = 'lxml'
|
LXML = 'lxml'
|
||||||
|
|
||||||
|
|||||||
@ -6,9 +6,9 @@ __license__ = "MIT"
|
|||||||
import cProfile
|
import cProfile
|
||||||
from io import StringIO
|
from io import StringIO
|
||||||
from html.parser import HTMLParser
|
from html.parser import HTMLParser
|
||||||
import bs4
|
from ..bs4 import BeautifulSoup as bs4
|
||||||
from bs4 import BeautifulSoup, __version__
|
from ..bs4 import BeautifulSoup, __version__
|
||||||
from bs4.builder import builder_registry
|
from ..bs4.builder import builder_registry
|
||||||
|
|
||||||
import os
|
import os
|
||||||
import pstats
|
import pstats
|
||||||
|
|||||||
@ -9,14 +9,14 @@ import re
|
|||||||
import sys
|
import sys
|
||||||
import warnings
|
import warnings
|
||||||
try:
|
try:
|
||||||
import soupsieve
|
from ..soupsieve import *
|
||||||
except ImportError as e:
|
except ImportError as e:
|
||||||
soupsieve = None
|
soupsieve = None
|
||||||
warnings.warn(
|
warnings.warn(
|
||||||
'The soupsieve package is not installed. CSS selectors cannot be used.'
|
'The soupsieve package is not installed. CSS selectors cannot be used.'
|
||||||
)
|
)
|
||||||
|
|
||||||
from bs4.formatter import (
|
from ..bs4.formatter import (
|
||||||
Formatter,
|
Formatter,
|
||||||
HTMLFormatter,
|
HTMLFormatter,
|
||||||
XMLFormatter,
|
XMLFormatter,
|
||||||
@ -380,7 +380,7 @@ class PageElement(object):
|
|||||||
and not isinstance(new_child, NavigableString)):
|
and not isinstance(new_child, NavigableString)):
|
||||||
new_child = NavigableString(new_child)
|
new_child = NavigableString(new_child)
|
||||||
|
|
||||||
from bs4 import BeautifulSoup
|
from ..bs4 import BeautifulSoup
|
||||||
if isinstance(new_child, BeautifulSoup):
|
if isinstance(new_child, BeautifulSoup):
|
||||||
# We don't want to end up with a situation where one BeautifulSoup
|
# We don't want to end up with a situation where one BeautifulSoup
|
||||||
# object contains another. Insert the children one at a time.
|
# object contains another. Insert the children one at a time.
|
||||||
|
|||||||
@ -1,4 +1,4 @@
|
|||||||
from bs4.dammit import EntitySubstitution
|
from ..bs4.dammit import EntitySubstitution
|
||||||
|
|
||||||
class Formatter(EntitySubstitution):
|
class Formatter(EntitySubstitution):
|
||||||
"""Describes a strategy to use when outputting a parse tree to a string.
|
"""Describes a strategy to use when outputting a parse tree to a string.
|
||||||
|
|||||||
@ -9,8 +9,8 @@ import copy
|
|||||||
import functools
|
import functools
|
||||||
import unittest
|
import unittest
|
||||||
from unittest import TestCase
|
from unittest import TestCase
|
||||||
from bs4 import BeautifulSoup
|
from ..bs4 import BeautifulSoup
|
||||||
from bs4.element import (
|
from ..bs4.element import (
|
||||||
CharsetMetaAttributeValue,
|
CharsetMetaAttributeValue,
|
||||||
Comment,
|
Comment,
|
||||||
ContentMetaAttributeValue,
|
ContentMetaAttributeValue,
|
||||||
@ -22,7 +22,7 @@ from bs4.element import (
|
|||||||
Tag
|
Tag
|
||||||
)
|
)
|
||||||
|
|
||||||
from bs4.builder import HTMLParserTreeBuilder
|
from ..bs4.builder import HTMLParserTreeBuilder
|
||||||
default_builder = HTMLParserTreeBuilder
|
default_builder = HTMLParserTreeBuilder
|
||||||
|
|
||||||
BAD_DOCUMENT = """A bare string
|
BAD_DOCUMENT = """A bare string
|
||||||
|
|||||||
@ -3,21 +3,21 @@
|
|||||||
import unittest
|
import unittest
|
||||||
import warnings
|
import warnings
|
||||||
|
|
||||||
from bs4 import BeautifulSoup
|
from ...bs4 import BeautifulSoup
|
||||||
from bs4.builder import (
|
from ...bs4.builder import (
|
||||||
builder_registry as registry,
|
builder_registry as registry,
|
||||||
HTMLParserTreeBuilder,
|
HTMLParserTreeBuilder,
|
||||||
TreeBuilderRegistry,
|
TreeBuilderRegistry,
|
||||||
)
|
)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from bs4.builder import HTML5TreeBuilder
|
from ...bs4.builder import HTML5TreeBuilder
|
||||||
HTML5LIB_PRESENT = True
|
HTML5LIB_PRESENT = True
|
||||||
except ImportError:
|
except ImportError:
|
||||||
HTML5LIB_PRESENT = False
|
HTML5LIB_PRESENT = False
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from bs4.builder import (
|
from ...bs4.builder import (
|
||||||
LXMLTreeBuilderForXML,
|
LXMLTreeBuilderForXML,
|
||||||
LXMLTreeBuilder,
|
LXMLTreeBuilder,
|
||||||
)
|
)
|
||||||
|
|||||||
@ -3,12 +3,12 @@
|
|||||||
import warnings
|
import warnings
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from bs4.builder import HTML5TreeBuilder
|
from ...bs4.builder import HTML5TreeBuilder
|
||||||
HTML5LIB_PRESENT = True
|
HTML5LIB_PRESENT = True
|
||||||
except ImportError as e:
|
except ImportError as e:
|
||||||
HTML5LIB_PRESENT = False
|
HTML5LIB_PRESENT = False
|
||||||
from bs4.element import SoupStrainer
|
from ...bs4.element import SoupStrainer
|
||||||
from bs4.testing import (
|
from ...bs4.testing import (
|
||||||
HTML5TreeBuilderSmokeTest,
|
HTML5TreeBuilderSmokeTest,
|
||||||
SoupTest,
|
SoupTest,
|
||||||
skipIf,
|
skipIf,
|
||||||
|
|||||||
@ -3,9 +3,9 @@ trees."""
|
|||||||
|
|
||||||
from pdb import set_trace
|
from pdb import set_trace
|
||||||
import pickle
|
import pickle
|
||||||
from bs4.testing import SoupTest, HTMLTreeBuilderSmokeTest
|
from ...bs4.testing import SoupTest, HTMLTreeBuilderSmokeTest
|
||||||
from bs4.builder import HTMLParserTreeBuilder
|
from ...bs4.builder import HTMLParserTreeBuilder
|
||||||
from bs4.builder._htmlparser import BeautifulSoupHTMLParser
|
from ...bs4.builder._htmlparser import BeautifulSoupHTMLParser
|
||||||
|
|
||||||
class HTMLParserTreeBuilderSmokeTest(SoupTest, HTMLTreeBuilderSmokeTest):
|
class HTMLParserTreeBuilderSmokeTest(SoupTest, HTMLTreeBuilderSmokeTest):
|
||||||
|
|
||||||
|
|||||||
@ -12,16 +12,16 @@ except ImportError as e:
|
|||||||
LXML_VERSION = (0,)
|
LXML_VERSION = (0,)
|
||||||
|
|
||||||
if LXML_PRESENT:
|
if LXML_PRESENT:
|
||||||
from bs4.builder import LXMLTreeBuilder, LXMLTreeBuilderForXML
|
from ...bs4.builder import LXMLTreeBuilder, LXMLTreeBuilderForXML
|
||||||
|
|
||||||
from bs4 import (
|
from ...bs4 import (
|
||||||
BeautifulSoup,
|
BeautifulSoup,
|
||||||
BeautifulStoneSoup,
|
BeautifulStoneSoup,
|
||||||
)
|
)
|
||||||
from bs4.element import Comment, Doctype, SoupStrainer
|
from ...bs4.element import Comment, Doctype, SoupStrainer
|
||||||
from bs4.testing import skipIf
|
from ...bs4.testing import skipIf
|
||||||
from bs4.tests import test_htmlparser
|
from ...bs4.tests import test_htmlparser
|
||||||
from bs4.testing import (
|
from ...bs4.testing import (
|
||||||
HTMLTreeBuilderSmokeTest,
|
HTMLTreeBuilderSmokeTest,
|
||||||
XMLTreeBuilderSmokeTest,
|
XMLTreeBuilderSmokeTest,
|
||||||
SoupTest,
|
SoupTest,
|
||||||
|
|||||||
@ -7,17 +7,17 @@ import unittest
|
|||||||
import sys
|
import sys
|
||||||
import tempfile
|
import tempfile
|
||||||
|
|
||||||
from bs4 import (
|
from ...bs4 import (
|
||||||
BeautifulSoup,
|
BeautifulSoup,
|
||||||
BeautifulStoneSoup,
|
BeautifulStoneSoup,
|
||||||
GuessedAtParserWarning,
|
GuessedAtParserWarning,
|
||||||
MarkupResemblesLocatorWarning,
|
MarkupResemblesLocatorWarning,
|
||||||
)
|
)
|
||||||
from bs4.builder import (
|
from ...bs4.builder import (
|
||||||
TreeBuilder,
|
TreeBuilder,
|
||||||
ParserRejectedMarkup,
|
ParserRejectedMarkup,
|
||||||
)
|
)
|
||||||
from bs4.element import (
|
from ...bs4.element import (
|
||||||
CharsetMetaAttributeValue,
|
CharsetMetaAttributeValue,
|
||||||
Comment,
|
Comment,
|
||||||
ContentMetaAttributeValue,
|
ContentMetaAttributeValue,
|
||||||
@ -27,13 +27,13 @@ from bs4.element import (
|
|||||||
NavigableString,
|
NavigableString,
|
||||||
)
|
)
|
||||||
|
|
||||||
import bs4.dammit
|
from ...bs4.dammit import *
|
||||||
from bs4.dammit import (
|
from ...bs4.dammit import (
|
||||||
EntitySubstitution,
|
EntitySubstitution,
|
||||||
UnicodeDammit,
|
UnicodeDammit,
|
||||||
EncodingDetector,
|
EncodingDetector,
|
||||||
)
|
)
|
||||||
from bs4.testing import (
|
from ...bs4.testing import (
|
||||||
default_builder,
|
default_builder,
|
||||||
SoupTest,
|
SoupTest,
|
||||||
skipIf,
|
skipIf,
|
||||||
@ -41,7 +41,7 @@ from bs4.testing import (
|
|||||||
import warnings
|
import warnings
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from bs4.builder import LXMLTreeBuilder, LXMLTreeBuilderForXML
|
from ...bs4.builder import LXMLTreeBuilder, LXMLTreeBuilderForXML
|
||||||
LXML_PRESENT = True
|
LXML_PRESENT = True
|
||||||
except ImportError as e:
|
except ImportError as e:
|
||||||
LXML_PRESENT = False
|
LXML_PRESENT = False
|
||||||
@ -418,13 +418,13 @@ class TestEncodingConversion(SoupTest):
|
|||||||
def test_ascii_in_unicode_out(self):
|
def test_ascii_in_unicode_out(self):
|
||||||
# ASCII input is converted to Unicode. The original_encoding
|
# ASCII input is converted to Unicode. The original_encoding
|
||||||
# attribute is set to 'utf-8', a superset of ASCII.
|
# attribute is set to 'utf-8', a superset of ASCII.
|
||||||
chardet = bs4.dammit.chardet_dammit
|
chardet = chardet_dammit
|
||||||
logging.disable(logging.WARNING)
|
logging.disable(logging.WARNING)
|
||||||
try:
|
try:
|
||||||
def noop(str):
|
def noop(str):
|
||||||
return None
|
return None
|
||||||
# Disable chardet, which will realize that the ASCII is ASCII.
|
# Disable chardet, which will realize that the ASCII is ASCII.
|
||||||
bs4.dammit.chardet_dammit = noop
|
chardet_dammit = noop
|
||||||
ascii = b"<foo>a</foo>"
|
ascii = b"<foo>a</foo>"
|
||||||
soup_from_ascii = self.soup(ascii)
|
soup_from_ascii = self.soup(ascii)
|
||||||
unicode_output = soup_from_ascii.decode()
|
unicode_output = soup_from_ascii.decode()
|
||||||
@ -433,7 +433,7 @@ class TestEncodingConversion(SoupTest):
|
|||||||
self.assertEqual(soup_from_ascii.original_encoding.lower(), "utf-8")
|
self.assertEqual(soup_from_ascii.original_encoding.lower(), "utf-8")
|
||||||
finally:
|
finally:
|
||||||
logging.disable(logging.NOTSET)
|
logging.disable(logging.NOTSET)
|
||||||
bs4.dammit.chardet_dammit = chardet
|
chardet_dammit = chardet
|
||||||
|
|
||||||
def test_unicode_in_unicode_out(self):
|
def test_unicode_in_unicode_out(self):
|
||||||
# Unicode input is left alone. The original_encoding attribute
|
# Unicode input is left alone. The original_encoding attribute
|
||||||
@ -574,12 +574,12 @@ class TestUnicodeDammit(unittest.TestCase):
|
|||||||
doc = b"""\357\273\277<?xml version="1.0" encoding="UTF-8"?>
|
doc = b"""\357\273\277<?xml version="1.0" encoding="UTF-8"?>
|
||||||
<html><b>\330\250\330\252\330\261</b>
|
<html><b>\330\250\330\252\330\261</b>
|
||||||
<i>\310\322\321\220\312\321\355\344</i></html>"""
|
<i>\310\322\321\220\312\321\355\344</i></html>"""
|
||||||
chardet = bs4.dammit.chardet_dammit
|
chardet = chardet_dammit
|
||||||
logging.disable(logging.WARNING)
|
logging.disable(logging.WARNING)
|
||||||
try:
|
try:
|
||||||
def noop(str):
|
def noop(str):
|
||||||
return None
|
return None
|
||||||
bs4.dammit.chardet_dammit = noop
|
chardet_dammit = noop
|
||||||
dammit = UnicodeDammit(doc)
|
dammit = UnicodeDammit(doc)
|
||||||
self.assertEqual(True, dammit.contains_replacement_characters)
|
self.assertEqual(True, dammit.contains_replacement_characters)
|
||||||
self.assertTrue("\ufffd" in dammit.unicode_markup)
|
self.assertTrue("\ufffd" in dammit.unicode_markup)
|
||||||
@ -588,7 +588,7 @@ class TestUnicodeDammit(unittest.TestCase):
|
|||||||
self.assertTrue(soup.contains_replacement_characters)
|
self.assertTrue(soup.contains_replacement_characters)
|
||||||
finally:
|
finally:
|
||||||
logging.disable(logging.NOTSET)
|
logging.disable(logging.NOTSET)
|
||||||
bs4.dammit.chardet_dammit = chardet
|
chardet_dammit = chardet
|
||||||
|
|
||||||
def test_byte_order_mark_removed(self):
|
def test_byte_order_mark_removed(self):
|
||||||
# A document written in UTF-16LE will have its byte order marker stripped.
|
# A document written in UTF-16LE will have its byte order marker stripped.
|
||||||
|
|||||||
@ -14,12 +14,12 @@ import copy
|
|||||||
import pickle
|
import pickle
|
||||||
import re
|
import re
|
||||||
import warnings
|
import warnings
|
||||||
from bs4 import BeautifulSoup
|
from ...bs4 import BeautifulSoup
|
||||||
from bs4.builder import (
|
from ...bs4.builder import (
|
||||||
builder_registry,
|
builder_registry,
|
||||||
HTMLParserTreeBuilder,
|
HTMLParserTreeBuilder,
|
||||||
)
|
)
|
||||||
from bs4.element import (
|
from ...bs4.element import (
|
||||||
PY3K,
|
PY3K,
|
||||||
CData,
|
CData,
|
||||||
Comment,
|
Comment,
|
||||||
@ -33,11 +33,11 @@ from bs4.element import (
|
|||||||
Tag,
|
Tag,
|
||||||
TemplateString,
|
TemplateString,
|
||||||
)
|
)
|
||||||
from bs4.testing import (
|
from ...bs4.testing import (
|
||||||
SoupTest,
|
SoupTest,
|
||||||
skipIf,
|
skipIf,
|
||||||
)
|
)
|
||||||
from soupsieve import SelectorSyntaxError
|
from ...soupsieve import SelectorSyntaxError
|
||||||
|
|
||||||
XML_BUILDER_PRESENT = (builder_registry.lookup("xml") is not None)
|
XML_BUILDER_PRESENT = (builder_registry.lookup("xml") is not None)
|
||||||
LXML_PRESENT = (builder_registry.lookup("lxml") is not None)
|
LXML_PRESENT = (builder_registry.lookup("lxml") is not None)
|
||||||
|
|||||||
@ -6,7 +6,8 @@ from .import css_types as ct
|
|||||||
import unicodedata
|
import unicodedata
|
||||||
from collections.abc import Sequence
|
from collections.abc import Sequence
|
||||||
|
|
||||||
import bs4
|
from ..bs4 import *
|
||||||
|
from ..bs4.element import *
|
||||||
|
|
||||||
# Empty tag pattern (whitespace okay)
|
# Empty tag pattern (whitespace okay)
|
||||||
RE_NOT_EMPTY = re.compile('[^ \t\r\n\f]')
|
RE_NOT_EMPTY = re.compile('[^ \t\r\n\f]')
|
||||||
@ -90,37 +91,37 @@ class _DocumentNav(object):
|
|||||||
@staticmethod
|
@staticmethod
|
||||||
def is_doc(obj):
|
def is_doc(obj):
|
||||||
"""Is `BeautifulSoup` object."""
|
"""Is `BeautifulSoup` object."""
|
||||||
return isinstance(obj, bs4.BeautifulSoup)
|
return isinstance(obj, BeautifulSoup)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def is_tag(obj):
|
def is_tag(obj):
|
||||||
"""Is tag."""
|
"""Is tag."""
|
||||||
return isinstance(obj, bs4.Tag)
|
return isinstance(obj, Tag)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def is_declaration(obj): # pragma: no cover
|
def is_declaration(obj): # pragma: no cover
|
||||||
"""Is declaration."""
|
"""Is declaration."""
|
||||||
return isinstance(obj, bs4.Declaration)
|
return isinstance(obj, Declaration)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def is_cdata(obj):
|
def is_cdata(obj):
|
||||||
"""Is CDATA."""
|
"""Is CDATA."""
|
||||||
return isinstance(obj, bs4.CData)
|
return isinstance(obj, CData)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def is_processing_instruction(obj): # pragma: no cover
|
def is_processing_instruction(obj): # pragma: no cover
|
||||||
"""Is processing instruction."""
|
"""Is processing instruction."""
|
||||||
return isinstance(obj, bs4.ProcessingInstruction)
|
return isinstance(obj, ProcessingInstruction)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def is_navigable_string(obj):
|
def is_navigable_string(obj):
|
||||||
"""Is navigable string."""
|
"""Is navigable string."""
|
||||||
return isinstance(obj, bs4.NavigableString)
|
return isinstance(obj, NavigableString)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def is_special_string(obj):
|
def is_special_string(obj):
|
||||||
"""Is special string."""
|
"""Is special string."""
|
||||||
return isinstance(obj, (bs4.Comment, bs4.Declaration, bs4.CData, bs4.ProcessingInstruction, bs4.Doctype))
|
return isinstance(obj, (Comment, Declaration, CData, ProcessingInstruction, Doctype))
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def is_content_string(cls, obj):
|
def is_content_string(cls, obj):
|
||||||
|
|||||||
@ -11,18 +11,14 @@ import os.path
|
|||||||
import concurrent.futures
|
import concurrent.futures
|
||||||
import urllib.request
|
import urllib.request
|
||||||
import base64
|
import base64
|
||||||
|
from .lib.bs4 import BeautifulSoup as bs4
|
||||||
# --- Bundled library imports ---
|
|
||||||
# Explicitly import from the 'lib' directory, now that the package root is in sys.path
|
|
||||||
from lib import bs4
|
|
||||||
from lib.markdown2 import Markdown
|
|
||||||
# --- End bundled library imports ---
|
|
||||||
|
|
||||||
from functools import partial
|
from functools import partial
|
||||||
|
|
||||||
|
from .lib.markdown2 import Markdown
|
||||||
|
|
||||||
__all__ = ("markdown2html",)
|
__all__ = ("markdown2html",)
|
||||||
|
|
||||||
# Use the imported module name
|
|
||||||
markdowner = Markdown(extras=["fenced-code-blocks", "cuddled-lists"])
|
markdowner = Markdown(extras=["fenced-code-blocks", "cuddled-lists"])
|
||||||
|
|
||||||
# FIXME: how do I choose how many workers I want? Does thread pool reuse threads or
|
# FIXME: how do I choose how many workers I want? Does thread pool reuse threads or
|
||||||
@ -37,7 +33,6 @@ def markdown2html(markdown, basepath, re_render, resources, viewport_width, font
|
|||||||
"""
|
"""
|
||||||
html = markdowner.convert(markdown)
|
html = markdowner.convert(markdown)
|
||||||
|
|
||||||
# Use the imported module name
|
|
||||||
soup = bs4.BeautifulSoup(html, "html.parser")
|
soup = bs4.BeautifulSoup(html, "html.parser")
|
||||||
for img_element in soup.find_all("img"):
|
for img_element in soup.find_all("img"):
|
||||||
src = img_element["src"]
|
src = img_element["src"]
|
||||||
@ -57,15 +52,14 @@ def markdown2html(markdown, basepath, re_render, resources, viewport_width, font
|
|||||||
# realpath: simplify that paths so that we don't have duplicated caches
|
# realpath: simplify that paths so that we don't have duplicated caches
|
||||||
path = os.path.realpath(os.path.expanduser(os.path.join(basepath, src)))
|
path = os.path.realpath(os.path.expanduser(os.path.join(basepath, src)))
|
||||||
|
|
||||||
base64_img, (width, height) = get_base64_image(path, re_render, resources) # Renamed local var to avoid conflict
|
base64, (width, height) = get_base64_image(path, re_render, resources)
|
||||||
|
|
||||||
img_element["src"] = base64_img
|
img_element["src"] = base64
|
||||||
if width > viewport_width:
|
if width > viewport_width:
|
||||||
img_element["width"] = viewport_width
|
img_element["width"] = viewport_width
|
||||||
img_element["height"] = viewport_width * (height / width)
|
img_element["height"] = viewport_width * (height / width)
|
||||||
|
|
||||||
# remove comments, because they pollute the console with error messages
|
# remove comments, because they pollute the console with error messages
|
||||||
# Use the imported module name
|
|
||||||
for comment_element in soup.find_all(
|
for comment_element in soup.find_all(
|
||||||
text=lambda text: isinstance(text, bs4.Comment)
|
text=lambda text: isinstance(text, bs4.Comment)
|
||||||
):
|
):
|
||||||
@ -84,7 +78,7 @@ def markdown2html(markdown, basepath, re_render, resources, viewport_width, font
|
|||||||
.replace(" ", '<i class="space">.</i>')
|
.replace(" ", '<i class="space">.</i>')
|
||||||
.replace("\n", "<br />")
|
.replace("\n", "<br />")
|
||||||
)
|
)
|
||||||
# Use the imported module name
|
|
||||||
code_element.replace_with(bs4.BeautifulSoup(fixed_pre, "html.parser"))
|
code_element.replace_with(bs4.BeautifulSoup(fixed_pre, "html.parser"))
|
||||||
|
|
||||||
# FIXME: highlight the code using Sublime's syntax
|
# FIXME: highlight the code using Sublime's syntax
|
||||||
|
|||||||
Reference in New Issue
Block a user