Merge remote-tracking branch 'update/master'

This is a weird merge. I had re-written everything in a separate repository, and basically wanted that new repo (update) to be the new master of this repository, whilst preserving the history of the update repo. Here's what I did: $ git remote add update <path to update> $ git fetch update $ git merge -X theirs --allow-unrelated-histories update/master $ # remove extra files $ git commit --amend
2019-11-16 09:18:24 +11:00
parent d4c477749c 9ad3f25d14
commit c6ac821c4a
46 changed files with 670 additions and 1609 deletions
--- a/markdown2html.py
+++ b/markdown2html.py
@ -0,0 +1,119 @@
+import copy
+import os.path
+import concurrent.futures
+import urllib.request
+import base64
+import bs4
+
+from functools import lru_cache, partial
+
+from .lib.markdown2 import Markdown
+
+__all__ = ('markdown2html', )
+
+markdowner = Markdown(extras=['fenced-code-blocks'])
+
+# FIXME: how do I choose how many workers I want? Does thread pool reuse threads or
+#        does it stupidly throw them out? (we could implement something of our own)
+executor = concurrent.futures.ThreadPoolExecutor(max_workers=5)
+
+images_cache = {}
+
+class LoadingError(Exception):
+    pass
+
+def markdown2html(markdown, basepath, re_render, resources):
+    """ converts the markdown to html, loads the images and puts in base64 for sublime
+    to understand them correctly. That means that we are responsible for loading the
+    images from the internet. Hence, we take in re_render, which is just a function we 
+    call when an image has finished loading to retrigger a render (see #90)
+    """
+    html = markdowner.convert(markdown)
+
+    soup = bs4.BeautifulSoup(html, "html.parser")
+    for img_element in soup.find_all('img'):
+        src = img_element['src']
+
+        # already in base64, or something of the like
+        # FIXME: what other types are possible? Are they handled by ST? If not, could we
+        #        convert it into base64? is it worth the effort?
+        if src.startswith('data:image/'):
+            continue
+
+        if src.startswith('http://') or src.startswith('https://'):
+            path = src
+        elif src.startswith('file://'):
+            path = src[len('file://'):]
+        else:
+            # expanduser: ~ -> /home/math2001
+            # realpath: simplify that paths so that we don't have duplicated caches
+            path = os.path.realpath(os.path.expanduser(os.path.join(basepath, src)))
+
+        try:
+            base64 = get_base64_image(path, re_render)
+        except FileNotFoundError as e:
+            base64 = resources['base64_404_image']
+        except LoadingError:
+            base64 = resources['base64_loading_image']
+
+        img_element['src'] = base64
+
+    # remove comments, because they pollute the console with error messages
+    for comment_element in soup.find_all(text=lambda text: isinstance(text, bs4.Comment)):
+        comment_element.extract()
+
+    # FIXME: how do tables look? should we use ascii tables?
+
+    # pre aren't handled by ST3. The require manual adjustment
+    for pre_element in soup.find_all('pre'):
+        # select the first child, <code>
+        code_element = next(pre_element.children)
+
+        # FIXME: this method sucks, but can we do better?
+        fixed_pre = str(code_element) \
+            .replace(' ', '<i class="space">.</i>') \
+            .replace('\n', '<br />')
+
+        code_element.replace_with(bs4.BeautifulSoup(fixed_pre, "html.parser"))
+
+    # FIXME: highlight the code using Sublime's syntax
+
+    # FIXME: report that ST doesn't support <br/> but does work with <br />... WTF?
+    return "<style>\n{}\n</style>\n\n{}".format(resources['stylesheet'], soup).replace('<br/>', '<br />')
+
+def get_base64_image(path, re_render):
+
+    def callback(url, future):
+        # this is "safe" to do because callback is called in the same thread as 
+        # add_done_callback:
+        # > Added callables are called in the order that they were added and are always
+        # > called in a thread belonging to the process that added them
+        # > --- Python docs
+        images_cache[url] = future.result()
+        # we render, which means this function will be called again, but this time, we
+        # will read from the cache
+        re_render()
+
+    if path.startswith('http://') or path.startswith('https://'):
+        if path in images_cache:
+            return images_cache[path]
+        executor.submit(load_image, path).add_done_callback(partial(callback, path))
+        raise LoadingError()
+
+    # FIXME: use some kind of cache for this as well, because it decodes on every
+    #        keystroke here...
+    with open(path, 'rb') as fp:
+        return 'data:image/png;base64,' + base64.b64encode(fp.read()).decode('utf-8')
+
+# FIXME: wait what the hell? Why do I have two caches? (lru and images_cache)
+# FIXME: This is an in memory cache. 20 seems like a fair bit of images... Should it be
+#        bigger? Should the user be allowed to chose? There definitely should be a limit
+#        because we don't wanna use to much memory, we're a simple markdown preview plugin
+# NOTE: > The LRU feature performs best when maxsize is a power-of-two. --- python docs
+@lru_cache(maxsize=2 ** 4)
+def load_image(url):
+    with urllib.request.urlopen(url, timeout=60) as conn:
+        content_type = conn.info().get_content_type()
+        if 'image' not in content_type:
+            raise ValueError("{!r} doesn't point to an image, but to a {!r}".format(url, content_type))
+        return 'data:image/png;base64,' + base64.b64encode(conn.read()).decode('utf-8')