Merge remote-tracking branch 'update/master'

This is a weird merge. I had re-written everything in a separate
repository, and basically wanted that new repo (update) to be the new
master of this repository, whilst preserving the history of the update
repo.

Here's what I did:

$ git remote add update <path to update>
$ git fetch update
$ git merge -X theirs --allow-unrelated-histories update/master
$ # remove extra files
$ git commit --amend
This commit is contained in:
Mathieu PATUREL
2019-11-16 09:18:24 +11:00
46 changed files with 670 additions and 1609 deletions

119
markdown2html.py Normal file
View File

@ -0,0 +1,119 @@
import copy
import os.path
import concurrent.futures
import urllib.request
import base64
import bs4
from functools import lru_cache, partial
from .lib.markdown2 import Markdown
__all__ = ('markdown2html', )
markdowner = Markdown(extras=['fenced-code-blocks'])
# FIXME: how do I choose how many workers I want? Does thread pool reuse threads or
# does it stupidly throw them out? (we could implement something of our own)
executor = concurrent.futures.ThreadPoolExecutor(max_workers=5)
images_cache = {}
class LoadingError(Exception):
pass
def markdown2html(markdown, basepath, re_render, resources):
""" converts the markdown to html, loads the images and puts in base64 for sublime
to understand them correctly. That means that we are responsible for loading the
images from the internet. Hence, we take in re_render, which is just a function we
call when an image has finished loading to retrigger a render (see #90)
"""
html = markdowner.convert(markdown)
soup = bs4.BeautifulSoup(html, "html.parser")
for img_element in soup.find_all('img'):
src = img_element['src']
# already in base64, or something of the like
# FIXME: what other types are possible? Are they handled by ST? If not, could we
# convert it into base64? is it worth the effort?
if src.startswith('data:image/'):
continue
if src.startswith('http://') or src.startswith('https://'):
path = src
elif src.startswith('file://'):
path = src[len('file://'):]
else:
# expanduser: ~ -> /home/math2001
# realpath: simplify that paths so that we don't have duplicated caches
path = os.path.realpath(os.path.expanduser(os.path.join(basepath, src)))
try:
base64 = get_base64_image(path, re_render)
except FileNotFoundError as e:
base64 = resources['base64_404_image']
except LoadingError:
base64 = resources['base64_loading_image']
img_element['src'] = base64
# remove comments, because they pollute the console with error messages
for comment_element in soup.find_all(text=lambda text: isinstance(text, bs4.Comment)):
comment_element.extract()
# FIXME: how do tables look? should we use ascii tables?
# pre aren't handled by ST3. The require manual adjustment
for pre_element in soup.find_all('pre'):
# select the first child, <code>
code_element = next(pre_element.children)
# FIXME: this method sucks, but can we do better?
fixed_pre = str(code_element) \
.replace(' ', '<i class="space">.</i>') \
.replace('\n', '<br />')
code_element.replace_with(bs4.BeautifulSoup(fixed_pre, "html.parser"))
# FIXME: highlight the code using Sublime's syntax
# FIXME: report that ST doesn't support <br/> but does work with <br />... WTF?
return "<style>\n{}\n</style>\n\n{}".format(resources['stylesheet'], soup).replace('<br/>', '<br />')
def get_base64_image(path, re_render):
def callback(url, future):
# this is "safe" to do because callback is called in the same thread as
# add_done_callback:
# > Added callables are called in the order that they were added and are always
# > called in a thread belonging to the process that added them
# > --- Python docs
images_cache[url] = future.result()
# we render, which means this function will be called again, but this time, we
# will read from the cache
re_render()
if path.startswith('http://') or path.startswith('https://'):
if path in images_cache:
return images_cache[path]
executor.submit(load_image, path).add_done_callback(partial(callback, path))
raise LoadingError()
# FIXME: use some kind of cache for this as well, because it decodes on every
# keystroke here...
with open(path, 'rb') as fp:
return 'data:image/png;base64,' + base64.b64encode(fp.read()).decode('utf-8')
# FIXME: wait what the hell? Why do I have two caches? (lru and images_cache)
# FIXME: This is an in memory cache. 20 seems like a fair bit of images... Should it be
# bigger? Should the user be allowed to chose? There definitely should be a limit
# because we don't wanna use to much memory, we're a simple markdown preview plugin
# NOTE: > The LRU feature performs best when maxsize is a power-of-two. --- python docs
@lru_cache(maxsize=2 ** 4)
def load_image(url):
with urllib.request.urlopen(url, timeout=60) as conn:
content_type = conn.info().get_content_type()
if 'image' not in content_type:
raise ValueError("{!r} doesn't point to an image, but to a {!r}".format(url, content_type))
return 'data:image/png;base64,' + base64.b64encode(conn.read()).decode('utf-8')