improve caching of images

First, we used two caches. Turns out that lru_cache wasn't needed, the dict works perfectly fine on it's own. Second, we now also cache local images, so that we don't have to read them off the filesystem and convert them to base64 on every keystroke Maybe there should be a maximum size on that cache dict, but I doubt anyone would actually run into any trouble this cache taking too much ram.
Format everything with black
2019-11-16 10:16:12 +11:00 · 2019-11-16 09:57:45 +11:00 · 2019-11-16 09:41:00 +11:00
6 changed files with 904 additions and 566 deletions
--- a/MarkdownLivePreview.py
+++ b/MarkdownLivePreview.py
@ -10,22 +10,26 @@ from .utils import *
 MARKDOWN_VIEW_INFOS = "markdown_view_infos"
 PREVIEW_VIEW_INFOS = "preview_view_infos"
 # FIXME: put this as a setting for the user to choose?
-DELAY = 100 # ms
+DELAY = 100  # ms
+

 def get_resource(resource):
-    path = 'Packages/MarkdownLivePreview/resources/' + resource
-    abs_path = os.path.join(sublime.packages_path(), '..', path)
+    path = "Packages/MarkdownLivePreview/resources/" + resource
+    abs_path = os.path.join(sublime.packages_path(), "..", path)
    if os.path.isfile(abs_path):
-        with open(abs_path, 'r') as fp:
+        with open(abs_path, "r") as fp:
            return fp.read()
    return sublime.load_resource(path)

+
 resources = {}

+
 def plugin_loaded():
-    resources["base64_loading_image"] = get_resource('loading.base64')
-    resources["base64_404_image"] = get_resource('404.base64')
-    resources["stylesheet"] = get_resource('stylesheet.css')
+    resources["base64_404_image"] = get_resource("404.base64")
+    resources["base64_loading_image"] = get_resource("loading.base64")
+    resources["stylesheet"] = get_resource("stylesheet.css")
+

 # try to reload the resources if we save this file
 try:
@ -40,13 +44,13 @@ except OSError:
 # original_window: the regular window
 # preview_window: the window with the markdown file and the preview

-class MdlpInsertCommand(sublime_plugin.TextCommand):

+class MdlpInsertCommand(sublime_plugin.TextCommand):
    def run(self, edit, point, string):
        self.view.insert(edit, point, string)

-class OpenMarkdownPreviewCommand(sublime_plugin.TextCommand):

+class OpenMarkdownPreviewCommand(sublime_plugin.TextCommand):
    def run(self, edit):

        """ If the file is saved exists on disk, we close it, and reopen it in a new
@ -57,7 +61,7 @@ class OpenMarkdownPreviewCommand(sublime_plugin.TextCommand):
        original_window_id = original_view.window().id()
        file_name = original_view.file_name()

-        syntax_file = original_view.settings().get('syntax')
+        syntax_file = original_view.settings().get("syntax")

        if file_name:
            original_view.close()
@ -70,41 +74,44 @@ class OpenMarkdownPreviewCommand(sublime_plugin.TextCommand):
            # FIXME: save the document to a temporary file, so that if we crash,
            #        the user doesn't lose what he wrote

-        sublime.run_command('new_window')
+        sublime.run_command("new_window")
        preview_window = sublime.active_window()

-        preview_window.run_command('set_layout', {
-            'cols': [0.0, 0.5, 1.0],
-            'rows': [0.0, 1.0],
-            'cells': [[0, 0, 1, 1], [1, 0, 2, 1]]
-        })
+        preview_window.run_command(
+            "set_layout",
+            {
+                "cols": [0.0, 0.5, 1.0],
+                "rows": [0.0, 1.0],
+                "cells": [[0, 0, 1, 1], [1, 0, 2, 1]],
+            },
+        )

        preview_window.focus_group(1)
        preview_view = preview_window.new_file()
        preview_view.set_scratch(True)
        preview_view.settings().set(PREVIEW_VIEW_INFOS, {})
-        preview_view.set_name('Preview')
-
+        preview_view.set_name("Preview")

        preview_window.focus_group(0)
        if file_name:
            markdown_view = preview_window.open_file(file_name)
        else:
            markdown_view = preview_window.new_file()
-            markdown_view.run_command('mdlp_insert', {'point': 0, 'string': content})
+            markdown_view.run_command("mdlp_insert", {"point": 0, "string": content})
            markdown_view.set_scratch(True)

        markdown_view.set_syntax_file(syntax_file)
-        markdown_view.settings().set(MARKDOWN_VIEW_INFOS, {
-            "original_window_id": original_window_id
-        })
+        markdown_view.settings().set(
+            MARKDOWN_VIEW_INFOS, {"original_window_id": original_window_id}
+        )

    def is_enabled(self):
        # FIXME: is this the best way there is to check if the current syntax is markdown?
        #        should we only support default markdown?
        #        what about "md"?
        # FIXME: what about other languages, where markdown preview roughly works?
-        return 'markdown' in self.view.settings().get('syntax').lower()
+        return "markdown" in self.view.settings().get("syntax").lower()
+

 class MarkdownLivePreviewListener(sublime_plugin.EventListener):

@ -153,30 +160,36 @@ class MarkdownLivePreviewListener(sublime_plugin.EventListener):
        if not infos:
            return

-        assert markdown_view.id() == self.markdown_view.id(), \
-        "pre_close view.id() != close view.id()"
+        assert (
+            markdown_view.id() == self.markdown_view.id()
+        ), "pre_close view.id() != close view.id()"

        del self.phantom_sets[markdown_view.id()]

-        self.preview_window.run_command('close_window')
+        self.preview_window.run_command("close_window")

        # find the window with the right id
-        original_window = next(window for window in sublime.windows() \
-                               if window.id() == infos['original_window_id'])
+        original_window = next(
+            window
+            for window in sublime.windows()
+            if window.id() == infos["original_window_id"]
+        )
        if self.file_name:
            original_window.open_file(self.file_name)
        else:
-            assert markdown_view.is_scratch(), "markdown view of an unsaved file should " \
-            "be a scratch"
+            assert markdown_view.is_scratch(), (
+                "markdown view of an unsaved file should " "be a scratch"
+            )
            # note here that this is called original_view, because it's what semantically
            # makes sense, but this original_view.id() will be different than the one
            # that we closed first to reopen in the preview window
            # shouldn't cause any trouble though
            original_view = original_window.new_file()
-            original_view.run_command('mdlp_insert', {'point': 0, 'string': self.content})
-
-            original_view.set_syntax_file(markdown_view.settings().get('syntax'))
+            original_view.run_command(
+                "mdlp_insert", {"point": 0, "string": self.content}
+            )

+            original_view.set_syntax_file(markdown_view.settings().get("syntax"))

    # here, views are NOT treated independently, which is theoretically wrong
    # but in practice, you can only edit one markdown file at a time, so it doesn't really
@ -188,7 +201,7 @@ class MarkdownLivePreviewListener(sublime_plugin.EventListener):
        if not infos:
            return

-        # we schedule an update, which won't run if an 
+        # we schedule an update, which won't run if an
        sublime.set_timeout(partial(self._update_preview, markdown_view), DELAY)

    def _update_preview(self, markdown_view):
@ -209,15 +222,16 @@ class MarkdownLivePreviewListener(sublime_plugin.EventListener):

        basepath = os.path.dirname(markdown_view.file_name())
        html = markdown2html(
-            markdown,
-            basepath,
-            partial(self._update_preview, markdown_view),
-            resources
+            markdown, basepath, partial(self._update_preview, markdown_view), resources,
        )

-        self.phantom_sets[markdown_view.id()].update([
-            sublime.Phantom(sublime.Region(0), html, sublime.LAYOUT_BLOCK,
-                lambda href: sublime.run_command('open_url', {'url': href}))
-            ])
-
-        
+        self.phantom_sets[markdown_view.id()].update(
+            [
+                sublime.Phantom(
+                    sublime.Region(0),
+                    html,
+                    sublime.LAYOUT_BLOCK,
+                    lambda href: sublime.run_command("open_url", {"url": href}),
+                )
+            ]
+        )
--- a/README.md
+++ b/README.md
@ -9,10 +9,18 @@ It's available on package control!

 ## How to contribute

+If you know what feature you want to implement, or what bug you wanna fix, then
+go ahead and hack! But if you wanna contribute just to say thanks, and don't
+really know what you could be working on, then there are a bunch of `FIXME`s
+in `MarkdownLivePreview.py` and `markdown2html.py` (GitHub only shows the top
+2 results if you try to search using their interface :slightly_frowning_face:).
+
+### Hack it!
+
 1. Fork this repo
 2. Make your own branch (the name of the branch should be the feature you are
   implementing eg. `improve-tables`, `fix-crash-on-multiple-preview`
 3. All your code should be formated by black.
 4. Send a PR!

-
+FIXME: add a git hook to format using black (can the git hook be added on github?)
--- a/lib/markdown2.py
+++ b/lib/markdown2.py
--- a/markdown2html.py
+++ b/markdown2html.py
@ -1,17 +1,23 @@
-import copy
+""" Notice how this file is completely independent of sublime text
+
+I think it should be kept this way, just because it gives a bit more organisation,
+and makes it a lot easier to think about, and for anyone who would want to, test since
+markdown2html is just a pure function
+"""
+
 import os.path
 import concurrent.futures
 import urllib.request
 import base64
 import bs4

-from functools import lru_cache, partial
+from functools import partial

 from .lib.markdown2 import Markdown

-__all__ = ('markdown2html', )
+__all__ = ("markdown2html",)

-markdowner = Markdown(extras=['fenced-code-blocks'])
+markdowner = Markdown(extras=["fenced-code-blocks"])

 # FIXME: how do I choose how many workers I want? Does thread pool reuse threads or
 #        does it stupidly throw them out? (we could implement something of our own)
@ -19,9 +25,11 @@ executor = concurrent.futures.ThreadPoolExecutor(max_workers=5)

 images_cache = {}

+
 class LoadingError(Exception):
    pass

+
 def markdown2html(markdown, basepath, re_render, resources):
    """ converts the markdown to html, loads the images and puts in base64 for sublime
    to understand them correctly. That means that we are responsible for loading the
@ -31,19 +39,19 @@ def markdown2html(markdown, basepath, re_render, resources):
    html = markdowner.convert(markdown)

    soup = bs4.BeautifulSoup(html, "html.parser")
-    for img_element in soup.find_all('img'):
-        src = img_element['src']
+    for img_element in soup.find_all("img"):
+        src = img_element["src"]

        # already in base64, or something of the like
        # FIXME: what other types are possible? Are they handled by ST? If not, could we
        #        convert it into base64? is it worth the effort?
-        if src.startswith('data:image/'):
+        if src.startswith("data:image/"):
            continue

-        if src.startswith('http://') or src.startswith('https://'):
+        if src.startswith("http://") or src.startswith("https://"):
            path = src
-        elif src.startswith('file://'):
-            path = src[len('file://'):]
+        elif src.startswith("file://"):
+            path = src[len("file://") :]
        else:
            # expanduser: ~ -> /home/math2001
            # realpath: simplify that paths so that we don't have duplicated caches
@ -52,68 +60,79 @@ def markdown2html(markdown, basepath, re_render, resources):
        try:
            base64 = get_base64_image(path, re_render)
        except FileNotFoundError as e:
-            base64 = resources['base64_404_image']
+            base64 = resources["base64_404_image"]
        except LoadingError:
-            base64 = resources['base64_loading_image']
+            base64 = resources["base64_loading_image"]

-        img_element['src'] = base64
+        img_element["src"] = base64

    # remove comments, because they pollute the console with error messages
-    for comment_element in soup.find_all(text=lambda text: isinstance(text, bs4.Comment)):
+    for comment_element in soup.find_all(
+        text=lambda text: isinstance(text, bs4.Comment)
+    ):
        comment_element.extract()

    # FIXME: how do tables look? should we use ascii tables?

    # pre aren't handled by ST3. The require manual adjustment
-    for pre_element in soup.find_all('pre'):
+    for pre_element in soup.find_all("pre"):
        # select the first child, <code>
        code_element = next(pre_element.children)

        # FIXME: this method sucks, but can we do better?
-        fixed_pre = str(code_element) \
-            .replace(' ', '<i class="space">.</i>') \
-            .replace('\n', '<br />')
+        fixed_pre = (
+            str(code_element)
+            .replace(" ", '<i class="space">.</i>')
+            .replace("\n", "<br />")
+        )

        code_element.replace_with(bs4.BeautifulSoup(fixed_pre, "html.parser"))

    # FIXME: highlight the code using Sublime's syntax

    # FIXME: report that ST doesn't support <br/> but does work with <br />... WTF?
-    return "<style>\n{}\n</style>\n\n{}".format(resources['stylesheet'], soup).replace('<br/>', '<br />')
+    return "<style>\n{}\n</style>\n\n{}".format(resources["stylesheet"], soup).replace(
+        "<br/>", "<br />"
+    )
+

 def get_base64_image(path, re_render):
+    """ Gets the base64 for the image (local and remote images). re_render is a
+    callback which is called when we finish loading an image from the internet
+    to trigger an update of the preview (the image will then be loaded from the cache)
+    """

-    def callback(url, future):
-        # this is "safe" to do because callback is called in the same thread as 
-        # add_done_callback:
+    def callback(path, future):
+        # altering image_cache is "safe" to do because callback is called in the same
+        # thread as add_done_callback:
        # > Added callables are called in the order that they were added and are always
        # > called in a thread belonging to the process that added them
        # > --- Python docs
-        images_cache[url] = future.result()
+        images_cache[path] = future.result()
        # we render, which means this function will be called again, but this time, we
        # will read from the cache
        re_render()

-    if path.startswith('http://') or path.startswith('https://'):
-        if path in images_cache:
-            return images_cache[path]
+    if path in images_cache:
+        return images_cache[path]
+
+    if path.startswith("http://") or path.startswith("https://"):
        executor.submit(load_image, path).add_done_callback(partial(callback, path))
        raise LoadingError()

-    # FIXME: use some kind of cache for this as well, because it decodes on every
-    #        keystroke here...
-    with open(path, 'rb') as fp:
-        return 'data:image/png;base64,' + base64.b64encode(fp.read()).decode('utf-8')
+    with open(path, "rb") as fp:
+        image = "data:image/png;base64," + base64.b64encode(fp.read()).decode("utf-8")
+        images_cache[path] = image
+        return image
+

-# FIXME: wait what the hell? Why do I have two caches? (lru and images_cache)
-# FIXME: This is an in memory cache. 20 seems like a fair bit of images... Should it be
-#        bigger? Should the user be allowed to chose? There definitely should be a limit
-#        because we don't wanna use to much memory, we're a simple markdown preview plugin
-# NOTE: > The LRU feature performs best when maxsize is a power-of-two. --- python docs
-@lru_cache(maxsize=2 ** 4)
 def load_image(url):
    with urllib.request.urlopen(url, timeout=60) as conn:
        content_type = conn.info().get_content_type()
-        if 'image' not in content_type:
-            raise ValueError("{!r} doesn't point to an image, but to a {!r}".format(url, content_type))
-        return 'data:image/png;base64,' + base64.b64encode(conn.read()).decode('utf-8')
+        if "image" not in content_type:
+            raise ValueError(
+                "{!r} doesn't point to an image, but to a {!r}".format(
+                    url, content_type
+                )
+            )
+        return "data:image/png;base64," + base64.b64encode(conn.read()).decode("utf-8")
--- a/resources/convertresources.py
+++ b/resources/convertresources.py
@ -2,8 +2,8 @@

 from base64 import b64encode

-with open('404.png', 'rb') as png, open('404.base64', 'wb') as base64:
+with open("404.png", "rb") as png, open("404.base64", "wb") as base64:
    base64.write(b64encode(png.read()))

-with open('loading.png', 'rb') as png, open('loading.base64', 'wb') as base64:
+with open("loading.png", "rb") as png, open("loading.base64", "wb") as base64:
    base64.write(b64encode(png.read()))
--- a/utils.py
+++ b/utils.py
@ -1,9 +1,11 @@
 # import sublime
 import time

+
 def get_settings():
    return sublime.get_settings("MarkdownLivePreview.sublime-settings")

+
 def min_time_between_call(timeout, on_block=lambda *args, **kwargs: None):
    """ Enforces a timeout between each call to the function
    timeout is in seconds
@ -19,5 +21,7 @@ def min_time_between_call(timeout, on_block=lambda *args, **kwargs: None):

            last_call = time.time()
            return func(*args, **kwargs)
+
        return wrapper
+
    return outer