Load images from the internet using an in memory cache

As soon as the plugin is reloaded, or the editor restarted, the images must be reloaded. Maybe we could use a file cache...
2019-11-14 19:30:26 +11:00
parent bae26fc452
commit ef9b2daf6d
3 changed files with 42 additions and 11 deletions
--- a/MarkdownLivePreview.py
+++ b/MarkdownLivePreview.py
@ -150,7 +150,7 @@ class MarkdownLivePreviewListener(sublime_plugin.EventListener):
            original_view.set_syntax_file(markdown_view.settings().get('syntax'))
-    # here, views are NOT treated independently, which is theoratically wrong
+    # here, views are NOT treated independently, which is theoretically wrong
    # but in practice, you can only edit one markdown file at a time, so it doesn't really
    # matter.
    # @min_time_between_call(.5)
--- a/live-testing/test.md
+++ b/live-testing/test.md
@ -14,7 +14,6 @@ This is the first image from the local file system, *relative* path!
 This is the first image from the internet!
-
+![some paysage](https://4.bp.blogspot.com/-RHTlwmd9EBw/Tn84-JEW8NI/AAAAAAAAAD0/6ugjklUMbtY/s1600/acapulco-8-704125.jpg)
 ![The sublime text logo!](https://www.sublimehq.com/images/sublime_text.png)
 [prev]: https://github.com/math2001/MarkdownLivePreview/tree/d4c477749ce7e77b8e9fc85464a2488f003c45bc
--- a/markdown2html.py
+++ b/markdown2html.py
@ -1,17 +1,26 @@
 import concurrent.futures
 import urllib.request
 import base64
 import os.path
-from functools import lru_cache
+
-from .lib.markdown2 import Markdown
+from functools import lru_cache, partial
 from bs4 import BeautifulSoup
 from .lib.markdown2 import Markdown
 __all__ = ('markdown2html', )
 markdowner = Markdown()
 # FIXME: how do I choose how many workers I want? Does thread pool reuse threads or
 #        does it stupidly throw them out? (we could implement something of our own)
 executor = concurrent.futures.ThreadPoolExecutor(max_workers=5)
 # FIXME: put a nice picture please :^)
 BASE64_LOADING_IMAGE = 'loading image!'
 BASE64_404_IMAGE = '404 not found :-('
 images_cache = {}
 class LoadingError(Exception):
    pass
@ -26,6 +35,7 @@ def markdown2html(markdown, basepath, re_render):
    soup = BeautifulSoup(html, "html.parser")
    for img_element in soup.find_all('img'):
        src = img_element['src']
        # already in base64, or something of the like
        # FIXME: what other types are possible? Are they handled by ST? If not, could we
        #        convert it into base64? is it worth the effort?
@ -42,7 +52,7 @@ def markdown2html(markdown, basepath, re_render):
            path = os.path.realpath(os.path.expanduser(os.path.join(basepath, src)))
        try:
-            base64 = get_base64_image(path)
+            base64 = get_base64_image(path, re_render)
        except FileNotFoundError as e:
            print("{!r} not found {!r}".format(path, e))
            base64 = BASE64_404_IMAGE
@ -56,14 +66,36 @@ def markdown2html(markdown, basepath, re_render):
    return str(soup)
-# FIXME: This is an in memory cache. 20 seems like a fair bit of images... Should it be
+def get_base64_image(path, re_render):
-#        bigger? Should the user be allowed to chose? There definitely should be a limit
+
-#        because we don't wanna use to much memory, we're a simple markdown preview plugin
+    def callback(url, future):
-@lru_cache(maxsize=20)
+        # this is "safe" to do because callback is called in the same thread as 
-def get_base64_image(path):
+        # add_done_callback:
        # > Added callables are called in the order that they were added and are always
        # > called in a thread belonging to the process that added them
        # > --- Python docs
        images_cache[url] = future.result()
        # we render, which means this function will be called again, but this time, we
        # will read from the cache
        re_render()
    if path.startswith('http://') or path.startswith('https://'):
        if path in images_cache:
            return images_cache[path]
        executor.submit(load_image, path).add_done_callback(partial(callback, path))
        return 'loading of the internet!'
    with open(path, 'rb') as fp:
        return 'data:image/png;base64,' + base64.b64encode(fp.read()).decode('utf-8')
 # FIXME: This is an in memory cache. 20 seems like a fair bit of images... Should it be
 #        bigger? Should the user be allowed to chose? There definitely should be a limit
 #        because we don't wanna use to much memory, we're a simple markdown preview plugin
 # NOTE: > The LRU feature performs best when maxsize is a power-of-two. --- python docs
@lru_cache(maxsize=2 ** 4)
 def load_image(url):
    with urllib.request.urlopen(url, timeout=60) as conn:
        content_type = conn.info().get_content_type()
        if 'image' not in content_type:
            raise ValueError("{!r} doesn't point to an image, but to a {!r}".format(url, content_type))
        return 'data:image/png;base64,' + base64.b64encode(conn.read()).decode('utf-8')