Load images from the internet using an in memory cache

As soon as the plugin is reloaded, or the editor restarted, the images must be reloaded. Maybe we could use a file cache...
2019-11-14 19:30:26 +11:00
parent bae26fc452
commit ef9b2daf6d
3 changed files with 42 additions and 11 deletions
--- a/MarkdownLivePreview.py
+++ b/MarkdownLivePreview.py
@ -150,7 +150,7 @@ class MarkdownLivePreviewListener(sublime_plugin.EventListener):

            original_view.set_syntax_file(markdown_view.settings().get('syntax'))

-    # here, views are NOT treated independently, which is theoratically wrong
+    # here, views are NOT treated independently, which is theoretically wrong
    # but in practice, you can only edit one markdown file at a time, so it doesn't really
    # matter.
    # @min_time_between_call(.5)
--- a/live-testing/test.md
+++ b/live-testing/test.md
@ -14,7 +14,6 @@ This is the first image from the local file system, *relative* path!

 This is the first image from the internet!

-
-![The sublime text logo!](https://www.sublimehq.com/images/sublime_text.png)
+![some paysage](https://4.bp.blogspot.com/-RHTlwmd9EBw/Tn84-JEW8NI/AAAAAAAAAD0/6ugjklUMbtY/s1600/acapulco-8-704125.jpg)

 [prev]: https://github.com/math2001/MarkdownLivePreview/tree/d4c477749ce7e77b8e9fc85464a2488f003c45bc
--- a/markdown2html.py
+++ b/markdown2html.py
@ -1,17 +1,26 @@
+import concurrent.futures
+import urllib.request
 import base64
 import os.path
-from functools import lru_cache
-from .lib.markdown2 import Markdown
+
+from functools import lru_cache, partial
 from bs4 import BeautifulSoup
+from .lib.markdown2 import Markdown

 __all__ = ('markdown2html', )

 markdowner = Markdown()

+# FIXME: how do I choose how many workers I want? Does thread pool reuse threads or
+#        does it stupidly throw them out? (we could implement something of our own)
+executor = concurrent.futures.ThreadPoolExecutor(max_workers=5)
+
 # FIXME: put a nice picture please :^)
 BASE64_LOADING_IMAGE = 'loading image!'
 BASE64_404_IMAGE = '404 not found :-('

+images_cache = {}
+
 class LoadingError(Exception):
    pass

@ -26,6 +35,7 @@ def markdown2html(markdown, basepath, re_render):
    soup = BeautifulSoup(html, "html.parser")
    for img_element in soup.find_all('img'):
        src = img_element['src']
+
        # already in base64, or something of the like
        # FIXME: what other types are possible? Are they handled by ST? If not, could we
        #        convert it into base64? is it worth the effort?
@ -42,7 +52,7 @@ def markdown2html(markdown, basepath, re_render):
            path = os.path.realpath(os.path.expanduser(os.path.join(basepath, src)))

        try:
-            base64 = get_base64_image(path)
+            base64 = get_base64_image(path, re_render)
        except FileNotFoundError as e:
            print("{!r} not found {!r}".format(path, e))
            base64 = BASE64_404_IMAGE
@ -56,14 +66,36 @@ def markdown2html(markdown, basepath, re_render):

    return str(soup)

-# FIXME: This is an in memory cache. 20 seems like a fair bit of images... Should it be
-#        bigger? Should the user be allowed to chose? There definitely should be a limit
-#        because we don't wanna use to much memory, we're a simple markdown preview plugin
-@lru_cache(maxsize=20)
-def get_base64_image(path):
+def get_base64_image(path, re_render):
+
+    def callback(url, future):
+        # this is "safe" to do because callback is called in the same thread as 
+        # add_done_callback:
+        # > Added callables are called in the order that they were added and are always
+        # > called in a thread belonging to the process that added them
+        # > --- Python docs
+        images_cache[url] = future.result()
+        # we render, which means this function will be called again, but this time, we
+        # will read from the cache
+        re_render()
+
    if path.startswith('http://') or path.startswith('https://'):
+        if path in images_cache:
+            return images_cache[path]
+        executor.submit(load_image, path).add_done_callback(partial(callback, path))
        return 'loading of the internet!'

    with open(path, 'rb') as fp:
        return 'data:image/png;base64,' + base64.b64encode(fp.read()).decode('utf-8')

+# FIXME: This is an in memory cache. 20 seems like a fair bit of images... Should it be
+#        bigger? Should the user be allowed to chose? There definitely should be a limit
+#        because we don't wanna use to much memory, we're a simple markdown preview plugin
+# NOTE: > The LRU feature performs best when maxsize is a power-of-two. --- python docs
+@lru_cache(maxsize=2 ** 4)
+def load_image(url):
+    with urllib.request.urlopen(url, timeout=60) as conn:
+        content_type = conn.info().get_content_type()
+        if 'image' not in content_type:
+            raise ValueError("{!r} doesn't point to an image, but to a {!r}".format(url, content_type))
+        return 'data:image/png;base64,' + base64.b64encode(conn.read()).decode('utf-8')