Load images from the internet using an in memory cache

As soon as the plugin is reloaded, or the editor restarted, the images
must be reloaded. Maybe we could use a file cache...
This commit is contained in:
Mathieu PATUREL
2019-11-14 19:30:26 +11:00
parent bae26fc452
commit ef9b2daf6d
3 changed files with 42 additions and 11 deletions

View File

@ -150,7 +150,7 @@ class MarkdownLivePreviewListener(sublime_plugin.EventListener):
original_view.set_syntax_file(markdown_view.settings().get('syntax'))
# here, views are NOT treated independently, which is theoratically wrong
# here, views are NOT treated independently, which is theoretically wrong
# but in practice, you can only edit one markdown file at a time, so it doesn't really
# matter.
# @min_time_between_call(.5)

View File

@ -14,7 +14,6 @@ This is the first image from the local file system, *relative* path!
This is the first image from the internet!
![The sublime text logo!](https://www.sublimehq.com/images/sublime_text.png)
![some paysage](https://4.bp.blogspot.com/-RHTlwmd9EBw/Tn84-JEW8NI/AAAAAAAAAD0/6ugjklUMbtY/s1600/acapulco-8-704125.jpg)
[prev]: https://github.com/math2001/MarkdownLivePreview/tree/d4c477749ce7e77b8e9fc85464a2488f003c45bc

View File

@ -1,17 +1,26 @@
import concurrent.futures
import urllib.request
import base64
import os.path
from functools import lru_cache
from .lib.markdown2 import Markdown
from functools import lru_cache, partial
from bs4 import BeautifulSoup
from .lib.markdown2 import Markdown
__all__ = ('markdown2html', )
markdowner = Markdown()
# FIXME: how do I choose how many workers I want? Does thread pool reuse threads or
# does it stupidly throw them out? (we could implement something of our own)
executor = concurrent.futures.ThreadPoolExecutor(max_workers=5)
# FIXME: put a nice picture please :^)
BASE64_LOADING_IMAGE = 'loading image!'
BASE64_404_IMAGE = '404 not found :-('
images_cache = {}
class LoadingError(Exception):
pass
@ -26,6 +35,7 @@ def markdown2html(markdown, basepath, re_render):
soup = BeautifulSoup(html, "html.parser")
for img_element in soup.find_all('img'):
src = img_element['src']
# already in base64, or something of the like
# FIXME: what other types are possible? Are they handled by ST? If not, could we
# convert it into base64? is it worth the effort?
@ -42,7 +52,7 @@ def markdown2html(markdown, basepath, re_render):
path = os.path.realpath(os.path.expanduser(os.path.join(basepath, src)))
try:
base64 = get_base64_image(path)
base64 = get_base64_image(path, re_render)
except FileNotFoundError as e:
print("{!r} not found {!r}".format(path, e))
base64 = BASE64_404_IMAGE
@ -56,14 +66,36 @@ def markdown2html(markdown, basepath, re_render):
return str(soup)
# FIXME: This is an in memory cache. 20 seems like a fair bit of images... Should it be
# bigger? Should the user be allowed to chose? There definitely should be a limit
# because we don't wanna use to much memory, we're a simple markdown preview plugin
@lru_cache(maxsize=20)
def get_base64_image(path):
def get_base64_image(path, re_render):
def callback(url, future):
# this is "safe" to do because callback is called in the same thread as
# add_done_callback:
# > Added callables are called in the order that they were added and are always
# > called in a thread belonging to the process that added them
# > --- Python docs
images_cache[url] = future.result()
# we render, which means this function will be called again, but this time, we
# will read from the cache
re_render()
if path.startswith('http://') or path.startswith('https://'):
if path in images_cache:
return images_cache[path]
executor.submit(load_image, path).add_done_callback(partial(callback, path))
return 'loading of the internet!'
with open(path, 'rb') as fp:
return 'data:image/png;base64,' + base64.b64encode(fp.read()).decode('utf-8')
# FIXME: This is an in memory cache. 20 seems like a fair bit of images... Should it be
# bigger? Should the user be allowed to chose? There definitely should be a limit
# because we don't wanna use to much memory, we're a simple markdown preview plugin
# NOTE: > The LRU feature performs best when maxsize is a power-of-two. --- python docs
@lru_cache(maxsize=2 ** 4)
def load_image(url):
with urllib.request.urlopen(url, timeout=60) as conn:
content_type = conn.info().get_content_type()
if 'image' not in content_type:
raise ValueError("{!r} doesn't point to an image, but to a {!r}".format(url, content_type))
return 'data:image/png;base64,' + base64.b64encode(conn.read()).decode('utf-8')