Skip to content

Commit 14fc992

Browse files
cslzchenfelliott
authored andcommitted
Fix PDF renderer with better URL escaping
- Add a helper method for escaping URL which (1) escapes both single and double quote(s) to %27 and %22 respectively and (2) logs whenever an unsafe URL is detected and replaced - Fix all three render actions and update tests
1 parent 78047e0 commit 14fc992

File tree

3 files changed

+101
-51
lines changed

3 files changed

+101
-51
lines changed

mfr/extensions/pdf/render.py

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,12 @@
11
import logging
22
import os
3-
import re
43

54
import furl
65
from mako.lookup import TemplateLookup
76

87
from mfr.core import extension
98
from mfr.extensions.pdf import settings
10-
from mfr.extensions.utils import munge_url_for_localdev
9+
from mfr.extensions.utils import munge_url_for_localdev, escape_url_for_template
1110

1211
logger = logging.getLogger(__name__)
1312

@@ -20,14 +19,15 @@ class PdfRenderer(extension.BaseRenderer):
2019
]).get_template('viewer.mako')
2120

2221
def render(self):
22+
2323
download_url = munge_url_for_localdev(self.metadata.download_url)
2424
logger.debug('extension::{} supported-list::{}'.format(self.metadata.ext,
2525
settings.EXPORT_SUPPORTED))
2626
if self.metadata.ext not in settings.EXPORT_SUPPORTED:
2727
logger.debug('Extension not found in supported list!')
2828
return self.TEMPLATE.render(
2929
base=self.assets_url,
30-
url=re.sub(r'\'', '\\\'', download_url.geturl())
30+
url=escape_url_for_template(download_url.geturl())
3131
)
3232

3333
logger.debug('Extension found in supported list!')
@@ -40,9 +40,16 @@ def render(self):
4040
exported_url.args['format'] = settings.EXPORT_TYPE
4141

4242
self.metrics.add('needs_export', True)
43-
return self.TEMPLATE.render(base=self.assets_url, url=exported_url.url)
43+
return self.TEMPLATE.render(
44+
base=self.assets_url,
45+
url=escape_url_for_template(exported_url.url)
46+
)
4447

45-
return self.TEMPLATE.render(base=self.assets_url, url=download_url.geturl())
48+
# TODO: is this dead code? ``settings.EXPORT_TYPE`` is never None or empty
49+
return self.TEMPLATE.render(
50+
base=self.assets_url,
51+
url=escape_url_for_template(download_url.geturl())
52+
)
4653

4754
@property
4855
def file_required(self):

mfr/extensions/utils.py

Lines changed: 35 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,25 +1,49 @@
1+
import logging
2+
from typing import Tuple
13
from urllib.parse import parse_qs, urlencode, urlparse
24

35
from mfr.extensions import settings
46

7+
logger = logging.getLogger(__name__)
58

6-
def munge_url_for_localdev(url):
7-
"""
8-
If MFR is being run in a local development environment (i.e. LOCAL_DEVELOPMENT is True), we
9+
10+
def munge_url_for_localdev(url: str) -> Tuple:
11+
"""If MFR is being run in a local development environment (i.e. LOCAL_DEVELOPMENT is True), we
912
need to replace the internal host (the one the backend services communicate on, default:
1013
192.168.168.167) with the external host (the one the user provides, default: "localhost")
1114
e.g. http://192.168.168.167:7777/foo/bar => http://localhost:7777/foo/bar
1215
"""
16+
1317
url_obj = urlparse(url)
14-
if (settings.LOCAL_DEVELOPMENT and url_obj.hostname == settings.DOCKER_LOCAL_HOST):
15-
query_dict = parse_qs(url_obj.query, keep_blank_values=True)
18+
if settings.LOCAL_DEVELOPMENT and url_obj.hostname == settings.DOCKER_LOCAL_HOST:
19+
query_dict = parse_qs(url_obj.query, keep_blank_values=True)
1620

17-
# the 'mode' param will break image downloads from the osf
18-
query_dict.pop('mode', None)
21+
# the 'mode' param will break image downloads from the osf
22+
query_dict.pop('mode', None)
1923

20-
url_obj = url_obj._replace(
21-
query=urlencode(query_dict, doseq=True),
22-
netloc='{}:{}'.format(settings.LOCAL_HOST, url_obj.port)
23-
)
24+
url_obj = url_obj._replace(
25+
query=urlencode(query_dict, doseq=True),
26+
netloc='{}:{}'.format(settings.LOCAL_HOST, url_obj.port)
27+
)
2428

2529
return url_obj
30+
31+
32+
def escape_url_for_template(url: str, logs=False) -> str:
33+
"""Escape (URL Encode) single and double quote(s) for the given URL.
34+
35+
Download and export URLs may end up not properly encoded right before they are about to be sent
36+
to the mako template due to issues including (but not limited to) (1) ``furl`` dropping encoding
37+
for single quote (2) URL (provided by users or constructed by scripts) not having the correct
38+
encoding. This helper method must be called for each render request that sends URL to the mako
39+
template.
40+
41+
:param str url: the URL to be sent to the mako template
42+
:param bool logs: whether to enable warnings, default is `True` and is set to `False` for tests
43+
:return: the properly encoded URL
44+
"""
45+
46+
safe_url = url.replace('"', '%22').replace("'", '%27')
47+
if url != safe_url and logs:
48+
logger.warning('Unsafe URL containing unescaped single (double) quote(s) has been replaced')
49+
return safe_url
Lines changed: 54 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -1,53 +1,68 @@
11
import furl
22
import pytest
3-
import re
43

5-
from mfr.extensions.pdf import (settings,
6-
PdfRenderer)
74
from mfr.core.provider import ProviderMetadata
5+
from mfr.extensions.pdf import settings, PdfRenderer
6+
from mfr.extensions.utils import escape_url_for_template
87

98

109
@pytest.fixture
1110
def metadata():
1211
return ProviderMetadata('test', '.pdf', 'text/plain', '1234',
1312
'http://wb.osf.io/file/test.pdf?token=1234')
1413

14+
15+
@pytest.fixture
16+
def metadata_2():
17+
return ProviderMetadata('te\'st', '.pdf', 'text/plain', '1234',
18+
'http://wb.osf.io/file/te\'st.pdf?token=1234')
19+
20+
1521
@pytest.fixture
1622
def tif_metadata():
1723
return ProviderMetadata('test', '.tif', 'text/plain', '1234',
1824
'http://wb.osf.io/file/test.tif?token=1234')
1925

26+
2027
@pytest.fixture
2128
def docx_metadata():
22-
return ProviderMetadata(
23-
'te\'st',
24-
'.docx',
25-
'text/plain',
26-
'1234',
27-
'http://mfr.osf.io/export?url=http://osf.io/file/te\'st.pdf'
28-
)
29+
return ProviderMetadata('te\'st', '.docx', 'text/plain','1234',
30+
'http://mfr.osf.io/export?url=http://osf.io/file/te\'st.pdf')
31+
2932

3033
@pytest.fixture
3134
def file_path():
3235
return '/tmp/test.pdf'
3336

37+
38+
@pytest.fixture
39+
def file_path_2():
40+
return '/tmp/te\'st.pdf'
41+
42+
3443
@pytest.fixture
3544
def tif_file_path():
3645
return '/tmp/test.tif'
3746

47+
3848
@pytest.fixture
3949
def docx_file_path():
4050
return '/tmp/te\'st.docx'
4151

4252

4353
@pytest.fixture
4454
def url():
55+
return 'http://osf.io/file/test.pdf'
56+
57+
58+
@pytest.fixture
59+
def url_2():
4560
return 'http://osf.io/file/te\'st.pdf'
4661

4762

4863
@pytest.fixture
4964
def tif_url():
50-
return 'http://osf.io/file/te\'st.tif'
65+
return 'http://osf.io/file/test.tif'
5166

5267

5368
@pytest.fixture
@@ -62,6 +77,11 @@ def assets_url():
6277

6378
@pytest.fixture
6479
def export_url():
80+
return 'http://mfr.osf.io/export?url=http://osf.io/file/test.pdf'
81+
82+
83+
@pytest.fixture
84+
def export_url_2():
6585
return 'http://mfr.osf.io/export?url=http://osf.io/file/te\'st.pdf'
6686

6787

@@ -71,31 +91,18 @@ def renderer(metadata, file_path, url, assets_url, export_url):
7191

7292

7393
@pytest.fixture
74-
def tif_renderer(
75-
tif_metadata,
76-
tif_file_path,
77-
tif_url,
78-
assets_url,
79-
export_url
80-
):
94+
def renderer_2(metadata_2, file_path_2, url_2, assets_url, export_url_2):
95+
return PdfRenderer(metadata_2, file_path_2, url_2, assets_url, export_url_2)
96+
97+
98+
@pytest.fixture
99+
def tif_renderer(tif_metadata, tif_file_path, tif_url, assets_url, export_url):
81100
return PdfRenderer(tif_metadata, tif_file_path, tif_url, assets_url, export_url)
82101

83102

84103
@pytest.fixture
85-
def docx_renderer(
86-
docx_metadata,
87-
docx_file_path,
88-
docx_url,
89-
assets_url,
90-
export_url
91-
):
92-
return PdfRenderer(
93-
docx_metadata,
94-
docx_file_path,
95-
docx_url,
96-
assets_url,
97-
export_url
98-
)
104+
def docx_renderer(docx_metadata, docx_file_path, docx_url, assets_url, export_url_2):
105+
return PdfRenderer(docx_metadata, docx_file_path, docx_url, assets_url, export_url_2)
99106

100107

101108
class TestPdfRenderer:
@@ -106,6 +113,16 @@ def test_render_pdf(self, renderer, metadata, assets_url):
106113
assert '<div id="viewer" class="pdfViewer"></div>' in body
107114
assert 'DEFAULT_URL = \'{}\''.format(metadata.download_url) in body
108115

116+
def test_render_pdf_with_single_quote_in_name(self, renderer_2, metadata_2, assets_url):
117+
118+
body = renderer_2.render()
119+
safe_download_url = escape_url_for_template(metadata_2.download_url, logs=False)
120+
121+
assert '<base href="{}/{}/web/" target="_blank">'.format(assets_url, 'pdf') in body
122+
assert '<div id="viewer" class="pdfViewer"></div>' in body
123+
assert 'DEFAULT_URL = \'{}\''.format(metadata_2.download_url) not in body
124+
assert 'DEFAULT_URL = \'{}\''.format(safe_download_url) in body
125+
109126
def test_render_tif(self, tif_renderer, assets_url):
110127
exported_url = furl.furl(tif_renderer.export_url)
111128
exported_url.args['format'] = '{}.{}'.format(settings.EXPORT_MAXIMUM_SIZE,
@@ -117,10 +134,12 @@ def test_render_tif(self, tif_renderer, assets_url):
117134
assert 'DEFAULT_URL = \'{}\''.format(exported_url.url) in body
118135

119136
def test_render_docx(self, docx_renderer, assets_url):
120-
exported_url = furl.furl(docx_renderer.export_url)
121137

138+
exported_url = furl.furl(docx_renderer.export_url)
139+
safe_exported_url = escape_url_for_template(exported_url.url, logs=False)
122140
body = docx_renderer.render()
141+
123142
assert '<base href="{}/{}/web/" target="_blank">'.format(assets_url, 'pdf') in body
124143
assert '<div id="viewer" class="pdfViewer"></div>' in body
125-
assert 'DEFAULT_URL = \'{}\''.format(re.sub(r'\'', '\\\'', exported_url.url)) in body
126-
144+
assert 'DEFAULT_URL = \'{}\''.format(exported_url.url) not in body
145+
assert 'DEFAULT_URL = \'{}\''.format(safe_exported_url) in body

0 commit comments

Comments
 (0)