Skip to content

Commit 8764f74

Browse files
committed
Move proxy logic into a separate module
1 parent f7cafbb commit 8764f74

File tree

12 files changed

+192
-188
lines changed

12 files changed

+192
-188
lines changed

scrapinghub/client/activity.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
from __future__ import absolute_import
22

3-
from .utils import _Proxy
3+
from .proxy import _Proxy
44
from .utils import parse_job_key
55

66

scrapinghub/client/collections.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,8 @@
55

66
from ..hubstorage.collectionsrt import Collection as _Collection
77

8-
from .utils import (
9-
_Proxy, format_iter_filters, proxy_methods, wrap_kwargs, update_kwargs,
10-
)
8+
from .proxy import _Proxy, proxy_methods, wrap_kwargs, format_iter_filters
9+
from .utils import update_kwargs
1110

1211

1312
class Collections(_Proxy):

scrapinghub/client/frontiers.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,8 @@
77
from ..hubstorage.frontier import Frontier as _Frontier
88
from ..hubstorage.utils import urlpathjoin
99

10-
from .utils import _Proxy, update_kwargs
10+
from .proxy import _Proxy
11+
from .utils import update_kwargs
1112

1213

1314
class _HSFrontier(_Frontier):

scrapinghub/client/items.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
from __future__ import absolute_import
22

3-
from .utils import _Proxy
3+
from .proxy import _Proxy
44

55

66
class Items(_Proxy):

scrapinghub/client/jobs.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,8 @@
1111
from .requests import Requests
1212
from .samples import Samples
1313
from .exceptions import NotFound, BadRequest, DuplicateJobError
14-
from .utils import (
15-
_MappingProxy, get_tags_for_update, parse_job_key, update_kwargs,
16-
)
14+
from .proxy import _MappingProxy
15+
from .utils import get_tags_for_update, parse_job_key, update_kwargs
1716

1817

1918
class Jobs(object):

scrapinghub/client/logs.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
from __future__ import absolute_import
22
import json
33

4-
from .utils import _Proxy
4+
from .proxy import _Proxy
55
from .utils import LogLevel
66

77

scrapinghub/client/projects.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,9 @@
88
from .collections import Collections
99
from .frontiers import _HSFrontier, Frontiers
1010
from .jobs import Jobs
11+
from .proxy import _MappingProxy
1112
from .spiders import Spiders
12-
from .utils import _MappingProxy, parse_project_id
13+
from .utils import parse_project_id
1314

1415

1516
class Projects(object):

scrapinghub/client/proxy.py

Lines changed: 178 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,178 @@
1+
from __future__ import absolute_import
2+
3+
import six
4+
import json
5+
6+
from ..hubstorage.resourcetype import DownloadableResource
7+
from ..hubstorage.resourcetype import ItemsResourceType
8+
from ..hubstorage.collectionsrt import Collections
9+
10+
from .exceptions import wrap_value_too_large
11+
12+
13+
class _Proxy(object):
14+
"""A helper to create a class instance and proxy its methods to origin.
15+
16+
The internal proxy class is useful to link class attributes from its
17+
origin depending on the origin base class as a part of init logic:
18+
19+
- :class:`~scrapinghub.hubstorage.resourcetype.ItemsResourceType` provides
20+
items-based attributes to access items in an arbitrary collection with
21+
get/write/flush/close/stats/iter methods.
22+
23+
- :class:`~scrapinghub.hubstorage.resourcetype.DownloadableResource` provides
24+
download-based attributes to iter through collection with or without
25+
msgpack support.
26+
"""
27+
28+
def __init__(self, cls, client, key):
29+
self.key = key
30+
self._client = client
31+
self._origin = cls(client._hsclient, key)
32+
33+
if issubclass(cls, ItemsResourceType):
34+
self._proxy_methods(['get', 'write', 'flush', 'close',
35+
'stats', ('iter', 'list')])
36+
# redefine write method to wrap hubstorage.ValueTooLarge error
37+
origin_method = getattr(self, 'write')
38+
setattr(self, 'write', wrap_value_too_large(origin_method))
39+
40+
# DType iter_values() has more priority than IType list()
41+
# plus Collections interface doesn't need the iter methods
42+
if issubclass(cls, DownloadableResource) and cls is not Collections:
43+
methods = [('iter', 'iter_values'),
44+
('iter_raw_msgpack', 'iter_msgpack'),
45+
('iter_raw_json', 'iter_json')]
46+
self._proxy_methods(methods)
47+
self._wrap_iter_methods([method[0] for method in methods])
48+
49+
def _proxy_methods(self, methods):
50+
"""A little helper for cleaner interface."""
51+
proxy_methods(self._origin, self, methods)
52+
53+
def _wrap_iter_methods(self, methods):
54+
"""Modify kwargs for all passed self.iter* methods."""
55+
for method in methods:
56+
wrapped = wrap_kwargs(getattr(self, method),
57+
self._modify_iter_params)
58+
setattr(self, method, wrapped)
59+
60+
def _modify_iter_params(self, params):
61+
"""A helper to modify iter() params on-the-fly.
62+
63+
The method is internal and should be redefined in subclasses.
64+
65+
:param params: a dictionary with input parameters.
66+
:return: an updated dictionary with parameters.
67+
:rtype: :class:`dict`
68+
"""
69+
return format_iter_filters(params)
70+
71+
def list(self, *args, **kwargs):
72+
"""Convenient shortcut to list iter results.
73+
74+
Please note that :meth:`list` method can use a lot of memory and for a
75+
large amount of elements it's recommended to iterate through it via
76+
:meth:`iter` method (all params and available filters are same for both
77+
methods).
78+
"""
79+
return list(self.iter(*args, **kwargs))
80+
81+
82+
class _MappingProxy(_Proxy):
83+
"""A helper class to support basic get/set interface for dict-like
84+
collections of elements.
85+
"""
86+
87+
def get(self, key):
88+
"""Get element value by key.
89+
90+
:param key: a string key
91+
"""
92+
return next(self._origin.apiget(key))
93+
94+
def set(self, key, value):
95+
"""Set element value.
96+
97+
:param key: a string key
98+
:param value: new value to set for the key
99+
"""
100+
self._origin.apipost(key, data=json.dumps(value), is_idempotent=True)
101+
102+
def update(self, values):
103+
"""Update multiple elements at once.
104+
105+
The method provides convenient interface for partial updates.
106+
107+
:param values: a dictionary with key/values to update.
108+
"""
109+
if not isinstance(values, dict):
110+
raise TypeError("values should be a dict")
111+
data = next(self._origin.apiget())
112+
data.update(values)
113+
self._origin.apipost(jl={k: v for k, v in six.iteritems(data)
114+
if k not in self._origin.ignore_fields},
115+
is_idempotent=True)
116+
117+
def delete(self, key):
118+
"""Delete element by key.
119+
120+
:param key: a string key
121+
"""
122+
self._origin.apidelete(key)
123+
124+
def iter(self):
125+
"""Iterate through key/value pairs.
126+
127+
:return: an iterator over key/value pairs.
128+
:rtype: :class:`collections.Iterable`
129+
"""
130+
return six.iteritems(next(self._origin.apiget()))
131+
132+
133+
def proxy_methods(origin, successor, methods):
134+
"""A helper to proxy methods from origin to successor.
135+
136+
Accepts a list with strings and tuples:
137+
138+
- each string defines:
139+
a successor method name to proxy 1:1 with origin method
140+
- each tuple should consist of 2 strings:
141+
a successor method name and an origin method name
142+
"""
143+
for method in methods:
144+
if isinstance(method, tuple):
145+
successor_name, origin_name = method
146+
else:
147+
successor_name, origin_name = method, method
148+
if not hasattr(successor, successor_name):
149+
setattr(successor, successor_name, getattr(origin, origin_name))
150+
151+
152+
def format_iter_filters(params):
153+
"""Format iter() filter param on-the-fly.
154+
155+
Support passing multiple filters at once as a list with tuples.
156+
"""
157+
filters = params.get('filter')
158+
if filters and isinstance(filters, list):
159+
filter_data = []
160+
for elem in params.pop('filter'):
161+
if isinstance(elem, six.string_types):
162+
filter_data.append(elem)
163+
elif isinstance(elem, (list, tuple)):
164+
filter_data.append(json.dumps(elem))
165+
else:
166+
raise ValueError(
167+
"Filter condition must be string, tuple or list")
168+
if filter_data:
169+
params['filter'] = filter_data
170+
return params
171+
172+
173+
def wrap_kwargs(fn, kwargs_fn):
174+
"""Tiny wrapper to prepare modified version of function kwargs"""
175+
def wrapped(*args, **kwargs):
176+
kwargs = kwargs_fn(kwargs)
177+
return fn(*args, **kwargs)
178+
return wrapped

scrapinghub/client/requests.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
from __future__ import absolute_import
22

3-
from .utils import _Proxy
3+
from .proxy import _Proxy
44

55

66
class Requests(_Proxy):

scrapinghub/client/samples.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
from __future__ import absolute_import
22

3-
from .utils import _Proxy
3+
from .proxy import _Proxy
44

55

66
class Samples(_Proxy):

0 commit comments

Comments
 (0)