|
| 1 | +from __future__ import absolute_import |
| 2 | + |
| 3 | +import six |
| 4 | +import json |
| 5 | + |
| 6 | +from ..hubstorage.resourcetype import DownloadableResource |
| 7 | +from ..hubstorage.resourcetype import ItemsResourceType |
| 8 | +from ..hubstorage.collectionsrt import Collections |
| 9 | + |
| 10 | +from .exceptions import wrap_value_too_large |
| 11 | + |
| 12 | + |
| 13 | +class _Proxy(object): |
| 14 | + """A helper to create a class instance and proxy its methods to origin. |
| 15 | +
|
| 16 | + The internal proxy class is useful to link class attributes from its |
| 17 | + origin depending on the origin base class as a part of init logic: |
| 18 | +
|
| 19 | + - :class:`~scrapinghub.hubstorage.resourcetype.ItemsResourceType` provides |
| 20 | + items-based attributes to access items in an arbitrary collection with |
| 21 | + get/write/flush/close/stats/iter methods. |
| 22 | +
|
| 23 | + - :class:`~scrapinghub.hubstorage.resourcetype.DownloadableResource` provides |
| 24 | + download-based attributes to iter through collection with or without |
| 25 | + msgpack support. |
| 26 | + """ |
| 27 | + |
| 28 | + def __init__(self, cls, client, key): |
| 29 | + self.key = key |
| 30 | + self._client = client |
| 31 | + self._origin = cls(client._hsclient, key) |
| 32 | + |
| 33 | + if issubclass(cls, ItemsResourceType): |
| 34 | + self._proxy_methods(['get', 'write', 'flush', 'close', |
| 35 | + 'stats', ('iter', 'list')]) |
| 36 | + # redefine write method to wrap hubstorage.ValueTooLarge error |
| 37 | + origin_method = getattr(self, 'write') |
| 38 | + setattr(self, 'write', wrap_value_too_large(origin_method)) |
| 39 | + |
| 40 | + # DType iter_values() has more priority than IType list() |
| 41 | + # plus Collections interface doesn't need the iter methods |
| 42 | + if issubclass(cls, DownloadableResource) and cls is not Collections: |
| 43 | + methods = [('iter', 'iter_values'), |
| 44 | + ('iter_raw_msgpack', 'iter_msgpack'), |
| 45 | + ('iter_raw_json', 'iter_json')] |
| 46 | + self._proxy_methods(methods) |
| 47 | + self._wrap_iter_methods([method[0] for method in methods]) |
| 48 | + |
| 49 | + def _proxy_methods(self, methods): |
| 50 | + """A little helper for cleaner interface.""" |
| 51 | + proxy_methods(self._origin, self, methods) |
| 52 | + |
| 53 | + def _wrap_iter_methods(self, methods): |
| 54 | + """Modify kwargs for all passed self.iter* methods.""" |
| 55 | + for method in methods: |
| 56 | + wrapped = wrap_kwargs(getattr(self, method), |
| 57 | + self._modify_iter_params) |
| 58 | + setattr(self, method, wrapped) |
| 59 | + |
| 60 | + def _modify_iter_params(self, params): |
| 61 | + """A helper to modify iter() params on-the-fly. |
| 62 | +
|
| 63 | + The method is internal and should be redefined in subclasses. |
| 64 | +
|
| 65 | + :param params: a dictionary with input parameters. |
| 66 | + :return: an updated dictionary with parameters. |
| 67 | + :rtype: :class:`dict` |
| 68 | + """ |
| 69 | + return format_iter_filters(params) |
| 70 | + |
| 71 | + def list(self, *args, **kwargs): |
| 72 | + """Convenient shortcut to list iter results. |
| 73 | +
|
| 74 | + Please note that :meth:`list` method can use a lot of memory and for a |
| 75 | + large amount of elements it's recommended to iterate through it via |
| 76 | + :meth:`iter` method (all params and available filters are same for both |
| 77 | + methods). |
| 78 | + """ |
| 79 | + return list(self.iter(*args, **kwargs)) |
| 80 | + |
| 81 | + |
| 82 | +class _MappingProxy(_Proxy): |
| 83 | + """A helper class to support basic get/set interface for dict-like |
| 84 | + collections of elements. |
| 85 | + """ |
| 86 | + |
| 87 | + def get(self, key): |
| 88 | + """Get element value by key. |
| 89 | +
|
| 90 | + :param key: a string key |
| 91 | + """ |
| 92 | + return next(self._origin.apiget(key)) |
| 93 | + |
| 94 | + def set(self, key, value): |
| 95 | + """Set element value. |
| 96 | +
|
| 97 | + :param key: a string key |
| 98 | + :param value: new value to set for the key |
| 99 | + """ |
| 100 | + self._origin.apipost(key, data=json.dumps(value), is_idempotent=True) |
| 101 | + |
| 102 | + def update(self, values): |
| 103 | + """Update multiple elements at once. |
| 104 | +
|
| 105 | + The method provides convenient interface for partial updates. |
| 106 | +
|
| 107 | + :param values: a dictionary with key/values to update. |
| 108 | + """ |
| 109 | + if not isinstance(values, dict): |
| 110 | + raise TypeError("values should be a dict") |
| 111 | + data = next(self._origin.apiget()) |
| 112 | + data.update(values) |
| 113 | + self._origin.apipost(jl={k: v for k, v in six.iteritems(data) |
| 114 | + if k not in self._origin.ignore_fields}, |
| 115 | + is_idempotent=True) |
| 116 | + |
| 117 | + def delete(self, key): |
| 118 | + """Delete element by key. |
| 119 | +
|
| 120 | + :param key: a string key |
| 121 | + """ |
| 122 | + self._origin.apidelete(key) |
| 123 | + |
| 124 | + def iter(self): |
| 125 | + """Iterate through key/value pairs. |
| 126 | +
|
| 127 | + :return: an iterator over key/value pairs. |
| 128 | + :rtype: :class:`collections.Iterable` |
| 129 | + """ |
| 130 | + return six.iteritems(next(self._origin.apiget())) |
| 131 | + |
| 132 | + |
| 133 | +def proxy_methods(origin, successor, methods): |
| 134 | + """A helper to proxy methods from origin to successor. |
| 135 | +
|
| 136 | + Accepts a list with strings and tuples: |
| 137 | +
|
| 138 | + - each string defines: |
| 139 | + a successor method name to proxy 1:1 with origin method |
| 140 | + - each tuple should consist of 2 strings: |
| 141 | + a successor method name and an origin method name |
| 142 | + """ |
| 143 | + for method in methods: |
| 144 | + if isinstance(method, tuple): |
| 145 | + successor_name, origin_name = method |
| 146 | + else: |
| 147 | + successor_name, origin_name = method, method |
| 148 | + if not hasattr(successor, successor_name): |
| 149 | + setattr(successor, successor_name, getattr(origin, origin_name)) |
| 150 | + |
| 151 | + |
| 152 | +def format_iter_filters(params): |
| 153 | + """Format iter() filter param on-the-fly. |
| 154 | +
|
| 155 | + Support passing multiple filters at once as a list with tuples. |
| 156 | + """ |
| 157 | + filters = params.get('filter') |
| 158 | + if filters and isinstance(filters, list): |
| 159 | + filter_data = [] |
| 160 | + for elem in params.pop('filter'): |
| 161 | + if isinstance(elem, six.string_types): |
| 162 | + filter_data.append(elem) |
| 163 | + elif isinstance(elem, (list, tuple)): |
| 164 | + filter_data.append(json.dumps(elem)) |
| 165 | + else: |
| 166 | + raise ValueError( |
| 167 | + "Filter condition must be string, tuple or list") |
| 168 | + if filter_data: |
| 169 | + params['filter'] = filter_data |
| 170 | + return params |
| 171 | + |
| 172 | + |
| 173 | +def wrap_kwargs(fn, kwargs_fn): |
| 174 | + """Tiny wrapper to prepare modified version of function kwargs""" |
| 175 | + def wrapped(*args, **kwargs): |
| 176 | + kwargs = kwargs_fn(kwargs) |
| 177 | + return fn(*args, **kwargs) |
| 178 | + return wrapped |
0 commit comments