Skip to content

Commit 59f4883

Browse files
authored
Merge pull request #53 from scrapinghub/sc1467-1-fixes
Minor improvements for new python-client
2 parents af72096 + 18481b6 commit 59f4883

21 files changed

+646
-468
lines changed

README.rst

Lines changed: 27 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,29 @@ Project instance also has the following fields:
100100
- spiders - access to spiders collection (see ``Spiders`` section)
101101

102102

103+
Settings
104+
--------
105+
106+
To get a list of the project settings::
107+
108+
>>> project.settings.list()
109+
[(u'default_job_units', 2), (u'job_runtime_limit', 24)]]
110+
111+
To get a project setting value by name::
112+
113+
>>> project.settings.get('job_runtime_limit')
114+
24
115+
116+
To update a project setting value by name::
117+
118+
>>> project.settings.set('job_runtime_limit', 20)
119+
120+
Or update a few project settings at once::
121+
122+
>>> project.settings.update({'default_job_units': 1,
123+
... 'job_runtime_limit': 20})
124+
125+
103126
Spiders
104127
-------
105128

@@ -181,7 +204,7 @@ count
181204

182205
It's also possible to count jobs for a given project/spider::
183206

184-
>> spider.jobs.count()
207+
>>> spider.jobs.count()
185208
5
186209

187210
Count logic supports different filters, as described for `count endpoint`_.
@@ -314,9 +337,9 @@ Metadata
314337

315338
Job details can be found in jobs metadata and it's scrapystats::
316339

317-
>>> job.metadata['version']
340+
>>> job.metadata.get('version')
318341
'5123a86-master'
319-
>>> job.metadata['scrapystats']
342+
>>> job.metadata.get('scrapystats')
320343
...
321344
'downloader/response_count': 104,
322345
'downloader/response_status_count/200': 104,
@@ -332,7 +355,7 @@ Job details can be found in jobs metadata and it's scrapystats::
332355

333356
Anything can be stored in metadata, here is example how to add tags::
334357

335-
>>> job.update_metadata({'tags': 'obsolete'})
358+
>>> job.metadata.set('tags', ['obsolete'])
336359

337360
Items
338361
^^^^^

scrapinghub/__init__.py

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
1-
__all__ = ["APIError", "Connection", "HubstorageClient", "ScrapinghubClient"]
2-
1+
__all__ = ["APIError", "Connection", "HubstorageClient",
2+
"ScrapinghubClient", "ScrapinghubAPIError",
3+
"DuplicateJobError", "BadRequest", "NotFound",
4+
"Unauthorized", "ValueTooLarge"]
35

46
import pkgutil
57
__version__ = pkgutil.get_data(__package__, 'VERSION')
@@ -10,3 +12,11 @@
1012
from .legacy import *
1113
from .hubstorage import HubstorageClient
1214
from .client import ScrapinghubClient
15+
from .client.exceptions import (
16+
ScrapinghubAPIError,
17+
DuplicateJobError,
18+
BadRequest,
19+
NotFound,
20+
Unauthorized,
21+
ValueTooLarge,
22+
)

scrapinghub/client/exceptions.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ def __init__(self, message=None, http_error=None):
3232
super(ScrapinghubAPIError, self).__init__(message)
3333

3434

35-
class InvalidUsage(ScrapinghubAPIError):
35+
class BadRequest(ScrapinghubAPIError):
3636
pass
3737

3838

@@ -60,7 +60,7 @@ def wrapped(*args, **kwargs):
6060
except HTTPError as exc:
6161
status_code = exc.response.status_code
6262
if status_code == 400:
63-
raise InvalidUsage(http_error=exc)
63+
raise BadRequest(http_error=exc)
6464
elif status_code == 401:
6565
raise Unauthorized(http_error=exc)
6666
elif status_code == 404:
@@ -76,8 +76,8 @@ def wrapped(*args, **kwargs):
7676
raise NotFound(msg)
7777
elif exc._type == APIError.ERR_VALUE_ERROR:
7878
raise ValueError(msg)
79-
elif exc._type == APIError.ERR_INVALID_USAGE:
80-
raise InvalidUsage(msg)
79+
elif exc._type == APIError.ERR_BAD_REQUEST:
80+
raise BadRequest(msg)
8181
elif exc._type == APIError.ERR_AUTH_ERROR:
8282
raise Unauthorized(http_error=exc)
8383
raise ScrapinghubAPIError(msg)

scrapinghub/client/jobs.py

Lines changed: 47 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
from __future__ import absolute_import
22
import json
33

4-
from ..hubstorage.job import JobMeta
4+
from ..hubstorage.job import JobMeta as _JobMeta
55
from ..hubstorage.job import Items as _Items
66
from ..hubstorage.job import Logs as _Logs
77
from ..hubstorage.job import Samples as _Samples
@@ -11,9 +11,8 @@
1111
from .logs import Logs
1212
from .requests import Requests
1313
from .samples import Samples
14-
from .exceptions import NotFound, InvalidUsage, DuplicateJobError
15-
from .utils import get_tags_for_update
16-
from .utils import parse_job_key
14+
from .exceptions import NotFound, BadRequest, DuplicateJobError
15+
from .utils import _MappingProxy, get_tags_for_update, parse_job_key
1716

1817

1918
class Jobs(object):
@@ -150,7 +149,7 @@ def schedule(self, spidername=None, **params):
150149
try:
151150
response = self._client._connection._post(
152151
'schedule', 'json', params)
153-
except InvalidUsage as exc:
152+
except BadRequest as exc:
154153
if 'already scheduled' in str(exc):
155154
raise DuplicateJobError(exc)
156155
raise
@@ -302,10 +301,10 @@ class Job(object):
302301
>>> job = project.job('123/1/2')
303302
>>> job.key
304303
'123/1/2'
305-
>>> job.metadata['state']
304+
>>> job.metadata.get('state')
306305
'finished'
307306
"""
308-
def __init__(self, client, jobkey, metadata=None):
307+
def __init__(self, client, jobkey):
309308
self.projectid = parse_job_key(jobkey).projectid
310309
self.key = jobkey
311310

@@ -319,24 +318,7 @@ def __init__(self, client, jobkey, metadata=None):
319318
self.requests = Requests(_Requests, client, jobkey)
320319
self.samples = Samples(_Samples, client, jobkey)
321320

322-
self.metadata = JobMeta(client._hsclient, jobkey, cached=metadata)
323-
324-
def update_metadata(self, *args, **kwargs):
325-
"""Update job metadata.
326-
327-
:param \*\*kwargs: keyword arguments representing job metadata
328-
329-
Usage:
330-
331-
- update job outcome::
332-
333-
>>> job.update_metadata(close_reason='custom reason')
334-
335-
- change job tags::
336-
337-
>>> job.update_metadata({'tags': 'obsolete'})
338-
"""
339-
self._job.update_metadata(*args, **kwargs)
321+
self.metadata = JobMeta(_JobMeta, client, jobkey)
340322

341323
def update_tags(self, add=None, remove=None):
342324
"""Partially update job tags.
@@ -426,19 +408,49 @@ def cancel(self):
426408
Usage::
427409
428410
>>> job.cancel()
429-
>>> job.metadata['cancelled_by']
411+
>>> job.metadata.get('cancelled_by')
430412
'John'
431413
"""
432414
self._project.jobq.request_cancel(self)
433415

434-
def purge(self):
435-
"""Delete job and expire its local metadata.
436416

437-
Usage::
417+
class JobMeta(_MappingProxy):
418+
"""Class representing job metadata.
438419
439-
>>> job.purge()
440-
>>> job.metadata['state']
441-
'deleted'
442-
"""
443-
self.delete()
444-
self.metadata.expire()
420+
Not a public constructor: use :class:`Job` instance to get a
421+
:class:`Jobmeta` instance. See :attr:`Job.metadata` attribute.
422+
423+
Usage::
424+
425+
- get job metadata instance
426+
427+
>>> job.metadata
428+
<scrapinghub.client.jobs.JobMeta at 0x10494f198>
429+
430+
- iterate through job metadata
431+
432+
>>> job.metadata.iter()
433+
<dict_itemiterator at 0x104adbd18>
434+
435+
- list job metadata
436+
437+
>>> job.metadata.list()
438+
[('project', 123), ('units', 1), ('state', 'finished'), ...]
439+
440+
- get meta field value by name
441+
442+
>>> job.metadata.get('version')
443+
'test'
444+
445+
- update job meta field value (some meta fields are read-only)
446+
447+
>>> job.metadata.set('my-meta', 'test')
448+
449+
- update multiple meta fields at once
450+
451+
>>> job.metadata.update({'my-meta1': 'test1', 'my-meta2': 'test2})
452+
453+
- delete meta field by name
454+
455+
>>> job.metadata.delete('my-meta')
456+
"""

scrapinghub/client/projects.py

Lines changed: 54 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,17 @@
11
from __future__ import absolute_import
22

3+
import six
4+
35
from ..hubstorage.activity import Activity as _Activity
46
from ..hubstorage.collectionsrt import Collections as _Collections
5-
from ..hubstorage.project import Settings
7+
from ..hubstorage.project import Settings as _Settings
68

79
from .activity import Activity
810
from .collections import Collections
911
from .frontiers import _HSFrontier, Frontiers
1012
from .jobs import Jobs
1113
from .spiders import Spiders
12-
from .utils import parse_project_id
14+
from .utils import _MappingProxy, parse_project_id
1315

1416

1517
class Projects(object):
@@ -121,4 +123,53 @@ def __init__(self, client, projectid):
121123
self.activity = Activity(_Activity, client, projectid)
122124
self.collections = Collections(_Collections, client, projectid)
123125
self.frontiers = Frontiers(_HSFrontier, client, projectid)
124-
self.settings = Settings(client._hsclient, projectid)
126+
self.settings = Settings(_Settings, client, projectid)
127+
128+
129+
class Settings(_MappingProxy):
130+
"""Class representing job metadata.
131+
132+
Not a public constructor: use :class:`Project` instance to get a
133+
:class:`Settings` instance. See :attr:`Project.settings` attribute.
134+
135+
Usage::
136+
137+
- get project settings instance
138+
139+
>>> project.settings
140+
<scrapinghub.client.projects.Settings at 0x10ecf1250>
141+
142+
- iterate through project settings
143+
144+
>>> project.settings.iter()
145+
<dictionary-itemiterator at 0x10ed11578>
146+
147+
- list project settings
148+
149+
>>> project.settings.list()
150+
[(u'default_job_units', 2),
151+
(u'job_runtime_limit', 20)]
152+
153+
- get setting value by name
154+
155+
>>> project.settings.get('default_job_units')
156+
2
157+
158+
- update setting value (some settings are read-only)
159+
160+
>>> project.settings.set('default_job_units', 2)
161+
162+
- update multiple settings at once
163+
164+
>>> project.settings.update({'default_job_units': 1,
165+
... 'job_runtime_limit': 20})
166+
167+
- delete project setting by name
168+
169+
>>> project.settings.delete('job_runtime_limit')
170+
"""
171+
def set(self, key, value):
172+
# FIXME drop the method when post-by-key is implemented on server side
173+
if not isinstance(key, six.string_types):
174+
raise TypeError("key should be a string")
175+
self.update({key: value})

scrapinghub/client/utils.py

Lines changed: 31 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,9 @@
44
import json
55
import logging
66
import binascii
7-
87
from codecs import decode
9-
from six import string_types
8+
9+
import six
1010

1111
from ..hubstorage.resourcetype import DownloadableResource
1212
from ..hubstorage.resourcetype import ItemsResourceType
@@ -46,7 +46,7 @@ def parse_project_id(projectid):
4646
def parse_job_key(jobkey):
4747
if isinstance(jobkey, tuple):
4848
parts = jobkey
49-
elif isinstance(jobkey, string_types):
49+
elif isinstance(jobkey, six.string_types):
5050
parts = jobkey.split('/')
5151
else:
5252
raise ValueError("Job key should be a string or a tuple")
@@ -125,6 +125,30 @@ def list(self, *args, **kwargs):
125125
return list(self.iter(*args, **kwargs))
126126

127127

128+
class _MappingProxy(_Proxy):
129+
130+
def get(self, key):
131+
return next(self._origin.apiget(key))
132+
133+
def set(self, key, value):
134+
self._origin.apipost(key, data=json.dumps(value), is_idempotent=True)
135+
136+
def update(self, values):
137+
if not isinstance(values, dict):
138+
raise TypeError("values should be a dict")
139+
data = next(self._origin.apiget())
140+
data.update(values)
141+
self._origin.apipost(jl={k: v for k, v in six.iteritems(data)
142+
if k not in self._origin.ignore_fields},
143+
is_idempotent=True)
144+
145+
def delete(self, key):
146+
self._origin.apidelete(key)
147+
148+
def iter(self):
149+
return six.iteritems(next(self._origin.apiget()))
150+
151+
128152
def wrap_kwargs(fn, kwargs_fn):
129153
"""Tiny wrapper to prepare modified version of function kwargs"""
130154
def wrapped(*args, **kwargs):
@@ -160,7 +184,7 @@ def format_iter_filters(params):
160184
if filters and isinstance(filters, list):
161185
filter_data = []
162186
for elem in params.pop('filter'):
163-
if isinstance(elem, string_types):
187+
if isinstance(elem, six.string_types):
164188
filter_data.append(elem)
165189
elif isinstance(elem, (list, tuple)):
166190
filter_data.append(json.dumps(elem))
@@ -195,12 +219,12 @@ def parse_auth(auth):
195219
return (apikey, '')
196220

197221
if isinstance(auth, tuple):
198-
all_strings = all(isinstance(k, string_types) for k in auth)
222+
all_strings = all(isinstance(k, six.string_types) for k in auth)
199223
if len(auth) != 2 or not all_strings:
200224
raise ValueError("Wrong authentication credentials")
201225
return auth
202226

203-
if not isinstance(auth, string_types):
227+
if not isinstance(auth, six.string_types):
204228
raise ValueError("Wrong authentication credentials")
205229

206230
jwt_auth = _search_for_jwt_credentials(auth)
@@ -217,7 +241,7 @@ def _search_for_jwt_credentials(auth):
217241
except (binascii.Error, TypeError):
218242
return
219243
try:
220-
if not isinstance(decoded_auth, string_types):
244+
if not isinstance(decoded_auth, six.string_types):
221245
decoded_auth = decoded_auth.decode('ascii')
222246
login, _, password = decoded_auth.partition(':')
223247
if password and parse_job_key(login):

0 commit comments

Comments
 (0)