Skip to content

Commit ec5590b

Browse files
authored
Merge pull request #38 from scrapinghub/sc1467-1
SH Python client update
2 parents 1d884bd + 0c280ba commit ec5590b

35 files changed

+4516
-24
lines changed

README.rst

Lines changed: 616 additions & 6 deletions
Large diffs are not rendered by default.

pytest.ini

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
[pytest]
2+
addopts = --doctest-glob='scrapinghub/*.py'

scrapinghub/VERSION

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
1.9.0
1+
1.10.0.dev1

scrapinghub/__init__.py

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
1-
__all__ = ["APIError", "Connection", "HubstorageClient"]
2-
1+
__all__ = ["APIError", "Connection", "HubstorageClient",
2+
"ScrapinghubClient", "ScrapinghubAPIError",
3+
"DuplicateJobError", "BadRequest", "NotFound",
4+
"Unauthorized", "ValueTooLarge"]
35

46
import pkgutil
57
__version__ = pkgutil.get_data(__package__, 'VERSION')
@@ -9,3 +11,12 @@
911

1012
from .legacy import *
1113
from .hubstorage import HubstorageClient
14+
from .client import ScrapinghubClient
15+
from .client.exceptions import (
16+
ScrapinghubAPIError,
17+
DuplicateJobError,
18+
BadRequest,
19+
NotFound,
20+
Unauthorized,
21+
ValueTooLarge,
22+
)

scrapinghub/client/__init__.py

Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
from scrapinghub import Connection as _Connection
2+
from scrapinghub import HubstorageClient as _HubstorageClient
3+
4+
from .projects import Projects
5+
from .exceptions import wrap_http_errors
6+
7+
from .utils import parse_auth
8+
from .utils import parse_project_id, parse_job_key
9+
10+
11+
__all__ = ['ScrapinghubClient']
12+
13+
14+
class Connection(_Connection):
15+
16+
@wrap_http_errors
17+
def _request(self, *args, **kwargs):
18+
return super(Connection, self)._request(*args, **kwargs)
19+
20+
21+
class HubstorageClient(_HubstorageClient):
22+
23+
@wrap_http_errors
24+
def request(self, *args, **kwargs):
25+
return super(HubstorageClient, self).request(*args, **kwargs)
26+
27+
28+
class ScrapinghubClient(object):
29+
"""Main class to work with Scrapinghub API.
30+
31+
:param auth: Scrapinghub APIKEY or other SH auth credentials.
32+
:param dash_endpoint: (optional) Scrapinghub Dash panel url.
33+
:param \*\*kwargs: (optional) Additional arguments for
34+
:class:`scrapinghub.hubstorage.HubstorageClient` constructor.
35+
36+
:ivar projects: projects collection, :class:`Projects` instance.
37+
38+
Usage::
39+
40+
>>> from scrapinghub import ScrapinghubClient
41+
>>> client = ScrapinghubClient('APIKEY')
42+
>>> client
43+
<scrapinghub.client.ScrapinghubClient at 0x1047af2e8>
44+
"""
45+
46+
def __init__(self, auth=None, dash_endpoint=None, **kwargs):
47+
self.projects = Projects(self)
48+
login, password = parse_auth(auth)
49+
self._connection = Connection(apikey=login,
50+
password=password,
51+
url=dash_endpoint)
52+
self._hsclient = HubstorageClient(auth=(login, password), **kwargs)
53+
54+
def get_project(self, project_id):
55+
"""Get :class:`Project` instance with a given project id.
56+
57+
The method is a shortcut for client.projects.get().
58+
59+
:param project_id: integer or string numeric project id.
60+
:return: :class:`Project` object.
61+
:rtype: scrapinghub.client.projects.Project
62+
63+
Usage::
64+
65+
>>> project = client.get_project(123)
66+
>>> project
67+
<scrapinghub.client.projects.Project at 0x106cdd6a0>
68+
"""
69+
return self.projects.get(parse_project_id(project_id))
70+
71+
def get_job(self, job_key):
72+
"""Get Job with a given job key.
73+
74+
:param job_key: job key string in format 'project_id/spider_id/job_id',
75+
where all the components are integers.
76+
:return: :class:`Job` object.
77+
:rtype: scrapinghub.client.jobs.Job
78+
79+
Usage::
80+
81+
>>> job = client.get_job('123/1/1')
82+
>>> job
83+
<scrapinghub.client.jobs.Job at 0x10afe2eb1>
84+
"""
85+
project_id = parse_job_key(job_key).project_id
86+
return self.projects.get(project_id).jobs.get(job_key)
87+
88+
def close(self, timeout=None):
89+
"""Close client instance.
90+
91+
:param timeout: (optional) float timeout secs to stop gracefully.
92+
"""
93+
self._hsclient.close(timeout=timeout)

scrapinghub/client/activity.py

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
from __future__ import absolute_import
2+
3+
from .utils import _Proxy
4+
from .utils import parse_job_key
5+
6+
7+
class Activity(_Proxy):
8+
"""Representation of collection of job activity events.
9+
10+
Not a public constructor: use :class:`Project` instance to get a
11+
:class:`Activity` instance. See :attr:`Project.activity` attribute.
12+
13+
Please note that list() method can use a lot of memory and for a large
14+
amount of activities it's recommended to iterate through it via iter()
15+
method (all params and available filters are same for both methods).
16+
17+
Usage:
18+
19+
- get all activity from a project::
20+
21+
>>> project.activity.iter()
22+
<generator object jldecode at 0x1049ee990>
23+
24+
- get only last 2 events from a project::
25+
26+
>>> project.activity.list(count=2)
27+
[{'event': 'job:completed', 'job': '123/2/3', 'user': 'jobrunner'},
28+
{'event': 'job:started', 'job': '123/2/3', 'user': 'john'}]
29+
30+
- post a new event::
31+
32+
>>> event = {'event': 'job:completed',
33+
'job': '123/2/4',
34+
'user': 'jobrunner'}
35+
>>> project.activity.add(event)
36+
37+
- post multiple events at once::
38+
39+
>>> events = [
40+
{'event': 'job:completed', 'job': '123/2/5', 'user': 'jobrunner'},
41+
{'event': 'job:cancelled', 'job': '123/2/6', 'user': 'john'},
42+
]
43+
>>> project.activity.add(events)
44+
45+
"""
46+
def __init__(self, *args, **kwargs):
47+
super(Activity, self).__init__(*args, **kwargs)
48+
self._proxy_methods([('iter', 'list')])
49+
self._wrap_iter_methods(['iter'])
50+
51+
def add(self, values, **kwargs):
52+
"""Add new event to the project activity.
53+
54+
:param values: a single event or a list of events, where event is
55+
represented with a dictionary of ('event', 'job', 'user') keys.
56+
"""
57+
if not isinstance(values, list):
58+
values = list(values)
59+
for activity in values:
60+
if not isinstance(activity, dict):
61+
raise ValueError("Please pass events as dictionaries")
62+
job_key = activity.get('job')
63+
if job_key and parse_job_key(job_key).project_id != self.key:
64+
raise ValueError('Please use same project id')
65+
self._origin.post(values, **kwargs)

0 commit comments

Comments
 (0)