Skip to content

Commit bd688fb

Browse files
committed
Frontier newcount per slot
1 parent f97e81f commit bd688fb

File tree

1 file changed

+44
-2
lines changed

1 file changed

+44
-2
lines changed

scrapinghub/client.py

Lines changed: 44 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
import json
22
import collections
3+
from functools import partial
4+
from collections import defaultdict
35

46
from six import string_types
57
from requests.compat import urljoin
@@ -9,6 +11,7 @@
911

1012
from .hubstorage.resourcetype import DownloadableResource
1113
from .hubstorage.resourcetype import ItemsResourceType
14+
from .hubstorage.utils import urlpathjoin
1215

1316
# scrapinghub.hubstorage classes to use as-is
1417
from .hubstorage.job import JobMeta
@@ -227,7 +230,7 @@ def __init__(self, client, projectid):
227230
# proxied sub-resources
228231
self.activity = Activity(_Activity, client, projectid)
229232
self.collections = Collections(_Collections, client, projectid)
230-
self.frontiers = Frontiers(_Frontier, client, projectid)
233+
self.frontiers = Frontiers(_HSFrontier, client, projectid)
231234
self.settings = Settings(client._hsclient, projectid)
232235

233236

@@ -1051,6 +1054,34 @@ def post(self, _value, **kwargs):
10511054
self._origin.post(_value, **kwargs)
10521055

10531056

1057+
class _HSFrontier(_Frontier):
1058+
"""Modified hubstorage Frontier with newcount per slot."""
1059+
1060+
def __init__(self, *args, **kwargs):
1061+
super(_HSFrontier, self).__init__(*args, **kwargs)
1062+
self.newcount = defaultdict(int)
1063+
1064+
def _get_writer(self, frontier, slot):
1065+
key = (frontier, slot)
1066+
writer = self._writers.get(key)
1067+
if not writer:
1068+
writer = self.client.batchuploader.create_writer(
1069+
url=urlpathjoin(self.url, frontier, 's', slot),
1070+
auth=self.auth,
1071+
size=self.batch_size,
1072+
start=self.batch_start,
1073+
interval=self.batch_interval,
1074+
qsize=self.batch_qsize,
1075+
content_encoding=self.batch_content_encoding,
1076+
callback=partial(self._writer_callback, key),
1077+
)
1078+
self._writers[key] = writer
1079+
return writer
1080+
1081+
def _writer_callback(self, key, response):
1082+
self.newcount[key] += response.json()["newcount"]
1083+
1084+
10541085
class Frontiers(_Proxy):
10551086
"""Frontiers collection for a project.
10561087
@@ -1095,7 +1126,7 @@ def list(self):
10951126

10961127
@property
10971128
def newcount(self):
1098-
return self._origin.newcount
1129+
return sum(self._origin.newcount.values())
10991130

11001131

11011132
class Frontier(object):
@@ -1145,6 +1176,12 @@ def flush(self):
11451176
if fname == self.key:
11461177
writer.flush()
11471178

1179+
@property
1180+
def newcount(self):
1181+
newcount_values = self._frontiers._origin.newcount
1182+
return sum(v for (frontier, _), v in newcount_values.items()
1183+
if frontier == self.key)
1184+
11481185

11491186
class FrontierSlot(object):
11501187
"""Representation of a frontier slot object.
@@ -1211,6 +1248,11 @@ def flush(self):
12111248
if writer:
12121249
writer.flush()
12131250

1251+
@property
1252+
def newcount(self):
1253+
newcount_values = self._frontier._frontiers._origin.newcount
1254+
return newcount_values.get((self._frontier.key, self.key), 0)
1255+
12141256

12151257
class FrontierSlotFingerprints(object):
12161258

0 commit comments

Comments
 (0)