11import json
22import collections
3+ from functools import partial
4+ from collections import defaultdict
35
46from six import string_types
57from requests .compat import urljoin
911
1012from .hubstorage .resourcetype import DownloadableResource
1113from .hubstorage .resourcetype import ItemsResourceType
14+ from .hubstorage .utils import urlpathjoin
1215
1316# scrapinghub.hubstorage classes to use as-is
1417from .hubstorage .job import JobMeta
@@ -227,7 +230,7 @@ def __init__(self, client, projectid):
227230 # proxied sub-resources
228231 self .activity = Activity (_Activity , client , projectid )
229232 self .collections = Collections (_Collections , client , projectid )
230- self .frontiers = Frontiers (_Frontier , client , projectid )
233+ self .frontiers = Frontiers (_HSFrontier , client , projectid )
231234 self .settings = Settings (client ._hsclient , projectid )
232235
233236
@@ -1051,6 +1054,34 @@ def post(self, _value, **kwargs):
10511054 self ._origin .post (_value , ** kwargs )
10521055
10531056
1057+ class _HSFrontier (_Frontier ):
1058+ """Modified hubstorage Frontier with newcount per slot."""
1059+
1060+ def __init__ (self , * args , ** kwargs ):
1061+ super (_HSFrontier , self ).__init__ (* args , ** kwargs )
1062+ self .newcount = defaultdict (int )
1063+
1064+ def _get_writer (self , frontier , slot ):
1065+ key = (frontier , slot )
1066+ writer = self ._writers .get (key )
1067+ if not writer :
1068+ writer = self .client .batchuploader .create_writer (
1069+ url = urlpathjoin (self .url , frontier , 's' , slot ),
1070+ auth = self .auth ,
1071+ size = self .batch_size ,
1072+ start = self .batch_start ,
1073+ interval = self .batch_interval ,
1074+ qsize = self .batch_qsize ,
1075+ content_encoding = self .batch_content_encoding ,
1076+ callback = partial (self ._writer_callback , key ),
1077+ )
1078+ self ._writers [key ] = writer
1079+ return writer
1080+
1081+ def _writer_callback (self , key , response ):
1082+ self .newcount [key ] += response .json ()["newcount" ]
1083+
1084+
10541085class Frontiers (_Proxy ):
10551086 """Frontiers collection for a project.
10561087
@@ -1074,6 +1105,10 @@ class Frontiers(_Proxy):
10741105 - flush data of all frontiers of a project
10751106 >>> project.frontiers.flush()
10761107
1108+ - show amount of new requests added for all frontiers
1109+ >>> project.frontiers.newcount
1110+ 3
1111+
10771112 - close batch writers of all frontiers of a project
10781113 >>> project.frontiers.close()
10791114 """
@@ -1095,7 +1130,7 @@ def list(self):
10951130
10961131 @property
10971132 def newcount (self ):
1098- return self ._origin .newcount
1133+ return sum ( self ._origin .newcount . values ())
10991134
11001135
11011136class Frontier (object ):
@@ -1120,6 +1155,10 @@ class Frontier(object):
11201155
11211156 - flush frontier data
11221157 >>> frontier.flush()
1158+
1159+ - show amount of new requests added to frontier
1160+ >>> frontier.newcount
1161+ 3
11231162 """
11241163 def __init__ (self , client , frontiers , name ):
11251164 self .key = name
@@ -1145,6 +1184,12 @@ def flush(self):
11451184 if fname == self .key :
11461185 writer .flush ()
11471186
1187+ @property
1188+ def newcount (self ):
1189+ newcount_values = self ._frontiers ._origin .newcount
1190+ return sum (v for (frontier , _ ), v in newcount_values .items ()
1191+ if frontier == self .key )
1192+
11481193
11491194class FrontierSlot (object ):
11501195 """Representation of a frontier slot object.
@@ -1164,6 +1209,10 @@ class FrontierSlot(object):
11641209 - flush data for a slot
11651210 >>> slot.flush()
11661211
1212+ - show amount of new requests added to a slot
1213+ >>> slot.newcount
1214+ 2
1215+
11671216 - read requests from a slot
11681217 >>> slot.q.iter()
11691218 <generator object jldecode at 0x1049aa9e8>
@@ -1202,7 +1251,8 @@ def q(self):
12021251 def delete (self ):
12031252 """Delete the slot."""
12041253 origin = self ._frontier ._frontiers ._origin
1205- return origin .delete_slot (self ._frontier .key , self .key )
1254+ origin .delete_slot (self ._frontier .key , self .key )
1255+ origin .newcount .pop ((self ._frontier .key , self .key ), None )
12061256
12071257 def flush (self ):
12081258 """Flush data for the slot."""
@@ -1211,6 +1261,11 @@ def flush(self):
12111261 if writer :
12121262 writer .flush ()
12131263
1264+ @property
1265+ def newcount (self ):
1266+ newcount_values = self ._frontier ._frontiers ._origin .newcount
1267+ return newcount_values .get ((self ._frontier .key , self .key ), 0 )
1268+
12141269
12151270class FrontierSlotFingerprints (object ):
12161271
0 commit comments