From c37c0aec5f07bf98d00cb3110a59ddf6b92a4a45 Mon Sep 17 00:00:00 2001 From: Justin Tracey Date: Thu, 23 Jan 2025 20:57:41 -0500 Subject: [PATCH 1/2] more robust parsing of user counts The latest userstats-relay-country.csv file has a row with scientific notation. Parsing as a float first prevents failing on that. --- tornettools/stage.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tornettools/stage.py b/tornettools/stage.py index 186cd12..35cfae7 100644 --- a/tornettools/stage.py +++ b/tornettools/stage.py @@ -60,7 +60,7 @@ def stage_users(args, min_unix_time, max_unix_time): date = str(parts[0]) # like '2019-01-01' country_code = str(parts[1]) # like 'us' - user_count = int(parts[2]) # like '14714' + user_count = int(float(parts[2])) # like '14714' or '2e+05' dt = datetime.strptime(date, "%Y-%m-%d").replace(tzinfo=timezone.utc) unix_time = int(dt.strftime("%s")) # returns stamp like 1548910800 From de56193c7d406a04289f1c2de62a83c98dd54e18 Mon Sep 17 00:00:00 2001 From: Justin Tracey Date: Wed, 29 Jan 2025 16:11:20 -0500 Subject: [PATCH 2/2] additional comment in stage.py --- tornettools/stage.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tornettools/stage.py b/tornettools/stage.py index 35cfae7..150561a 100644 --- a/tornettools/stage.py +++ b/tornettools/stage.py @@ -60,6 +60,8 @@ def stage_users(args, min_unix_time, max_unix_time): date = str(parts[0]) # like '2019-01-01' country_code = str(parts[1]) # like 'us' + # At least one float has been observed in the file: + # user_count = int(float(parts[2])) # like '14714' or '2e+05' dt = datetime.strptime(date, "%Y-%m-%d").replace(tzinfo=timezone.utc)