From 70e94cf30bb76ea064ae8b755eae4024028d9d67 Mon Sep 17 00:00:00 2001 From: Alexey Vazhnov Date: Mon, 16 Sep 2024 02:14:38 +0200 Subject: [PATCH 1/7] Add method `UserHistory(UserId, TimeFrom) which returns all changesets for a user Another ways: * Web UI: https://www.openstreetmap.org/user/Alexey%20Vazhnov/history * Manual API request: https://api.openstreetmap.org/api/0.6/changesets?display_name=Alexey%20Vazhnov Possible improvements: * add possibility to use `user=#uid` as alternative to current `display_name=UserId` * URL encoding for `UserId`, for example replace ` ` with %20 * if keep using `return`, maybe use `OrderedDict` to save the order of records * use `yield` (but then resulting structure will be different) --- osmapi/OsmApi.py | 57 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) diff --git a/osmapi/OsmApi.py b/osmapi/OsmApi.py index b0d1c9e..d3d8f8d 100644 --- a/osmapi/OsmApi.py +++ b/osmapi/OsmApi.py @@ -25,6 +25,8 @@ """ +import time +import datetime import xml.dom.minidom import xml.parsers.expat import urllib.parse @@ -1166,6 +1168,61 @@ def RelationsGet(self, RelationIdList): result[data["id"]] = data return result + ################################################## + # User # + ################################################## + + def UserHistory(self, UserId, TimeFrom="1990-01-01"): + """ + Returns a dict of dicts of changesets for user. + Structure example: + + #!python + { + 3325270: {'changes_count': '28', + 'closed_at': datetime.datetime(2009, 12, 8, 14, 39, 50), + 'comments_count': 0, + 'created_at': datetime.datetime(2009, 12, 8, 14, 39, 47), + 'discussion': [], + 'id': 3325270, + 'max_lat': '54.3280590', + 'max_lon': '59.3791874', + 'min_lat': '54.3241120', + 'min_lon': '59.3739293', + 'open': False, + 'tag': {'comment': 'text', + 'created_by': 'text'}, + 'uid': 91771, + 'user': 'Alexey Vazhnov'}}, + { + ... + }, + } + """ + newest_time_from = datetime.datetime.fromisoformat(TimeFrom) + result = {} + need_fetch = True + while need_fetch: + need_fetch = False + time_str_ruby = newest_time_from.isoformat() + uri = f"/api/0.6/changesets?from={time_str_ruby}&order=oldest&display_name={UserId}" + data = self._session._get(uri) + changes = dom.OsmResponseToDom(data, tag="changeset") + for change in changes: + data = dom.DomParseChangeset(change, include_discussion=True) + change_id = data["id"] + logger.debug("id: %s", change_id) + if change_id in result: + logger.debug("Ignoring id %s (already in result)", change_id) + else: + result[change_id] = data + created_at = data["created_at"] + if created_at > newest_time_from: + newest_time_from = created_at + need_fetch = True + time.sleep(0.2) + return result + ################################################## # Changeset # ################################################## From 39e8c58ac103b2a5c172f669eb739202043f830d Mon Sep 17 00:00:00 2001 From: Alexey Vazhnov Date: Sat, 5 Oct 2024 00:01:57 +0200 Subject: [PATCH 2/7] UserHistory: add `limit` argument, add type hints --- osmapi/OsmApi.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/osmapi/OsmApi.py b/osmapi/OsmApi.py index d3d8f8d..eb16834 100644 --- a/osmapi/OsmApi.py +++ b/osmapi/OsmApi.py @@ -1172,9 +1172,17 @@ def RelationsGet(self, RelationIdList): # User # ################################################## - def UserHistory(self, UserId, TimeFrom="1990-01-01"): + def UserHistory(self, + UserId: str, + TimeFrom: str = "1990-01-01", + limit: int = 0) -> dict[dict]: """ Returns a dict of dicts of changesets for user. + + Limits and defaults (changesets `maximum_elements` and changesets + `default_query_limit`) can be received by `capabilities()` method from + `/api/capabilities`. + Structure example: #!python @@ -1220,6 +1228,8 @@ def UserHistory(self, UserId, TimeFrom="1990-01-01"): if created_at > newest_time_from: newest_time_from = created_at need_fetch = True + if limit and len(result) >= limit: + need_fetch = False time.sleep(0.2) return result From fd4e3a10b23950b334bbacb4d2c6499de465c47f Mon Sep 17 00:00:00 2001 From: Alexey Vazhnov Date: Sat, 5 Oct 2024 01:54:13 +0200 Subject: [PATCH 3/7] UserHistory(TimeFrom): use native `datetime.datetime`. The default date is 2005-01-01 because of https://www.openstreetmap.org/changeset/1 which is 2005-04-09, so probably there are no changes earlier in the OSM DB. --- osmapi/OsmApi.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/osmapi/OsmApi.py b/osmapi/OsmApi.py index eb16834..9fe0953 100644 --- a/osmapi/OsmApi.py +++ b/osmapi/OsmApi.py @@ -1174,7 +1174,7 @@ def RelationsGet(self, RelationIdList): def UserHistory(self, UserId: str, - TimeFrom: str = "1990-01-01", + TimeFrom: datetime.datetime = datetime.datetime(2005, 1, 1, 0, 0, 0), limit: int = 0) -> dict[dict]: """ Returns a dict of dicts of changesets for user. @@ -1207,7 +1207,7 @@ def UserHistory(self, }, } """ - newest_time_from = datetime.datetime.fromisoformat(TimeFrom) + newest_time_from = TimeFrom result = {} need_fetch = True while need_fetch: From f0c224cc4c0f583dd4889e6ad0da89144a092b9c Mon Sep 17 00:00:00 2001 From: Alexey Vazhnov Date: Sat, 5 Oct 2024 01:56:54 +0200 Subject: [PATCH 4/7] Add `examples/fetch_user_history.py` --- examples/fetch_user_history.py | 91 ++++++++++++++++++++++++++++++++++ 1 file changed, 91 insertions(+) create mode 100644 examples/fetch_user_history.py diff --git a/examples/fetch_user_history.py b/examples/fetch_user_history.py new file mode 100644 index 0000000..2f8ea41 --- /dev/null +++ b/examples/fetch_user_history.py @@ -0,0 +1,91 @@ +""" +Fetch user history of changes from OpenStreetMap. + +If there are more than 100 changes, `osmapi` will repeat request till all +changes will be fetched. + +See +https://wiki.openstreetmap.org/wiki/API_v0.6#Query:_GET_/api/0.6/changesets +and +https://wiki.openstreetmap.org/wiki/API_v0.6#Capabilities:_GET_/api/capabilities +for more details. +""" + +import datetime +import argparse +import logging +import json +import osmapi + + +def parse_args(): + parser = argparse.ArgumentParser( + description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter + ) + parser.add_argument( + "--username", + required=True, + help="OpenStreetMap username. Should be url-encoded if has special characters.", + ) + parser.add_argument("--filename", help="JSON file to store.") + parser.add_argument( + "--api", + default="https://api.openstreetmap.org/api/0.6/", + help="Set OpenStreetMap API URL. Use https://master.apis.dev.openstreetmap.org/api/0.6/ for experiments.", + ) + parser.add_argument( + "--start", + metavar="YYYY-MM-DD", + # Can't use `datetime.date` here because of error in `osmapi.UserHistory`: + # TypeError: can't compare datetime.datetime to datetime.date + type=datetime.datetime.fromisoformat, + help="History start date (by default, fetch everything)", + ) + parser.add_argument( + "--limit", + default=0, + metavar="INT", + type=int, + help="0 is unlimited. The result will probably be more than limit", + ) + parser.add_argument( + "--loglevel", + default="INFO", + choices=["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"], + help="Set logging level: DEBUG, INFO, WARNING, ERROR, CRITICAL", + ) + args = parser.parse_args() + logging.basicConfig( + level=getattr(logging, args.loglevel), + format="%(levelname)s:%(funcName)s:%(message)s", + ) + logging.debug("All parsed arguments:") + for arg, value in sorted(vars(args).items()): + logging.debug("Argument %s: %s", arg, value) + return args + + +def main(): + config = parse_args() + logging.debug("Script started!") + api = osmapi.OsmApi() + logging.warning("Limits (capabilities): %s", api.Capabilities()["changesets"]) + if config.start: + history = api.UserHistory( + UserId=config.username, limit=config.limit, TimeFrom=config.start + ) + else: + history = api.UserHistory(UserId=config.username, limit=config.limit) + if config.filename: + with open(config.filename, encoding="utf-8", mode="w") as f: + # `default=str` — to avoid an error + # "Object of type datetime is not JSON serializable" + json.dump(history, f, indent=4, default=str) + else: + for k, v in history.items(): + print(k, ":", v) + logging.debug("Script finished!") + + +if __name__ == "__main__": + main() From e361009a9cfd9bcbfbb015991932de0de38a9ddd Mon Sep 17 00:00:00 2001 From: Alexey Vazhnov Date: Sat, 5 Oct 2024 02:25:50 +0200 Subject: [PATCH 5/7] Add support to store .pickle files --- examples/fetch_user_history.py | 36 +++++++++++++++++++++++++--------- 1 file changed, 27 insertions(+), 9 deletions(-) diff --git a/examples/fetch_user_history.py b/examples/fetch_user_history.py index 2f8ea41..7e4065b 100644 --- a/examples/fetch_user_history.py +++ b/examples/fetch_user_history.py @@ -15,6 +15,7 @@ import argparse import logging import json +import pickle import osmapi @@ -27,7 +28,10 @@ def parse_args(): required=True, help="OpenStreetMap username. Should be url-encoded if has special characters.", ) - parser.add_argument("--filename", help="JSON file to store.") + parser.add_argument( + "--filename", + help="File to store, supported formats: JSON and pickle (selected by extension).", + ) parser.add_argument( "--api", default="https://api.openstreetmap.org/api/0.6/", @@ -65,6 +69,27 @@ def parse_args(): return args +def save_file(filename: str, history: dict[dict]): + """ + Handle storing a file in different formats, depending on filename extension. + If no filename provided, print to STDOUT. + """ + logging.info("Items in history: %s", len(history)) + if not filename: + for k, v in history.items(): + print(k, ":", v) + elif filename.endswith(".json"): + with open(filename, encoding="utf-8", mode="w") as f: + # `default=str` — to avoid an error + # "Object of type datetime is not JSON serializable" + json.dump(history, f, indent=4, default=str) + elif filename.endswith(".pickle"): + with open(filename, mode="wb") as f: + pickle.dump(history, f) + else: + logging.error("Use known file extension to save the file.") + + def main(): config = parse_args() logging.debug("Script started!") @@ -76,14 +101,7 @@ def main(): ) else: history = api.UserHistory(UserId=config.username, limit=config.limit) - if config.filename: - with open(config.filename, encoding="utf-8", mode="w") as f: - # `default=str` — to avoid an error - # "Object of type datetime is not JSON serializable" - json.dump(history, f, indent=4, default=str) - else: - for k, v in history.items(): - print(k, ":", v) + save_file(config.filename, history) logging.debug("Script finished!") From 238fb46100109ee5c6e18a6efa8ba59b7740886f Mon Sep 17 00:00:00 2001 From: Alexey Vazhnov Date: Sat, 5 Oct 2024 12:50:38 +0200 Subject: [PATCH 6/7] Add support to save CSV files and to load Python "pickle" file, which is good for experiments --- examples/fetch_user_history.py | 61 +++++++++++++++++++++++++++++----- 1 file changed, 52 insertions(+), 9 deletions(-) diff --git a/examples/fetch_user_history.py b/examples/fetch_user_history.py index 7e4065b..ed273a6 100644 --- a/examples/fetch_user_history.py +++ b/examples/fetch_user_history.py @@ -2,7 +2,15 @@ Fetch user history of changes from OpenStreetMap. If there are more than 100 changes, `osmapi` will repeat request till all -changes will be fetched. +changes are be fetched. + +Also script can store the history data into "pickle" file format, so it is +possible to load it again without fetching OSM API. + +Example of storing the history data into different formats: +python3 ./examples/fetch_user_history.py --username 'My%20user' --loglevel INFO --filename /tmp/MyHistory.pickle +python3 ./examples/fetch_user_history.py --username 'My%20user' --loglevel INFO --load-pickle /tmp/MyHistory.pickle --filename /tmp/1.csv +python3 ./examples/fetch_user_history.py --username 'My%20user' --loglevel INFO --load-pickle /tmp/MyHistory.pickle --filename /tmp/1.json See https://wiki.openstreetmap.org/wiki/API_v0.6#Query:_GET_/api/0.6/changesets @@ -14,6 +22,7 @@ import datetime import argparse import logging +import csv import json import pickle import osmapi @@ -30,7 +39,13 @@ def parse_args(): ) parser.add_argument( "--filename", - help="File to store, supported formats: JSON and pickle (selected by extension).", + metavar="FILENAME", + help="File to store, supported formats: JSON, CSV or pickle (selected by extension).", + ) + parser.add_argument( + "--load-pickle", + metavar="FILENAME", + help="Instead of fetching the history data from OSM API, use previously stored 'pickle' file.", ) parser.add_argument( "--api", @@ -69,6 +84,15 @@ def parse_args(): return args +def load_pickle(filename: str): + """ + Load "pickle" file, stored beforehand by `save_file`. + To have possibility to play with data without stressing out OSM API server. + """ + with open(filename, "rb") as f: + return pickle.load(f) + + def save_file(filename: str, history: dict[dict]): """ Handle storing a file in different formats, depending on filename extension. @@ -86,21 +110,40 @@ def save_file(filename: str, history: dict[dict]): elif filename.endswith(".pickle"): with open(filename, mode="wb") as f: pickle.dump(history, f) + elif filename.endswith(".csv"): + with open(filename, encoding="utf-8", mode="w", newline="") as f: + fieldnames = ["id", "created_at", "tag"] + writer = csv.DictWriter(f, fieldnames=fieldnames, extrasaction="ignore") + writer.writeheader() + for i in history: + writer.writerow(history[i]) else: - logging.error("Use known file extension to save the file.") + logging.error("Use known file extension to save the file: .json, .csv or .pickle") + + +def convert_data(history): + # PUT YOUR CODE HERE IF NEEDED + pass def main(): config = parse_args() logging.debug("Script started!") - api = osmapi.OsmApi() - logging.warning("Limits (capabilities): %s", api.Capabilities()["changesets"]) - if config.start: - history = api.UserHistory( - UserId=config.username, limit=config.limit, TimeFrom=config.start + if config.load_pickle: + logging.info( + "Instead of using OSM API, loading 'pickle' file '%s'", config.load_pickle ) + history = load_pickle(config.load_pickle) else: - history = api.UserHistory(UserId=config.username, limit=config.limit) + api = osmapi.OsmApi() + logging.info("Limits (capabilities): %s", api.Capabilities()["changesets"]) + if config.start: + history = api.UserHistory( + UserId=config.username, limit=config.limit, TimeFrom=config.start + ) + else: + history = api.UserHistory(UserId=config.username, limit=config.limit) + convert_data(history) save_file(config.filename, history) logging.debug("Script finished!") From 78e4a95dd37108c511a1c310e6bcf1b28eed1808 Mon Sep 17 00:00:00 2001 From: Alexey Vazhnov Date: Sun, 6 Oct 2024 12:52:44 +0200 Subject: [PATCH 7/7] UserHistory: remove `time.sleep`, it was used for early experiments --- osmapi/OsmApi.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/osmapi/OsmApi.py b/osmapi/OsmApi.py index 9fe0953..83790fa 100644 --- a/osmapi/OsmApi.py +++ b/osmapi/OsmApi.py @@ -25,7 +25,6 @@ """ -import time import datetime import xml.dom.minidom import xml.parsers.expat @@ -1230,7 +1229,6 @@ def UserHistory(self, need_fetch = True if limit and len(result) >= limit: need_fetch = False - time.sleep(0.2) return result ##################################################