diff --git a/examples/fetch_user_history.py b/examples/fetch_user_history.py new file mode 100644 index 0000000..ed273a6 --- /dev/null +++ b/examples/fetch_user_history.py @@ -0,0 +1,152 @@ +""" +Fetch user history of changes from OpenStreetMap. + +If there are more than 100 changes, `osmapi` will repeat request till all +changes are be fetched. + +Also script can store the history data into "pickle" file format, so it is +possible to load it again without fetching OSM API. + +Example of storing the history data into different formats: +python3 ./examples/fetch_user_history.py --username 'My%20user' --loglevel INFO --filename /tmp/MyHistory.pickle +python3 ./examples/fetch_user_history.py --username 'My%20user' --loglevel INFO --load-pickle /tmp/MyHistory.pickle --filename /tmp/1.csv +python3 ./examples/fetch_user_history.py --username 'My%20user' --loglevel INFO --load-pickle /tmp/MyHistory.pickle --filename /tmp/1.json + +See +https://wiki.openstreetmap.org/wiki/API_v0.6#Query:_GET_/api/0.6/changesets +and +https://wiki.openstreetmap.org/wiki/API_v0.6#Capabilities:_GET_/api/capabilities +for more details. +""" + +import datetime +import argparse +import logging +import csv +import json +import pickle +import osmapi + + +def parse_args(): + parser = argparse.ArgumentParser( + description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter + ) + parser.add_argument( + "--username", + required=True, + help="OpenStreetMap username. Should be url-encoded if has special characters.", + ) + parser.add_argument( + "--filename", + metavar="FILENAME", + help="File to store, supported formats: JSON, CSV or pickle (selected by extension).", + ) + parser.add_argument( + "--load-pickle", + metavar="FILENAME", + help="Instead of fetching the history data from OSM API, use previously stored 'pickle' file.", + ) + parser.add_argument( + "--api", + default="https://api.openstreetmap.org/api/0.6/", + help="Set OpenStreetMap API URL. Use https://master.apis.dev.openstreetmap.org/api/0.6/ for experiments.", + ) + parser.add_argument( + "--start", + metavar="YYYY-MM-DD", + # Can't use `datetime.date` here because of error in `osmapi.UserHistory`: + # TypeError: can't compare datetime.datetime to datetime.date + type=datetime.datetime.fromisoformat, + help="History start date (by default, fetch everything)", + ) + parser.add_argument( + "--limit", + default=0, + metavar="INT", + type=int, + help="0 is unlimited. The result will probably be more than limit", + ) + parser.add_argument( + "--loglevel", + default="INFO", + choices=["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"], + help="Set logging level: DEBUG, INFO, WARNING, ERROR, CRITICAL", + ) + args = parser.parse_args() + logging.basicConfig( + level=getattr(logging, args.loglevel), + format="%(levelname)s:%(funcName)s:%(message)s", + ) + logging.debug("All parsed arguments:") + for arg, value in sorted(vars(args).items()): + logging.debug("Argument %s: %s", arg, value) + return args + + +def load_pickle(filename: str): + """ + Load "pickle" file, stored beforehand by `save_file`. + To have possibility to play with data without stressing out OSM API server. + """ + with open(filename, "rb") as f: + return pickle.load(f) + + +def save_file(filename: str, history: dict[dict]): + """ + Handle storing a file in different formats, depending on filename extension. + If no filename provided, print to STDOUT. + """ + logging.info("Items in history: %s", len(history)) + if not filename: + for k, v in history.items(): + print(k, ":", v) + elif filename.endswith(".json"): + with open(filename, encoding="utf-8", mode="w") as f: + # `default=str` — to avoid an error + # "Object of type datetime is not JSON serializable" + json.dump(history, f, indent=4, default=str) + elif filename.endswith(".pickle"): + with open(filename, mode="wb") as f: + pickle.dump(history, f) + elif filename.endswith(".csv"): + with open(filename, encoding="utf-8", mode="w", newline="") as f: + fieldnames = ["id", "created_at", "tag"] + writer = csv.DictWriter(f, fieldnames=fieldnames, extrasaction="ignore") + writer.writeheader() + for i in history: + writer.writerow(history[i]) + else: + logging.error("Use known file extension to save the file: .json, .csv or .pickle") + + +def convert_data(history): + # PUT YOUR CODE HERE IF NEEDED + pass + + +def main(): + config = parse_args() + logging.debug("Script started!") + if config.load_pickle: + logging.info( + "Instead of using OSM API, loading 'pickle' file '%s'", config.load_pickle + ) + history = load_pickle(config.load_pickle) + else: + api = osmapi.OsmApi() + logging.info("Limits (capabilities): %s", api.Capabilities()["changesets"]) + if config.start: + history = api.UserHistory( + UserId=config.username, limit=config.limit, TimeFrom=config.start + ) + else: + history = api.UserHistory(UserId=config.username, limit=config.limit) + convert_data(history) + save_file(config.filename, history) + logging.debug("Script finished!") + + +if __name__ == "__main__": + main() diff --git a/osmapi/OsmApi.py b/osmapi/OsmApi.py index b0d1c9e..83790fa 100644 --- a/osmapi/OsmApi.py +++ b/osmapi/OsmApi.py @@ -25,6 +25,7 @@ """ +import datetime import xml.dom.minidom import xml.parsers.expat import urllib.parse @@ -1166,6 +1167,70 @@ def RelationsGet(self, RelationIdList): result[data["id"]] = data return result + ################################################## + # User # + ################################################## + + def UserHistory(self, + UserId: str, + TimeFrom: datetime.datetime = datetime.datetime(2005, 1, 1, 0, 0, 0), + limit: int = 0) -> dict[dict]: + """ + Returns a dict of dicts of changesets for user. + + Limits and defaults (changesets `maximum_elements` and changesets + `default_query_limit`) can be received by `capabilities()` method from + `/api/capabilities`. + + Structure example: + + #!python + { + 3325270: {'changes_count': '28', + 'closed_at': datetime.datetime(2009, 12, 8, 14, 39, 50), + 'comments_count': 0, + 'created_at': datetime.datetime(2009, 12, 8, 14, 39, 47), + 'discussion': [], + 'id': 3325270, + 'max_lat': '54.3280590', + 'max_lon': '59.3791874', + 'min_lat': '54.3241120', + 'min_lon': '59.3739293', + 'open': False, + 'tag': {'comment': 'text', + 'created_by': 'text'}, + 'uid': 91771, + 'user': 'Alexey Vazhnov'}}, + { + ... + }, + } + """ + newest_time_from = TimeFrom + result = {} + need_fetch = True + while need_fetch: + need_fetch = False + time_str_ruby = newest_time_from.isoformat() + uri = f"/api/0.6/changesets?from={time_str_ruby}&order=oldest&display_name={UserId}" + data = self._session._get(uri) + changes = dom.OsmResponseToDom(data, tag="changeset") + for change in changes: + data = dom.DomParseChangeset(change, include_discussion=True) + change_id = data["id"] + logger.debug("id: %s", change_id) + if change_id in result: + logger.debug("Ignoring id %s (already in result)", change_id) + else: + result[change_id] = data + created_at = data["created_at"] + if created_at > newest_time_from: + newest_time_from = created_at + need_fetch = True + if limit and len(result) >= limit: + need_fetch = False + return result + ################################################## # Changeset # ##################################################