Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
152 changes: 152 additions & 0 deletions examples/fetch_user_history.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,152 @@
"""
Fetch user history of changes from OpenStreetMap.

If there are more than 100 changes, `osmapi` will repeat request till all
changes are be fetched.

Also script can store the history data into "pickle" file format, so it is
possible to load it again without fetching OSM API.

Example of storing the history data into different formats:
python3 ./examples/fetch_user_history.py --username 'My%20user' --loglevel INFO --filename /tmp/MyHistory.pickle
python3 ./examples/fetch_user_history.py --username 'My%20user' --loglevel INFO --load-pickle /tmp/MyHistory.pickle --filename /tmp/1.csv
python3 ./examples/fetch_user_history.py --username 'My%20user' --loglevel INFO --load-pickle /tmp/MyHistory.pickle --filename /tmp/1.json

See
https://wiki.openstreetmap.org/wiki/API_v0.6#Query:_GET_/api/0.6/changesets
and
https://wiki.openstreetmap.org/wiki/API_v0.6#Capabilities:_GET_/api/capabilities
for more details.
"""

import datetime
import argparse
import logging
import csv
import json
import pickle
import osmapi


def parse_args():
parser = argparse.ArgumentParser(
description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter
)
parser.add_argument(
"--username",
required=True,
help="OpenStreetMap username. Should be url-encoded if has special characters.",
)
parser.add_argument(
"--filename",
metavar="FILENAME",
help="File to store, supported formats: JSON, CSV or pickle (selected by extension).",
)
parser.add_argument(
"--load-pickle",
metavar="FILENAME",
help="Instead of fetching the history data from OSM API, use previously stored 'pickle' file.",
)
parser.add_argument(
"--api",
default="https://api.openstreetmap.org/api/0.6/",
help="Set OpenStreetMap API URL. Use https://master.apis.dev.openstreetmap.org/api/0.6/ for experiments.",
)
parser.add_argument(
"--start",
metavar="YYYY-MM-DD",
# Can't use `datetime.date` here because of error in `osmapi.UserHistory`:
# TypeError: can't compare datetime.datetime to datetime.date
type=datetime.datetime.fromisoformat,
help="History start date (by default, fetch everything)",
)
parser.add_argument(
"--limit",
default=0,
metavar="INT",
type=int,
help="0 is unlimited. The result will probably be more than limit",
)
parser.add_argument(
"--loglevel",
default="INFO",
choices=["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"],
help="Set logging level: DEBUG, INFO, WARNING, ERROR, CRITICAL",
)
args = parser.parse_args()
logging.basicConfig(
level=getattr(logging, args.loglevel),
format="%(levelname)s:%(funcName)s:%(message)s",
)
logging.debug("All parsed arguments:")
for arg, value in sorted(vars(args).items()):
logging.debug("Argument %s: %s", arg, value)
return args


def load_pickle(filename: str):
"""
Load "pickle" file, stored beforehand by `save_file`.
To have possibility to play with data without stressing out OSM API server.
"""
with open(filename, "rb") as f:
return pickle.load(f)


def save_file(filename: str, history: dict[dict]):
"""
Handle storing a file in different formats, depending on filename extension.
If no filename provided, print to STDOUT.
"""
logging.info("Items in history: %s", len(history))
if not filename:
for k, v in history.items():
print(k, ":", v)
elif filename.endswith(".json"):
with open(filename, encoding="utf-8", mode="w") as f:
# `default=str` — to avoid an error
# "Object of type datetime is not JSON serializable"
json.dump(history, f, indent=4, default=str)
elif filename.endswith(".pickle"):
with open(filename, mode="wb") as f:
pickle.dump(history, f)
elif filename.endswith(".csv"):
with open(filename, encoding="utf-8", mode="w", newline="") as f:
fieldnames = ["id", "created_at", "tag"]
writer = csv.DictWriter(f, fieldnames=fieldnames, extrasaction="ignore")
writer.writeheader()
for i in history:
writer.writerow(history[i])
else:
logging.error("Use known file extension to save the file: .json, .csv or .pickle")


def convert_data(history):
# PUT YOUR CODE HERE IF NEEDED
pass


def main():
config = parse_args()
logging.debug("Script started!")
if config.load_pickle:
logging.info(
"Instead of using OSM API, loading 'pickle' file '%s'", config.load_pickle
)
history = load_pickle(config.load_pickle)
else:
api = osmapi.OsmApi()
logging.info("Limits (capabilities): %s", api.Capabilities()["changesets"])
if config.start:
history = api.UserHistory(
UserId=config.username, limit=config.limit, TimeFrom=config.start
)
else:
history = api.UserHistory(UserId=config.username, limit=config.limit)
convert_data(history)
save_file(config.filename, history)
logging.debug("Script finished!")


if __name__ == "__main__":
main()
65 changes: 65 additions & 0 deletions osmapi/OsmApi.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@

"""

import datetime
import xml.dom.minidom
import xml.parsers.expat
import urllib.parse
Expand Down Expand Up @@ -1166,6 +1167,70 @@ def RelationsGet(self, RelationIdList):
result[data["id"]] = data
return result

##################################################
# User #
##################################################

def UserHistory(self,
UserId: str,
TimeFrom: datetime.datetime = datetime.datetime(2005, 1, 1, 0, 0, 0),
limit: int = 0) -> dict[dict]:
"""
Returns a dict of dicts of changesets for user.

Limits and defaults (changesets `maximum_elements` and changesets
`default_query_limit`) can be received by `capabilities()` method from
`/api/capabilities`.

Structure example:

#!python
{
3325270: {'changes_count': '28',
'closed_at': datetime.datetime(2009, 12, 8, 14, 39, 50),
'comments_count': 0,
'created_at': datetime.datetime(2009, 12, 8, 14, 39, 47),
'discussion': [],
'id': 3325270,
'max_lat': '54.3280590',
'max_lon': '59.3791874',
'min_lat': '54.3241120',
'min_lon': '59.3739293',
'open': False,
'tag': {'comment': 'text',
'created_by': 'text'},
'uid': 91771,
'user': 'Alexey Vazhnov'}},
{
...
},
}
"""
newest_time_from = TimeFrom
result = {}
need_fetch = True
while need_fetch:
need_fetch = False
time_str_ruby = newest_time_from.isoformat()
uri = f"/api/0.6/changesets?from={time_str_ruby}&order=oldest&display_name={UserId}"
data = self._session._get(uri)
changes = dom.OsmResponseToDom(data, tag="changeset")
for change in changes:
data = dom.DomParseChangeset(change, include_discussion=True)
change_id = data["id"]
logger.debug("id: %s", change_id)
if change_id in result:
logger.debug("Ignoring id %s (already in result)", change_id)
else:
result[change_id] = data
created_at = data["created_at"]
if created_at > newest_time_from:
newest_time_from = created_at
need_fetch = True
if limit and len(result) >= limit:
need_fetch = False
return result

##################################################
# Changeset #
##################################################
Expand Down