diff --git a/CHANGELOG.md b/CHANGELOG.md index bdc66c8..2e08f9b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,4 +1,7 @@ # Changelog +## v1.5.0 11/19/24 +- Added cloudLibrary client + ## v1.4.0 9/23/24 - Added SFTP client diff --git a/README.md b/README.md index f695d8b..70446ab 100644 --- a/README.md +++ b/README.md @@ -14,6 +14,7 @@ This package contains common Python utility classes and functions. * Connecting to and querying a PostgreSQL database using a connection pool * Connecting to and querying Redshift * Making requests to the Oauth2 authenticated APIs such as NYPL Platform API and Sierra +* Interacting with vendor APIs such as cloudLibrary ## Functions * Reading a YAML config file and putting the contents in os.environ -- see `config/sample.yaml` for an example of how the config file should be formatted @@ -37,7 +38,7 @@ kinesis_client = KinesisClient(...) # Do not use any version below 1.0.0 # All available optional dependencies can be found in pyproject.toml. # See the "Managing dependencies" section below for more details. -nypl-py-utils[kinesis-client,config-helper]==1.4.0 +nypl-py-utils[kinesis-client,config-helper]==1.5.0 ``` ## Developing locally @@ -63,7 +64,7 @@ The optional dependency sets also give the developer the option to manually list ### Using PostgreSQLClient in an AWS Lambda Because `psycopg` requires a statically linked version of the `libpq` library, the `PostgreSQLClient` cannot be installed as-is in an AWS Lambda function. Instead, it must be packaged as follows: ```bash -pip install --target ./package nypl-py-utils[postgresql-client]==1.4.0 +pip install --target ./package nypl-py-utils[postgresql-client]==1.5.0 pip install \ --platform manylinux2014_x86_64 \ diff --git a/pyproject.toml b/pyproject.toml index ed151cd..6d6b5eb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "hatchling.build" [project] name = "nypl_py_utils" -version = "1.4.0" +version = "1.5.0" authors = [ { name="Aaron Friedman", email="aaronfriedman@nypl.org" }, ] @@ -27,6 +27,9 @@ avro-client = [ "avro>=1.11.1", "requests>=2.28.1" ] +cloudlibrary-client = [ + "requests>=2.28.1" +] kinesis-client = [ "boto3>=1.26.5", "botocore>=1.29.5" diff --git a/src/nypl_py_utils/classes/cloudlibrary_client.py b/src/nypl_py_utils/classes/cloudlibrary_client.py new file mode 100644 index 0000000..1c8c191 --- /dev/null +++ b/src/nypl_py_utils/classes/cloudlibrary_client.py @@ -0,0 +1,149 @@ +import base64 +import hashlib +import hmac +import requests + +from datetime import datetime, timedelta, timezone +from nypl_py_utils.functions.log_helper import create_log +from requests.adapters import HTTPAdapter, Retry + +_API_URL = "https://partner.yourcloudlibrary.com" +_VERSION = "3.0.2" + + +class CloudLibraryClient: + """Client for interacting with CloudLibrary API v3.0.2""" + + def __init__(self, library_id, account_id, account_key): + self.logger = create_log("cloudlibrary_client") + self.library_id = library_id + self.account_id = account_id + self.account_key = account_key + + # authenticate & set up HTTP session + retry_policy = Retry(total=3, backoff_factor=45, + status_forcelist=[500, 502, 503, 504], + allowed_methods=frozenset(["GET"])) + self.session = requests.Session() + self.session.mount("https://", + HTTPAdapter(max_retries=retry_policy)) + + def get_library_events(self, start_date=None, + end_date=None) -> requests.Response: + """ + Retrieves all the events related to library-owned items within the + optional timeframe. Pulls past 24 hours of events by default. + + start_date and end_date are optional parameters, and must be + formatted either YYYY-MM-DD or YYYY-MM-DDTHH:MM:SS + """ + date_format = "%Y-%m-%dT%H:%M:%S" + today = datetime.now(timezone.utc) + yesterday = today - timedelta(1) + start_date = datetime.strftime( + yesterday, date_format) if start_date is None else start_date + end_date = datetime.strftime( + today, date_format) if end_date is None else end_date + + if (datetime.strptime(start_date, date_format) > + datetime.strptime(end_date, date_format)): + error_message = (f"Start date {start_date} greater than end date " + f"{end_date}, cannot retrieve library events") + self.logger.error(error_message) + raise CloudLibraryClientError(error_message) + + self.logger.info( + (f"Fetching all library events in " + f"time frame {start_date} to {end_date}...")) + + path = f"data/cloudevents?startdate={start_date}&enddate={end_date}" + response = self.request(path=path, method_type="GET") + return response + + def create_request_body(self, request_type, + item_id, patron_id) -> str: + """ + Helper function to generate request body when performing item + and/or patron-specific functions (ex. checking out a title). + """ + request_template = "<%(request_type)s>%(item_id)s%(patron_id)s" # noqa + return request_template % { + "request_type": request_type, + "item_id": item_id, + "patron_id": patron_id, + } + + def request(self, path, method_type="GET", + body=None) -> requests.Response: + """ + Use this method to call specific paths in the cloudLibrary API. + This method is necessary for building headers/authorization. + Example usage of this method is in the get_library_events function. + + Returns Response object by default -- you will need to parse this + object to retrieve response text, status codes, etc. + """ + extended_path = f"/cirrus/library/{self.library_id}/{path}" + headers = self._build_headers(method_type, extended_path) + url = f"{_API_URL}{extended_path}" + method_type = method_type.upper() + + try: + if method_type == "PUT": + response = self.session.put(url=url, + data=body, + headers=headers, + timeout=60) + elif method_type == "POST": + response = self.session.post(url=url, + data=body, + headers=headers, + timeout=60) + else: + response = self.session.get(url=url, + data=body, + headers=headers, + timeout=60) + response.raise_for_status() + except Exception as e: + error_message = (f"Failed to retrieve response from {url}: " + f"{repr(e)}") + self.logger.error(error_message) + raise CloudLibraryClientError(error_message) + + return response + + def _build_headers(self, method_type, path) -> dict: + time, authorization = self._build_authorization( + method_type, path) + headers = { + "3mcl-Datetime": time, + "3mcl-Authorization": authorization, + "3mcl-APIVersion": _VERSION, + } + + if method_type == "GET": + headers["Accept"] = "application/xml" + else: + headers["Content-Type"] = "application/xml" + + return headers + + def _build_authorization(self, method_type, + path) -> tuple[str, str]: + now = datetime.now(timezone.utc).strftime( + "%a, %d %b %Y %H:%M:%S GMT") + message = "\n".join([now, method_type, path]) + digest = hmac.new( + self.account_key.encode("utf-8"), + msg=message.encode("utf-8"), + digestmod=hashlib.sha256 + ).digest() + signature = base64.standard_b64encode(digest).decode() + + return now, f"3MCLAUTH {self.account_id}:{signature}" + + +class CloudLibraryClientError(Exception): + def __init__(self, message=None): + self.message = message diff --git a/tests/test_cloudlibrary_client.py b/tests/test_cloudlibrary_client.py new file mode 100644 index 0000000..84866f9 --- /dev/null +++ b/tests/test_cloudlibrary_client.py @@ -0,0 +1,202 @@ +import pytest + +from freezegun import freeze_time +from requests import ConnectTimeout +from nypl_py_utils.classes.cloudlibrary_client import ( + CloudLibraryClient, CloudLibraryClientError) + +_API_URL = "https://partner.yourcloudlibrary.com/cirrus/library/" + +# catch-all API response since we're not testing actual data +_TEST_LIBRARY_EVENTS_RESPONSE = """ +4302fcca-ef99-49bf-bd29-d673e990f765 +2024-11-10T17:35:18 +2012-11-11T13:58:52.055 + + +4302fcca-ef99-49bf-bd29-d673e990f4a7 +CHECKIN +2024-11-10T05:07:56 +2024-11-10T07:50:59 +edbz9 +1234 +9780307238405 +TestUser1 +1234 +2024-11-10T17:35:18 + + + +""" + + +@freeze_time("2024-11-11 10:00:00") +class TestCloudLibraryClient: + @pytest.fixture + def test_instance(self): + return CloudLibraryClient( + "library_id", "account_id", "account_key") + + def test_get_library_events_success_no_args( + self, test_instance, mocker): + start = "2024-11-10T10:00:00" + end = "2024-11-11T10:00:00" + mock_request = mocker.patch( + "nypl_py_utils.classes.cloudlibrary_client.CloudLibraryClient.request", # noqa + return_value=_TEST_LIBRARY_EVENTS_RESPONSE) + response = test_instance.get_library_events() + + mock_request.assert_called_once_with( + path=f"data/cloudevents?startdate={start}&enddate={end}", + method_type="GET") + assert response == _TEST_LIBRARY_EVENTS_RESPONSE + + def test_get_library_events_success_with_start_and_end_date( + self, test_instance, mocker): + start = "2024-11-01T10:00:00" + end = "2024-11-05T10:00:00" + mock_request = mocker.patch( + "nypl_py_utils.classes.cloudlibrary_client.CloudLibraryClient.request", # noqa + return_value=_TEST_LIBRARY_EVENTS_RESPONSE) + response = test_instance.get_library_events(start, end) + + mock_request.assert_called_once_with( + path=f"data/cloudevents?startdate={start}&enddate={end}", + method_type="GET") + assert response == _TEST_LIBRARY_EVENTS_RESPONSE + + def test_get_library_events_success_with_no_end_date( + self, test_instance, mocker): + start = "2024-11-01T09:00:00" + end = "2024-11-11T10:00:00" + mock_request = mocker.patch( + "nypl_py_utils.classes.cloudlibrary_client.CloudLibraryClient.request", # noqa + return_value=_TEST_LIBRARY_EVENTS_RESPONSE) + response = test_instance.get_library_events(start) + + mock_request.assert_called_once_with( + path=f"data/cloudevents?startdate={start}&enddate={end}", + method_type="GET") + assert response == _TEST_LIBRARY_EVENTS_RESPONSE + + def test_get_library_events_exception_when_start_date_greater_than_end( + self, test_instance, caplog): + start = "2024-11-11T09:00:00" + end = "2024-11-01T10:00:00" + + with pytest.raises(CloudLibraryClientError): + test_instance.get_library_events(start, end) + assert (f"Start date {start} greater than end date {end}, " + f"cannot retrieve library events") in caplog.text + + def test_get_library_events_exception_when_connection_timeout( + self, test_instance, requests_mock, caplog): + start = "2024-11-10T10:00:00" + end = "2024-11-11T10:00:00" + url = f"{_API_URL}{test_instance.library_id}/data/cloudevents?startdate={start}&enddate={end}" # noqa + + # We're making sure that a separate error during a sub-method will + # still result in CloudLibraryClientError + requests_mock.get( + url, exc=ConnectTimeout) + + with pytest.raises(CloudLibraryClientError): + test_instance.get_library_events() + assert (f"Failed to retrieve response from {url}") in caplog.text + + def test_get_request_success(self, test_instance, requests_mock): + start = "2024-11-10T10:00:00" + end = "2024-11-11T10:00:00" + url = f"{_API_URL}{test_instance.library_id}/data/cloudevents?startdate={start}&enddate={end}" # noqa + expected_headers = {"3mcl-Datetime": "Mon, 11 Nov 2024 10:00:00 GMT", + "3mcl-Authorization": "3MCLAUTH account_id:KipNmbVsmsT2xPjP4oHAaR3n00JgcszfF6mQRffBoRk=", # noqa + "3mcl-APIVersion": "3.0.2", + "Accept": "application/xml"} + requests_mock.get( + url=url, text=_TEST_LIBRARY_EVENTS_RESPONSE) + + response = test_instance.request( + path=f"data/cloudevents?startdate={start}&enddate={end}", + method_type="GET") + + assert response.text == _TEST_LIBRARY_EVENTS_RESPONSE + assert requests_mock.request_history[0].method == "GET" + assert requests_mock.request_history[0].url == url + assert requests_mock.request_history[0].body is None + assert expected_headers.items() <= dict( + requests_mock.request_history[0].headers).items() + + def test_put_request_success(self, test_instance, requests_mock): + start = "2024-11-10T10:00:00" + end = "2024-11-11T10:00:00" + url = f"{_API_URL}{test_instance.library_id}/data/cloudevents?startdate={start}&enddate={end}" # noqa + expected_headers = {"3mcl-Datetime": "Mon, 11 Nov 2024 10:00:00 GMT", + "3mcl-Authorization": "3MCLAUTH account_id:3M773C6ZVWmB/ISoSjQy9iBp48T4tUWhoNOwXaseMtE=", # noqa + "3mcl-APIVersion": "3.0.2", + "Content-Type": "application/xml"} + requests_mock.put( + url=url, text=_TEST_LIBRARY_EVENTS_RESPONSE) + + response = test_instance.request( + path=f"data/cloudevents?startdate={start}&enddate={end}", + method_type="PUT", + body={"test": "test"}) + + assert response.text == _TEST_LIBRARY_EVENTS_RESPONSE + assert requests_mock.request_history[0].method == "PUT" + assert requests_mock.request_history[0].url == url + assert requests_mock.request_history[0].body == "test=test" + assert expected_headers.items() <= dict( + requests_mock.request_history[0].headers).items() + + def test_post_request_success(self, test_instance, requests_mock): + start = "2024-11-10T10:00:00" + end = "2024-11-11T10:00:00" + url = f"{_API_URL}{test_instance.library_id}/data/cloudevents?startdate={start}&enddate={end}" # noqa + expected_headers = {"3mcl-Datetime": "Mon, 11 Nov 2024 10:00:00 GMT", + "3mcl-Authorization": "3MCLAUTH account_id:vF0zI6ee1w1PbTLQ9EVvtxRly2vpCRxdBdAHb8DZQ4E=", # noqa + "3mcl-APIVersion": "3.0.2", + "Content-Type": "application/xml"} + requests_mock.post( + url=url, text=_TEST_LIBRARY_EVENTS_RESPONSE) + + response = test_instance.request( + path=f"data/cloudevents?startdate={start}&enddate={end}", + method_type="POST", + body={"test": "test"}) + + assert response.text == _TEST_LIBRARY_EVENTS_RESPONSE + assert requests_mock.request_history[0].method == "POST" + assert requests_mock.request_history[0].url == url + assert requests_mock.request_history[0].body == "test=test" + assert expected_headers.items() <= dict( + requests_mock.request_history[0].headers).items() + + def test_request_failure(self, test_instance, + requests_mock, caplog): + start = "2024-11-10T10:00:00" + end = "2024-11-11T10:00:00" + url = f"{_API_URL}{test_instance.library_id}/data/cloudevents?startdate={start}&enddate={end}" # noqa + requests_mock.get( + url, exc=ConnectTimeout) + + with pytest.raises(CloudLibraryClientError): + test_instance.request( + path=f"data/cloudevents?startdate={start}&enddate={end}", + method_type="GET") + assert (f"Failed to retrieve response from " + f"{url}: ConnectTimeout()") in caplog.text + + def test_create_request_body_success(self, test_instance): + request_type = "CheckoutRequest" + item_id = "df45qw" + patron_id = "215555602845" + EXPECTED_REQUEST_BODY = (f"<{request_type}>{item_id}" + f"{patron_id}" + f"") + request_body = test_instance.create_request_body( + request_type, item_id, patron_id) + + assert request_body == EXPECTED_REQUEST_BODY