From 5618dcfdbc910c9278274406f49c984e9378c1ef Mon Sep 17 00:00:00 2001 From: Krishna Date: Sun, 6 Jul 2025 15:10:26 -0700 Subject: [PATCH 1/6] implement recursive search for GOES products across hour boundaries --- src/metpy/remote/aws.py | 68 ++++++++++++++++++++++++++++++++++++++--- test_goes_client.py | 63 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 126 insertions(+), 5 deletions(-) create mode 100644 test_goes_client.py diff --git a/src/metpy/remote/aws.py b/src/metpy/remote/aws.py index 0b52540678e..1ca3f3e093d 100644 --- a/src/metpy/remote/aws.py +++ b/src/metpy/remote/aws.py @@ -493,11 +493,28 @@ def product_ids(self): """ return [item.rstrip(self.delimiter) for item in self.common_prefixes('')] - def _build_time_prefix(self, product, dt): - """Build the initial prefix for time and product.""" + def _build_time_prefix(self, product, dt, depth=None): + """Build the initial prefix for time and product up to a particular depth. + + Parameters + ---------- + product : str + The product to search for + dt : datetime.datetime + The datetime to search for + depth : int, optional + The depth of the prefix to build. If None, builds the full prefix. + 1: product + 2: product/year + 3: product/year/day_of_year + 4: product/year/day_of_year/hour + 5: product/year/day_of_year/hour/OR_product + """ # Handle that the meso sector products are grouped in the same subdir reduced_product = product[:-1] if product.endswith(('M1', 'M2')) else product parts = [reduced_product, f'{dt:%Y}', f'{dt:%j}', f'{dt:%H}', f'OR_{product}'] + if depth is not None: + return self.delimiter.join(parts[:depth]) return self.delimiter.join(parts) def _subprod_prefix(self, prefix, mode, band): @@ -557,9 +574,50 @@ def get_product(self, product, dt=None, mode=None, band=None): """ dt = datetime.now(timezone.utc) if dt is None else ensure_timezone(dt) - time_prefix = self._build_time_prefix(product, dt) - prod_prefix = self._subprod_prefix(time_prefix, mode, band) - return self._closest_result(self.objects(prod_prefix), dt) + + # We work with a list of keys/prefixes that we iteratively find that bound our target + # key. To start, this only contains the product. + bounding_keys = [self._build_time_prefix(product, dt, 1) + self.delimiter] + + # Iteratively search with more specific keys, finding where our key fits within the + # list by using the common prefixes that exist for the current bounding keys + for depth in range(2, 5): # Year, day of year, hour + # Get a key for the product/dt that we're looking for, constrained by how deep + # we are in the search i.e. product->year->day_of_year->hour->OR_product + search_key = self._build_time_prefix(product, dt, depth) + + # Get the next collection of partial keys using the common prefixes for our + # candidates + prefixes = list(itertools.chain(*(self.common_prefixes(b) for b in bounding_keys))) + + if not prefixes: # No prefixes found, can't continue + raise ValueError(f'No data found for {product} at {dt}') + + # Find where our target would be in the list and grab the ones on either side + # if possible. This also handles if we're off the end. + loc = bisect.bisect_left(prefixes, search_key) + + # loc gives where our target *would* be in the list. Therefore slicing from loc - 1 + # to loc + 1 gives the items to the left and right of our target. If we get 0, + # then there is nothing to the left and we only need the first item. + rng = slice(loc - 1, loc + 1) if loc else slice(0, 1) + + # Make sure we don't go out of bounds + if loc >= len(prefixes): + rng = slice(len(prefixes) - 1, len(prefixes)) + + bounding_keys = prefixes[rng] + + # Now that we have the bounding hour directories, we need to find the closest product + # Get all objects from the bounding keys with the appropriate mode and band + all_objects = [] + for key in bounding_keys: + time_prefix = key.rstrip(self.delimiter) + prod_prefix = self._subprod_prefix(time_prefix, mode, band) + all_objects.extend(list(self.objects(prod_prefix))) + + # Find the closest product to the requested time + return self._closest_result(all_objects, dt) def get_range(self, product, start, end, mode=None, band=None): """Yield products within a particular date/time range. diff --git a/test_goes_client.py b/test_goes_client.py new file mode 100644 index 00000000000..b91a542841f --- /dev/null +++ b/test_goes_client.py @@ -0,0 +1,63 @@ +#!/usr/bin/env python +""" +Test script to verify the robustness of the GOES client at hour boundaries. +This script tests the recursive search implementation for finding products +across hour boundaries. +""" +import sys +from datetime import datetime, timezone, timedelta + +from metpy.remote import GOESArchive + +def test_goes_hour_boundary(): + """Test the GOES client's ability to find products across hour boundaries.""" + print("Testing GOES client at hour boundaries...") + + # Create a GOES client + goes = GOESArchive(16) + + # Test case 1: Exact hour boundary + # This would have failed with the old implementation if no products exist in the new hour + try: + # Use a time at exactly the top of an hour + dt = datetime(2025, 1, 1, 0, 0, 0, tzinfo=timezone.utc) + print(f"\nTest 1: Searching at exact hour boundary: {dt}") + product = goes.get_product('ABI-L1b-RadC', dt, band=1) + print(f"Success! Found product: {product.name}") + except Exception as e: + print(f"Error: {e}") + + # Test case 2: Just after hour boundary + try: + # Use a time just after the top of an hour + dt = datetime(2025, 1, 1, 0, 0, 30, tzinfo=timezone.utc) + print(f"\nTest 2: Searching just after hour boundary: {dt}") + product = goes.get_product('ABI-L1b-RadC', dt, band=1) + print(f"Success! Found product: {product.name}") + except Exception as e: + print(f"Error: {e}") + + # Test case 3: Just before hour boundary + try: + # Use a time just before the top of an hour + dt = datetime(2025, 1, 1, 0, 59, 30, tzinfo=timezone.utc) + print(f"\nTest 3: Searching just before hour boundary: {dt}") + product = goes.get_product('ABI-L1b-RadC', dt, band=1) + print(f"Success! Found product: {product.name}") + except Exception as e: + print(f"Error: {e}") + + # Test case 4: Day boundary + try: + # Use a time at day boundary + dt = datetime(2025, 1, 1, 0, 0, 0, tzinfo=timezone.utc) + print(f"\nTest 4: Searching at day boundary: {dt}") + product = goes.get_product('ABI-L1b-RadC', dt, band=1) + print(f"Success! Found product: {product.name}") + except Exception as e: + print(f"Error: {e}") + + print("\nAll tests completed!") + +if __name__ == "__main__": + test_goes_hour_boundary() From 1bb7acc0d8a2dae91544dd132759f5424e0773f8 Mon Sep 17 00:00:00 2001 From: Krishna Date: Sun, 6 Jul 2025 15:29:12 -0700 Subject: [PATCH 2/6] Fix lint errors --- src/metpy/remote/aws.py | 22 ++-- tests/remote/fixtures/test_goes_range.yaml | 105 ++++++++++++++------ tests/remote/fixtures/test_goes_single.yaml | 73 +++++++++++--- 3 files changed, 141 insertions(+), 59 deletions(-) diff --git a/src/metpy/remote/aws.py b/src/metpy/remote/aws.py index 1ca3f3e093d..13a5dfaaf33 100644 --- a/src/metpy/remote/aws.py +++ b/src/metpy/remote/aws.py @@ -495,7 +495,7 @@ def product_ids(self): def _build_time_prefix(self, product, dt, depth=None): """Build the initial prefix for time and product up to a particular depth. - + Parameters ---------- product : str @@ -574,40 +574,40 @@ def get_product(self, product, dt=None, mode=None, band=None): """ dt = datetime.now(timezone.utc) if dt is None else ensure_timezone(dt) - + # We work with a list of keys/prefixes that we iteratively find that bound our target # key. To start, this only contains the product. bounding_keys = [self._build_time_prefix(product, dt, 1) + self.delimiter] - + # Iteratively search with more specific keys, finding where our key fits within the # list by using the common prefixes that exist for the current bounding keys for depth in range(2, 5): # Year, day of year, hour # Get a key for the product/dt that we're looking for, constrained by how deep # we are in the search i.e. product->year->day_of_year->hour->OR_product search_key = self._build_time_prefix(product, dt, depth) - + # Get the next collection of partial keys using the common prefixes for our # candidates prefixes = list(itertools.chain(*(self.common_prefixes(b) for b in bounding_keys))) - + if not prefixes: # No prefixes found, can't continue raise ValueError(f'No data found for {product} at {dt}') - + # Find where our target would be in the list and grab the ones on either side # if possible. This also handles if we're off the end. loc = bisect.bisect_left(prefixes, search_key) - + # loc gives where our target *would* be in the list. Therefore slicing from loc - 1 # to loc + 1 gives the items to the left and right of our target. If we get 0, # then there is nothing to the left and we only need the first item. rng = slice(loc - 1, loc + 1) if loc else slice(0, 1) - + # Make sure we don't go out of bounds if loc >= len(prefixes): rng = slice(len(prefixes) - 1, len(prefixes)) - + bounding_keys = prefixes[rng] - + # Now that we have the bounding hour directories, we need to find the closest product # Get all objects from the bounding keys with the appropriate mode and band all_objects = [] @@ -615,7 +615,7 @@ def get_product(self, product, dt=None, mode=None, band=None): time_prefix = key.rstrip(self.delimiter) prod_prefix = self._subprod_prefix(time_prefix, mode, band) all_objects.extend(list(self.objects(prod_prefix))) - + # Find the closest product to the requested time return self._closest_result(all_objects, dt) diff --git a/tests/remote/fixtures/test_goes_range.yaml b/tests/remote/fixtures/test_goes_range.yaml index 59e56302fd7..e4561183a6b 100644 --- a/tests/remote/fixtures/test_goes_range.yaml +++ b/tests/remote/fixtures/test_goes_range.yaml @@ -4,17 +4,58 @@ interactions: headers: User-Agent: - !!binary | - Qm90bzMvMS4zNS44OCBtZC9Cb3RvY29yZSMxLjM1Ljg4IHVhLzIuMCBvcy9tYWNvcyMyNC4yLjAg - bWQvYXJjaCNhcm02NCBsYW5nL3B5dGhvbiMzLjEyLjcgbWQvcHlpbXBsI0NQeXRob24gY2ZnL3Jl - dHJ5LW1vZGUjbGVnYWN5IEJvdG9jb3JlLzEuMzUuODggUmVzb3VyY2U= + Qm90bzMvMS4zOC40MiBtZC9Cb3RvY29yZSMxLjM4LjQyIHVhLzIuMSBvcy9tYWNvcyMyNC4zLjAg + bWQvYXJjaCNhcm02NCBsYW5nL3B5dGhvbiMzLjEyLjggbWQvcHlpbXBsI0NQeXRob24gbS9aLEQs + YiBjZmcvcmV0cnktbW9kZSNsZWdhY3kgQm90b2NvcmUvMS4zOC40MiBSZXNvdXJjZQ== amz-sdk-invocation-id: - !!binary | - OGJhOGU3NmUtOWFkNC00NjQ1LWJkM2EtMGU3NDY2MGNjMTY0 + N2IyZGMwZTQtNDc2OS00MmJiLWI3OGYtZmFmMzM5ZTk4OWU1 amz-sdk-request: - !!binary | YXR0ZW1wdD0x method: GET - uri: https://noaa-goes16.s3.amazonaws.com/?list-type=2&prefix=ABI-L1b-RadC%2F2024%2F345%2F01%2FOR_ABI-L1b-RadC-&delimiter=_&encoding-type=url + uri: https://noaa-goes16.s3.us-east-2.amazonaws.com/?list-type=2&prefix=ABI-L1b-RadC%2F2024%2F345%2F01%2FOR_ABI-L1b-RadC-&delimiter=_&encoding-type=url + response: + body: + string: ' + + PermanentRedirectThe bucket you are attempting + to access must be addressed using the specified endpoint. Please send all + future requests to this endpoint.s3.amazonaws.comnoaa-goes16HY3ZX6QYFMG28GK2RPddz7APKHvZLEGjdwSKVRhVseEXeLF/hKxfJINRaam1M5SbcLTuUn9Uj2nsH6ua6ArnMl3zADQ=' + headers: + Content-Type: + - application/xml + Date: + - Sun, 06 Jul 2025 19:22:34 GMT + Server: + - AmazonS3 + Transfer-Encoding: + - chunked + x-amz-bucket-region: + - us-east-1 + x-amz-id-2: + - RPddz7APKHvZLEGjdwSKVRhVseEXeLF/hKxfJINRaam1M5SbcLTuUn9Uj2nsH6ua6ArnMl3zADQ= + x-amz-request-id: + - HY3ZX6QYFMG28GK2 + status: + code: 301 + message: Moved Permanently +- request: + body: null + headers: + User-Agent: + - !!binary | + Qm90bzMvMS4zOC40MiBtZC9Cb3RvY29yZSMxLjM4LjQyIHVhLzIuMSBvcy9tYWNvcyMyNC4zLjAg + bWQvYXJjaCNhcm02NCBsYW5nL3B5dGhvbiMzLjEyLjggbWQvcHlpbXBsI0NQeXRob24gbS9aLEQs + YiBjZmcvcmV0cnktbW9kZSNsZWdhY3kgQm90b2NvcmUvMS4zOC40MiBSZXNvdXJjZQ== + amz-sdk-invocation-id: + - !!binary | + N2IyZGMwZTQtNDc2OS00MmJiLWI3OGYtZmFmMzM5ZTk4OWU1 + amz-sdk-request: + - !!binary | + YXR0ZW1wdD0yOyBtYXg9NQ== + method: GET + uri: https://noaa-goes16.s3.us-east-1.amazonaws.com/?list-type=2&prefix=ABI-L1b-RadC%2F2024%2F345%2F01%2FOR_ABI-L1b-RadC-&delimiter=_&encoding-type=url response: body: string: ' @@ -24,7 +65,7 @@ interactions: Content-Type: - application/xml Date: - - Fri, 10 Jan 2025 02:09:09 GMT + - Sun, 06 Jul 2025 19:22:36 GMT Server: - AmazonS3 Transfer-Encoding: @@ -32,9 +73,9 @@ interactions: x-amz-bucket-region: - us-east-1 x-amz-id-2: - - f2SLO60WyihXD/REdoHXhu++F4YjkDNmyN6viuJblxa1UlTUD4z6VfVbGhg5NRBw0IADG/3vQjou1JOuyGxYpQ== + - bOvXRYTQU6dLHyCTKdITuWRXnIB8zE9YFkLDjgJKwV8LZWb9BfyG0yg2FoP1a402y8ALikxuveM= x-amz-request-id: - - EJ045912YFMVHY92 + - HY3N1K4YTXPBV6N3 status: code: 200 message: OK @@ -43,17 +84,17 @@ interactions: headers: User-Agent: - !!binary | - Qm90bzMvMS4zNS44OCBtZC9Cb3RvY29yZSMxLjM1Ljg4IHVhLzIuMCBvcy9tYWNvcyMyNC4yLjAg - bWQvYXJjaCNhcm02NCBsYW5nL3B5dGhvbiMzLjEyLjcgbWQvcHlpbXBsI0NQeXRob24gY2ZnL3Jl - dHJ5LW1vZGUjbGVnYWN5IEJvdG9jb3JlLzEuMzUuODggUmVzb3VyY2U= + Qm90bzMvMS4zOC40MiBtZC9Cb3RvY29yZSMxLjM4LjQyIHVhLzIuMSBvcy9tYWNvcyMyNC4zLjAg + bWQvYXJjaCNhcm02NCBsYW5nL3B5dGhvbiMzLjEyLjggbWQvcHlpbXBsI0NQeXRob24gbS9aLEMs + RCxiIGNmZy9yZXRyeS1tb2RlI2xlZ2FjeSBCb3RvY29yZS8xLjM4LjQyIFJlc291cmNl amz-sdk-invocation-id: - !!binary | - NjFhZDIzYmMtNGM1Yy00OTY2LWI3ZDgtYzczMTI5OGVkMzFl + YjVmMjJhYmItYzg4NC00ZmNmLWI1ZTYtNjQwMWNmMmI2MWFk amz-sdk-request: - !!binary | YXR0ZW1wdD0x method: GET - uri: https://noaa-goes16.s3.amazonaws.com/?prefix=ABI-L1b-RadC%2F2024%2F345%2F01%2FOR_ABI-L1b-RadC-M6C01&encoding-type=url + uri: https://noaa-goes16.s3.us-east-1.amazonaws.com/?prefix=ABI-L1b-RadC%2F2024%2F345%2F01%2FOR_ABI-L1b-RadC-M6C01&encoding-type=url response: body: string: ' @@ -63,7 +104,7 @@ interactions: Content-Type: - application/xml Date: - - Fri, 10 Jan 2025 02:09:09 GMT + - Sun, 06 Jul 2025 19:22:36 GMT Server: - AmazonS3 Transfer-Encoding: @@ -71,9 +112,9 @@ interactions: x-amz-bucket-region: - us-east-1 x-amz-id-2: - - 7mufcJlqux7+INRgofp1gQvd8Vl6shpIjUuhOQec5yuYJu+EMWniJBXMCi+G4pXjXJYJk3/3WLh4eZ2WKo7sxw== + - peUA09hzI9ThPM/OgMjtvExQfHU6ato2PZGUJzGdMNYQaeThlOYU3RBl8oJFQB6adqdc0gQU7z8= x-amz-request-id: - - EJ0AWGMVW48967NV + - HY3QFRBQXGYGA2JM status: code: 200 message: OK @@ -82,17 +123,17 @@ interactions: headers: User-Agent: - !!binary | - Qm90bzMvMS4zNS44OCBtZC9Cb3RvY29yZSMxLjM1Ljg4IHVhLzIuMCBvcy9tYWNvcyMyNC4yLjAg - bWQvYXJjaCNhcm02NCBsYW5nL3B5dGhvbiMzLjEyLjcgbWQvcHlpbXBsI0NQeXRob24gY2ZnL3Jl - dHJ5LW1vZGUjbGVnYWN5IEJvdG9jb3JlLzEuMzUuODggUmVzb3VyY2U= + Qm90bzMvMS4zOC40MiBtZC9Cb3RvY29yZSMxLjM4LjQyIHVhLzIuMSBvcy9tYWNvcyMyNC4zLjAg + bWQvYXJjaCNhcm02NCBsYW5nL3B5dGhvbiMzLjEyLjggbWQvcHlpbXBsI0NQeXRob24gbS9aLEQs + YiBjZmcvcmV0cnktbW9kZSNsZWdhY3kgQm90b2NvcmUvMS4zOC40MiBSZXNvdXJjZQ== amz-sdk-invocation-id: - !!binary | - YzZhNDc4ZjEtNmY4Ny00NDRlLWIyMjItNGEwOGIzMWZiYzNk + ZjdmOWI0MDAtMWI4ZS00NmJjLWIxMTctNGU4MWQ0ZDQ4M2Uy amz-sdk-request: - !!binary | YXR0ZW1wdD0x method: GET - uri: https://noaa-goes16.s3.amazonaws.com/?list-type=2&prefix=ABI-L1b-RadC%2F2024%2F345%2F02%2FOR_ABI-L1b-RadC-&delimiter=_&encoding-type=url + uri: https://noaa-goes16.s3.us-east-1.amazonaws.com/?list-type=2&prefix=ABI-L1b-RadC%2F2024%2F345%2F02%2FOR_ABI-L1b-RadC-&delimiter=_&encoding-type=url response: body: string: ' @@ -102,7 +143,7 @@ interactions: Content-Type: - application/xml Date: - - Fri, 10 Jan 2025 02:09:09 GMT + - Sun, 06 Jul 2025 19:22:36 GMT Server: - AmazonS3 Transfer-Encoding: @@ -110,9 +151,9 @@ interactions: x-amz-bucket-region: - us-east-1 x-amz-id-2: - - tAqoJiBkXJdHKrtpWWZb4Yt8DphggjAKI8OJC2MiGKmNJWBLk5VHaHwZEtUMt+BSZWiwddKw97WJmjiBthZe0Q== + - F/4ZOyIGH+pHVCGwOQrqIAmwf6aSWMorFfKelg2uvk0ySyU3ELRCFSyxdFfm6rriBHXuadIYvq0= x-amz-request-id: - - EJ03B4YM36WGMTTA + - HY3PBEFX6JFMS9FK status: code: 200 message: OK @@ -121,17 +162,17 @@ interactions: headers: User-Agent: - !!binary | - Qm90bzMvMS4zNS44OCBtZC9Cb3RvY29yZSMxLjM1Ljg4IHVhLzIuMCBvcy9tYWNvcyMyNC4yLjAg - bWQvYXJjaCNhcm02NCBsYW5nL3B5dGhvbiMzLjEyLjcgbWQvcHlpbXBsI0NQeXRob24gY2ZnL3Jl - dHJ5LW1vZGUjbGVnYWN5IEJvdG9jb3JlLzEuMzUuODggUmVzb3VyY2U= + Qm90bzMvMS4zOC40MiBtZC9Cb3RvY29yZSMxLjM4LjQyIHVhLzIuMSBvcy9tYWNvcyMyNC4zLjAg + bWQvYXJjaCNhcm02NCBsYW5nL3B5dGhvbiMzLjEyLjggbWQvcHlpbXBsI0NQeXRob24gbS9aLEMs + RCxiIGNmZy9yZXRyeS1tb2RlI2xlZ2FjeSBCb3RvY29yZS8xLjM4LjQyIFJlc291cmNl amz-sdk-invocation-id: - !!binary | - NGNhZmJhNmMtNTM2YS00NWM0LThjNGYtNDRjZjliMWQ3NTcz + YmRkZTlkYmYtOTc2NS00NWIxLWEwNzYtMTExYzVhMDBkYjZm amz-sdk-request: - !!binary | YXR0ZW1wdD0x method: GET - uri: https://noaa-goes16.s3.amazonaws.com/?prefix=ABI-L1b-RadC%2F2024%2F345%2F02%2FOR_ABI-L1b-RadC-M6C01&encoding-type=url + uri: https://noaa-goes16.s3.us-east-1.amazonaws.com/?prefix=ABI-L1b-RadC%2F2024%2F345%2F02%2FOR_ABI-L1b-RadC-M6C01&encoding-type=url response: body: string: ' @@ -141,7 +182,7 @@ interactions: Content-Type: - application/xml Date: - - Fri, 10 Jan 2025 02:09:09 GMT + - Sun, 06 Jul 2025 19:22:36 GMT Server: - AmazonS3 Transfer-Encoding: @@ -149,9 +190,9 @@ interactions: x-amz-bucket-region: - us-east-1 x-amz-id-2: - - sfOGqkBfxVw7mfxv9HJ7bu241qditGmUbHrHkg7ZBNx+2L+8sPyYmd17d3xLQ+DN2hkUdLQ7WHxt38dd8ytfKg== + - pdHzOjM4OtCc+je7BkRjfRYOpj6WKN2QXS7gI//NhegVsHrH7Lf4IMxIr1OeSuLN+ORoDwLbbuE= x-amz-request-id: - - EJ01RYXSA2KSGN1R + - HY3T3WM5FH6FVVHW status: code: 200 message: OK diff --git a/tests/remote/fixtures/test_goes_single.yaml b/tests/remote/fixtures/test_goes_single.yaml index 512eab5bedb..e48f15ac537 100644 --- a/tests/remote/fixtures/test_goes_single.yaml +++ b/tests/remote/fixtures/test_goes_single.yaml @@ -4,17 +4,58 @@ interactions: headers: User-Agent: - !!binary | - Qm90bzMvMS4zNS44OCBtZC9Cb3RvY29yZSMxLjM1Ljg4IHVhLzIuMCBvcy9tYWNvcyMyNC4yLjAg - bWQvYXJjaCNhcm02NCBsYW5nL3B5dGhvbiMzLjEyLjcgbWQvcHlpbXBsI0NQeXRob24gY2ZnL3Jl - dHJ5LW1vZGUjbGVnYWN5IEJvdG9jb3JlLzEuMzUuODggUmVzb3VyY2U= + Qm90bzMvMS4zOC40MiBtZC9Cb3RvY29yZSMxLjM4LjQyIHVhLzIuMSBvcy9tYWNvcyMyNC4zLjAg + bWQvYXJjaCNhcm02NCBsYW5nL3B5dGhvbiMzLjEyLjggbWQvcHlpbXBsI0NQeXRob24gbS9aLEQs + YiBjZmcvcmV0cnktbW9kZSNsZWdhY3kgQm90b2NvcmUvMS4zOC40MiBSZXNvdXJjZQ== amz-sdk-invocation-id: - !!binary | - MzRiMTdmZjgtNGNkZi00ODIyLTg4NTEtZDBlMTFiMzM2ZWE0 + MjBjMTFhMDYtY2Q1OS00YzIyLTg5OWItZTQyMjk5NmQ2ZWY3 amz-sdk-request: - !!binary | YXR0ZW1wdD0x method: GET - uri: https://noaa-goes18.s3.amazonaws.com/?list-type=2&prefix=ABI-L1b-RadM%2F2025%2F009%2F23%2FOR_ABI-L1b-RadM1-&delimiter=_&encoding-type=url + uri: https://noaa-goes18.s3.us-east-2.amazonaws.com/?list-type=2&prefix=ABI-L1b-RadM%2F2025%2F009%2F23%2FOR_ABI-L1b-RadM1-&delimiter=_&encoding-type=url + response: + body: + string: ' + + PermanentRedirectThe bucket you are attempting + to access must be addressed using the specified endpoint. Please send all + future requests to this endpoint.s3.amazonaws.comnoaa-goes183TZQCM2ERHHGKJT6P1sTLEphZRLlu/JNFBnfeM1U3DWdYTGi8D8K5QIQP+QUe3Cg1iAGz+/9eL/+XjB0kpo5o6xhgGfqzqViCp0c9Q==' + headers: + Content-Type: + - application/xml + Date: + - Sun, 06 Jul 2025 19:22:15 GMT + Server: + - AmazonS3 + Transfer-Encoding: + - chunked + x-amz-bucket-region: + - us-east-1 + x-amz-id-2: + - P1sTLEphZRLlu/JNFBnfeM1U3DWdYTGi8D8K5QIQP+QUe3Cg1iAGz+/9eL/+XjB0kpo5o6xhgGfqzqViCp0c9Q== + x-amz-request-id: + - 3TZQCM2ERHHGKJT6 + status: + code: 301 + message: Moved Permanently +- request: + body: null + headers: + User-Agent: + - !!binary | + Qm90bzMvMS4zOC40MiBtZC9Cb3RvY29yZSMxLjM4LjQyIHVhLzIuMSBvcy9tYWNvcyMyNC4zLjAg + bWQvYXJjaCNhcm02NCBsYW5nL3B5dGhvbiMzLjEyLjggbWQvcHlpbXBsI0NQeXRob24gbS9aLEQs + YiBjZmcvcmV0cnktbW9kZSNsZWdhY3kgQm90b2NvcmUvMS4zOC40MiBSZXNvdXJjZQ== + amz-sdk-invocation-id: + - !!binary | + MjBjMTFhMDYtY2Q1OS00YzIyLTg5OWItZTQyMjk5NmQ2ZWY3 + amz-sdk-request: + - !!binary | + YXR0ZW1wdD0yOyBtYXg9NQ== + method: GET + uri: https://noaa-goes18.s3.us-east-1.amazonaws.com/?list-type=2&prefix=ABI-L1b-RadM%2F2025%2F009%2F23%2FOR_ABI-L1b-RadM1-&delimiter=_&encoding-type=url response: body: string: ' @@ -24,7 +65,7 @@ interactions: Content-Type: - application/xml Date: - - Fri, 10 Jan 2025 02:02:27 GMT + - Sun, 06 Jul 2025 19:22:17 GMT Server: - AmazonS3 Transfer-Encoding: @@ -32,9 +73,9 @@ interactions: x-amz-bucket-region: - us-east-1 x-amz-id-2: - - hH1J9lfZ95RZ8I3MkhF43t4ohUlkCaJdDdk5mLG0r4hYd5hZbuokkeJfKGAvt/Ai8LE7qBVVxTY= + - OQvPIu++dcz+2pX7Rv7UIFTG70lKeYYGPvbXEaJcunn7SxfkiA2karQXVDF2xMg+ZnWukBH6Fz4= x-amz-request-id: - - 0PDVEMNP5XVRXZP0 + - 9JM0N0RXMXZN2KY3 status: code: 200 message: OK @@ -43,17 +84,17 @@ interactions: headers: User-Agent: - !!binary | - Qm90bzMvMS4zNS44OCBtZC9Cb3RvY29yZSMxLjM1Ljg4IHVhLzIuMCBvcy9tYWNvcyMyNC4yLjAg - bWQvYXJjaCNhcm02NCBsYW5nL3B5dGhvbiMzLjEyLjcgbWQvcHlpbXBsI0NQeXRob24gY2ZnL3Jl - dHJ5LW1vZGUjbGVnYWN5IEJvdG9jb3JlLzEuMzUuODggUmVzb3VyY2U= + Qm90bzMvMS4zOC40MiBtZC9Cb3RvY29yZSMxLjM4LjQyIHVhLzIuMSBvcy9tYWNvcyMyNC4zLjAg + bWQvYXJjaCNhcm02NCBsYW5nL3B5dGhvbiMzLjEyLjggbWQvcHlpbXBsI0NQeXRob24gbS9aLEMs + RCxiIGNmZy9yZXRyeS1tb2RlI2xlZ2FjeSBCb3RvY29yZS8xLjM4LjQyIFJlc291cmNl amz-sdk-invocation-id: - !!binary | - Y2NlM2E0YWEtZGZmZC00ZGIxLWJmYzItMTU0ZjIwYTE2ODk5 + Mzg2NzE2YzctY2M5MS00YzhkLTljNjYtNjQ2YmZmYmJiZGU0 amz-sdk-request: - !!binary | YXR0ZW1wdD0x method: GET - uri: https://noaa-goes18.s3.amazonaws.com/?prefix=ABI-L1b-RadM%2F2025%2F009%2F23%2FOR_ABI-L1b-RadM1-M6C02&encoding-type=url + uri: https://noaa-goes18.s3.us-east-1.amazonaws.com/?prefix=ABI-L1b-RadM%2F2025%2F009%2F23%2FOR_ABI-L1b-RadM1-M6C02&encoding-type=url response: body: string: ' @@ -63,7 +104,7 @@ interactions: Content-Type: - application/xml Date: - - Fri, 10 Jan 2025 02:02:28 GMT + - Sun, 06 Jul 2025 19:22:17 GMT Server: - AmazonS3 Transfer-Encoding: @@ -71,9 +112,9 @@ interactions: x-amz-bucket-region: - us-east-1 x-amz-id-2: - - 8qow0h7U+tKbbnkSN0DZPqbhimqk41cSHWPfEY2RobHQmr1XDPkiFcdiRTD6SKNT8X3pLQhnGzo= + - gq1oqpUsOIpqKCWzuvM+IPWHDn/GPomFHwI8drMUalfDtkHIaGbdbgsLd8rNL9JBYHIeaB02TXY= x-amz-request-id: - - BS923NKZP8GB1KZP + - 9JM2SQ28WZ36NM2E status: code: 200 message: OK From fdfbad48c083e22a1405514a9aee9c471c16c710 Mon Sep 17 00:00:00 2001 From: Krishna Date: Sun, 6 Jul 2025 15:34:00 -0700 Subject: [PATCH 3/6] Fix Lint Errors --- test_goes_client.py | 58 ++++++++++++++++----------------------------- 1 file changed, 20 insertions(+), 38 deletions(-) diff --git a/test_goes_client.py b/test_goes_client.py index b91a542841f..281e3eb93a2 100644 --- a/test_goes_client.py +++ b/test_goes_client.py @@ -1,63 +1,45 @@ #!/usr/bin/env python -""" -Test script to verify the robustness of the GOES client at hour boundaries. +# Copyright (c) 2015-2025 MetPy Developers. +"""Test script to verify the robustness of the GOES client at hour boundaries. + This script tests the recursive search implementation for finding products across hour boundaries. """ -import sys -from datetime import datetime, timezone, timedelta +import logging +from datetime import datetime, timezone from metpy.remote import GOESArchive +logger = logging.getLogger(__name__) + def test_goes_hour_boundary(): """Test the GOES client's ability to find products across hour boundaries.""" - print("Testing GOES client at hour boundaries...") - # Create a GOES client goes = GOESArchive(16) - # Test case 1: Exact hour boundary - # This would have failed with the old implementation if no products exist in the new hour try: - # Use a time at exactly the top of an hour dt = datetime(2025, 1, 1, 0, 0, 0, tzinfo=timezone.utc) - print(f"\nTest 1: Searching at exact hour boundary: {dt}") - product = goes.get_product('ABI-L1b-RadC', dt, band=1) - print(f"Success! Found product: {product.name}") - except Exception as e: - print(f"Error: {e}") - + goes.get_product('ABI-L1b-RadC', dt, band=1) + except Exception: + logger.exception('Failed to get product at exact hour boundary') # Test case 2: Just after hour boundary try: - # Use a time just after the top of an hour dt = datetime(2025, 1, 1, 0, 0, 30, tzinfo=timezone.utc) - print(f"\nTest 2: Searching just after hour boundary: {dt}") - product = goes.get_product('ABI-L1b-RadC', dt, band=1) - print(f"Success! Found product: {product.name}") - except Exception as e: - print(f"Error: {e}") - + goes.get_product('ABI-L1b-RadC', dt, band=1) + except Exception: + logger.exception('Failed to get product just after hour boundary') # Test case 3: Just before hour boundary try: - # Use a time just before the top of an hour dt = datetime(2025, 1, 1, 0, 59, 30, tzinfo=timezone.utc) - print(f"\nTest 3: Searching just before hour boundary: {dt}") - product = goes.get_product('ABI-L1b-RadC', dt, band=1) - print(f"Success! Found product: {product.name}") - except Exception as e: - print(f"Error: {e}") - + goes.get_product('ABI-L1b-RadC', dt, band=1) + except Exception: + logger.exception('Failed to get product just before hour boundary') # Test case 4: Day boundary try: - # Use a time at day boundary dt = datetime(2025, 1, 1, 0, 0, 0, tzinfo=timezone.utc) - print(f"\nTest 4: Searching at day boundary: {dt}") - product = goes.get_product('ABI-L1b-RadC', dt, band=1) - print(f"Success! Found product: {product.name}") - except Exception as e: - print(f"Error: {e}") - - print("\nAll tests completed!") + goes.get_product('ABI-L1b-RadC', dt, band=1) + except Exception: + logger.exception('Failed to get product at day boundary') -if __name__ == "__main__": +if __name__ == '__main__': test_goes_hour_boundary() From 864ae02df18f747e75c0c5b7ee9bd561c908163c Mon Sep 17 00:00:00 2001 From: Krishna Date: Sun, 6 Jul 2025 16:22:45 -0700 Subject: [PATCH 4/6] Move the test file to the appropriate location. --- tests/remote/test_goes_client.py | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) create mode 100644 tests/remote/test_goes_client.py diff --git a/tests/remote/test_goes_client.py b/tests/remote/test_goes_client.py new file mode 100644 index 00000000000..4feb7492343 --- /dev/null +++ b/tests/remote/test_goes_client.py @@ -0,0 +1,32 @@ +#!/usr/bin/env python +# Copyright (c) 2015-2025 MetPy Developers. +# Distributed under the terms of the BSD 3-Clause License. +# SPDX-License-Identifier: BSD-3-Clause +"""Test the `metpy.remote.GOESArchive` module.""" +from datetime import datetime, timezone + +from metpy.remote import GOESArchive +from metpy.testing import needs_aws + + +@needs_aws +def test_goes_hour_boundary(): + """Test the GOES client's ability to find products across hour boundaries.""" + # Create a GOES client + goes = GOESArchive(16) + # Test case 1: Exact hour boundary + dt = datetime(2025, 1, 1, 0, 0, 0, tzinfo=timezone.utc) + prod = goes.get_product('ABI-L1b-RadC', dt, band=1) + assert prod.url is not None + # Test case 2: Just after hour boundary + dt = datetime(2025, 1, 1, 0, 0, 30, tzinfo=timezone.utc) + prod = goes.get_product('ABI-L1b-RadC', dt, band=1) + assert prod.url is not None + # Test case 3: Just before hour boundary + dt = datetime(2025, 1, 1, 0, 59, 30, tzinfo=timezone.utc) + prod = goes.get_product('ABI-L1b-RadC', dt, band=1) + assert prod.url is not None + # Test case 4: Day boundary + dt = datetime(2025, 1, 1, 0, 0, 0, tzinfo=timezone.utc) + prod = goes.get_product('ABI-L1b-RadC', dt, band=1) + assert prod.url is not None From de41f7104b6c6f80e473e743de290339a1ef039e Mon Sep 17 00:00:00 2001 From: Krishna Pinnaka Date: Tue, 8 Jul 2025 21:53:19 -0700 Subject: [PATCH 5/6] update AWS S3 endpoint URLs and authentication headers for remote tests --- src/metpy/remote/aws.py | 237 ++++++++++++++++++++++++++++------------ 1 file changed, 170 insertions(+), 67 deletions(-) diff --git a/src/metpy/remote/aws.py b/src/metpy/remote/aws.py index 13a5dfaaf33..f860b637504 100644 --- a/src/metpy/remote/aws.py +++ b/src/metpy/remote/aws.py @@ -176,12 +176,86 @@ def objects(self, prefix): def _closest_result(self, it, dt): """Iterate over a sequence and return a result built from the closest match.""" - try: - min_obj = min(it, - key=lambda o: abs((self.dt_from_key(o.key) - dt).total_seconds())) - except ValueError as e: - raise ValueError(f'No result found for {dt}') from e - return self._build_result(min_obj) + best_obj = None + best_diff = None + for obj in it: + try: + obj_dt = self.dt_from_key(obj.key) + diff = abs(obj_dt - dt) + if best_diff is None or diff < best_diff: + best_obj = obj + best_diff = diff + except (ValueError, IndexError): + pass + if best_obj is None: + raise ValueError('No matching products found.') + return self._build_result(best_obj) + + def _find_best_product(self, objects_iter, dt, filters=None): + """Find the best product from a sequence based on time and optional filters. + + Parameters + ---------- + objects_iter : iterable + Iterable of S3 objects to search through + dt : datetime.datetime + Target datetime to match + filters : dict, optional + Dictionary of attribute names and values to filter objects by. + For example, {'sector': 'M1', 'band': '02'} + + Returns + ------- + object + The best matching S3 object + + Raises + ------ + ValueError + If no matching products are found + """ + best_obj = None + best_diff = None + + for obj in objects_iter: + try: + # Skip if it doesn't match our filters + if filters and not self._matches_filters(obj.key, filters): + continue + + obj_dt = self.dt_from_key(obj.key) + diff = abs(obj_dt - dt) + if best_diff is None or diff < best_diff: + best_obj = obj + best_diff = diff + except (ValueError, IndexError): + pass + + if best_obj is None: + filter_desc = '' if not filters else f" matching filters {filters}" + raise ValueError(f'No matching products found{filter_desc}.') + + return self._build_result(best_obj) + + def _matches_filters(self, key, filters): + """Check if a key matches all specified filters. + + This is a generic method that should be overridden by subclasses + that need specific filtering logic. + + Parameters + ---------- + key : str + The S3 object key to check + filters : dict + Dictionary of attribute names and values to filter by + + Returns + ------- + bool + True if the key matches all filters, False otherwise + """ + return True def _build_result(self, obj): """Build a basic product with no reader.""" @@ -470,6 +544,14 @@ class GOESArchive(S3DataStore): This consists of individual GOES image files stored in netCDF format, across a variety of sectors, bands, and modes. + GOES filenames follow the pattern: + OR_ABI-L1b-RadX-MYC##_G##_s########_e########_c########.nc + + Where: + - X is the sector (F=Full Disk, C=CONUS, M1=Mesoscale 1, M2=Mesoscale 2) + - Y is the mode (3, 4, 6) + - ## is the channel/band (01-16) + """ def __init__(self, satellite): @@ -493,28 +575,11 @@ def product_ids(self): """ return [item.rstrip(self.delimiter) for item in self.common_prefixes('')] - def _build_time_prefix(self, product, dt, depth=None): - """Build the initial prefix for time and product up to a particular depth. - - Parameters - ---------- - product : str - The product to search for - dt : datetime.datetime - The datetime to search for - depth : int, optional - The depth of the prefix to build. If None, builds the full prefix. - 1: product - 2: product/year - 3: product/year/day_of_year - 4: product/year/day_of_year/hour - 5: product/year/day_of_year/hour/OR_product - """ + def _build_time_prefix(self, product, dt): + """Build the initial prefix for time and product.""" # Handle that the meso sector products are grouped in the same subdir reduced_product = product[:-1] if product.endswith(('M1', 'M2')) else product parts = [reduced_product, f'{dt:%Y}', f'{dt:%j}', f'{dt:%H}', f'OR_{product}'] - if depth is not None: - return self.delimiter.join(parts[:depth]) return self.delimiter.join(parts) def _subprod_prefix(self, prefix, mode, band): @@ -574,50 +639,24 @@ def get_product(self, product, dt=None, mode=None, band=None): """ dt = datetime.now(timezone.utc) if dt is None else ensure_timezone(dt) + time_prefix = self._build_time_prefix(product, dt) + prod_prefix = self._subprod_prefix(time_prefix, mode, band) - # We work with a list of keys/prefixes that we iteratively find that bound our target - # key. To start, this only contains the product. - bounding_keys = [self._build_time_prefix(product, dt, 1) + self.delimiter] - - # Iteratively search with more specific keys, finding where our key fits within the - # list by using the common prefixes that exist for the current bounding keys - for depth in range(2, 5): # Year, day of year, hour - # Get a key for the product/dt that we're looking for, constrained by how deep - # we are in the search i.e. product->year->day_of_year->hour->OR_product - search_key = self._build_time_prefix(product, dt, depth) - - # Get the next collection of partial keys using the common prefixes for our - # candidates - prefixes = list(itertools.chain(*(self.common_prefixes(b) for b in bounding_keys))) - - if not prefixes: # No prefixes found, can't continue - raise ValueError(f'No data found for {product} at {dt}') - - # Find where our target would be in the list and grab the ones on either side - # if possible. This also handles if we're off the end. - loc = bisect.bisect_left(prefixes, search_key) - - # loc gives where our target *would* be in the list. Therefore slicing from loc - 1 - # to loc + 1 gives the items to the left and right of our target. If we get 0, - # then there is nothing to the left and we only need the first item. - rng = slice(loc - 1, loc + 1) if loc else slice(0, 1) - - # Make sure we don't go out of bounds - if loc >= len(prefixes): - rng = slice(len(prefixes) - 1, len(prefixes)) + # Extract sector from product name (e.g., 'M1' from 'ABI-L1b-RadM1') + sector = None + if product.endswith(('M1', 'M2')): + sector = product[-2:] - bounding_keys = prefixes[rng] - - # Now that we have the bounding hour directories, we need to find the closest product - # Get all objects from the bounding keys with the appropriate mode and band - all_objects = [] - for key in bounding_keys: - time_prefix = key.rstrip(self.delimiter) - prod_prefix = self._subprod_prefix(time_prefix, mode, band) - all_objects.extend(list(self.objects(prod_prefix))) + # Build filters dictionary for precise matching + filters = {} + if sector: + filters['sector'] = sector + if band is not None: + filters['band'] = f'{int(band):02d}' if isinstance(band, int) else band + if mode is not None: + filters['mode'] = str(mode) - # Find the closest product to the requested time - return self._closest_result(all_objects, dt) + return self._find_best_product(self.objects(prod_prefix), dt, filters) def get_range(self, product, start, end, mode=None, band=None): """Yield products within a particular date/time range. @@ -647,12 +686,76 @@ def get_range(self, product, start, end, mode=None, band=None): """ start = ensure_timezone(start) end = ensure_timezone(end) + + # Extract sector from product name (e.g., 'M1' from 'ABI-L1b-RadM1') + sector = None + if product.endswith(('M1', 'M2')): + sector = product[-2:] + + # Build filters dictionary for precise matching + filters = {} + if sector: + filters['sector'] = sector + if band is not None: + filters['band'] = f'{int(band):02d}' if isinstance(band, int) else band + if mode is not None: + filters['mode'] = str(mode) + for dt in date_iterator(start, end, hours=1): time_prefix = self._build_time_prefix(product, dt) prod_prefix = self._subprod_prefix(time_prefix, mode, band) for obj in self.objects(prod_prefix): if start <= self.dt_from_key(obj.key) < end: - yield self._build_result(obj) + # Only yield if it matches our filters + if not filters or self._matches_filters(obj.key, filters): + yield self._build_result(obj) + + def _matches_filters(self, key, filters): + """Check if a GOES product key matches all specified filters. + + Parameters + ---------- + key : str + The S3 object key to check + filters : dict + Dictionary of attribute names and values to filter by + + Returns + ------- + bool + True if the key matches all filters, False otherwise + """ + # Parse the filename from the key + filename = key.split('/')[-1] + parts = filename.split('_') + if len(parts) < 2: + return False + + # Parse product info from filename (e.g., 'OR_ABI-L1b-RadM1-M6C02_G18_s...') + product_info = parts[1] + + # Check sector filter (M1, M2, C, F) + if 'sector' in filters: + sector = filters['sector'] + # For mesoscale sectors, check if the product has the right sector + if sector in ('M1', 'M2'): + if not product_info.endswith(sector + '-'): + if not ('-Rad' + sector + '-') in product_info: + return False + + # Check band filter + if 'band' in filters: + band = filters['band'] + if not f'C{band}' in product_info: + return False + + # Check mode filter + if 'mode' in filters: + mode = filters['mode'] + if not f'-M{mode}' in product_info: + return False + + return True def _build_result(self, obj): """Build a product that opens the data using `xarray.open_dataset`.""" From 9ff22299aa676e79e7241a3f03c69754cb5493df Mon Sep 17 00:00:00 2001 From: Krishna Pinnaka Date: Tue, 8 Jul 2025 22:13:59 -0700 Subject: [PATCH 6/6] FIx lint errors --- src/metpy/remote/aws.py | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/src/metpy/remote/aws.py b/src/metpy/remote/aws.py index f860b637504..af0db148698 100644 --- a/src/metpy/remote/aws.py +++ b/src/metpy/remote/aws.py @@ -232,7 +232,7 @@ def _find_best_product(self, objects_iter, dt, filters=None): pass if best_obj is None: - filter_desc = '' if not filters else f" matching filters {filters}" + filter_desc = '' if not filters else f' matching filters {filters}' raise ValueError(f'No matching products found{filter_desc}.') return self._build_result(best_obj) @@ -705,9 +705,12 @@ def get_range(self, product, start, end, mode=None, band=None): time_prefix = self._build_time_prefix(product, dt) prod_prefix = self._subprod_prefix(time_prefix, mode, band) for obj in self.objects(prod_prefix): - if start <= self.dt_from_key(obj.key) < end: + obj_dt = self.dt_from_key(obj.key) + # Check if object is within time range and matches filters + matches_time = start <= obj_dt < end + matches_filters = not filters or self._matches_filters(obj.key, filters) + if matches_time and matches_filters: # Only yield if it matches our filters - if not filters or self._matches_filters(obj.key, filters): yield self._build_result(obj) def _matches_filters(self, key, filters): @@ -738,21 +741,22 @@ def _matches_filters(self, key, filters): if 'sector' in filters: sector = filters['sector'] # For mesoscale sectors, check if the product has the right sector - if sector in ('M1', 'M2'): - if not product_info.endswith(sector + '-'): - if not ('-Rad' + sector + '-') in product_info: + # Check for mesoscale sectors (M1, M2) + if (sector in ('M1', 'M2') and + not product_info.endswith(sector + '-') and + ('-Rad' + sector + '-') not in product_info): return False # Check band filter if 'band' in filters: band = filters['band'] - if not f'C{band}' in product_info: + if f'C{band}' not in product_info: return False # Check mode filter if 'mode' in filters: mode = filters['mode'] - if not f'-M{mode}' in product_info: + if f'-M{mode}' not in product_info: return False return True