Skip to content

Commit 48b4930

Browse files
committed
Optimize S3 fetcher with conditional GETs and new download_if_changed hook
- add optional download_if_changed/2 to Fetcher behaviour and wire update loop to use it when available - implement conditional GETs in the S3 fetcher via If-None-Match and 304 handling - switch S3 version checks to ETag-only and document versioned/non‑versioned behavior - add S3 tests covering conditional GET and header usage
1 parent 8f8a204 commit 48b4930

5 files changed

Lines changed: 243 additions & 55 deletions

File tree

AGENTS.md

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
# Repository Guidelines
2+
3+
## Project Structure & Module Organization
4+
- `lib/remote_persistent_term.ex` defines the core behaviour and `use RemotePersistentTerm` macro.
5+
- `lib/remote_persistent_term/fetcher/` holds fetcher implementations (HTTP, S3, Static) and helpers like HTTP cache logic.
6+
- Tests live in `test/` with paths mirroring modules (e.g., `test/remote_persistent_term/fetcher/http_test.exs`).
7+
- Generated artifacts (`_build/`, `deps/`, `doc/`, `cover/`) are outputs of Mix tasks and should not be edited by hand.
8+
9+
## Build, Test, and Development Commands
10+
- `mix deps.get` installs dependencies.
11+
- `mix compile` builds the library.
12+
- `mix test` runs the full ExUnit suite.
13+
- `mix test test/remote_persistent_term/fetcher/http_test.exs:30` runs a focused test by file and line.
14+
- `mix format` applies the project formatter (see `.formatter.exs`).
15+
- `mix docs` generates ExDoc output into `doc/`.
16+
17+
## Coding Style & Naming Conventions
18+
- Follow `mix format` output (Elixir defaults to 2-space indentation).
19+
- Modules use `CamelCase`; files and functions use `snake_case` (predicates end in `?`).
20+
- Keep option keys consistent with `RemotePersistentTerm` options and fetcher configuration keys.
21+
22+
## Testing Guidelines
23+
- Tests use ExUnit; Mox mocks the ExAws client and Bypass is used for HTTP fetcher tests.
24+
- Prefer deterministic tests and keep external network access mocked or bypassed.
25+
- Name tests with clear behaviour statements; add coverage for new fetcher logic and option validation.
26+
27+
## Commit & Pull Request Guidelines
28+
- Commit messages are short, imperative, and capitalized (e.g., “Improve logs”, “Fix tests”, “Increment version”).
29+
- Keep commits scoped to one change; avoid unrelated refactors in the same commit.
30+
- PRs should include a concise summary, motivation, and the tests you ran (or note if none).
31+
- If a change affects public behaviour or configuration, update documentation and mention it in the PR.
32+
33+
## Notes for Contributors
34+
- CI runs `mix test` on recent OTP/Elixir versions; ensure your changes pass locally before pushing.
35+
- When touching S3 or HTTP fetchers, prefer tests that use Mox/Bypass rather than real services.

lib/remote_persistent_term.ex

Lines changed: 47 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -268,21 +268,43 @@ defmodule RemotePersistentTerm do
268268
start_meta,
269269
fn ->
270270
{status, version} =
271-
with {:ok, current_version} <- state.fetcher_mod.current_version(state.fetcher_state),
272-
true <- state.current_version != current_version,
273-
:ok <- download_and_store_term(state, deserialize_fun, put_fun) do
274-
{:updated, current_version}
271+
if function_exported?(state.fetcher_mod, :download_if_changed, 2) do
272+
case state.fetcher_mod.download_if_changed(
273+
state.fetcher_state,
274+
state.current_version
275+
) do
276+
{:ok, term, new_version} ->
277+
case store_term(state, deserialize_fun, put_fun, term) do
278+
:ok ->
279+
{:updated, new_version}
280+
281+
{:error, reason} ->
282+
log_update_error(state.name, reason)
283+
{:not_updated, state.current_version}
284+
end
285+
286+
{:not_modified, version} ->
287+
Logger.info("#{state.name} - up to date")
288+
{:not_updated, version || state.current_version}
289+
290+
{:error, reason} ->
291+
log_update_error(state.name, reason)
292+
{:not_updated, state.current_version}
293+
end
275294
else
276-
false ->
277-
Logger.info("#{state.name} - up to date")
278-
{:not_updated, state.current_version}
279-
280-
{:error, reason} ->
281-
Logger.error(
282-
"#{state.name} - failed to update remote term, reason: #{inspect(reason)}"
283-
)
284-
285-
{:not_updated, state.current_version}
295+
with {:ok, current_version} <- state.fetcher_mod.current_version(state.fetcher_state),
296+
true <- state.current_version != current_version,
297+
:ok <- download_and_store_term(state, deserialize_fun, put_fun) do
298+
{:updated, current_version}
299+
else
300+
false ->
301+
Logger.info("#{state.name} - up to date")
302+
{:not_updated, state.current_version}
303+
304+
{:error, reason} ->
305+
log_update_error(state.name, reason)
306+
{:not_updated, state.current_version}
307+
end
286308
end
287309

288310
{version, Map.put(start_meta, :status, status)}
@@ -305,9 +327,18 @@ defmodule RemotePersistentTerm do
305327
@doc false
306328
def validate_options(opts), do: NimbleOptions.validate(opts, @opts_schema)
307329

330+
defp log_update_error(name, reason) do
331+
Logger.error("#{name} - failed to update remote term, reason: #{inspect(reason)}")
332+
end
333+
308334
defp download_and_store_term(state, deserialize_fun, put_fun) do
309-
with {:ok, term} <- state.fetcher_mod.download(state.fetcher_state),
310-
{:ok, decompressed} <- maybe_decompress(state, term),
335+
with {:ok, term} <- state.fetcher_mod.download(state.fetcher_state) do
336+
store_term(state, deserialize_fun, put_fun, term)
337+
end
338+
end
339+
340+
defp store_term(state, deserialize_fun, put_fun, term) do
341+
with {:ok, decompressed} <- maybe_decompress(state, term),
311342
{:ok, deserialized} <- deserialize_fun.(decompressed) do
312343
put_fun.(deserialized)
313344
end

lib/remote_persistent_term/fetcher.ex

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@ defmodule RemotePersistentTerm.Fetcher do
1010
@type state :: term()
1111
@type opts :: Keyword.t()
1212
@type version :: String.t()
13+
@type download_if_changed_result ::
14+
{:ok, term(), version()} | {:not_modified, version() | nil} | {:error, term()}
1315

1416
@doc """
1517
Initialize the implementation specific state of the Fetcher.
@@ -26,4 +28,12 @@ defmodule RemotePersistentTerm.Fetcher do
2628
Download the term from the remote source.
2729
"""
2830
@callback download(state()) :: {:ok, term()} | {:error, term()}
31+
32+
@doc """
33+
Optionally download the term only if it has changed. When implemented, it should
34+
return `{:not_modified, current_version}` for an unchanged term or `{:ok, term, new_version}`.
35+
"""
36+
@callback download_if_changed(state(), version() | nil) :: download_if_changed_result
37+
38+
@optional_callbacks download_if_changed: 2
2939
end

lib/remote_persistent_term/fetcher/s3.ex

Lines changed: 122 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,26 @@
11
defmodule RemotePersistentTerm.Fetcher.S3 do
22
@moduledoc """
33
A Fetcher implementation for AWS S3.
4+
5+
## Versioned vs. non-versioned buckets
6+
7+
This fetcher works with both versioned and non-versioned buckets. It uses the object's
8+
`ETag` as a change token and performs conditional GETs with `If-None-Match` to avoid
9+
re-downloading unchanged data.
10+
11+
- **Versioned buckets**: `HEAD`/`GET` responses include `ETag`; the fetcher uses it for
12+
change detection. The latest object is always whatever S3 returns for the key (no explicit
13+
version ID required).
14+
- **Non-versioned buckets**: only `ETag` is available, which is sufficient to detect
15+
content changes. Overwriting an object with identical bytes may keep the same `ETag`,
16+
which is fine because the content is unchanged.
17+
18+
## S3-compatible services
19+
20+
S3-compatible providers (e.g., DigitalOcean Spaces, Linode Object Storage) should work
21+
as long as they support standard S3 headers: `ETag`, `If-None-Match`, and `304 Not Modified`.
22+
If a provider ignores conditional requests, the fetcher will still function but will
23+
download on every refresh.
424
"""
525
require Logger
626

@@ -82,17 +102,20 @@ defmodule RemotePersistentTerm.Fetcher.S3 do
82102

83103
@impl true
84104
def current_version(state) do
85-
with {:ok, versions} <- list_object_versions(state),
86-
{:ok, %{etag: etag, version_id: version}} <- find_latest(versions) do
105+
with {:ok, %{headers: headers}} <- head_object(state),
106+
{:ok, version} <- extract_version(headers) do
87107
Logger.info(
88108
bucket: state.bucket,
89109
key: state.key,
90110
version: version,
91111
message: "Found latest version of object"
92112
)
93113

94-
{:ok, etag}
114+
{:ok, version}
95115
else
116+
{:error, {:http_error, 404, _}} ->
117+
{:error, "could not find s3://#{state.bucket}/#{state.key}"}
118+
96119
{:error, {:unexpected_response, %{body: reason}}} ->
97120
{:error, reason}
98121

@@ -133,60 +156,127 @@ defmodule RemotePersistentTerm.Fetcher.S3 do
133156
end
134157
end
135158

136-
defp list_object_versions(state) do
159+
@impl true
160+
def download_if_changed(state, current_version) do
137161
res =
138-
aws_client_request(
139-
&ExAws.S3.get_bucket_object_versions/2,
162+
get_object_request(
140163
state,
141-
prefix: state.key
164+
if_none_match_opts(current_version),
165+
&failover_on_error?/1
142166
)
143167

144-
with {:ok, %{body: %{versions: versions}}} <- res do
145-
{:ok, versions}
168+
case res do
169+
{:ok, %{status_code: 304}} ->
170+
{:not_modified, current_version}
171+
172+
{:error, {:http_error, 304, _}} ->
173+
{:not_modified, current_version}
174+
175+
{:ok, %{body: body, headers: headers}} ->
176+
with {:ok, version} <- extract_version(headers) do
177+
{:ok, body, version}
178+
end
179+
180+
{:error, reason} ->
181+
{:error, inspect(reason)}
146182
end
147183
end
148184

149185
defp get_object(state) do
150-
aws_client_request(&ExAws.S3.get_object/2, state, state.key)
186+
get_object_request(state, [])
187+
end
188+
189+
defp get_object_request(state, opts, failover_on_error? \\ fn _ -> true end) do
190+
aws_client_request(
191+
fn bucket, request_opts -> ExAws.S3.get_object(bucket, state.key, request_opts) end,
192+
state,
193+
opts,
194+
failover_on_error?
195+
)
196+
end
197+
198+
defp head_object(state) do
199+
aws_client_request(&ExAws.S3.head_object/2, state, state.key)
151200
end
152201

153-
defp find_latest([_ | _] = contents) do
154-
Enum.find(contents, fn
155-
%{is_latest: "true"} ->
156-
true
202+
defp extract_version(headers) do
203+
case header_value(headers, "etag") do
204+
nil -> {:error, :not_found}
205+
value -> {:ok, normalize_etag(value)}
206+
end
207+
end
208+
209+
defp header_value(headers, name) do
210+
downcased = String.downcase(name)
211+
212+
Enum.find_value(headers, fn
213+
{key, value} when is_binary(key) and is_binary(value) ->
214+
if String.downcase(key) == downcased, do: value, else: nil
215+
216+
{key, value} when is_atom(key) and is_binary(value) ->
217+
if String.downcase(Atom.to_string(key)) == downcased, do: value, else: nil
157218

158219
_ ->
159-
false
220+
nil
160221
end)
161-
|> case do
162-
res when is_map(res) -> {:ok, res}
163-
_ -> {:error, :not_found}
222+
end
223+
224+
defp normalize_etag(value) when is_binary(value) do
225+
value
226+
|> String.trim()
227+
|> String.trim("\"")
228+
end
229+
230+
defp if_none_match_opts(nil), do: []
231+
defp if_none_match_opts(etag), do: [if_none_match: quote_etag(etag)]
232+
233+
defp quote_etag(etag) do
234+
etag = String.trim(etag)
235+
236+
if String.starts_with?(etag, "\"") and String.ends_with?(etag, "\"") do
237+
etag
238+
else
239+
"\"#{etag}\""
164240
end
165241
end
166242

167-
defp find_latest(_), do: {:error, :not_found}
243+
defp failover_on_error?({:http_error, 304, _}), do: false
244+
defp failover_on_error?(_reason), do: true
245+
246+
defp aws_client_request(op, state, opts) do
247+
aws_client_request(op, state, opts, fn _ -> true end)
248+
end
168249

169-
defp aws_client_request(op, %{failover_buckets: nil} = state, opts) do
250+
defp aws_client_request(op, %{failover_buckets: nil} = state, opts, _failover_on_error?) do
170251
perform_request(op, state.bucket, state.region, opts)
171252
end
172253

173254
defp aws_client_request(
174255
op,
175256
%{
176-
failover_buckets: [_|_] = failover_buckets
257+
failover_buckets: [_ | _] = failover_buckets
177258
} = state,
178-
opts
259+
opts,
260+
failover_on_error?
179261
) do
180-
with {:error, reason} <- perform_request(op, state.bucket, state.region, opts) do
181-
Logger.error(%{
182-
bucket: state.bucket,
183-
key: state.key,
184-
region: state.region,
185-
reason: inspect(reason),
186-
message: "Failed to fetch from primary bucket, attempting failover buckets"
187-
})
188-
189-
try_failover_buckets(op, failover_buckets, opts, state)
262+
case perform_request(op, state.bucket, state.region, opts) do
263+
{:error, reason} = error ->
264+
if failover_on_error?.(reason) do
265+
Logger.error(%{
266+
bucket: state.bucket,
267+
key: state.key,
268+
region: state.region,
269+
reason: inspect(reason),
270+
message: "Failed to fetch from primary bucket, attempting failover buckets"
271+
})
272+
273+
try_failover_buckets(op, failover_buckets, opts, state)
274+
else
275+
error
276+
end
277+
278+
result ->
279+
result
190280
end
191281
end
192282

test/remote_persistent_term/fetcher/s3_test.exs

Lines changed: 29 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,6 @@ defmodule RemotePersistentTerm.Fetcher.S3Test do
1212
[bucket: "failover-bucket-1", region: "failover-region-1"],
1313
[bucket: "failover-bucket-2", region: "failover-region-2"]
1414
]
15-
@version "F76V.weh4uOlU15f7a2OLHPgCLXkDpm4"
16-
1715
test "Unknown error returns an error for current_version/1" do
1816
expect(AwsClientMock, :request, fn _op, _opts ->
1917
{:error, :unknown_error}
@@ -84,11 +82,9 @@ defmodule RemotePersistentTerm.Fetcher.S3Test do
8482
op_bucket == "failover-bucket-1" && region == "failover-region-1" ->
8583
{:ok,
8684
%{
87-
body: %{
88-
versions: [
89-
%{version_id: @version, etag: "current-etag", is_latest: "true"}
90-
]
91-
}
85+
headers: [
86+
{"etag", "\"current-etag\""}
87+
]
9288
}}
9389

9490
true ->
@@ -253,4 +249,30 @@ defmodule RemotePersistentTerm.Fetcher.S3Test do
253249
assert log =~ "Downloaded object from S3"
254250
end
255251
end
252+
253+
describe "download_if_changed/2" do
254+
test "returns not_modified on 304 and sends if-none-match" do
255+
state = %S3{bucket: @bucket, key: @key, region: @region}
256+
257+
expect(AwsClientMock, :request, fn operation, _opts ->
258+
assert operation.headers["if-none-match"] == "\"current-etag\""
259+
{:error, {:http_error, 304, %{}}}
260+
end)
261+
262+
assert {:not_modified, "current-etag"} =
263+
S3.download_if_changed(state, "current-etag")
264+
end
265+
266+
test "returns body and version on 200" do
267+
state = %S3{bucket: @bucket, key: @key, region: @region}
268+
269+
expect(AwsClientMock, :request, fn operation, _opts ->
270+
assert operation.headers["if-none-match"] == "\"old-etag\""
271+
{:ok, %{body: "new-content", headers: [{"etag", "\"new-etag\""}]}}
272+
end)
273+
274+
assert {:ok, "new-content", "new-etag"} =
275+
S3.download_if_changed(state, "old-etag")
276+
end
277+
end
256278
end

0 commit comments

Comments
 (0)