From f5c7ab50c28877b80dfbe41b5d308f2f0a0c107f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Du=C5=A1an=20Jolovi=C4=87?= Date: Thu, 7 May 2026 06:25:01 +0200 Subject: [PATCH] refactor: derive last_accessed_at from claimed_at to unify timestamp tracking The SQLite store maintained two independent timestamps (claimed_at and last_accessed_at) that were updated through different code paths. This led to subtle inconsistencies: - find_by_fingerprint did a side-effect UPDATE on every read, adding write contention and making reads non-deterministic. - put_commit passed a separate 'now' for last_accessed_at, which could drift from claimed_at if the commit was modified elsewhere. Changes: - put_commit now uses commit.claimed_at as the source of truth for last_accessed_at. A commit is 'accessed' when it is claimed. - find_by_fingerprint is now side-effect-free; callers that need to touch a commit already call put_commit on cache hits. - Redis _encode_commit aligned to use commit.claimed_at for last_accessed_at for cross-backend consistency. Tests verify the invariant last_accessed_at == claimed_at and that repeated cache hits no longer shift the access timestamp. --- src/cashet/redis_store.py | 2 +- src/cashet/store.py | 10 ++------- tests/test_async_client.py | 43 ++++++++++++++++++++++++++++++++++++++ tests/test_store.py | 39 ++++++++++++++++++++++++++++++++++ 4 files changed, 85 insertions(+), 9 deletions(-) diff --git a/src/cashet/redis_store.py b/src/cashet/redis_store.py index 3bb0951..d4ffb98 100644 --- a/src/cashet/redis_store.py +++ b/src/cashet/redis_store.py @@ -120,7 +120,7 @@ def _encode_commit(commit: Commit) -> bytes: "tags": commit.tags, "created_at": commit.created_at.isoformat(), "claimed_at": commit.claimed_at.isoformat(), - "last_accessed_at": datetime.now(UTC).isoformat(), + "last_accessed_at": commit.claimed_at.isoformat(), } return json.dumps(d, separators=(",", ":")).encode() diff --git a/src/cashet/store.py b/src/cashet/store.py index a80c60a..85fd71f 100644 --- a/src/cashet/store.py +++ b/src/cashet/store.py @@ -271,9 +271,8 @@ def blob_exists(self, hash: str) -> bool: def put_commit(self, commit: Commit) -> None: conn = self._connect(immediate=True) - now = datetime.now(UTC).isoformat() try: - self._put_commit_row(conn, commit, now) + self._put_commit_row(conn, commit, commit.claimed_at.isoformat()) conn.execute("COMMIT") except Exception: conn.execute("ROLLBACK") @@ -329,8 +328,7 @@ def _put_commit_row( def find_by_fingerprint(self, fingerprint: str) -> Commit | None: conn = self._connect() - now = datetime.now(UTC) - now_iso = now.isoformat() + now_iso = datetime.now(UTC).isoformat() row = conn.execute( """SELECT * FROM commits WHERE fingerprint = ? AND status IN ('completed', 'cached') @@ -341,10 +339,6 @@ def find_by_fingerprint(self, fingerprint: str) -> Commit | None: ).fetchone() if row is None: return None - conn.execute( - "UPDATE commits SET last_accessed_at = ? WHERE hash = ?", - (now_iso, row["hash"]), - ) return self._row_to_commit(row) def find_running_by_fingerprint(self, fingerprint: str) -> Commit | None: diff --git a/tests/test_async_client.py b/tests/test_async_client.py index 04da39a..5ca066e 100644 --- a/tests/test_async_client.py +++ b/tests/test_async_client.py @@ -315,6 +315,49 @@ def do_thing(x: int) -> int: assert do_thing.__doc__ == "A docstring." assert hasattr(do_thing, "_cashet_wrapped_func") + async def test_last_accessed_at_derived_from_claimed_at( + self, async_client: AsyncClient + ) -> None: + def work() -> int: + return 42 + + ref = await async_client.submit(work) + assert await ref.load() == 42 + + conn = async_client.store._core._connect() + row = conn.execute( + "SELECT claimed_at, last_accessed_at FROM commits WHERE hash = ?", + (ref.commit_hash,), + ).fetchone() + assert row["claimed_at"] == row["last_accessed_at"] + + async def test_cache_hit_does_not_shift_last_accessed_at( + self, async_client: AsyncClient + ) -> None: + def work() -> int: + return 42 + + ref1 = await async_client.submit(work) + assert await ref1.load() == 42 + + conn = async_client.store._core._connect() + row_before = conn.execute( + "SELECT last_accessed_at FROM commits WHERE hash = ?", + (ref1.commit_hash,), + ).fetchone() + la_before = row_before["last_accessed_at"] + + ref2 = await async_client.submit(work) + assert await ref2.load() == 42 + + row_after = conn.execute( + "SELECT last_accessed_at FROM commits WHERE hash = ?", + (ref1.commit_hash,), + ).fetchone() + la_after = row_after["last_accessed_at"] + + assert la_before == la_after + class TestAsyncContextManager: async def test_async_with(self) -> None: diff --git a/tests/test_store.py b/tests/test_store.py index 13cee6b..72975e6 100644 --- a/tests/test_store.py +++ b/tests/test_store.py @@ -1099,6 +1099,45 @@ def make_bytes(n: int) -> bytes: client.gc(older_than=timedelta(days=1), max_size_bytes=1) assert client.stats()["total_commits"] == 0 + def test_last_accessed_at_derived_from_claimed_at(self, client: Client) -> None: + def work() -> int: + return 42 + + ref = client.submit(work) + assert ref.load() == 42 + + conn = client.store._connect() + row = conn.execute( + "SELECT claimed_at, last_accessed_at FROM commits WHERE hash = ?", + (ref.commit_hash,), + ).fetchone() + assert row["claimed_at"] == row["last_accessed_at"] + + def test_cache_hit_does_not_shift_last_accessed_at(self, client: Client) -> None: + def work() -> int: + return 42 + + ref1 = client.submit(work) + assert ref1.load() == 42 + + conn = client.store._connect() + row_before = conn.execute( + "SELECT last_accessed_at FROM commits WHERE hash = ?", + (ref1.commit_hash,), + ).fetchone() + la_before = row_before["last_accessed_at"] + + ref2 = client.submit(work) + assert ref2.load() == 42 + + row_after = conn.execute( + "SELECT last_accessed_at FROM commits WHERE hash = ?", + (ref1.commit_hash,), + ).fetchone() + la_after = row_after["last_accessed_at"] + + assert la_before == la_after + class TestTTL: def test_ttl_roundtrip(self, client: Client) -> None: