Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/cashet/redis_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ def _encode_commit(commit: Commit) -> bytes:
"tags": commit.tags,
"created_at": commit.created_at.isoformat(),
"claimed_at": commit.claimed_at.isoformat(),
"last_accessed_at": datetime.now(UTC).isoformat(),
"last_accessed_at": commit.claimed_at.isoformat(),
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Medium Redis find_by_fingerprint still touches access time; backfill uses stale claimed_at

The PR removes the side-effect UPDATE in SQLite's find_by_fingerprint, but the Redis variant still calls _touch_commit which updates the access sorted set with current time, violating the intended backend consistency. Moreover, the backfill function _backfill_access_index reads last_accessed_at from the JSON, now set to claimed_at (which may be older than the original access score). During a backfill, it overwrites the access sorted set entry with this older timestamp, causing LRU eviction to consider still-active entries as old.

}
return json.dumps(d, separators=(",", ":")).encode()

Expand Down
10 changes: 2 additions & 8 deletions src/cashet/store.py
Original file line number Diff line number Diff line change
Expand Up @@ -271,9 +271,8 @@ def blob_exists(self, hash: str) -> bool:

def put_commit(self, commit: Commit) -> None:
conn = self._connect(immediate=True)
now = datetime.now(UTC).isoformat()
try:
self._put_commit_row(conn, commit, now)
self._put_commit_row(conn, commit, commit.claimed_at.isoformat())
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P1 High put_commit no longer advances last_accessed_at and may fail on None claimed_at

put_commit now uses commit.claimed_at.isoformat() instead of current time, so callers relying on put_commit for cache-hit promotion or heartbeats will not refresh the access timestamp, potentially causing premature eviction. Additionally, if commit.claimed_at is None (e.g., unclaimed commit), this will raise AttributeError. Previously, the timestamp was computed independently.

Suggested change
self._put_commit_row(conn, commit, commit.claimed_at.isoformat())
self._put_commit_row(conn, commit, (commit.claimed_at or datetime.now(UTC)).isoformat())

conn.execute("COMMIT")
except Exception:
conn.execute("ROLLBACK")
Expand Down Expand Up @@ -329,8 +328,7 @@ def _put_commit_row(

def find_by_fingerprint(self, fingerprint: str) -> Commit | None:
conn = self._connect()
now = datetime.now(UTC)
now_iso = now.isoformat()
now_iso = datetime.now(UTC).isoformat()
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P3 Low Dead code: now_iso assignment no longer used

The assignment to now_iso is no longer used because the subsequent UPDATE statement was removed. This wastes a system call and can mislead maintainers.

row = conn.execute(
"""SELECT * FROM commits
WHERE fingerprint = ? AND status IN ('completed', 'cached')
Expand All @@ -341,10 +339,6 @@ def find_by_fingerprint(self, fingerprint: str) -> Commit | None:
).fetchone()
if row is None:
return None
conn.execute(
"UPDATE commits SET last_accessed_at = ? WHERE hash = ?",
(now_iso, row["hash"]),
)
return self._row_to_commit(row)

def find_running_by_fingerprint(self, fingerprint: str) -> Commit | None:
Expand Down
43 changes: 43 additions & 0 deletions tests/test_async_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -315,6 +315,49 @@ def do_thing(x: int) -> int:
assert do_thing.__doc__ == "A docstring."
assert hasattr(do_thing, "_cashet_wrapped_func")

async def test_last_accessed_at_derived_from_claimed_at(
self, async_client: AsyncClient
) -> None:
def work() -> int:
return 42

ref = await async_client.submit(work)
assert await ref.load() == 42

conn = async_client.store._core._connect()
row = conn.execute(
"SELECT claimed_at, last_accessed_at FROM commits WHERE hash = ?",
(ref.commit_hash,),
).fetchone()
assert row["claimed_at"] == row["last_accessed_at"]

async def test_cache_hit_does_not_shift_last_accessed_at(
self, async_client: AsyncClient
) -> None:
def work() -> int:
return 42

ref1 = await async_client.submit(work)
assert await ref1.load() == 42

conn = async_client.store._core._connect()
row_before = conn.execute(
"SELECT last_accessed_at FROM commits WHERE hash = ?",
(ref1.commit_hash,),
).fetchone()
la_before = row_before["last_accessed_at"]

ref2 = await async_client.submit(work)
assert await ref2.load() == 42

row_after = conn.execute(
"SELECT last_accessed_at FROM commits WHERE hash = ?",
(ref1.commit_hash,),
).fetchone()
la_after = row_after["last_accessed_at"]

assert la_before == la_after


class TestAsyncContextManager:
async def test_async_with(self) -> None:
Expand Down
39 changes: 39 additions & 0 deletions tests/test_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -1099,6 +1099,45 @@ def make_bytes(n: int) -> bytes:
client.gc(older_than=timedelta(days=1), max_size_bytes=1)
assert client.stats()["total_commits"] == 0

def test_last_accessed_at_derived_from_claimed_at(self, client: Client) -> None:
def work() -> int:
return 42

ref = client.submit(work)
assert ref.load() == 42

conn = client.store._connect()
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P3 Low Tests use private _connect() to check timestamps

The new tests use client.store._connect() to directly query the database. This depends on internal implementation details (private method _connect) and may break if the store connection mechanism changes. Using a public API to retrieve the commit and check timestamps would be more robust.

row = conn.execute(
"SELECT claimed_at, last_accessed_at FROM commits WHERE hash = ?",
(ref.commit_hash,),
).fetchone()
assert row["claimed_at"] == row["last_accessed_at"]

def test_cache_hit_does_not_shift_last_accessed_at(self, client: Client) -> None:
def work() -> int:
return 42

ref1 = client.submit(work)
assert ref1.load() == 42

conn = client.store._connect()
row_before = conn.execute(
"SELECT last_accessed_at FROM commits WHERE hash = ?",
(ref1.commit_hash,),
).fetchone()
la_before = row_before["last_accessed_at"]

ref2 = client.submit(work)
assert ref2.load() == 42

row_after = conn.execute(
"SELECT last_accessed_at FROM commits WHERE hash = ?",
(ref1.commit_hash,),
).fetchone()
la_after = row_after["last_accessed_at"]

assert la_before == la_after


class TestTTL:
def test_ttl_roundtrip(self, client: Client) -> None:
Expand Down
Loading