Skip to content

Commit 0eef4b9

Browse files
feat: recency ranking, noise filter, subdomain resolution
Query results now weighted by recency (30-day half-life) so recent memories rank higher. Content noise filter rejects known phrases (ok, thanks, got it), very short strings, and JSON blobs. Vendor normalization now strips api/app/gateway subdomains and paths — api.openai.com/v1 resolves to openai.com. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 031adf1 commit 0eef4b9

4 files changed

Lines changed: 132 additions & 8 deletions

File tree

lightning_memory/db.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -229,16 +229,25 @@ def query_memories(
229229
hit_ids = []
230230
for row in rows:
231231
hit_ids.append(row["id"])
232+
bm25_score = -row["rank"] # BM25 returns negative; negate for positive
233+
# Recency decay: memories lose relevance over time
234+
# Half-life of 30 days — a 60-day-old memory has 0.25x the weight
235+
age_days = (now - row["created_at"]) / 86400
236+
recency_weight = 0.5 ** (age_days / 30.0)
237+
relevance = bm25_score * (0.3 + 0.7 * recency_weight) # floor at 30% of original score
232238
results.append({
233239
"id": row["id"],
234240
"content": row["content"],
235241
"type": row["type"],
236242
"metadata": json.loads(row["metadata"]),
237243
"nostr_event_id": row["nostr_event_id"],
238244
"created_at": format_utc(row["created_at"]),
239-
"relevance": -row["rank"], # BM25 returns negative scores; negate for intuitive ordering
245+
"relevance": round(relevance, 6),
240246
})
241247

248+
# Re-sort by recency-weighted relevance
249+
results.sort(key=lambda r: r["relevance"], reverse=True)
250+
242251
# Update access tracking for returned memories
243252
if hit_ids:
244253
_bump_access(conn, hit_ids, now)

lightning_memory/memory.py

Lines changed: 48 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -16,22 +16,31 @@
1616
def normalize_vendor(name: str) -> str:
1717
"""Normalize a vendor name for consistent matching.
1818
19-
Strips protocol, www prefix, trailing slashes, and lowercases.
19+
Strips protocol, www/api/app subdomains, trailing slashes, paths,
20+
and lowercases. Reduces to the base domain so that api.openai.com,
21+
www.openai.com, and openai.com all resolve to "openai.com".
22+
2023
Examples:
2124
"https://www.Bitrefill.com/" -> "bitrefill.com"
22-
"WWW.BITREFILL.COM" -> "bitrefill.com"
23-
"bitrefill.com" -> "bitrefill.com"
25+
"api.openai.com/v1" -> "openai.com"
26+
"app.example.io" -> "example.io"
27+
"bitrefill" -> "bitrefill"
2428
"""
2529
v = name.strip().lower()
2630
# Strip protocol
2731
for prefix in ("https://", "http://"):
2832
if v.startswith(prefix):
2933
v = v[len(prefix):]
3034
break
31-
# Strip www. prefix
32-
if v.startswith("www."):
33-
v = v[4:]
34-
# Strip trailing slash
35+
# Strip path (everything after first /)
36+
if "/" in v:
37+
v = v.split("/")[0]
38+
# Strip common subdomains (www, api, app, gateway, gw)
39+
for sub in ("www.", "api.", "app.", "gateway.", "gw."):
40+
if v.startswith(sub):
41+
v = v[len(sub):]
42+
break
43+
# Strip trailing slash (safety)
3544
v = v.rstrip("/")
3645
return v
3746

@@ -69,6 +78,34 @@ def parse_since(since: str) -> float:
6978
_DEDUP_DEFAULT_THRESHOLD = 0.80
7079

7180

81+
# Noise patterns that should not be stored as memories
82+
_NOISE_PATTERNS = {
83+
"ok", "okay", "got it", "thanks", "thank you", "sure", "yes", "no",
84+
"understood", "ack", "acknowledged", "noted", "done", "fine", "right",
85+
}
86+
_MIN_CONTENT_LENGTH = 3 # memories shorter than this are noise
87+
88+
89+
def _is_noise(content: str) -> bool:
90+
"""Check if content is too short, a known noise phrase, or a JSON blob.
91+
92+
Returns True if the content should be rejected.
93+
"""
94+
stripped = content.strip()
95+
if len(stripped) < _MIN_CONTENT_LENGTH:
96+
return True
97+
if stripped.lower().rstrip(".!") in _NOISE_PATTERNS:
98+
return True
99+
# Reject JSON blobs (agents sometimes dump raw API responses)
100+
if stripped.startswith("{") or stripped.startswith("["):
101+
try:
102+
json.loads(stripped)
103+
return True # Valid JSON blob — not a natural language memory
104+
except (json.JSONDecodeError, ValueError):
105+
pass
106+
return False
107+
108+
72109
def _jaccard(text_a: str, text_b: str, min_word_len: int = 3) -> float:
73110
"""Compute Jaccard similarity on word sets (words >= min_word_len chars)."""
74111
words_a = {re.sub(r"[^\w]", "", w) for w in text_a.lower().split() if len(w) >= min_word_len}
@@ -103,6 +140,10 @@ def store(
103140
104141
Types: general, transaction, vendor, preference, error, decision
105142
"""
143+
# Reject noise content
144+
if _is_noise(content):
145+
return {"status": "rejected", "reason": "noise", "content_preview": content[:50]}
146+
106147
# Check for near-duplicate before storing
107148
existing = self._find_duplicate(content, memory_type, metadata)
108149
if existing is not None:

tests/test_memory_quality.py

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -167,6 +167,61 @@ def test_no_price_contradiction_for_small_changes(engine):
167167
assert "contradictions" not in r2 or len(r2.get("contradictions", [])) == 0
168168

169169

170+
# --- Noise filtering tests ---
171+
172+
173+
def test_noise_short_content_rejected(engine):
174+
"""Very short content should be rejected."""
175+
result = engine.store("ok")
176+
assert result.get("status") == "rejected"
177+
assert result.get("reason") == "noise"
178+
179+
180+
def test_noise_known_phrase_rejected(engine):
181+
"""Known noise phrases should be rejected."""
182+
for phrase in ["got it", "thanks", "sure", "acknowledged"]:
183+
result = engine.store(phrase)
184+
assert result.get("status") == "rejected", f"'{phrase}' should be rejected"
185+
186+
187+
def test_noise_json_blob_rejected(engine):
188+
"""JSON blobs should be rejected."""
189+
result = engine.store('{"key": "value", "nested": {"a": [1,2,3]}}')
190+
assert result.get("status") == "rejected"
191+
192+
193+
def test_noise_valid_content_accepted(engine):
194+
"""Normal memory content should pass the noise filter."""
195+
result = engine.store("Paid 500 sats to bitrefill for a gift card")
196+
assert "status" not in result or result.get("status") != "rejected"
197+
198+
199+
# --- Recency-weighted ranking tests ---
200+
201+
202+
def test_recent_memories_rank_higher(engine):
203+
"""Recent memories should rank higher than old ones for same query."""
204+
import time as _time
205+
206+
# Store two memories with same keywords but different ages
207+
engine.store("bitrefill vendor is reliable for gift cards", memory_type="vendor")
208+
209+
# Artificially age the first memory by updating created_at
210+
engine.conn.execute(
211+
"UPDATE memories SET created_at = created_at - 5184000" # 60 days ago
212+
)
213+
engine.conn.commit()
214+
215+
# Store a newer memory
216+
engine.store("bitrefill vendor has new pricing for gift cards", memory_type="vendor")
217+
218+
results = engine.query("bitrefill vendor gift cards")
219+
assert len(results) >= 2
220+
# The newer memory should have higher relevance
221+
relevances = [r["relevance"] for r in results]
222+
assert relevances[0] >= relevances[1], "Newer memory should rank first"
223+
224+
170225
def test_contradiction_includes_existing_preview(engine):
171226
"""Contradiction should include a preview of the conflicting memory."""
172227
engine.store(

tests/test_vendor_normalization.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,25 @@ def test_normalize_already_clean():
4545
assert normalize_vendor("openai") == "openai"
4646

4747

48+
def test_normalize_subdomain_api():
49+
assert normalize_vendor("api.openai.com") == "openai.com"
50+
assert normalize_vendor("https://api.openai.com/v1") == "openai.com"
51+
52+
53+
def test_normalize_subdomain_app():
54+
assert normalize_vendor("app.example.io") == "example.io"
55+
56+
57+
def test_normalize_subdomain_gateway():
58+
assert normalize_vendor("gateway.bitrefill.com") == "bitrefill.com"
59+
assert normalize_vendor("gw.bitrefill.com") == "bitrefill.com"
60+
61+
62+
def test_normalize_strips_path():
63+
assert normalize_vendor("bitrefill.com/api/v1") == "bitrefill.com"
64+
assert normalize_vendor("https://api.openai.com/v1/chat") == "openai.com"
65+
66+
4867
def test_normalize_empty():
4968
assert normalize_vendor("") == ""
5069

0 commit comments

Comments
 (0)