-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathfeedfilter.py
More file actions
56 lines (46 loc) · 1.87 KB
/
feedfilter.py
File metadata and controls
56 lines (46 loc) · 1.87 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
import difflib
from shared import g_c
def merge_entries(new_entries, old_entries, title_threshold=0.85):
"""
Merge two lists of feed entries, preserving order and avoiding duplicates.
Two entries are considered the same if:
- They share the same link.
New entries take precedence over cached ones, but original timestamps are preserved
from cached entries to maintain proper chronological ordering.
"""
merged = []
seen_links = set()
new_titles = [] # to compare against old titles
# Create a mapping of old entries by link for timestamp preservation
old_entries_by_link = {}
for entry in old_entries:
link = entry.get('link')
if link:
old_entries_by_link[link] = entry
# Process new entries first.
for entry in new_entries:
# Get unique key and title. (Assumes entries are dicts.)
key = entry.get('link')
title = entry.get('title')
if key:
seen_links.add(key)
# If we've seen this link before, preserve the original timestamp
if key in old_entries_by_link:
old_entry = old_entries_by_link[key]
# Preserve the original published timestamp
if 'published' in old_entry:
entry['published'] = old_entry['published']
if 'published_parsed' in old_entry:
entry['published_parsed'] = old_entry['published_parsed']
if title:
new_titles.append(title)
merged.append(entry)
# Append old entries only if they're not already represented.
for entry in old_entries:
key = entry.get('link')
title = entry.get('title')
# Skip if the link already exists.
if key and key in seen_links:
continue
merged.append(entry)
return merged