forked from dreadnode/paperstack
-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathcsv_utils.py
More file actions
70 lines (59 loc) · 2.6 KB
/
csv_utils.py
File metadata and controls
70 lines (59 loc) · 2.6 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
import csv
import os
from datetime import datetime
from typing import List
from _types import Paper
def get_papers_from_csv(csv_path: str) -> List[Paper]:
"""Read papers from CSV if it exists."""
papers = []
if not os.path.exists(csv_path):
return papers
with open(csv_path, 'r', encoding='utf-8') as f:
reader = csv.DictReader(f)
for row in reader:
# Convert string representation of list back to list for authors
authors = row['authors'].strip('[]').split(',') if row['authors'] else []
authors = [a.strip().strip("'") for a in authors if a.strip()]
paper = Paper(
title=row['title'],
url=row['url'],
authors=authors,
abstract=row['abstract'],
published=datetime.strptime(row['published'], '%Y-%m-%d').date() if row['published'] else None,
explored=row['explored'].lower() == 'true',
summary=row['summary'],
focus=row['focus']
)
papers.append(paper)
return papers
def write_papers_to_csv(csv_path: str, papers: List[Paper]) -> None:
"""Write papers to CSV file."""
# Ensure output directory exists if path contains directories
directory = os.path.dirname(csv_path)
if directory:
os.makedirs(directory, exist_ok=True)
# If file exists, read existing papers to merge with new ones
existing_papers = get_papers_from_csv(csv_path) if os.path.exists(csv_path) else []
# Create a dictionary of existing papers by title for easy lookup
existing_dict = {p.title: p for p in existing_papers}
# Update existing papers and add new ones
for paper in papers:
existing_dict[paper.title] = paper
# Convert to final list
final_papers = list(existing_dict.values())
# Write all papers to CSV
fieldnames = ['title', 'url', 'authors', 'abstract', 'published', 'explored', 'summary', 'focus']
with open(csv_path, 'w', encoding='utf-8', newline='') as f:
writer = csv.DictWriter(f, fieldnames=fieldnames)
writer.writeheader()
for paper in final_papers:
writer.writerow({
'title': paper.title,
'url': paper.url,
'authors': str(paper.authors),
'abstract': paper.abstract,
'published': paper.published.strftime('%Y-%m-%d') if paper.published else '',
'explored': str(paper.explored),
'summary': paper.summary,
'focus': paper.focus
})