Spookalicious · ozipi · Feb 26, 2025 · Feb 26, 2025 · Feb 26, 2025
diff --git a/.github/workflows/core.yml b/.github/workflows/core.yml
@@ -1,4 +1,4 @@
-name: Paperstack Core
+name: Arxivist Core
 
 on:
   workflow_call:
@@ -29,11 +29,11 @@ jobs:
         python -m pip install --upgrade pip
         pip install -r requirements.txt
 
-    - name: Run paperstack
+    - name: Run arxivist
       env:
         OPENAI_API_TOKEN: ${{ secrets.OPENAI_API_TOKEN }}
       run: |
-        python paperstack.py \
+        python arxivist.py \
           --output-csv "papers.csv" \
           ${{ inputs.search-arxiv == true && '--search-arxiv' || '' }} \
           ${{ inputs.search-scholar == true && '--search-semantic-scholar' || '' }}

diff --git a/.github/workflows/csv.yml b/.github/workflows/csv.yml
@@ -1,4 +1,4 @@
-name: Paperstack (CSV)
+name: Arxivist (CSV)
 
 on:
   workflow_dispatch:
@@ -36,6 +36,6 @@ jobs:
           # Create new release
           gh release create latest-papers \
             --title "Latest Research Papers" \
-            --notes "Latest research papers from PaperStack" \
+            --notes "Latest research papers from Arxivist" \
             research-papers/papers.csv \
             --latest
diff --git a/.github/workflows/long.yml b/.github/workflows/long.yml
@@ -1,4 +1,4 @@
-name: Paperstack (Long)
+name: Arxivist (Long)
 
 on:
   schedule:

diff --git a/.github/workflows/manual.yml b/.github/workflows/manual.yml
@@ -1,4 +1,4 @@
-name: Paperstack (Manual)
+name: Arxivist (Manual)
 
 on:
   workflow_dispatch:

diff --git a/.github/workflows/short.yml b/.github/workflows/short.yml
@@ -1,4 +1,4 @@
-name: Paperstack (Short)
+name: Arxivist (Short)
 
 on:
   schedule:

diff --git a/.gitignore b/.gitignore
@@ -158,3 +158,11 @@ cython_debug/
 #  and can be added to the global gitignore or merged into this file.  For a more nuclear
 #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
 #.idea/
+
+
+# Obsidian
+/obsidian/
+
+# Papers
+papers.csv
+papers.md
diff --git a/README.md b/README.md
@@ -1,6 +1,6 @@
-# paperstack
+# Arxivist
 
-Paperstack uses ArXiv and Semantic Scholar (relational) to sync academic paper information into a Notion DB. It also has some lightweight uses of OpenAI models for summarization and categorization. It was built for gathering machine learning and security related papers, but could be adapted easily to any other subject (`ARXIV_SEARCH`/`--arxiv-search-query`). It's deplyoment is focused on Github actions, but can be executed on the command line directly. It can also detect partial entries (ArXiv link or title) in the Notion DB and fill in the remaining information.
+Arxivist uses ArXiv and Semantic Scholar (relational) to sync academic paper information into a Notion DB. It also has some lightweight uses of OpenAI models for summarization and categorization. It was built for gathering machine learning and security related papers, but could be adapted easily to any other subject (`ARXIV_SEARCH`/`--arxiv-search-query`). It's deplyoment is focused on Github actions, but can be executed on the command line directly. It can also detect partial entries (ArXiv link or title) in the Notion DB and fill in the remaining information.
 
 The Notion DB requires a semi-fixed structure as a function of the syncing logic (`notion_utils.py`), and you're free to add columns and custom syncing behavior as needed. Here is the mininmum database layout the tool currently expects:
 

diff --git a/obsidian_utils.py b/obsidian_utils.py
@@ -0,0 +1,56 @@
+import os
+from datetime import datetime
+from typing import List
+from _types import Paper
+
+def write_papers_to_obsidian(output_dir: str, papers: List[Paper]) -> None:
+    """Write papers to Obsidian markdown files."""
+    os.makedirs(output_dir, exist_ok=True)
+
+    for paper in papers:
+        if not paper.title:
+            continue
+
+        # Create a safe filename from the title
+        safe_title = "".join(c if c.isalnum() or c in (' ', '-') else '_' for c in paper.title)
+        filename = os.path.join(output_dir, f"{safe_title}.md")
+
+        with open(filename, 'w', encoding='utf-8') as f:
+            # Write frontmatter
+            f.write("---\n")
+            f.write(f"title: {paper.title}\n")
+            if paper.url:
+                f.write(f"url: {paper.url}\n")
+            if paper.published:
+                f.write(f"date: {paper.published.strftime('%Y-%m-%d')}\n")
+            if paper.authors:
+                f.write(f"authors: {', '.join(paper.authors)}\n")
+            if paper.focus:
+                f.write(f"focus: {paper.focus.value}\n")
+            f.write("---\n\n")
+
+            # Write content
+            if paper.summary:
+                f.write("## Summary\n")
+                f.write(f"{paper.summary}\n\n")
+
+            if paper.abstract:
+                f.write("## Abstract\n")
+                f.write(f"{paper.abstract}\n")
+
+def write_papers_table_to_markdown(output_file: str, papers: List[Paper]) -> None:
+    """Write a Markdown table of papers to a file."""
+    with open(output_file, 'w', encoding='utf-8') as f:
+        f.write("# Arxivist Papers\n\n")
+        f.write("| Title | Authors | Published | URL | Summary | Focus |\n")
+        f.write("|-------|---------|-----------|-----|---------|-------|\n")
+
+        for paper in papers:
+            # Create an internal link for Obsidian
+            title_link = f"[[{paper.title}]]" if paper.title else "N/A"
+            authors = ", ".join(paper.authors) if paper.authors else "N/A"
+            published_date = paper.published.strftime('%Y-%m-%d') if paper.published else "N/A"
+            summary = paper.summary if paper.summary else "N/A"
+            focus = paper.focus.value if paper.focus else "N/A"
+
+            f.write(f"| {title_link} | {authors} | {published_date} | {paper.url} | {summary} | {focus} |\n")
diff --git a/paperstack.py b/paperstack.py
@@ -9,6 +9,7 @@
     summarize_abstract_with_openai,
 )
 from scholar_utils import get_recommended_arxiv_ids_from_semantic_scholar
+from obsidian_utils import write_papers_table_to_markdown
 
 ARXIV_SEARCH = """\
 "adversarial attacks" OR "language model attacks" OR "LLM vulnerabilities" OR \
@@ -29,18 +30,29 @@ def main():
     parser.add_argument(
         "--openai-token",
         type=str,
-        default=os.environ.get("OPENAI_API_TOKEN"),
-        help="OpenAI token",
+        default=None,
+        help="OpenAI token (optional)",
     )
     parser.add_argument("--arxiv-search-query", type=str, default=ARXIV_SEARCH)
     parser.add_argument("--search-arxiv", action="store_true", default=False)
     parser.add_argument("--search-semantic-scholar", action="store_true", default=False)
+    parser.add_argument(
+        "--output-obsidian",
+        type=str,
+        default="papers.md",
+        help="Path to output Obsidian folder"
+    )
 
     args = parser.parse_args()
 
-    print("[+] Paperstack")
+    print("[+] Arxivist")
 
-    openai_client = get_openai_client(args.openai_token)
+    openai_client = None
+    if args.openai_token:
+        openai_client = get_openai_client(args.openai_token)
+        print(" |- OpenAI client initialized")
+    else:
+        print(" |- No OpenAI token provided; skipping OpenAI operations")
 
     print(f" |- Reading existing papers from CSV [{args.output_csv}]")
     papers = get_papers_from_csv(args.output_csv)
@@ -69,24 +81,29 @@ def main():
         else:
             print(" |- All papers have been explored")
 
-    if not all([paper.summary for paper in papers]):
-        print(" |- Building summaries with OpenAI")
-        for paper in [p for p in papers if not p.summary and p.abstract]:
-            print(f"    |- {paper.title[:50]}...")
-            paper.summary = summarize_abstract_with_openai(
-                openai_client, paper.abstract
-            )
-
-    if not all([paper.focus for paper in papers]):
-        print(" |- Assigning focus labels with OpenAI")
-        for paper in [p for p in papers if not p.focus and p.abstract]:
-            paper.focus = get_focus_label_from_abstract(openai_client, paper.abstract)
-            print(f"    |- {paper.focus}")
+    if openai_client:
+        print(" |- Generating summaries and focus labels using OpenAI")
+        for paper in papers:
+            if paper.abstract:
+                paper.summary = summarize_abstract_with_openai(openai_client, paper.abstract)
+                paper.focus = get_focus_label_from_abstract(openai_client, paper.abstract)
+    else:
+        print(" |- Skipping summary generation as no OpenAI token was provided")
 
     print(f" |- Writing papers to CSV [{args.output_csv}]")
     write_papers_to_csv(args.output_csv, papers)
     print(f" |- Done! Saved {len(papers)} papers to {args.output_csv}")
 
+    if args.output_obsidian:
+        print(f" |- Writing papers to Obsidian format [{args.output_obsidian}]")
+        from obsidian_utils import write_papers_to_obsidian
+        write_papers_to_obsidian(args.output_obsidian, papers)
+
+    # Create the arxivist-papers.md file
+    arxivist_file_path = os.path.join(args.output_obsidian, "arxivist-papers.md")
+    print(f" |- Writing papers table to Markdown file [{arxivist_file_path}]")
+    write_papers_table_to_markdown(arxivist_file_path, papers)
+
 
 if __name__ == "__main__":
     main()