-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathduplicate_note_finder.py
More file actions
101 lines (80 loc) · 2.98 KB
/
duplicate_note_finder.py
File metadata and controls
101 lines (80 loc) · 2.98 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
#!/usr/bin/env python3
"""
Duplicate Note Finder for Obsidian
This script identifies markdown notes with identical filenames across different
folders in an Obsidian vault. It traverses the entire vault directory structure,
collects all markdown files (*.md), and reports any duplicate filenames along with
their full paths.
Technical Implementation:
- Uses os.walk() to recursively traverse all directories in the vault
- Maintains a dictionary to track filenames and their paths
- Ignores specified directories (like .git, .trash, etc.)
- Outputs results to console, showing duplicate filenames and their locations
- Can be customized to ignore specific directories via the IGNORED_DIRS list
"""
import os
import sys
from typing import Dict, List, Set
from pathlib import Path
def find_duplicate_notes(
vault_path: str, ignored_dirs: Set[str]
) -> Dict[str, List[str]]:
"""
Find markdown notes with identical filenames across the vault.
Args:
vault_path (str): Path to the Obsidian vault root
ignored_dirs (Set[str]): Set of directory names to ignore
Returns:
Dict[str, List[str]]: Dictionary mapping duplicate filenames to lists of their paths
"""
# Dictionary to track filenames and their paths
file_paths: Dict[str, List[str]] = {}
# Walk through the vault directory structure
for root, dirs, files in os.walk(vault_path):
# Skip ignored directories
dirs[:] = [d for d in dirs if d not in ignored_dirs]
# Process only markdown files
md_files = [f for f in files if f.endswith(".md")]
for filename in md_files:
full_path = os.path.join(root, filename)
# Track the file
if filename not in file_paths:
file_paths[filename] = []
file_paths[filename].append(full_path)
# Filter to keep only duplicates
duplicates = {
filename: paths for filename, paths in file_paths.items() if len(paths) > 1
}
return duplicates
def main() -> None:
"""
Main function to run the duplicate note finder.
"""
# Define the vault path (current directory by default)
vault_path = os.path.expanduser("~/Obsidian")
# Directories to ignore
ignored_dirs = {
".git",
".obsidian",
".trash",
"node_modules",
".github",
"__pycache__",
".DS_Store",
}
print(f"Searching for duplicate notes in: {vault_path}")
# Find duplicates
duplicates = find_duplicate_notes(vault_path, ignored_dirs)
# Display results
if not duplicates:
print("No duplicate notes found.")
else:
print(f"\nFound {len(duplicates)} duplicate note names:")
for filename, paths in duplicates.items():
print(f"\n• {filename}")
for path in paths:
# Get relative path to make output cleaner
rel_path = os.path.relpath(path, vault_path)
print(f" - {rel_path}")
if __name__ == "__main__":
main()