Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -80,11 +80,12 @@ currency_format: "€{amount}" # Optional: €1,234 or "{amount} zł" for 1,234

data_sources:
- name: AMEX
file: data/amex.csv
file: data/amex.csv # Single file
account_type: credit_card
format: "{date:%m/%d/%Y},{description},{amount}"
# Use glob patterns to match multiple files:
- name: Chase
file: data/chase.csv
file: data/chase*.csv # Matches chase-2024.csv, chase-2025.csv, etc.
account_type: credit_card
format: "{date:%m/%d/%Y},{description},{amount}"
- name: BofA Checking
Expand All @@ -98,6 +99,8 @@ data_sources:
decimal_separator: "," # European format (1.234,56)
```

**File Patterns**: The `file` parameter supports glob patterns (`*`, `?`, `[]`) to match multiple files. Files are processed in sorted order.

### Account Types

Use `account_type` to automatically handle sign conventions:
Expand Down
7 changes: 6 additions & 1 deletion config/settings.yaml.example
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,15 @@ title: "Spending Analysis"

# Data sources - list of transaction files to process
# Paths are relative to the parent of the config directory
# Supports glob patterns (*, ?, []) to match multiple files
data_sources:
- name: AMEX
file: data/amex-2025.csv
file: data/amex-2025.csv # Single file
type: amex
# Example with glob pattern to match multiple files:
# - name: AMEX
# file: data/amex*.csv # Matches amex-2024.csv, amex-2025.csv, etc.
# type: amex
- name: BOA
file: data/boa-checking.txt
type: boa
Expand Down
207 changes: 122 additions & 85 deletions src/tally/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
"""

import argparse
import glob
import os
import shutil
import sys
Expand Down Expand Up @@ -98,6 +99,37 @@ def __init__(self):
)


def _expand_file_pattern(config_dir, file_pattern):
"""
Expand a file pattern (which may contain wildcards) to a list of actual file paths.

Args:
config_dir: Path to the config directory
file_pattern: File pattern (e.g., 'data/amex*.csv' or 'data/amex-2025.csv')

Returns:
List of absolute file paths that match the pattern, or empty list if no matches
"""
# Try relative to config_dir parent first
base_path = os.path.join(config_dir, '..')
base_path = os.path.normpath(base_path)
pattern_path = os.path.join(base_path, file_pattern)
pattern_path = os.path.normpath(pattern_path)

# Expand glob pattern
matched_files = glob.glob(pattern_path)

# If no matches with config_dir parent, try relative to config_dir's parent directory
if not matched_files:
base_path = os.path.dirname(config_dir)
pattern_path = os.path.join(base_path, file_pattern)
pattern_path = os.path.normpath(pattern_path)
matched_files = glob.glob(pattern_path)

# Sort files to ensure consistent processing order
return sorted(matched_files)


def _check_merchant_migration(config: dict, config_dir: str, quiet: bool = False, migrate: bool = False) -> list:
"""
Check if merchant rules should be migrated from CSV to .rules format.
Expand Down Expand Up @@ -866,48 +898,50 @@ def cmd_run(args):
all_txns = []

for source in data_sources:
filepath = os.path.join(config_dir, '..', source['file'])
filepath = os.path.normpath(filepath)

if not os.path.exists(filepath):
# Try relative to config_dir parent
filepath = os.path.join(os.path.dirname(config_dir), source['file'])

if not os.path.exists(filepath):
# Expand glob pattern to get all matching files
filepaths = _expand_file_pattern(config_dir, source['file'])

if not filepaths:
if not args.quiet:
print(f" {source['name']}: File not found - {source['file']}")
print(f" {source['name']}: No files found matching pattern - {source['file']}")
continue

# Get parser type and format spec (set by config_loader.resolve_source_format)
parser_type = source.get('_parser_type', source.get('type', '')).lower()
format_spec = source.get('_format_spec')

try:
if parser_type == 'amex':
_warn_deprecated_parser(source.get('name', 'AMEX'), 'amex', source['file'])
txns = parse_amex(filepath, rules, home_locations, cleaning_patterns)
elif parser_type == 'boa':
_warn_deprecated_parser(source.get('name', 'BOA'), 'boa', source['file'])
txns = parse_boa(filepath, rules, home_locations, cleaning_patterns)
elif parser_type == 'generic' and format_spec:
txns = parse_generic_csv(filepath, format_spec, rules,
home_locations,
source_name=source.get('name', 'CSV'),
decimal_separator=source.get('decimal_separator', '.'),
cleaning_patterns=cleaning_patterns)
else:
source_txns = []
for filepath in filepaths:
try:
if parser_type == 'amex':
_warn_deprecated_parser(source.get('name', 'AMEX'), 'amex', source['file'])
txns = parse_amex(filepath, rules, home_locations, cleaning_patterns)
elif parser_type == 'boa':
_warn_deprecated_parser(source.get('name', 'BOA'), 'boa', source['file'])
txns = parse_boa(filepath, rules, home_locations, cleaning_patterns)
elif parser_type == 'generic' and format_spec:
txns = parse_generic_csv(filepath, format_spec, rules,
home_locations,
source_name=source.get('name', 'CSV'),
decimal_separator=source.get('decimal_separator', '.'),
cleaning_patterns=cleaning_patterns)
else:
if not args.quiet:
print(f" {source['name']}: Unknown parser type '{parser_type}'")
print(f" Use 'tally inspect {source['file']}' to determine format")
continue

source_txns.extend(txns)
except Exception as e:
if not args.quiet:
print(f" {source['name']}: Unknown parser type '{parser_type}'")
print(f" Use 'tally inspect {source['file']}' to determine format")
print(f" {source['name']} ({os.path.basename(filepath)}): Error parsing - {e}")
continue
except Exception as e:
if not args.quiet:
print(f" {source['name']}: Error parsing - {e}")
continue

all_txns.extend(txns)
all_txns.extend(source_txns)
if not args.quiet:
print(f" {source['name']}: {len(txns)} transactions")
file_count = len(filepaths)
files_str = f"{file_count} file{'s' if file_count > 1 else ''}"
print(f" {source['name']}: {len(source_txns)} transactions from {files_str}")

if not all_txns:
print("Error: No transactions found", file=sys.stderr)
Expand Down Expand Up @@ -1085,37 +1119,35 @@ def cmd_discover(args):
all_txns = []

for source in data_sources:
filepath = os.path.join(config_dir, '..', source['file'])
filepath = os.path.normpath(filepath)

if not os.path.exists(filepath):
filepath = os.path.join(os.path.dirname(config_dir), source['file'])

if not os.path.exists(filepath):
# Expand glob pattern to get all matching files
filepaths = _expand_file_pattern(config_dir, source['file'])

if not filepaths:
continue

parser_type = source.get('_parser_type', source.get('type', '')).lower()
format_spec = source.get('_format_spec')

try:
if parser_type == 'amex':
_warn_deprecated_parser(source.get('name', 'AMEX'), 'amex', source['file'])
txns = parse_amex(filepath, rules, home_locations, cleaning_patterns)
elif parser_type == 'boa':
_warn_deprecated_parser(source.get('name', 'BOA'), 'boa', source['file'])
txns = parse_boa(filepath, rules, home_locations, cleaning_patterns)
elif parser_type == 'generic' and format_spec:
txns = parse_generic_csv(filepath, format_spec, rules,
home_locations,
source_name=source.get('name', 'CSV'),
decimal_separator=source.get('decimal_separator', '.'),
cleaning_patterns=cleaning_patterns)
else:
for filepath in filepaths:
try:
if parser_type == 'amex':
_warn_deprecated_parser(source.get('name', 'AMEX'), 'amex', source['file'])
txns = parse_amex(filepath, rules, home_locations, cleaning_patterns)
elif parser_type == 'boa':
_warn_deprecated_parser(source.get('name', 'BOA'), 'boa', source['file'])
txns = parse_boa(filepath, rules, home_locations, cleaning_patterns)
elif parser_type == 'generic' and format_spec:
txns = parse_generic_csv(filepath, format_spec, rules,
home_locations,
source_name=source.get('name', 'CSV'),
decimal_separator=source.get('decimal_separator', '.'),
cleaning_patterns=cleaning_patterns)
else:
continue
except Exception:
continue
except Exception:
continue

all_txns.extend(txns)
all_txns.extend(txns)

if not all_txns:
print("Error: No transactions found", file=sys.stderr)
Expand Down Expand Up @@ -1741,14 +1773,18 @@ def cmd_diag(args):
print("DATA SOURCES")
print("-" * 70)
for i, source in enumerate(config['data_sources'], 1):
filepath = os.path.join(config_dir, '..', source['file'])
filepath = os.path.normpath(filepath)
if not os.path.exists(filepath):
filepath = os.path.join(os.path.dirname(config_dir), source['file'])
# Expand glob pattern to get all matching files
filepaths = _expand_file_pattern(config_dir, source['file'])

print(f" {i}. {source.get('name', 'unnamed')}")
print(f" File: {source['file']}")
print(f" Exists: {os.path.exists(filepath)}")
print(f" Pattern: {source['file']}")
if filepaths:
print(f" Matched files: {len(filepaths)}")
for fp in filepaths:
print(f" - {os.path.basename(fp)}")
else:
print(f" Matched files: 0 (no files found)")

if source.get('type'):
print(f" Type: {source['type']}")
if source.get('format'):
Expand Down Expand Up @@ -1952,11 +1988,12 @@ def cmd_diag(args):
}
if config and config.get('data_sources'):
for source in config['data_sources']:
filepath = os.path.join(os.path.dirname(config_dir), source['file'])
filepaths = _expand_file_pattern(config_dir, source['file'])
output['data_sources'].append({
'name': source.get('name'),
'file': source['file'],
'exists': os.path.exists(filepath),
'matched_files': len(filepaths),
'files': [os.path.basename(f) for f in filepaths],
'type': source.get('type'),
'format': source.get('format'),
})
Expand Down Expand Up @@ -2267,35 +2304,35 @@ def cmd_explain(args):
# Parse transactions (quietly)
all_txns = []
for source in data_sources:
filepath = os.path.join(config_dir, '..', source['file'])
filepath = os.path.normpath(filepath)
if not os.path.exists(filepath):
filepath = os.path.join(os.path.dirname(config_dir), source['file'])
if not os.path.exists(filepath):
# Expand glob pattern to get all matching files
filepaths = _expand_file_pattern(config_dir, source['file'])

if not filepaths:
continue

parser_type = source.get('_parser_type', source.get('type', '')).lower()
format_spec = source.get('_format_spec')

try:
if parser_type == 'amex':
_warn_deprecated_parser(source.get('name', 'AMEX'), 'amex', source['file'])
txns = parse_amex(filepath, rules, home_locations, cleaning_patterns)
elif parser_type == 'boa':
_warn_deprecated_parser(source.get('name', 'BOA'), 'boa', source['file'])
txns = parse_boa(filepath, rules, home_locations, cleaning_patterns)
elif parser_type == 'generic' and format_spec:
txns = parse_generic_csv(filepath, format_spec, rules,
home_locations,
source_name=source.get('name', 'CSV'),
decimal_separator=source.get('decimal_separator', '.'),
cleaning_patterns=cleaning_patterns)
else:
for filepath in filepaths:
try:
if parser_type == 'amex':
_warn_deprecated_parser(source.get('name', 'AMEX'), 'amex', source['file'])
txns = parse_amex(filepath, rules, home_locations, cleaning_patterns)
elif parser_type == 'boa':
_warn_deprecated_parser(source.get('name', 'BOA'), 'boa', source['file'])
txns = parse_boa(filepath, rules, home_locations, cleaning_patterns)
elif parser_type == 'generic' and format_spec:
txns = parse_generic_csv(filepath, format_spec, rules,
home_locations,
source_name=source.get('name', 'CSV'),
decimal_separator=source.get('decimal_separator', '.'),
cleaning_patterns=cleaning_patterns)
else:
continue
except Exception:
continue
except Exception:
continue

all_txns.extend(txns)
all_txns.extend(txns)

if not all_txns:
print("Error: No transactions found", file=sys.stderr)
Expand Down
Loading