Add scripts to export all CSV files and aggregate by month
This commit is contained in:
258
scripts/aggregate_by_month.py
Executable file
258
scripts/aggregate_by_month.py
Executable file
@@ -0,0 +1,258 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Script to aggregate all account statements by month
|
||||
"""
|
||||
|
||||
import os
|
||||
import csv
|
||||
import sys
|
||||
import argparse
|
||||
import re
|
||||
from datetime import datetime
|
||||
from collections import defaultdict
|
||||
import calendar
|
||||
|
||||
def parse_date(date_str, source_file):
|
||||
"""
|
||||
Parse date from various formats and return normalized (year, month, day)
|
||||
"""
|
||||
# Try different date formats
|
||||
formats = [
|
||||
'%d/%m/%Y', # DD/MM/YYYY
|
||||
'%m/%d/%Y', # MM/DD/YYYY (Amex format)
|
||||
'%Y-%m-%d', # YYYY-MM-DD (Revolut format)
|
||||
]
|
||||
|
||||
for fmt in formats:
|
||||
try:
|
||||
dt = datetime.strptime(date_str, fmt)
|
||||
return (dt.year, dt.month, dt.day)
|
||||
except ValueError:
|
||||
continue
|
||||
|
||||
# Try to extract from filename (for SNCF)
|
||||
if 'salaire' in source_file.lower():
|
||||
months = ['janvier', 'fevrier', 'mars', 'avril', 'mai', 'juin',
|
||||
'juillet', 'aout', 'septembre', 'octobre', 'novembre', 'decembre']
|
||||
for i, month in enumerate(months, 1):
|
||||
if month.lower() in source_file.lower():
|
||||
year_match = re.search(r'20(\d{2})', source_file)
|
||||
year = int(year_match.group(1)) if year_match else datetime.now().year
|
||||
return (year, i, 1)
|
||||
|
||||
# Default: return current date
|
||||
return (datetime.now().year, datetime.now().month, 1)
|
||||
|
||||
def categorize_institution(source_file):
|
||||
"""
|
||||
Determine the institution based on the source filename
|
||||
"""
|
||||
source_lower = source_file.lower()
|
||||
|
||||
if 'boursobank' in source_lower or 'releve-compte' in source_lower:
|
||||
return 'Boursobank'
|
||||
elif 'american_express' in source_lower or 'amex' in source_lower:
|
||||
return 'American Express'
|
||||
elif 'monabanq' in source_lower or 'extrait de comptes' in source_lower:
|
||||
return 'Monabanq'
|
||||
elif 'revolut' in source_lower:
|
||||
return 'Revolut'
|
||||
elif 'sncf' in source_lower or 'salaire' in source_lower:
|
||||
return 'SNCF'
|
||||
elif 'la_poste' in source_lower or '2-la.poste' in source_lower or 'releve_ccp' in source_lower:
|
||||
return 'La Poste'
|
||||
|
||||
return 'Other'
|
||||
|
||||
def process_csv_file(file_path):
|
||||
"""
|
||||
Process a CSV file and return a list of transactions
|
||||
"""
|
||||
transactions = []
|
||||
institution = categorize_institution(os.path.basename(file_path))
|
||||
|
||||
with open(file_path, 'r', encoding='utf-8') as f:
|
||||
reader = csv.DictReader(f)
|
||||
|
||||
for row in reader:
|
||||
# Get the date
|
||||
date_str = row.get('Date', '')
|
||||
if not date_str:
|
||||
continue
|
||||
|
||||
# Parse and normalize the date
|
||||
year, month, day = parse_date(date_str, row.get('Source', ''))
|
||||
|
||||
# Get amount (handle different column names)
|
||||
amount_str = row.get('Amount', '') or row.get('Debit', '') or row.get('Credit', '0')
|
||||
try:
|
||||
amount = float(amount_str.replace(',', '.')) if amount_str else 0
|
||||
except ValueError:
|
||||
amount = 0
|
||||
|
||||
# Create transaction record
|
||||
transactions.append({
|
||||
'year': year,
|
||||
'month': month,
|
||||
'day': day,
|
||||
'date_str': date_str,
|
||||
'description': row.get('Description', ''),
|
||||
'category': row.get('Category', 'Other'),
|
||||
'amount': amount,
|
||||
'institution': institution,
|
||||
'source': row.get('Source', os.path.basename(file_path))
|
||||
})
|
||||
|
||||
return transactions
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description='Aggregate all account statements by month')
|
||||
parser.add_argument('--input-dir', default='output/csv',
|
||||
help='Directory containing CSV files to aggregate (default: output/csv)')
|
||||
parser.add_argument('--output-dir', default='output/reports',
|
||||
help='Directory to save aggregated reports (default: output/reports)')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# Create output directory
|
||||
os.makedirs(args.output_dir, exist_ok=True)
|
||||
|
||||
print(f"\n{'='*60}")
|
||||
print(f"Monthly Aggregation of All Account Statements")
|
||||
print(f"Input Directory: {os.path.abspath(args.input_dir)}")
|
||||
print(f"Output Directory: {os.path.abspath(args.output_dir)}")
|
||||
print(f"Date: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
|
||||
print(f"{'='*60}")
|
||||
|
||||
# Collect all transactions
|
||||
all_transactions = []
|
||||
|
||||
# Find all CSV files in input directory
|
||||
csv_files = [f for f in os.listdir(args.input_dir) if f.endswith('.csv')]
|
||||
|
||||
if not csv_files:
|
||||
print(f"\nError: No CSV files found in {args.input_dir}")
|
||||
return
|
||||
|
||||
# Process each CSV file
|
||||
for csv_file in csv_files:
|
||||
file_path = os.path.join(args.input_dir, csv_file)
|
||||
print(f"\nProcessing: {csv_file}")
|
||||
transactions = process_csv_file(file_path)
|
||||
all_transactions.extend(transactions)
|
||||
print(f" Found {len(transactions)} transactions")
|
||||
|
||||
# Group transactions by month
|
||||
monthly_transactions = defaultdict(list)
|
||||
for transaction in all_transactions:
|
||||
key = (transaction['year'], transaction['month'])
|
||||
monthly_transactions[key].append(transaction)
|
||||
|
||||
# Create monthly summary report
|
||||
summary_file = os.path.join(args.output_dir, 'monthly_summary.csv')
|
||||
with open(summary_file, 'w', newline='', encoding='utf-8') as f:
|
||||
writer = csv.writer(f)
|
||||
|
||||
# Header
|
||||
writer.writerow([
|
||||
'Year', 'Month', 'Total Income', 'Total Expenses', 'Net Balance',
|
||||
'Transaction Count', 'Institutions'
|
||||
])
|
||||
|
||||
# Process each month
|
||||
for (year, month) in sorted(monthly_transactions.keys()):
|
||||
transactions = monthly_transactions[(year, month)]
|
||||
month_name = calendar.month_name[month]
|
||||
|
||||
# Calculate totals
|
||||
total_income = sum(t['amount'] for t in transactions if t['amount'] < 0) # Negative amounts are income in Revolut
|
||||
total_expenses = sum(t['amount'] for t in transactions if t['amount'] > 0)
|
||||
net_balance = total_income + total_expenses
|
||||
transaction_count = len(transactions)
|
||||
|
||||
# Get unique institutions
|
||||
institutions = sorted(list(set(t['institution'] for t in transactions)))
|
||||
institutions_str = ', '.join(institutions)
|
||||
|
||||
# Write row
|
||||
writer.writerow([
|
||||
year, month_name, total_income, total_expenses, net_balance,
|
||||
transaction_count, institutions_str
|
||||
])
|
||||
|
||||
# Create detailed monthly transactions file for each month
|
||||
for (year, month) in sorted(monthly_transactions.keys()):
|
||||
month_name = calendar.month_name[month].lower()
|
||||
transactions = monthly_transactions[(year, month)]
|
||||
|
||||
# Create filename
|
||||
detail_file = os.path.join(args.output_dir, f'transactions_{year}_{month_name}.csv')
|
||||
|
||||
with open(detail_file, 'w', newline='', encoding='utf-8') as f:
|
||||
writer = csv.DictWriter(f, fieldnames=[
|
||||
'Date', 'Description', 'Category', 'Amount',
|
||||
'Institution', 'Source'
|
||||
])
|
||||
writer.writeheader()
|
||||
|
||||
# Sort transactions by date
|
||||
sorted_transactions = sorted(transactions, key=lambda x: (x['day'], x['description']))
|
||||
|
||||
for transaction in sorted_transactions:
|
||||
writer.writerow({
|
||||
'Date': transaction['date_str'],
|
||||
'Description': transaction['description'],
|
||||
'Category': transaction['category'],
|
||||
'Amount': transaction['amount'],
|
||||
'Institution': transaction['institution'],
|
||||
'Source': transaction['source']
|
||||
})
|
||||
|
||||
# Create yearly summary
|
||||
yearly_summary = defaultdict(lambda: {'income': 0, 'expenses': 0, 'count': 0})
|
||||
for transaction in all_transactions:
|
||||
year = transaction['year']
|
||||
yearly_summary[year]['count'] += 1
|
||||
if transaction['amount'] < 0:
|
||||
yearly_summary[year]['income'] += transaction['amount']
|
||||
else:
|
||||
yearly_summary[year]['expenses'] += transaction['amount']
|
||||
|
||||
# Create yearly summary file
|
||||
yearly_file = os.path.join(args.output_dir, 'yearly_summary.csv')
|
||||
with open(yearly_file, 'w', newline='', encoding='utf-8') as f:
|
||||
writer = csv.writer(f)
|
||||
writer.writerow(['Year', 'Total Income', 'Total Expenses', 'Net Balance', 'Transaction Count'])
|
||||
|
||||
for year in sorted(yearly_summary.keys()):
|
||||
data = yearly_summary[year]
|
||||
net_balance = data['income'] + data['expenses']
|
||||
writer.writerow([
|
||||
year, data['income'], data['expenses'], net_balance, data['count']
|
||||
])
|
||||
|
||||
# Print summary statistics
|
||||
print(f"\n{'='*60}")
|
||||
print(f"Aggregation Complete")
|
||||
print(f"Total Transactions: {len(all_transactions)}")
|
||||
print(f"Months with Data: {len(monthly_transactions)}")
|
||||
print(f"{'='*60}")
|
||||
|
||||
# List generated files
|
||||
generated_files = [
|
||||
os.path.basename(summary_file),
|
||||
os.path.basename(yearly_file)
|
||||
]
|
||||
for (year, month) in sorted(monthly_transactions.keys()):
|
||||
month_name = calendar.month_name[month].lower()
|
||||
generated_files.append(f'transactions_{year}_{month_name}.csv')
|
||||
|
||||
print("\nGenerated Files:")
|
||||
for file in generated_files:
|
||||
file_path = os.path.join(args.output_dir, file)
|
||||
if os.path.exists(file_path):
|
||||
file_size = os.path.getsize(file_path)
|
||||
print(f" - {file} ({file_size:,} bytes)")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
132
scripts/export_all_csv.py
Executable file
132
scripts/export_all_csv.py
Executable file
@@ -0,0 +1,132 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Script to output CSV files for all account statements
|
||||
"""
|
||||
|
||||
import os
|
||||
import subprocess
|
||||
import sys
|
||||
import argparse
|
||||
from datetime import datetime
|
||||
|
||||
def run_script(script_path, pdf_dir, output_dir, use_csv_dir=False):
|
||||
"""Run a processing script with the specified parameters"""
|
||||
if use_csv_dir: # For Revolut which uses CSV input
|
||||
cmd = [sys.executable, script_path, '--csv-dir', pdf_dir, '--output-dir', output_dir, '--csv']
|
||||
else:
|
||||
cmd = [sys.executable, script_path, '--pdf-dir', pdf_dir, '--output-dir', output_dir, '--csv']
|
||||
|
||||
print(f"\n{'='*60}")
|
||||
print(f"Processing {script_path.replace('../scripts/', '').replace('.py', '').replace('_', ' ').title()} statements...")
|
||||
print('='*60)
|
||||
|
||||
try:
|
||||
result = subprocess.run(cmd, check=True)
|
||||
return True
|
||||
except subprocess.CalledProcessError as e:
|
||||
print(f"Error running {script_path}: {e}")
|
||||
return False
|
||||
|
||||
def main():
|
||||
# Get absolute paths
|
||||
script_dir = os.path.dirname(os.path.abspath(__file__))
|
||||
project_root = os.path.dirname(script_dir)
|
||||
|
||||
parser = argparse.ArgumentParser(description='Process all account statements and output CSV files')
|
||||
parser.add_argument('--output-dir', default=os.path.join(project_root, 'output/csv'),
|
||||
help='Directory to save CSV output files')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# Create output directory if it doesn't exist
|
||||
os.makedirs(args.output_dir, exist_ok=True)
|
||||
|
||||
print(f"\n{'='*60}")
|
||||
print(f"All Account Statements CSV Export")
|
||||
print(f"Date: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
|
||||
print(f"Output Directory: {os.path.abspath(args.output_dir)}")
|
||||
print(f"{'='*60}")
|
||||
|
||||
# Get absolute paths
|
||||
script_dir = os.path.dirname(os.path.abspath(__file__))
|
||||
project_root = os.path.dirname(script_dir)
|
||||
data_dir = os.path.join(project_root, 'data/pdf')
|
||||
raw_csv_dir = os.path.join(project_root, 'data/raw_csv')
|
||||
|
||||
# Define account types and their corresponding directories and scripts
|
||||
accounts = [
|
||||
{
|
||||
'name': 'Boursobank',
|
||||
'script': os.path.join(script_dir, 'process_bourso.py'),
|
||||
'data_dir': os.path.join(data_dir, 'boursobank'),
|
||||
'use_csv_dir': False
|
||||
},
|
||||
{
|
||||
'name': 'American Express',
|
||||
'script': os.path.join(script_dir, 'process_amex.py'),
|
||||
'data_dir': os.path.join(data_dir, 'american_express'),
|
||||
'use_csv_dir': False
|
||||
},
|
||||
{
|
||||
'name': 'Monabanq',
|
||||
'script': os.path.join(script_dir, 'process_monabanq.py'),
|
||||
'data_dir': os.path.join(data_dir, 'monabanq'),
|
||||
'use_csv_dir': False
|
||||
},
|
||||
{
|
||||
'name': 'Revolut',
|
||||
'script': os.path.join(script_dir, 'process_expenses.py'),
|
||||
'data_dir': raw_csv_dir, # Revolut uses CSV input
|
||||
'use_csv_dir': True
|
||||
},
|
||||
{
|
||||
'name': 'SNCF',
|
||||
'script': os.path.join(script_dir, 'process_sncf.py'),
|
||||
'data_dir': os.path.join(data_dir, '1-sncf'),
|
||||
'use_csv_dir': False
|
||||
},
|
||||
{
|
||||
'name': 'La Poste',
|
||||
'script': os.path.join(script_dir, 'process_laposte.py'),
|
||||
'data_dir': os.path.join(data_dir, '2-la.poste'),
|
||||
'use_csv_dir': False
|
||||
}
|
||||
]
|
||||
|
||||
# Process each account
|
||||
success_count = 0
|
||||
total_accounts = len(accounts)
|
||||
|
||||
for account in accounts:
|
||||
# Check if directory exists and has files
|
||||
if not os.path.exists(account['data_dir']):
|
||||
print(f"\nWarning: Directory not found for {account['name']}: {account['data_dir']}")
|
||||
continue
|
||||
|
||||
# Skip if directory is empty
|
||||
if not os.listdir(account['data_dir']):
|
||||
print(f"\nSkipping {account['name']}: No files found in {account['data_dir']}")
|
||||
continue
|
||||
|
||||
# Run the processing script with appropriate parameter name
|
||||
if run_script(account['script'], account['data_dir'], args.output_dir, account['use_csv_dir']):
|
||||
success_count += 1
|
||||
|
||||
# Print summary
|
||||
print(f"\n{'='*60}")
|
||||
print(f"Processing Complete: {success_count}/{total_accounts} accounts processed successfully")
|
||||
print(f"CSV files have been saved to: {os.path.abspath(args.output_dir)}")
|
||||
print(f"{'='*60}")
|
||||
|
||||
# List generated CSV files
|
||||
if os.path.exists(args.output_dir):
|
||||
csv_files = [f for f in os.listdir(args.output_dir) if f.endswith('.csv')]
|
||||
if csv_files:
|
||||
print(f"\nGenerated CSV Files:")
|
||||
for file in sorted(csv_files):
|
||||
file_path = os.path.join(args.output_dir, file)
|
||||
file_size = os.path.getsize(file_path)
|
||||
print(f" - {file} ({file_size:,} bytes)")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user