Refactor SNCF processor and add Revolut aggregator

- Fix SNCF NET PAYÉ EN EUROS extraction to correctly parse MENSUEL line - Extract month/year from PDF content instead of filename - Add new Revolut CSV processor to aggregate account statements - Organize Revolut data files into data/csv/revolut/ - Clean up redundant scripts and reports
2026-02-09 16:17:48 +01:00
parent ef23d066e0
commit eb66c7a43e
85 changed files with 3270 additions and 2106 deletions
--- a/scripts/process_revolut.py
+++ b/scripts/process_revolut.py
@@ -0,0 +1,154 @@
+#!/usr/bin/env python3
+"""
+Revolut CSV aggregator to process and consolidate account statements
+"""
+
+import csv
+import os
+import glob
+import argparse
+from datetime import datetime
+from collections import defaultdict
+
+
+def parse_revolut_csv(csv_file):
+    """Parse a single Revolut CSV file and return list of transactions"""
+    transactions = []
+
+    with open(csv_file, 'r', encoding='utf-8') as f:
+        reader = csv.DictReader(f)
+        for row in reader:
+            # Skip if not completed
+            if row.get('State', '').upper() != 'COMPLETED':
+                continue
+
+            # Parse date
+            started_date = row.get('Started Date', '')
+            try:
+                # Format: 2026-01-03 04:39:38
+                date_obj = datetime.strptime(started_date, '%Y-%m-%d %H:%M:%S')
+                formatted_date = date_obj.strftime('%d/%m/%Y')
+            except (ValueError, TypeError):
+                formatted_date = started_date
+
+            # Determine amount (negative = expense, positive = income)
+            try:
+                amount = float(row.get('Amount', '0'))
+            except ValueError:
+                amount = 0.0
+
+            # Get fee
+            try:
+                fee = float(row.get('Fee', '0'))
+            except ValueError:
+                fee = 0.0
+
+            # Calculate net amount (amount includes fee already in Revolut)
+            net_amount = amount
+
+            transaction = {
+                'Date': formatted_date,
+                'Description': row.get('Description', ''),
+                'Type': row.get('Type', ''),
+                'Product': row.get('Product', ''),
+                'Amount': net_amount,
+                'Fee': fee,
+                'Currency': row.get('Currency', 'EUR'),
+                'State': row.get('State', ''),
+                'Balance': row.get('Balance', ''),
+                'Source': os.path.basename(csv_file)
+            }
+
+            transactions.append(transaction)
+
+    return transactions
+
+
+def categorize_transaction(description, trans_type):
+    """Categorize transaction based on description and type"""
+    description_upper = description.upper()
+    trans_type_upper = trans_type.upper()
+
+    if 'POCKET' in description_upper or 'ÉPARGNE' in description_upper:
+        return 'Savings Transfer'
+    elif trans_type_upper == 'TRANSFER':
+        return 'Transfer'
+    elif trans_type_upper == 'CARD_PAYMENT':
+        return 'Card Payment'
+    elif trans_type_upper == 'CARD_REFUND':
+        return 'Card Refund'
+    elif trans_type_upper == 'EXCHANGE':
+        return 'Currency Exchange'
+    elif trans_type_upper == 'TOPUP':
+        return 'Top Up'
+    elif trans_type_upper == 'REWARD':
+        return 'Reward'
+    else:
+        return 'Other'
+
+
+def process_revolut_csv_files(directory, output_csv=False, output_dir='output/csv'):
+    """Process all Revolut CSV files and aggregate transactions"""
+    # Get all CSV files in the directory
+    csv_files = glob.glob(os.path.join(directory, "*.csv"))
+    all_transactions = []
+
+    for csv_file in csv_files:
+        try:
+            transactions = parse_revolut_csv(csv_file)
+            all_transactions.extend(transactions)
+            print(f"Processed {os.path.basename(csv_file)}: {len(transactions)} transactions")
+        except Exception as e:
+            print(f"Error processing {csv_file}: {e}")
+
+    # Sort transactions by date
+    all_transactions.sort(key=lambda x: datetime.strptime(x['Date'], '%d/%m/%Y') if x['Date'] else datetime.min)
+
+    # Add categories
+    for trans in all_transactions:
+        trans['Category'] = categorize_transaction(trans['Description'], trans['Type'])
+
+    # Output CSV
+    if output_csv and all_transactions:
+        csv_file = os.path.join(output_dir, 'revolut_all_transactions.csv')
+        os.makedirs(output_dir, exist_ok=True)
+
+        with open(csv_file, 'w', newline='', encoding='utf-8') as csvfile:
+            fieldnames = ['Date', 'Description', 'Category', 'Type', 'Product', 'Amount', 'Fee',
+                         'Currency', 'State', 'Balance', 'Source']
+            writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
+            writer.writeheader()
+            writer.writerows(all_transactions)
+
+        print(f"\nTransaction data saved to {csv_file}")
+
+    print(f"\n--- Revolut Account Statements ---")
+    print(f"Found {len(csv_files)} statement files")
+    print(f"Total transactions: {len(all_transactions)}")
+
+    # Calculate totals
+    total_income = sum(t['Amount'] for t in all_transactions if t['Amount'] > 0)
+    total_expenses = sum(t['Amount'] for t in all_transactions if t['Amount'] < 0)
+    total_fees = sum(t['Fee'] for t in all_transactions)
+
+    print(f"Total Income: €{total_income:,.2f}")
+    print(f"Total Expenses: €{total_expenses:,.2f}")
+    print(f"Total Fees: €{total_fees:,.2f}")
+    print(f"Net Flow: €{(total_income + total_expenses):,.2f}")
+
+    return all_transactions
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description='Process and aggregate Revolut CSV account statements')
+    parser.add_argument('--csv-dir', default='data/csv/revolut',
+                       help='Directory containing Revolut CSV files')
+    parser.add_argument('--output-dir', default='output/csv',
+                       help='Directory to save CSV output files')
+    parser.add_argument('--csv', action='store_true',
+                       help='Output aggregated data to CSV file')
+
+    args = parser.parse_args()
+
+    # Process all CSV files in the directory
+    process_revolut_csv_files(args.csv_dir, args.csv, args.output_dir)