Refactor SNCF processor and add Revolut aggregator
- Fix SNCF NET PAYÉ EN EUROS extraction to correctly parse MENSUEL line - Extract month/year from PDF content instead of filename - Add new Revolut CSV processor to aggregate account statements - Organize Revolut data files into data/csv/revolut/ - Clean up redundant scripts and reports
This commit is contained in:
154
scripts/process_revolut.py
Normal file
154
scripts/process_revolut.py
Normal file
@@ -0,0 +1,154 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Revolut CSV aggregator to process and consolidate account statements
|
||||
"""
|
||||
|
||||
import csv
|
||||
import os
|
||||
import glob
|
||||
import argparse
|
||||
from datetime import datetime
|
||||
from collections import defaultdict
|
||||
|
||||
|
||||
def parse_revolut_csv(csv_file):
|
||||
"""Parse a single Revolut CSV file and return list of transactions"""
|
||||
transactions = []
|
||||
|
||||
with open(csv_file, 'r', encoding='utf-8') as f:
|
||||
reader = csv.DictReader(f)
|
||||
for row in reader:
|
||||
# Skip if not completed
|
||||
if row.get('State', '').upper() != 'COMPLETED':
|
||||
continue
|
||||
|
||||
# Parse date
|
||||
started_date = row.get('Started Date', '')
|
||||
try:
|
||||
# Format: 2026-01-03 04:39:38
|
||||
date_obj = datetime.strptime(started_date, '%Y-%m-%d %H:%M:%S')
|
||||
formatted_date = date_obj.strftime('%d/%m/%Y')
|
||||
except (ValueError, TypeError):
|
||||
formatted_date = started_date
|
||||
|
||||
# Determine amount (negative = expense, positive = income)
|
||||
try:
|
||||
amount = float(row.get('Amount', '0'))
|
||||
except ValueError:
|
||||
amount = 0.0
|
||||
|
||||
# Get fee
|
||||
try:
|
||||
fee = float(row.get('Fee', '0'))
|
||||
except ValueError:
|
||||
fee = 0.0
|
||||
|
||||
# Calculate net amount (amount includes fee already in Revolut)
|
||||
net_amount = amount
|
||||
|
||||
transaction = {
|
||||
'Date': formatted_date,
|
||||
'Description': row.get('Description', ''),
|
||||
'Type': row.get('Type', ''),
|
||||
'Product': row.get('Product', ''),
|
||||
'Amount': net_amount,
|
||||
'Fee': fee,
|
||||
'Currency': row.get('Currency', 'EUR'),
|
||||
'State': row.get('State', ''),
|
||||
'Balance': row.get('Balance', ''),
|
||||
'Source': os.path.basename(csv_file)
|
||||
}
|
||||
|
||||
transactions.append(transaction)
|
||||
|
||||
return transactions
|
||||
|
||||
|
||||
def categorize_transaction(description, trans_type):
|
||||
"""Categorize transaction based on description and type"""
|
||||
description_upper = description.upper()
|
||||
trans_type_upper = trans_type.upper()
|
||||
|
||||
if 'POCKET' in description_upper or 'ÉPARGNE' in description_upper:
|
||||
return 'Savings Transfer'
|
||||
elif trans_type_upper == 'TRANSFER':
|
||||
return 'Transfer'
|
||||
elif trans_type_upper == 'CARD_PAYMENT':
|
||||
return 'Card Payment'
|
||||
elif trans_type_upper == 'CARD_REFUND':
|
||||
return 'Card Refund'
|
||||
elif trans_type_upper == 'EXCHANGE':
|
||||
return 'Currency Exchange'
|
||||
elif trans_type_upper == 'TOPUP':
|
||||
return 'Top Up'
|
||||
elif trans_type_upper == 'REWARD':
|
||||
return 'Reward'
|
||||
else:
|
||||
return 'Other'
|
||||
|
||||
|
||||
def process_revolut_csv_files(directory, output_csv=False, output_dir='output/csv'):
|
||||
"""Process all Revolut CSV files and aggregate transactions"""
|
||||
# Get all CSV files in the directory
|
||||
csv_files = glob.glob(os.path.join(directory, "*.csv"))
|
||||
all_transactions = []
|
||||
|
||||
for csv_file in csv_files:
|
||||
try:
|
||||
transactions = parse_revolut_csv(csv_file)
|
||||
all_transactions.extend(transactions)
|
||||
print(f"Processed {os.path.basename(csv_file)}: {len(transactions)} transactions")
|
||||
except Exception as e:
|
||||
print(f"Error processing {csv_file}: {e}")
|
||||
|
||||
# Sort transactions by date
|
||||
all_transactions.sort(key=lambda x: datetime.strptime(x['Date'], '%d/%m/%Y') if x['Date'] else datetime.min)
|
||||
|
||||
# Add categories
|
||||
for trans in all_transactions:
|
||||
trans['Category'] = categorize_transaction(trans['Description'], trans['Type'])
|
||||
|
||||
# Output CSV
|
||||
if output_csv and all_transactions:
|
||||
csv_file = os.path.join(output_dir, 'revolut_all_transactions.csv')
|
||||
os.makedirs(output_dir, exist_ok=True)
|
||||
|
||||
with open(csv_file, 'w', newline='', encoding='utf-8') as csvfile:
|
||||
fieldnames = ['Date', 'Description', 'Category', 'Type', 'Product', 'Amount', 'Fee',
|
||||
'Currency', 'State', 'Balance', 'Source']
|
||||
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
|
||||
writer.writeheader()
|
||||
writer.writerows(all_transactions)
|
||||
|
||||
print(f"\nTransaction data saved to {csv_file}")
|
||||
|
||||
print(f"\n--- Revolut Account Statements ---")
|
||||
print(f"Found {len(csv_files)} statement files")
|
||||
print(f"Total transactions: {len(all_transactions)}")
|
||||
|
||||
# Calculate totals
|
||||
total_income = sum(t['Amount'] for t in all_transactions if t['Amount'] > 0)
|
||||
total_expenses = sum(t['Amount'] for t in all_transactions if t['Amount'] < 0)
|
||||
total_fees = sum(t['Fee'] for t in all_transactions)
|
||||
|
||||
print(f"Total Income: €{total_income:,.2f}")
|
||||
print(f"Total Expenses: €{total_expenses:,.2f}")
|
||||
print(f"Total Fees: €{total_fees:,.2f}")
|
||||
print(f"Net Flow: €{(total_income + total_expenses):,.2f}")
|
||||
|
||||
return all_transactions
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser(description='Process and aggregate Revolut CSV account statements')
|
||||
parser.add_argument('--csv-dir', default='data/csv/revolut',
|
||||
help='Directory containing Revolut CSV files')
|
||||
parser.add_argument('--output-dir', default='output/csv',
|
||||
help='Directory to save CSV output files')
|
||||
parser.add_argument('--csv', action='store_true',
|
||||
help='Output aggregated data to CSV file')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# Process all CSV files in the directory
|
||||
process_revolut_csv_files(args.csv_dir, args.csv, args.output_dir)
|
||||
Reference in New Issue
Block a user