#!/usr/bin/env python3 """ Script to output CSV files for all account statements """ import os import subprocess import sys import argparse from datetime import datetime def run_script(script_path, pdf_dir, output_dir, use_csv_dir=False): """Run a processing script with the specified parameters""" if use_csv_dir: # For Revolut which uses CSV input cmd = [sys.executable, script_path, '--csv-dir', pdf_dir, '--output-dir', output_dir, '--csv'] else: cmd = [sys.executable, script_path, '--pdf-dir', pdf_dir, '--output-dir', output_dir, '--csv'] print(f"\n{'='*60}") print(f"Processing {script_path.replace('../scripts/', '').replace('.py', '').replace('_', ' ').title()} statements...") print('='*60) try: result = subprocess.run(cmd, check=True) return True except subprocess.CalledProcessError as e: print(f"Error running {script_path}: {e}") return False def main(): # Get absolute paths script_dir = os.path.dirname(os.path.abspath(__file__)) project_root = os.path.dirname(script_dir) parser = argparse.ArgumentParser(description='Process all account statements and output CSV files') parser.add_argument('--output-dir', default=os.path.join(project_root, 'output/csv'), help='Directory to save CSV output files') args = parser.parse_args() # Create output directory if it doesn't exist os.makedirs(args.output_dir, exist_ok=True) print(f"\n{'='*60}") print(f"All Account Statements CSV Export") print(f"Date: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}") print(f"Output Directory: {os.path.abspath(args.output_dir)}") print(f"{'='*60}") # Get absolute paths script_dir = os.path.dirname(os.path.abspath(__file__)) project_root = os.path.dirname(script_dir) data_dir = os.path.join(project_root, 'data/pdf') raw_csv_dir = os.path.join(project_root, 'data/raw_csv') # Define account types and their corresponding directories and scripts accounts = [ { 'name': 'Boursobank', 'script': os.path.join(script_dir, 'process_bourso.py'), 'data_dir': os.path.join(data_dir, 'boursobank'), 'use_csv_dir': False }, { 'name': 'American Express', 'script': os.path.join(script_dir, 'process_amex.py'), 'data_dir': os.path.join(data_dir, 'american_express'), 'use_csv_dir': False }, { 'name': 'Monabanq', 'script': os.path.join(script_dir, 'process_monabanq.py'), 'data_dir': os.path.join(data_dir, 'monabanq'), 'use_csv_dir': False }, { 'name': 'Revolut', 'script': os.path.join(script_dir, 'process_expenses.py'), 'data_dir': raw_csv_dir, # Revolut uses CSV input 'use_csv_dir': True }, { 'name': 'SNCF', 'script': os.path.join(script_dir, 'process_sncf.py'), 'data_dir': os.path.join(data_dir, '1-sncf'), 'use_csv_dir': False }, { 'name': 'La Poste', 'script': os.path.join(script_dir, 'process_laposte.py'), 'data_dir': os.path.join(data_dir, '2-la.poste'), 'use_csv_dir': False } ] # Process each account success_count = 0 total_accounts = len(accounts) for account in accounts: # Check if directory exists and has files if not os.path.exists(account['data_dir']): print(f"\nWarning: Directory not found for {account['name']}: {account['data_dir']}") continue # Skip if directory is empty if not os.listdir(account['data_dir']): print(f"\nSkipping {account['name']}: No files found in {account['data_dir']}") continue # Run the processing script with appropriate parameter name if run_script(account['script'], account['data_dir'], args.output_dir, account['use_csv_dir']): success_count += 1 # Print summary print(f"\n{'='*60}") print(f"Processing Complete: {success_count}/{total_accounts} accounts processed successfully") print(f"CSV files have been saved to: {os.path.abspath(args.output_dir)}") print(f"{'='*60}") # List generated CSV files if os.path.exists(args.output_dir): csv_files = [f for f in os.listdir(args.output_dir) if f.endswith('.csv')] if csv_files: print(f"\nGenerated CSV Files:") for file in sorted(csv_files): file_path = os.path.join(args.output_dir, file) file_size = os.path.getsize(file_path) print(f" - {file} ({file_size:,} bytes)") if __name__ == "__main__": main()