Refactor SNCF processor and add Revolut aggregator
- Fix SNCF NET PAYÉ EN EUROS extraction to correctly parse MENSUEL line - Extract month/year from PDF content instead of filename - Add new Revolut CSV processor to aggregate account statements - Organize Revolut data files into data/csv/revolut/ - Clean up redundant scripts and reports
This commit is contained in:
12
scripts/process_laposte_improved.py
Normal file → Executable file
12
scripts/process_laposte_improved.py
Normal file → Executable file
@@ -1,3 +1,7 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
|
||||
|
||||
import subprocess
|
||||
import re
|
||||
import csv
|
||||
@@ -54,7 +58,7 @@ def process_laposte_pdf_files(directory, output_csv=False, output_dir='../../out
|
||||
continue
|
||||
|
||||
# Match transaction lines - they have date and amount
|
||||
if re.match(r'\s*\d{2}/\d{2}/\d{4}', line):
|
||||
if re.match(r'\s*\d{2}/\d{2}', line):
|
||||
parts = re.split(r'\s{2,}', line)
|
||||
if len(parts) >= 3:
|
||||
try:
|
||||
@@ -64,9 +68,9 @@ def process_laposte_pdf_files(directory, output_csv=False, output_dir='../../out
|
||||
# Extract amount (look for numeric values with ¤ or €)
|
||||
amount = 0
|
||||
for part in parts[2:]:
|
||||
part = part.strip().replace('¤', '').replace('€', '')
|
||||
part = part.strip().replace('¤', '').replace('€', '').replace(' ', '')
|
||||
if re.match(r'[\d.,]+', part):
|
||||
amount_str = part.replace(' ', '').replace(',', '.')
|
||||
amount_str = part.replace(',', '.')
|
||||
try:
|
||||
amount = float(amount_str)
|
||||
break
|
||||
@@ -121,4 +125,4 @@ if __name__ == "__main__":
|
||||
args = parser.parse_args()
|
||||
|
||||
# Process all PDF files in the directory
|
||||
process_laposte_pdf_files(args.pdf_dir, args.csv, args.output_dir)
|
||||
process_laposte_pdf_files(args.pdf_dir, args.csv, args.output_dir)
|
||||
|
||||
Reference in New Issue
Block a user