Initial commit with CSV export functionality for all financial statement processing scripts
This commit is contained in:
104
process_laposte.py
Normal file
104
process_laposte.py
Normal file
@@ -0,0 +1,104 @@
|
||||
import subprocess
|
||||
import re
|
||||
import csv
|
||||
import os
|
||||
import glob
|
||||
from collections import defaultdict
|
||||
|
||||
def categorize_laposte_transaction(description):
|
||||
description = description.lower()
|
||||
|
||||
if 'virement' in description or 'vir' in description:
|
||||
return 'Transfer'
|
||||
if 'retrait' in description:
|
||||
return 'Cash Withdrawal'
|
||||
if 'carte' in description or 'paiement' in description:
|
||||
return 'Card Payment'
|
||||
if 'frais' in description:
|
||||
return 'Bank Fees'
|
||||
if 'cotisation' in description:
|
||||
return 'Deductions'
|
||||
if 'impot' in description:
|
||||
return 'Tax'
|
||||
|
||||
return 'Other'
|
||||
|
||||
def process_laposte_pdf_files(directory, output_csv=False):
|
||||
# Get all PDF files in the directory
|
||||
pdf_files = glob.glob(os.path.join(directory, "*.pdf"))
|
||||
all_transactions = []
|
||||
|
||||
for pdf_file in pdf_files:
|
||||
try:
|
||||
# Convert PDF to text
|
||||
result = subprocess.run(['pdftotext', '-layout', pdf_file, '-'],
|
||||
capture_output=True, text=True, check=True)
|
||||
content = result.stdout
|
||||
|
||||
# Extract transactions from the PDF
|
||||
lines = content.split('\n')
|
||||
for line in lines:
|
||||
# Basic regex to find transaction lines (may need refinement based on actual format)
|
||||
if re.match(r'\s*\d{2}/\d{2}/\d{4}', line):
|
||||
parts = line.split()
|
||||
if len(parts) > 2:
|
||||
try:
|
||||
date = parts[0]
|
||||
# Extract description parts between date and amount
|
||||
description_parts = []
|
||||
amount = 0
|
||||
|
||||
# Find amount (last numeric value)
|
||||
for part in reversed(parts):
|
||||
if re.match(r'[\d,.]+', part):
|
||||
amount = float(part.replace(',', '.'))
|
||||
break
|
||||
description_parts.insert(0, part)
|
||||
|
||||
description = ' '.join(description_parts).strip()
|
||||
category = categorize_laposte_transaction(description)
|
||||
|
||||
# Store transaction for CSV output
|
||||
all_transactions.append({
|
||||
'Date': date,
|
||||
'Description': description,
|
||||
'Category': category,
|
||||
'Amount': amount,
|
||||
'Source': os.path.basename(pdf_file)
|
||||
})
|
||||
|
||||
except (ValueError, IndexError):
|
||||
continue
|
||||
|
||||
except (subprocess.CalledProcessError, FileNotFoundError) as e:
|
||||
print(f"Error processing {pdf_file}: {e}")
|
||||
continue
|
||||
|
||||
# Output CSV if requested
|
||||
if output_csv and all_transactions:
|
||||
csv_file = os.path.join(directory, 'laposte_all_transactions.csv')
|
||||
with open(csv_file, 'w', newline='', encoding='utf-8') as csvfile:
|
||||
fieldnames = ['Date', 'Description', 'Category', 'Amount', 'Source']
|
||||
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
|
||||
writer.writeheader()
|
||||
writer.writerows(all_transactions)
|
||||
print(f"\nTransaction data saved to {csv_file}")
|
||||
|
||||
print(f"--- La Poste Account Statements ---")
|
||||
print(f"Found {len(pdf_files)} account statement files")
|
||||
print(f"Processed {len(all_transactions)} transactions")
|
||||
|
||||
return all_transactions
|
||||
|
||||
if __name__ == "__main__":
|
||||
import argparse
|
||||
|
||||
parser = argparse.ArgumentParser(description='Process La Poste account statements')
|
||||
parser.add_argument('--pdf-dir', default='2-la.poste',
|
||||
help='Directory containing La Poste PDF files')
|
||||
parser.add_argument('--csv', action='store_true',
|
||||
help='Output transaction data to CSV files')
|
||||
args = parser.parse_args()
|
||||
|
||||
# Process all PDF files in the directory
|
||||
process_laposte_pdf_files(args.pdf_dir, args.csv)
|
||||
Reference in New Issue
Block a user