#!/usr/bin/env python3 """ Category Proposer - AI-powered category suggestions Analyzes posts and proposes optimal categories based on content. """ import csv import json import logging import sys from pathlib import Path from typing import Dict, List, Optional import requests from datetime import datetime from config import Config logger = logging.getLogger(__name__) class CategoryProposer: """Propose categories for posts using AI.""" def __init__(self, csv_file: str): """Initialize proposer with CSV file.""" self.csv_file = Path(csv_file) self.openrouter_api_key = Config.OPENROUTER_API_KEY self.ai_model = Config.AI_MODEL self.posts = [] self.proposed_categories = [] self.api_calls = 0 self.ai_cost = 0.0 def load_csv(self) -> bool: """Load posts from CSV.""" logger.info(f"Loading CSV: {self.csv_file}") if not self.csv_file.exists(): logger.error(f"CSV file not found: {self.csv_file}") return False try: with open(self.csv_file, 'r', encoding='utf-8') as f: reader = csv.DictReader(f) self.posts = list(reader) logger.info(f"✓ Loaded {len(self.posts)} posts") return True except Exception as e: logger.error(f"Error loading CSV: {e}") return False def get_category_proposals(self, batch: List[Dict]) -> Optional[str]: """Get AI category proposals for a batch of posts.""" if not self.openrouter_api_key: logger.error("OPENROUTER_API_KEY not set") return None # Format posts for AI formatted = [] for i, post in enumerate(batch, 1): text = f"{i}. ID: {post['post_id']}\n" text += f" Title: {post.get('title', '')}\n" text += f" Current Categories: {post.get('categories', '')}\n" if 'content_preview' in post: text += f" Content: {post['content_preview'][:300]}...\n" formatted.append(text) posts_text = "\n".join(formatted) prompt = f"""Analyze these blog posts and propose optimal categories. {posts_text} For EACH post, provide: {{ "post_id": , "current_categories": "", "proposed_category": "", "alternative_categories": ["", ""], "reason": "", "confidence": "" }} Return ONLY a JSON array with one object per post.""" try: logger.info(f" Getting category proposals...") response = requests.post( "https://openrouter.ai/api/v1/chat/completions", headers={ "Authorization": f"Bearer {self.openrouter_api_key}", "Content-Type": "application/json", }, json={ "model": self.ai_model, "messages": [{"role": "user", "content": prompt}], "temperature": 0.3, }, timeout=60 ) response.raise_for_status() result = response.json() self.api_calls += 1 usage = result.get('usage', {}) input_tokens = usage.get('prompt_tokens', 0) output_tokens = usage.get('completion_tokens', 0) self.ai_cost += (input_tokens * 3 + output_tokens * 15) / 1_000_000 logger.info(f" ✓ Got proposals (tokens: {input_tokens}+{output_tokens})") return result['choices'][0]['message']['content'].strip() except Exception as e: logger.error(f"Error getting proposals: {e}") return None def parse_proposals(self, proposals_json: str) -> List[Dict]: """Parse JSON proposals.""" try: start_idx = proposals_json.find('[') end_idx = proposals_json.rfind(']') + 1 if start_idx == -1 or end_idx == 0: return [] return json.loads(proposals_json[start_idx:end_idx]) except json.JSONDecodeError: return [] def propose_categories(self, batch_size: int = 10) -> bool: """Propose categories for all posts.""" logger.info("\n" + "="*70) logger.info("PROPOSING CATEGORIES WITH AI") logger.info("="*70 + "\n") batches = [self.posts[i:i + batch_size] for i in range(0, len(self.posts), batch_size)] logger.info(f"Processing {len(self.posts)} posts in {len(batches)} batches...\n") all_proposals = {} for batch_num, batch in enumerate(batches, 1): logger.info(f"Batch {batch_num}/{len(batches)}...") proposals_json = self.get_category_proposals(batch) if not proposals_json: continue proposals = self.parse_proposals(proposals_json) for prop in proposals: all_proposals[str(prop.get('post_id', ''))] = prop logger.info(f" ✓ Got {len(proposals)} proposals") logger.info(f"\n✓ Proposals complete!") logger.info(f" Total: {len(all_proposals)}") logger.info(f" API calls: {self.api_calls}") logger.info(f" Cost: ${self.ai_cost:.4f}") # Map proposals to posts for post in self.posts: post_id = str(post['post_id']) proposal = all_proposals.get(post_id, {}) self.proposed_categories.append({ **post, 'proposed_category': proposal.get('proposed_category', post.get('categories', '')), 'alternative_categories': ', '.join(proposal.get('alternative_categories', [])), 'category_reason': proposal.get('reason', ''), 'category_confidence': proposal.get('confidence', 'Medium'), 'current_categories': post.get('categories', '') }) return True def export_proposals(self, output_file: Optional[str] = None) -> str: """Export category proposals to CSV.""" if not output_file: output_dir = Path(__file__).parent.parent / 'output' output_dir.mkdir(parents=True, exist_ok=True) timestamp = datetime.now().strftime('%Y%m%d_%H%M%S') output_file = output_dir / f'category_proposals_{timestamp}.csv' output_file = Path(output_file) output_file.parent.mkdir(parents=True, exist_ok=True) fieldnames = [ 'post_id', 'title', 'site', 'current_categories', 'proposed_category', 'alternative_categories', 'category_reason', 'category_confidence' ] logger.info(f"\nExporting to: {output_file}") with open(output_file, 'w', newline='', encoding='utf-8') as f: writer = csv.DictWriter(f, fieldnames=fieldnames, extrasaction='ignore') writer.writeheader() writer.writerows(self.proposed_categories) logger.info(f"✓ Exported {len(self.proposed_categories)} proposals") return str(output_file) def run(self, output_file: Optional[str] = None, batch_size: int = 10) -> str: """Run complete category proposal process.""" if not self.load_csv(): sys.exit(1) if not self.propose_categories(batch_size=batch_size): logger.error("Failed to propose categories") sys.exit(1) return self.export_proposals(output_file) def main(): """Main entry point.""" import argparse parser = argparse.ArgumentParser( description='AI-powered category proposer for blog posts' ) parser.add_argument('csv_file', help='Input CSV file with posts') parser.add_argument('--output', '-o', help='Output CSV file') parser.add_argument('--batch-size', type=int, default=10, help='Batch size') args = parser.parse_args() proposer = CategoryProposer(args.csv_file) output_file = proposer.run(batch_size=args.batch_size) logger.info(f"\n✓ Category proposals saved to: {output_file}") if __name__ == '__main__': main()