#!/usr/bin/env python3 """ WordPress Category Management Script Fetches all categories from WordPress sites, proposes new categories, and allows assigning posts to categories or websites using AI recommendations. """ import csv import json import logging import sys from pathlib import Path from typing import Dict, List, Optional import requests from requests.auth import HTTPBasicAuth import time from datetime import datetime from config import Config # Setup logging logging.basicConfig( level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s' ) logger = logging.getLogger(__name__) class AICategoryAdvisor: """AI-powered advisor for category and site recommendations.""" def __init__(self): self.openrouter_api_key = Config.OPENROUTER_API_KEY self.ai_model = Config.AI_MODEL self.api_calls = 0 self.ai_cost = 0.0 def get_ai_category_recommendations(self, posts_batch: List[Dict]) -> Optional[List[Dict]]: """ Get AI recommendations for category assignments. Args: posts_batch: List of posts to analyze Returns: List of recommendations for each post """ if not self.openrouter_api_key: logger.error("OPENROUTER_API_KEY not set") return None # Format posts for AI analysis formatted_posts = [] for i, post in enumerate(posts_batch, 1): title = post.get('title', {}).get('rendered', 'Untitled') content = post.get('content', {}).get('rendered', '')[:500] # First 500 chars current_categories = post.get('categories', []) formatted_posts.append( f"{i}. POST ID: {post['id']}\n" f" Title: {title}\n" f" Content Preview: {content}...\n" f" Current Categories: {current_categories}\n" ) posts_text = "\n".join(formatted_posts) prompt = f"""Analyze these blog posts and provide category recommendations. Website Strategy: - mistergeek.net: High-value topics (VPN, Software, Gaming, General Tech, SEO, Content Marketing) - webscroll.fr: Torrenting, File-Sharing, Tracker guides (niche audience) - hellogeek.net: Low-traffic, experimental, off-brand, or niche content {posts_text} For EACH post, provide a JSON object with: {{ "post_id": , "recommended_category": "", "recommended_site": "", "reason": "", "confidence": "" }} Return ONLY a JSON array. Example: [ {{"post_id": 2845, "recommended_category": "VPN", "recommended_site": "mistergeek.net", "reason": "Core VPN topic", "confidence": "High"}}, {{"post_id": 1234, "recommended_category": "Torrenting", "recommended_site": "webscroll.fr", "reason": "Torrent tracker content", "confidence": "High"}} ] Analyze all posts and provide recommendations for EVERY post in the batch.""" try: logger.info(f" Sending batch to AI for category recommendations...") response = requests.post( "https://openrouter.ai/api/v1/chat/completions", headers={ "Authorization": f"Bearer {self.openrouter_api_key}", "Content-Type": "application/json", }, json={ "model": self.ai_model, "messages": [ {"role": "user", "content": prompt} ], "temperature": 0.3, # Lower temp for more consistent recommendations }, timeout=60 ) response.raise_for_status() result = response.json() self.api_calls += 1 # Track cost usage = result.get('usage', {}) input_tokens = usage.get('prompt_tokens', 0) output_tokens = usage.get('completion_tokens', 0) # Using Claude 3.5 Sonnet pricing: $3/$15 per 1M tokens self.ai_cost += (input_tokens * 3 + output_tokens * 15) / 1_000_000 recommendations_text = result['choices'][0]['message']['content'].strip() logger.info(f" ✓ Got recommendations (tokens: {input_tokens}+{output_tokens})") # Parse the recommendations return self._parse_recommendations(recommendations_text) except Exception as e: logger.error(f"Error getting AI recommendations: {e}") return None def _parse_recommendations(self, recommendations_json: str) -> List[Dict]: """Parse JSON recommendations from AI.""" try: # Try to extract JSON from response start_idx = recommendations_json.find('[') end_idx = recommendations_json.rfind(']') + 1 if start_idx == -1 or end_idx == 0: logger.error("Could not find JSON array in response") return [] json_str = recommendations_json[start_idx:end_idx] recommendations = json.loads(json_str) return recommendations except json.JSONDecodeError as e: logger.error(f"Error parsing JSON recommendations: {e}") logger.debug(f"Response was: {recommendations_json[:500]}") return [] class CategoryManager: """Manage WordPress categories across multiple sites.""" def __init__(self): """Initialize the category manager with sites from Config.""" self.sites = Config.WORDPRESS_SITES self.categories_by_site = {} self.posts_by_site = {} self.proposed_categories = {} self.category_assignments = [] self.ai_advisor = AICategoryAdvisor() def fetch_categories_from_site(self, site_name: str, site_config: Dict) -> List[Dict]: """ Fetch all categories from a WordPress site. Args: site_name: Website name site_config: Site configuration dict Returns: List of categories with metadata """ logger.info(f"Fetching categories from {site_name}...") categories = [] base_url = site_config['url'].rstrip('/') api_url = f"{base_url}/wp-json/wp/v2/categories" auth = HTTPBasicAuth(site_config['username'], site_config['password']) try: # Fetch all categories (pagination if needed) page = 1 while True: params = { 'page': page, 'per_page': 100, } response = requests.get(api_url, params=params, auth=auth, timeout=10) if response.status_code == 401: logger.error(f"Unauthorized access to {site_name}. Check credentials.") break elif response.status_code == 403: logger.error(f"Forbidden access to {site_name}. Check permissions.") break response.raise_for_status() page_categories = response.json() if not page_categories: break categories.extend(page_categories) logger.info(f" Page {page}: Got {len(page_categories)} categories") # Check if there are more pages link_header = response.headers.get('Link', '') if 'rel="next"' not in link_header: break page += 1 time.sleep(0.5) logger.info(f"✓ Total categories from {site_name}: {len(categories)}") except requests.exceptions.RequestException as e: logger.error(f"Error fetching categories from {site_name}: {e}") return [] return categories def fetch_posts_from_site(self, site_name: str, site_config: Dict) -> List[Dict]: """ Fetch posts from a WordPress site to see current category assignments. Args: site_name: Website name site_config: Site configuration dict Returns: List of posts with category information """ logger.info(f"Fetching posts from {site_name} to analyze category assignments...") posts = [] base_url = site_config['url'].rstrip('/') api_url = f"{base_url}/wp-json/wp/v2/posts" auth = HTTPBasicAuth(site_config['username'], site_config['password']) try: page = 1 while True: params = { 'page': page, 'per_page': 100, 'status': 'publish', } response = requests.get(api_url, params=params, auth=auth, timeout=10) if response.status_code == 401: logger.error(f"Unauthorized access to {site_name}. Check credentials.") break elif response.status_code == 403: logger.error(f"Forbidden access to {site_name}. Check permissions.") break response.raise_for_status() page_posts = response.json() if not page_posts: break posts.extend(page_posts) logger.info(f" Page {page}: Got {len(page_posts)} posts") # Check if there are more pages link_header = response.headers.get('Link', '') if 'rel="next"' not in link_header: break page += 1 time.sleep(0.5) logger.info(f"✓ Total posts from {site_name}: {len(posts)}") except requests.exceptions.RequestException as e: logger.error(f"Error fetching posts from {site_name}: {e}") return [] return posts def analyze_categories(self): """Analyze current categories and propose new ones.""" logger.info("\n" + "="*70) logger.info("ANALYZING CURRENT CATEGORIES") logger.info("="*70) for site_name, config in self.sites.items(): categories = self.fetch_categories_from_site(site_name, config) posts = self.fetch_posts_from_site(site_name, config) self.categories_by_site[site_name] = categories self.posts_by_site[site_name] = posts logger.info(f"\n{site_name}:") logger.info(f" Categories: {len(categories)}") logger.info(f" Posts: {len(posts)}") # Show top categories by post count if categories: logger.info(" Top 10 categories by post count:") # Sort categories by count (most posts first) sorted_cats = sorted(categories, key=lambda x: x.get('count', 0), reverse=True) for i, cat in enumerate(sorted_cats[:10]): logger.info(f" {i+1}. {cat['name']} ({cat['count']} posts)") def propose_new_categories(self): """Propose new categories based on content analysis.""" logger.info("\n" + "="*70) logger.info("PROPOSING NEW CATEGORIES") logger.info("="*70) # Define category proposals based on content analysis category_proposals = { 'mistergeek.net': [ {'name': 'VPN Reviews', 'description': 'Reviews of VPN services', 'parent': 0}, {'name': 'Software Tutorials', 'description': 'Step-by-step software guides', 'parent': 0}, {'name': 'Tech News', 'description': 'Latest technology news', 'parent': 0}, {'name': 'Cybersecurity', 'description': 'Security tips and tools', 'parent': 0}, ], 'webscroll.fr': [ {'name': 'Torrent Clients', 'description': 'Reviews of torrent clients', 'parent': 0}, {'name': 'Privacy Tools', 'description': 'Privacy-focused tools and services', 'parent': 0}, {'name': 'File Sharing Guide', 'description': 'Guides on file sharing methods', 'parent': 0}, ], 'hellogeek.net': [ {'name': 'Experimental Tech', 'description': 'New and experimental tech', 'parent': 0}, {'name': 'Random Thoughts', 'description': 'Opinion and commentary posts', 'parent': 0}, {'name': 'Testing Zone', 'description': 'Posts for testing purposes', 'parent': 0}, ] } for site_name in self.sites.keys(): if site_name in category_proposals: self.proposed_categories[site_name] = category_proposals[site_name] logger.info(f"\n{site_name} - Proposed categories:") for cat in category_proposals[site_name]: logger.info(f" - {cat['name']}: {cat['description']}") def create_category_assignment_proposals(self): """Create proposals for assigning posts to categories or websites.""" logger.info("\n" + "="*70) logger.info("CREATING CATEGORY ASSIGNMENT PROPOSALS") logger.info("="*70) # Analyze posts and propose category assignments for site_name, posts in self.posts_by_site.items(): logger.info(f"\nAnalyzing posts from {site_name} for category assignments...") # Process posts in batches for AI analysis batch_size = 10 for i in range(0, len(posts), batch_size): batch = posts[i:i + batch_size] # Get AI recommendations for this batch ai_recommendations = self.ai_advisor.get_ai_category_recommendations(batch) if ai_recommendations: # Map AI recommendations to our assignment format for post in batch: title = post.get('title', {}).get('rendered', 'Untitled') content = post.get('content', {}).get('rendered', '')[:200] # First 200 chars current_categories = post.get('categories', []) # Find the AI recommendation for this post ai_rec = None for rec in ai_recommendations: if rec.get('post_id') == post['id']: ai_rec = rec break if ai_rec: assignment = { 'site': site_name, 'post_id': post['id'], 'post_title': title[:50] + "..." if len(title) > 50 else title, 'current_categories': current_categories, 'proposed_category': ai_rec.get('recommended_category', 'Uncategorized'), 'proposed_site': ai_rec.get('recommended_site', site_name), 'reason': ai_rec.get('reason', ''), 'confidence': ai_rec.get('confidence', 'Low'), 'content_preview': content[:100] + "..." if len(content) > 100 else content, 'status': 'pending_approval' } else: # Fallback to keyword-based suggestion if no AI recommendation proposed_category = self._suggest_category_by_content(title + " " + content, site_name) assignment = { 'site': site_name, 'post_id': post['id'], 'post_title': title[:50] + "..." if len(title) > 50 else title, 'current_categories': current_categories, 'proposed_category': proposed_category, 'proposed_site': site_name, 'reason': 'Keyword-based suggestion', 'confidence': 'Low', 'content_preview': content[:100] + "..." if len(content) > 100 else content, 'status': 'pending_approval' } self.category_assignments.append(assignment) else: # If AI is not available, use keyword-based suggestions for post in batch: title = post.get('title', {}).get('rendered', 'Untitled') content = post.get('content', {}).get('rendered', '')[:200] # First 200 chars current_categories = post.get('categories', []) proposed_category = self._suggest_category_by_content(title + " " + content, site_name) assignment = { 'site': site_name, 'post_id': post['id'], 'post_title': title[:50] + "..." if len(title) > 50 else title, 'current_categories': current_categories, 'proposed_category': proposed_category, 'proposed_site': site_name, 'reason': 'Keyword-based suggestion', 'confidence': 'Low', 'content_preview': content[:100] + "..." if len(content) > 100 else content, 'status': 'pending_approval' } self.category_assignments.append(assignment) logger.info(f"Created {len(self.category_assignments)} category assignment proposals") def _suggest_category_by_content(self, content: str, site_name: str) -> str: """Suggest a category based on content keywords.""" content_lower = content.lower() # Site-specific category mappings category_keywords = { 'mistergeek.net': { 'VPN': ['vpn', 'proxy', 'privacy', 'secure', 'encryption'], 'Software': ['software', 'app', 'tool', 'download', 'install'], 'Gaming': ['game', 'gaming', 'console', 'steam', 'playstation'], 'Tech News': ['news', 'update', 'release', 'announced'], 'Cybersecurity': ['security', 'malware', 'antivirus', 'hacking', 'breach'] }, 'webscroll.fr': { 'Torrent': ['torrent', 'download', 'upload', 'client', 'tracker'], 'Privacy': ['privacy', 'anonymous', 'tor', 'vpn'], 'File Sharing': ['share', 'sharing', 'ddl', 'upload'] }, 'hellogeek.net': { 'Opinion': ['think', 'believe', 'opinion', 'view', 'perspective'], 'Tutorial': ['how to', 'guide', 'tutorial', 'steps', 'instructions'], 'Review': ['review', 'rating', 'comparison', 'test'] } } site_categories = category_keywords.get(site_name, {}) for category, keywords in site_categories.items(): for keyword in keywords: if keyword in content_lower: return category return 'Uncategorized' def export_categories_csv(self) -> str: """Export current categories to CSV.""" output_dir = Path(__file__).parent.parent / 'output' output_dir.mkdir(parents=True, exist_ok=True) timestamp = datetime.now().strftime('%Y%m%d_%H%M%S') csv_file = output_dir / f'current_categories_{timestamp}.csv' fieldnames = ['site', 'category_id', 'name', 'slug', 'description', 'post_count', 'parent_id'] with open(csv_file, 'w', newline='', encoding='utf-8') as f: writer = csv.DictWriter(f, fieldnames=fieldnames) writer.writeheader() for site_name, categories in self.categories_by_site.items(): for cat in categories: writer.writerow({ 'site': site_name, 'category_id': cat.get('id', ''), 'name': cat.get('name', ''), 'slug': cat.get('slug', ''), 'description': cat.get('description', ''), 'post_count': cat.get('count', 0), 'parent_id': cat.get('parent', 0) }) logger.info(f"✓ Current categories exported to: {csv_file}") return str(csv_file) def export_proposed_categories_csv(self) -> str: """Export proposed new categories to CSV.""" output_dir = Path(__file__).parent.parent / 'output' output_dir.mkdir(parents=True, exist_ok=True) timestamp = datetime.now().strftime('%Y%m%d_%H%M%S') csv_file = output_dir / f'proposed_categories_{timestamp}.csv' fieldnames = ['site', 'proposed_category', 'description', 'parent_category', 'reason'] with open(csv_file, 'w', newline='', encoding='utf-8') as f: writer = csv.DictWriter(f, fieldnames=fieldnames) writer.writeheader() for site_name, categories in self.proposed_categories.items(): for cat in categories: writer.writerow({ 'site': site_name, 'proposed_category': cat.get('name', ''), 'description': cat.get('description', ''), 'parent_category': cat.get('parent', 0), 'reason': 'Content analysis and organization improvement' }) logger.info(f"✓ Proposed categories exported to: {csv_file}") return str(csv_file) def export_category_assignments_csv(self) -> str: """Export category assignment proposals to CSV.""" output_dir = Path(__file__).parent.parent / 'output' output_dir.mkdir(parents=True, exist_ok=True) timestamp = datetime.now().strftime('%Y%m%d_%H%M%S') csv_file = output_dir / f'category_assignments_{timestamp}.csv' fieldnames = ['site', 'post_id', 'post_title', 'current_categories', 'proposed_category', 'proposed_site', 'reason', 'confidence', 'content_preview', 'status'] with open(csv_file, 'w', newline='', encoding='utf-8') as f: writer = csv.DictWriter(f, fieldnames=fieldnames) writer.writeheader() for assignment in self.category_assignments: writer.writerow(assignment) logger.info(f"✓ Category assignments exported to: {csv_file}") return str(csv_file) def run(self): """Run complete category management process.""" logger.info("="*70) logger.info("WORDPRESS CATEGORY MANAGEMENT") logger.info("="*70) logger.info("Sites configured: " + ", ".join(self.sites.keys())) logger.info("") # Analyze current categories self.analyze_categories() # Propose new categories self.propose_new_categories() # Create category assignment proposals self.create_category_assignment_proposals() # Export all data logger.info("\n" + "="*70) logger.info("EXPORTING RESULTS") logger.info("="*70) categories_csv = self.export_categories_csv() proposed_csv = self.export_proposed_categories_csv() assignments_csv = self.export_category_assignments_csv() # Print summary logger.info("\n" + "="*70) logger.info("CATEGORY MANAGEMENT SUMMARY") logger.info("="*70) total_categories = sum(len(cats) for cats in self.categories_by_site.values()) logger.info(f"Total current categories: {total_categories}") total_proposed = sum(len(props) for props in self.proposed_categories.values()) logger.info(f"Total proposed categories: {total_proposed}") logger.info(f"Category assignment proposals: {len(self.category_assignments)}") # AI Advisor stats logger.info(f"AI API calls made: {self.ai_advisor.api_calls}") logger.info(f"AI cost: ${self.ai_advisor.ai_cost:.4f}") logger.info(f"\n{'─'*70}") logger.info("Exported files:") logger.info(f" • Current categories: {categories_csv}") logger.info(f" • Proposed categories: {proposed_csv}") logger.info(f" • Category assignments: {assignments_csv}") logger.info(f"{'─'*70}") logger.info(f"\n✓ Category management complete!") logger.info(f"\nNext steps:") logger.info(f" 1. Review proposed_categories.csv for new categories to add") logger.info(f" 2. Review category_assignments.csv for posts that need re-categorization") logger.info(f" 3. Manually approve or modify proposals before applying changes") def main(): """Main entry point.""" import argparse parser = argparse.ArgumentParser( description='Manage WordPress categories across multiple sites' ) args = parser.parse_args() manager = CategoryManager() manager.run() if __name__ == '__main__': main()