""" Multi-Site Content Strategy Analyzer Analyzes all content (published + drafts) across 3 websites. Recommends optimal distribution and consolidation strategy. """ import csv import json import argparse from pathlib import Path from collections import defaultdict from datetime import datetime class ContentStrategyAnalyzer: """Analyze and optimize content distribution across multiple sites.""" def __init__(self): """Initialize analyzer.""" self.output_dir = Path('output') self.output_dir.mkdir(exist_ok=True) (self.output_dir / 'analysis').mkdir(exist_ok=True) (self.output_dir / 'reports').mkdir(exist_ok=True) (self.output_dir / 'logs').mkdir(exist_ok=True) self.logs = [] def log(self, message): """Log message.""" self.logs.append(message) print(message) def load_wordpress_posts(self, csv_path): """Load published WordPress posts.""" posts = {} if not csv_path.exists(): self.log(f"⚠️ WordPress posts file not found: {csv_path}") return posts try: with open(csv_path, 'r', encoding='utf-8') as f: reader = csv.DictReader(f) for row in reader: post_id = row.get('ID') or row.get('post_id') if not post_id: continue posts[post_id] = { 'source': 'wordpress', 'status': 'published', 'title': row.get('Title') or row.get('title') or row.get('post_title') or '', 'url': row.get('URL') or row.get('url') or row.get('post_url') or '', 'author': row.get('Author') or row.get('author') or 'Unknown', 'traffic': int(row.get('traffic', 0) or 0), 'impressions': int(row.get('impressions', 0) or 0), 'position': float(row.get('avg_position', 0) or 0), 'category': row.get('Category') or row.get('category') or '', } self.log(f"✓ Loaded {len(posts)} published WordPress posts") except Exception as e: self.log(f"❌ Error reading WordPress posts: {e}") return posts def load_draft_posts(self, csv_path): """Load draft/unpublished posts.""" posts = {} if not csv_path.exists(): self.log(f"⚠️ Draft posts file not found: {csv_path}") return posts try: with open(csv_path, 'r', encoding='utf-8') as f: reader = csv.DictReader(f) for row in reader: post_id = row.get('ID') or row.get('post_id') if not post_id: continue posts[post_id] = { 'source': 'draft', 'status': 'draft', 'title': row.get('Title') or row.get('title') or row.get('post_title') or '', 'url': row.get('URL') or row.get('url') or row.get('post_url') or '', 'author': row.get('Author') or row.get('author') or 'Unknown', 'traffic': 0, # Drafts have no traffic 'impressions': 0, 'position': 0, 'category': row.get('Category') or row.get('category') or '', } self.log(f"✓ Loaded {len(posts)} draft posts") except Exception as e: self.log(f"❌ Error reading draft posts: {e}") return posts def classify_post_topic(self, post): """Classify post into topic area.""" title = post['title'].lower() category = post['category'].lower() content = f"{title} {category}" # Topic classification based on keywords topic_keywords = { 'torrent': ['torrent', 'ygg', 'ratio', 'tracker', 'magnet', 'seedbox', 'upload'], 'streaming': ['stream', 'film', 'série', 'netflix', 'disney', 'platforma'], 'vpn': ['vpn', 'proxy', 'anonyme', 'privacy', 'chiffr'], 'software': ['software', 'tool', 'app', 'logiciel', 'outil', 'program'], 'gaming': ['game', 'jeu', 'gaming', 'emula', 'console', 'retro'], 'download': ['download', 'télécharge', 'ddl', 'upload'], 'tech': ['tech', 'informatique', 'code', 'programming', 'developer'], 'other': [], } for topic, keywords in topic_keywords.items(): if topic == 'other': continue for keyword in keywords: if keyword in content: return topic return 'other' def classify_website(self, post): """Determine which website this post should be on.""" topic = self.classify_post_topic(post) author = post.get('author', '').strip() is_sponsored = author == 'Expert' # Website assignment rules if topic == 'torrent' or topic == 'download': return { 'site': 'webscroll.fr', 'reason': f'Torrent/file-sharing content', 'priority': 'HIGH' if post['traffic'] > 100 else 'MEDIUM' } if topic in ['vpn', 'software', 'gaming', 'tech']: return { 'site': 'mistergeek.net', 'reason': f'{topic.capitalize()} - core content', 'priority': 'HIGH' if post['traffic'] > 50 else 'MEDIUM' } if topic == 'streaming' and post['traffic'] < 100: return { 'site': 'hellogeek.net', 'reason': 'Low-traffic streaming content', 'priority': 'LOW' } if topic == 'other' or post['traffic'] < 10: return { 'site': 'hellogeek.net', 'reason': 'Off-brand or low-traffic content', 'priority': 'LOW' } # Default to main site return { 'site': 'mistergeek.net', 'reason': 'Core content', 'priority': 'MEDIUM' } def classify_content_action(self, post): """Determine what action to take with this post.""" topic = self.classify_post_topic(post) traffic = post.get('traffic', 0) impressions = post.get('impressions', 0) position = post.get('position', 0) status = post.get('status', 'published') # Determine action if status == 'draft': if traffic == 0: return 'REVIEW_PUBLISH_OR_DELETE' # Unpublished draft else: return 'REPUBLISH' # Was published, now draft if traffic < 5 and impressions < 20: return 'DELETE_OR_CONSOLIDATE' if traffic > 0 and position > 0 and position < 11: return 'KEEP_OPTIMIZE' if position > 11 and position < 30: return 'KEEP_OPTIMIZE' if position > 30 or traffic < 10: return 'MOVE_TO_OTHER_SITE' return 'KEEP_MONITOR' def analyze_all_content(self, posts): """Analyze and classify all posts.""" analysis = { 'total_posts': len(posts), 'by_site': defaultdict(lambda: {'count': 0, 'traffic': 0, 'posts': []}), 'by_topic': defaultdict(lambda: {'count': 0, 'traffic': 0, 'posts': []}), 'by_action': defaultdict(lambda: {'count': 0, 'traffic': 0, 'posts': []}), 'sponsored_posts': {'count': 0, 'traffic': 0, 'posts': []}, 'draft_posts': {'count': 0, 'posts': []}, } for post_id, post in posts.items(): topic = self.classify_post_topic(post) site_assignment = self.classify_website(post) action = self.classify_content_action(post) is_sponsored = post.get('author', '').strip() == 'Expert' is_draft = post.get('status') == 'draft' # Record in analysis analysis['by_site'][site_assignment['site']]['count'] += 1 analysis['by_site'][site_assignment['site']]['traffic'] += post['traffic'] analysis['by_site'][site_assignment['site']]['posts'].append({ 'id': post_id, 'title': post['title'], 'traffic': post['traffic'], 'reason': site_assignment['reason'] }) analysis['by_topic'][topic]['count'] += 1 analysis['by_topic'][topic]['traffic'] += post['traffic'] analysis['by_action'][action]['count'] += 1 analysis['by_action'][action]['traffic'] += post['traffic'] if is_sponsored: analysis['sponsored_posts']['count'] += 1 analysis['sponsored_posts']['traffic'] += post['traffic'] analysis['sponsored_posts']['posts'].append({ 'id': post_id, 'title': post['title'], 'traffic': post['traffic'] }) if is_draft: analysis['draft_posts']['count'] += 1 analysis['draft_posts']['posts'].append({ 'id': post_id, 'title': post['title'], 'status': 'draft' }) return analysis def generate_content_distribution_csv(self, posts, output_path): """Export detailed content distribution plan.""" try: fieldnames = [ 'post_id', 'title', 'topic', 'status', 'author', 'traffic', 'impressions', 'position', 'recommended_site', 'reason', 'action', 'priority', 'notes' ] rows = [] for post_id, post in posts.items(): topic = self.classify_post_topic(post) site_assignment = self.classify_website(post) action = self.classify_content_action(post) author = post.get('author', '').strip() is_sponsored = author == 'Expert' rows.append({ 'post_id': post_id, 'title': post['title'][:80], 'topic': topic, 'status': post.get('status', 'published'), 'author': author, 'traffic': post.get('traffic', 0), 'impressions': post.get('impressions', 0), 'position': post.get('position', 0), 'recommended_site': site_assignment['site'], 'reason': site_assignment['reason'], 'action': action, 'priority': site_assignment['priority'], 'notes': 'SPONSORED' if is_sponsored else '' }) rows.sort(key=lambda x: x['traffic'], reverse=True) with open(output_path, 'w', newline='', encoding='utf-8') as f: writer = csv.DictWriter(f, fieldnames=fieldnames) writer.writeheader() writer.writerows(rows) self.log(f"✓ Exported {len(rows)} posts to {output_path}") except Exception as e: self.log(f"❌ Error exporting CSV: {e}") def generate_strategy_report(self, analysis, output_path): """Generate comprehensive strategy report.""" try: report = [] report.append("# Multi-Site Content Strategy Report\n") report.append(f"*Generated: {datetime.now().strftime('%Y-%m-%d %H:%M')}*\n\n") # Executive Summary report.append("## Executive Summary\n\n") report.append(f"**Total Content Analyzed:** {analysis['total_posts']} posts\n") report.append(f"- Published: {analysis['total_posts'] - analysis['draft_posts']['count']}\n") report.append(f"- Drafts: {analysis['draft_posts']['count']}\n") report.append(f"- Sponsored: {analysis['sponsored_posts']['count']}\n\n") # Distribution Strategy report.append("## Recommended Site Distribution\n\n") for site, data in sorted(analysis['by_site'].items(), key=lambda x: x[1]['traffic'], reverse=True): report.append(f"### {site}\n") report.append(f"- Posts: {data['count']}\n") report.append(f"- Total Traffic: {data['traffic']:,} visits/month\n") report.append(f"- Top Posts:\n") for post in sorted(data['posts'], key=lambda x: x['traffic'], reverse=True)[:5]: report.append(f" - {post['title'][:60]} ({post['traffic']} visits)\n") report.append(f"\n") # Topic Distribution report.append("## Content by Topic\n\n") for topic, data in sorted(analysis['by_topic'].items(), key=lambda x: x[1]['traffic'], reverse=True): report.append(f"- **{topic.title()}:** {data['count']} posts ({data['traffic']:,} visits)\n") report.append("\n") # Actions Required report.append("## Required Actions\n\n") for action, data in sorted(analysis['by_action'].items(), key=lambda x: x[1]['count'], reverse=True): report.append(f"- **{action}:** {data['count']} posts ({data['traffic']:,} visits)\n") report.append("\n") # Sponsored Content if analysis['sponsored_posts']['count'] > 0: report.append("## Sponsored Content (by 'Expert')\n\n") report.append(f"Total: {analysis['sponsored_posts']['count']} posts\n") report.append(f"Traffic: {analysis['sponsored_posts']['traffic']:,} visits/month\n\n") for post in sorted(analysis['sponsored_posts']['posts'], key=lambda x: x['traffic'], reverse=True)[:10]: report.append(f"- {post['title'][:70]} ({post['traffic']} visits)\n") report.append("\n") # Draft Posts if analysis['draft_posts']['count'] > 0: report.append("## Draft Posts (Unpublished)\n\n") report.append(f"Total: {analysis['draft_posts']['count']} posts\n") report.append("*Decision needed: Publish, delete, or move to other site?*\n\n") for post in analysis['draft_posts']['posts'][:15]: report.append(f"- {post['title'][:70]}\n") report.append("\n") # Recommendations report.append("## Strategic Recommendations\n\n") report.append("1. **Consolidate on mistergeek.net:**\n") report.append(" - Keep only VPN, software, gaming, tech content\n") report.append(" - Focus on high-traffic posts (>50 visits/month)\n\n") report.append("2. **Move to webscroll.fr:**\n") report.append(" - All torrent/file-sharing content\n") report.append(" - File-specific guides\n\n") report.append("3. **Move to hellogeek.net:**\n") report.append(" - Low-traffic content (<50 visits)\n") report.append(" - Off-brand content\n") report.append(" - Experimental/niche posts\n\n") report.append("4. **Delete:**\n") report.append(f" - Posts with <5 visits and <20 impressions\n") report.append(" - Duplicates/thin content\n\n") with open(output_path, 'w', encoding='utf-8') as f: f.write(''.join(report)) self.log(f"✓ Generated strategy report: {output_path}") except Exception as e: self.log(f"❌ Error generating report: {e}") def run(self, wordpress_csv, drafts_csv): """Run complete content strategy analysis.""" self.log("\n" + "="*70) self.log("Multi-Site Content Strategy Analyzer") self.log("="*70 + "\n") # Load posts self.log("📚 Loading content...\n") wordpress_posts = self.load_wordpress_posts(wordpress_csv) draft_posts = self.load_draft_posts(drafts_csv) # Combine all posts all_posts = {**wordpress_posts, **draft_posts} self.log(f"Total posts: {len(all_posts)}\n") # Analyze self.log("🔍 Analyzing content distribution...\n") analysis = self.analyze_all_content(all_posts) # Generate outputs self.log("📊 Generating outputs...\n") output_csv = self.output_dir / 'analysis' / 'content_distribution.csv' self.generate_content_distribution_csv(all_posts, output_csv) output_md = self.output_dir / 'reports' / 'content_strategy_report.md' self.generate_strategy_report(analysis, output_md) # Export analysis JSON analysis_json = self.output_dir / 'analysis' / 'analysis_summary.json' try: with open(analysis_json, 'w', encoding='utf-8') as f: # Convert defaultdict to regular dict for JSON serialization analysis_clean = { 'total_posts': analysis['total_posts'], 'by_site': dict(analysis['by_site']), 'by_topic': {k: {'count': v['count'], 'traffic': v['traffic']} for k, v in analysis['by_topic'].items()}, 'by_action': {k: {'count': v['count'], 'traffic': v['traffic']} for k, v in analysis['by_action'].items()}, 'sponsored_posts': { 'count': analysis['sponsored_posts']['count'], 'traffic': analysis['sponsored_posts']['traffic'] }, 'draft_posts': { 'count': analysis['draft_posts']['count'] } } json.dump(analysis_clean, f, indent=2, ensure_ascii=False) self.log(f"✓ Exported analysis JSON: {analysis_json}\n") except Exception as e: self.log(f"❌ Error exporting JSON: {e}\n") # Summary self.log("\n" + "="*70) self.log("ANALYSIS COMPLETE") self.log("="*70) self.log(f"\nOutputs:") self.log(f" Distribution: {output_csv}") self.log(f" Strategy: {output_md}") self.log(f" Summary: {analysis_json}\n") self.log("Next steps:") self.log(" 1. Review content_strategy_report.md") self.log(" 2. Review content_distribution.csv") self.log(" 3. Decide: which posts go to which site?") self.log(" 4. Plan content consolidation") def main(): """CLI entry point.""" parser = argparse.ArgumentParser(description='Analyze content across multiple sites') parser.add_argument('--wordpress-csv', type=Path, default=Path('input/wordpress/new-propositions.csv'), help='WordPress posts CSV') parser.add_argument('--drafts-csv', type=Path, default=Path('input/drafts/drafts.csv'), help='Draft posts CSV') args = parser.parse_args() analyzer = ContentStrategyAnalyzer() analyzer.run(args.wordpress_csv, args.drafts_csv) if __name__ == '__main__': main()