#!/usr/bin/env python3 """ Multi-Site WordPress SEO Analyzer Fetches posts from 3 WordPress sites, analyzes titles and meta descriptions, and provides AI-powered optimization recommendations. """ import os import csv import json import logging from datetime import datetime from pathlib import Path from typing import Dict, List, Optional, Tuple import requests from requests.auth import HTTPBasicAuth import time from config import Config import sys # Setup logging logging.basicConfig( level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s' ) logger = logging.getLogger(__name__) class MultiSiteSEOAnalyzer: """Analyzes titles and meta descriptions across multiple WordPress sites.""" def __init__(self, progressive_csv: bool = True): """ Initialize the analyzer. Args: progressive_csv: If True, write CSV progressively as posts are analyzed """ self.sites_config = Config.WORDPRESS_SITES self.posts_data = {} self.analysis_results = [] self.api_calls = 0 self.ai_cost = 0.0 self.openrouter_api_key = Config.OPENROUTER_API_KEY self.progressive_csv = progressive_csv self.csv_file = None self.csv_writer = None def fetch_posts_from_site(self, site_name: str, site_config: Dict, include_drafts: bool = False) -> List[Dict]: """ Fetch posts from a WordPress site using REST API. Args: site_name: Name of the site (domain) site_config: Configuration dict with url, username, password include_drafts: If True, fetch both published and draft posts Returns: List of posts with metadata """ logger.info(f"Fetching posts from {site_name}...") posts = [] base_url = site_config['url'].rstrip('/') api_url = f"{base_url}/wp-json/wp/v2/posts" auth = HTTPBasicAuth(site_config['username'], site_config['password']) # Determine which statuses to fetch statuses = ['publish', 'draft'] if include_drafts else ['publish'] status_str = ', '.join(statuses).replace('publish', 'published').replace('draft', 'drafts') # Fetch each status separately to avoid 400 Bad Request on pagination for status in statuses: page = 1 status_count = 0 use_fields = True # Try with _fields first, fallback without if 400 while True: params = { 'page': page, 'per_page': 100, 'status': status, # Single status per request } # Add _fields only if not getting 400 errors if use_fields: params['_fields'] = 'id,title,slug,link,meta,status' try: response = requests.get(api_url, params=params, auth=auth, timeout=10) response.raise_for_status() page_posts = response.json() if not page_posts: break posts.extend(page_posts) status_count += len(page_posts) logger.info(f" ✓ Fetched {len(page_posts)} {status} posts (page {page})") page += 1 time.sleep(Config.API_DELAY_SECONDS) except requests.exceptions.HTTPError as e: # Handle 400 errors gracefully if response.status_code == 400 and use_fields and page == 1: # Retry page 1 without _fields parameter logger.info(f" ⓘ Retrying without _fields parameter...") use_fields = False continue elif response.status_code == 400: # Pagination or API limit reached logger.info(f" ⓘ API limit reached (fetched {status_count} {status} posts)") break else: logger.error(f"Error fetching page {page} from {site_name}: {e}") break except requests.exceptions.RequestException as e: logger.error(f"Error fetching from {site_name}: {e}") break if status_count > 0: logger.info(f" ✓ Total {status} posts: {status_count}") logger.info(f"✓ Total posts from {site_name} ({status_str}): {len(posts)}") return posts def extract_seo_data(self, post: Dict, site_name: str) -> Dict: """ Extract SEO-relevant data from a post. Args: post: Post data from WordPress API site_name: Name of the site Returns: Dict with extracted SEO data """ title = post.get('title', {}) if isinstance(title, dict): title = title.get('rendered', '') # Get meta description from various SEO plugins # Check multiple possible locations where different plugins store meta descriptions meta_desc = '' if isinstance(post.get('meta'), dict): meta_dict = post['meta'] # Try various SEO plugin fields (order matters - most specific first) meta_desc = ( meta_dict.get('_yoast_wpseo_metadesc', '') or # Yoast SEO meta_dict.get('_rank_math_description', '') or # Rank Math meta_dict.get('_aioseo_description', '') or # All in One SEO meta_dict.get('description', '') or # Standard field meta_dict.get('_meta_description', '') or # Alternative meta_dict.get('metadesc', '') # Alternative ) # Get post status status = post.get('status', 'publish') return { 'site': site_name, 'post_id': post['id'], 'title': title.strip(), 'slug': post.get('slug', ''), 'url': post.get('link', ''), 'meta_description': meta_desc.strip(), 'status': status, } def analyze_title(self, title: str) -> Dict: """ Analyze title for SEO best practices. Args: title: Post title Returns: Dict with analysis results """ length = len(title) # SEO best practices issues = [] recommendations = [] score = 100 if length < 30: issues.append(f"Too short ({length})") recommendations.append("Expand title to 50-60 characters") score -= 20 elif length < 50: recommendations.append("Could be slightly longer (target 50-60)") score -= 5 elif length > 70: issues.append(f"Too long ({length})") recommendations.append("Consider shortening to 50-70 characters") score -= 15 # Check for power words power_words = ['best', 'ultimate', 'complete', 'essential', 'proven', 'effective', 'powerful', 'expert', 'guide', 'tutorial', 'how to', 'step by step', 'top 10', 'ultimate guide'] has_power_word = any(word.lower() in title.lower() for word in power_words) if not has_power_word: recommendations.append("Consider adding a power word (best, complete, guide, etc.)") score -= 10 # Check for numbers if not any(c.isdigit() for c in title): recommendations.append("Consider adding a number (e.g., 'Top 5', '2025')") score -= 5 # Check for emojis or special chars that might break rendering special_chars = set(title) - set('abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789 -:') if special_chars: recommendations.append(f"Check special characters: {special_chars}") score -= 5 return { 'length': length, 'issues': issues, 'recommendations': recommendations, 'score': max(0, score), 'has_power_word': has_power_word, 'has_number': any(c.isdigit() for c in title) } def analyze_meta_description(self, meta_desc: str) -> Dict: """ Analyze meta description for SEO best practices. Args: meta_desc: Meta description text Returns: Dict with analysis results """ length = len(meta_desc) issues = [] recommendations = [] score = 100 if not meta_desc or length == 0: issues.append("Missing meta description") recommendations.append("Write a 120-160 character meta description") score = 0 else: if length < 100: issues.append(f"Too short ({length})") recommendations.append("Expand to 120-160 characters") score -= 20 elif length < 120: recommendations.append("Could be slightly longer (target 120-160)") score -= 5 elif length > 160: issues.append(f"Too long ({length})") recommendations.append("Shorten to 120-160 characters") score -= 15 # Check for CTA cta_words = ['learn', 'discover', 'read', 'explore', 'find', 'get', 'download', 'check', 'see', 'watch', 'try', 'start'] has_cta = any(word.lower() in meta_desc.lower() for word in cta_words) if not has_cta: recommendations.append("Consider adding a call-to-action") score -= 5 return { 'length': length, 'is_missing': not meta_desc, 'issues': issues, 'recommendations': recommendations, 'score': max(0, score), } def calculate_overall_score(self, title_analysis: Dict, meta_analysis: Dict) -> float: """Calculate overall SEO score (0-100).""" title_weight = 0.4 meta_weight = 0.6 return (title_analysis['score'] * title_weight) + (meta_analysis['score'] * meta_weight) def generate_ai_recommendations(self, post_data: Dict, title_analysis: Dict, meta_analysis: Dict) -> Optional[str]: """ Use Claude AI to generate specific optimization recommendations. Args: post_data: Post data title_analysis: Title analysis results meta_analysis: Meta description analysis Returns: AI-generated recommendations or None if AI disabled """ if not self.openrouter_api_key: return None prompt = f"""Analyze this blog post and provide specific SEO optimization recommendations: Post Title: "{post_data['title']}" Current Meta Description: "{post_data['meta_description'] or 'MISSING'}" URL: {post_data['url']} Title Analysis: - Length: {title_analysis['length']} characters (target: 50-70) - Issues: {', '.join(title_analysis['issues']) or 'None'} Meta Description Analysis: - Length: {meta_analysis['length']} characters (target: 120-160) - Issues: {', '.join(meta_analysis['issues']) or 'None'} Provide 2-3 specific, actionable recommendations to improve SEO. Focus on: 1. If title needs improvement: suggest a better title 2. If meta description is missing: write one 3. If both are weak: provide both improved versions Format as: - Recommendation 1: [specific action] - Recommendation 2: [specific action] etc. Be concise and specific.""" try: response = requests.post( "https://openrouter.ai/api/v1/chat/completions", headers={ "Authorization": f"Bearer {self.openrouter_api_key}", "Content-Type": "application/json", }, json={ "model": "anthropic/claude-3.5-sonnet", "messages": [ {"role": "user", "content": prompt} ], "temperature": 0.7, }, timeout=30 ) response.raise_for_status() result = response.json() self.api_calls += 1 # Track cost (Claude 3.5 Sonnet: $3/$15 per 1M tokens) usage = result.get('usage', {}) input_tokens = usage.get('prompt_tokens', 0) output_tokens = usage.get('completion_tokens', 0) self.ai_cost += (input_tokens * 3 + output_tokens * 15) / 1_000_000 recommendations = result['choices'][0]['message']['content'].strip() return recommendations except Exception as e: logger.warning(f"AI recommendation failed: {e}") return None def _setup_progressive_csv(self) -> Optional[Tuple]: """ Setup CSV file for progressive writing. Returns: Tuple of (file_handle, writer) or None if progressive_csv is False """ if not self.progressive_csv: return None output_dir = Path(__file__).parent.parent / 'output' output_dir.mkdir(parents=True, exist_ok=True) timestamp = datetime.now().strftime('%Y%m%d_%H%M%S') csv_path = output_dir / f'seo_analysis_{timestamp}.csv' fieldnames = [ 'site', 'post_id', 'status', 'title', 'slug', 'url', 'meta_description', 'title_score', 'title_issues', 'title_recommendations', 'meta_score', 'meta_issues', 'meta_recommendations', 'overall_score', 'ai_recommendations', ] csv_file = open(csv_path, 'w', newline='', encoding='utf-8') writer = csv.DictWriter(csv_file, fieldnames=fieldnames) writer.writeheader() csv_file.flush() logger.info(f"✓ CSV file created: {csv_path}") self.csv_file = csv_file self.csv_writer = writer return csv_path def _write_result_to_csv(self, result: Dict) -> None: """Write a single result row to CSV file.""" if self.progressive_csv and self.csv_writer: self.csv_writer.writerow(result) self.csv_file.flush() def analyze_all_sites(self, use_ai: bool = True, top_n: int = 10, include_drafts: bool = False): """ Analyze all configured sites. Args: use_ai: Whether to use AI for recommendations top_n: Number of top priority posts to get AI recommendations for include_drafts: If True, include draft posts in analysis """ logger.info(f"Starting analysis of {len(self.sites_config)} sites...") if include_drafts: logger.info("(Including draft posts)") logger.info("") all_posts = [] # Fetch posts from all sites for site_name, config in self.sites_config.items(): posts = self.fetch_posts_from_site(site_name, config, include_drafts=include_drafts) if posts: self.posts_data[site_name] = posts all_posts.extend(posts) if not all_posts: logger.error("No posts found on any site") return logger.info(f"\nAnalyzing {len(all_posts)} posts...\n") # Setup progressive CSV if enabled csv_path = self._setup_progressive_csv() # Analyze each post for site_name, posts in self.posts_data.items(): logger.info(f"Analyzing {len(posts)} posts from {site_name}...") for idx, post in enumerate(posts, 1): seo_data = self.extract_seo_data(post, site_name) title_analysis = self.analyze_title(seo_data['title']) meta_analysis = self.analyze_meta_description(seo_data['meta_description']) overall_score = self.calculate_overall_score(title_analysis, meta_analysis) result = { **seo_data, 'title_score': title_analysis['score'], 'title_issues': '|'.join(title_analysis['issues']) or 'None', 'title_recommendations': '|'.join(title_analysis['recommendations']), 'meta_score': meta_analysis['score'], 'meta_issues': '|'.join(meta_analysis['issues']) or 'None', 'meta_recommendations': '|'.join(meta_analysis['recommendations']), 'overall_score': overall_score, 'ai_recommendations': '', } self.analysis_results.append(result) # Write to CSV progressively (before AI recommendations) if self.progressive_csv: self._write_result_to_csv(result) logger.debug(f" [{idx}/{len(posts)}] Written: {seo_data['title'][:40]}") # Sort by priority (lowest scores first) and get AI recommendations for top posts if use_ai: self.analysis_results.sort(key=lambda x: x['overall_score']) logger.info(f"\nGenerating AI recommendations for top {top_n} posts...\n") for idx, result in enumerate(self.analysis_results[:top_n], 1): logger.info(f" [{idx}/{top_n}] {result['title'][:50]}...") ai_recs = self.generate_ai_recommendations( result, { 'score': result['title_score'], 'issues': result['title_issues'].split('|'), 'length': len(result['title']) }, { 'score': result['meta_score'], 'issues': result['meta_issues'].split('|'), 'length': len(result['meta_description']) } ) result['ai_recommendations'] = ai_recs or '' # Update CSV with AI recommendations if using progressive CSV if self.progressive_csv and self.csv_writer: # Find and update the row in the CSV by re-writing it # This is a limitation of CSV - we'll update in final export instead pass time.sleep(0.5) # Rate limiting # Sort by overall score for final export self.analysis_results.sort(key=lambda x: x['overall_score']) # Close progressive CSV if open (will be re-written with final data including AI recs) if self.progressive_csv and self.csv_file: self.csv_file.close() self.csv_file = None self.csv_writer = None def export_results(self, output_file: Optional[str] = None): """ Export analysis results to CSV. Args: output_file: Output file path (optional) """ if not output_file: output_dir = Path(__file__).parent.parent / 'output' output_dir.mkdir(parents=True, exist_ok=True) if self.progressive_csv: # Use same timestamp as progressive file timestamp = datetime.now().strftime('%Y%m%d_%H%M%S') # Find the most recent seo_analysis file files = sorted(output_dir.glob('seo_analysis_*.csv')) if files: output_file = files[-1] # Use the most recent one else: output_file = output_dir / f'seo_analysis_{timestamp}_final.csv' else: timestamp = datetime.now().strftime('%Y%m%d_%H%M%S') output_file = output_dir / f'seo_analysis_{timestamp}.csv' output_file = Path(output_file) output_file.parent.mkdir(parents=True, exist_ok=True) if not self.analysis_results: logger.error("No results to export") return fieldnames = [ 'site', 'post_id', 'status', 'title', 'slug', 'url', 'meta_description', 'title_score', 'title_issues', 'title_recommendations', 'meta_score', 'meta_issues', 'meta_recommendations', 'overall_score', 'ai_recommendations', ] with open(output_file, 'w', newline='', encoding='utf-8') as f: writer = csv.DictWriter(f, fieldnames=fieldnames) writer.writeheader() for result in self.analysis_results: writer.writerow({field: result.get(field, '') for field in fieldnames}) if self.progressive_csv: logger.info(f"\n✓ Final results saved to: {output_file}") else: logger.info(f"\n✓ Results exported to: {output_file}") # Also export as a summary report self.export_summary_report(output_file) def export_summary_report(self, csv_file: Path): """Export a markdown summary report.""" report_file = csv_file.parent / f"{csv_file.stem}_summary.md" # Group by site by_site = {} for result in self.analysis_results: site = result['site'] if site not in by_site: by_site[site] = [] by_site[site].append(result) with open(report_file, 'w', encoding='utf-8') as f: f.write("# Multi-Site SEO Analysis Report\n\n") f.write(f"**Generated:** {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n\n") # Summary stats total_posts = len(self.analysis_results) published = sum(1 for r in self.analysis_results if r['status'] == 'publish') drafts = sum(1 for r in self.analysis_results if r['status'] == 'draft') avg_score = sum(r['overall_score'] for r in self.analysis_results) / total_posts if total_posts > 0 else 0 f.write("## Summary\n\n") f.write(f"- **Total Posts:** {total_posts}\n") if published > 0: f.write(f" - Published: {published}\n") if drafts > 0: f.write(f" - Drafts: {drafts}\n") f.write(f"- **Average SEO Score:** {avg_score:.1f}/100\n") f.write(f"- **API Calls Made:** {self.api_calls}\n") f.write(f"- **AI Cost:** ${self.ai_cost:.4f}\n") f.write(f"- **Sites Analyzed:** {len(by_site)}\n\n") # Priority issues missing_meta = sum(1 for r in self.analysis_results if r['meta_score'] == 0) weak_titles = sum(1 for r in self.analysis_results if r['title_score'] < 50) weak_meta = sum(1 for r in self.analysis_results if r['meta_score'] < 50 and r['meta_score'] > 0) f.write("## Priority Issues\n\n") f.write(f"- **Missing Meta Descriptions:** {missing_meta} posts\n") f.write(f"- **Weak Titles (Score < 50):** {weak_titles} posts\n") f.write(f"- **Weak Meta (Score < 50):** {weak_meta} posts\n\n") # By site for site_name, posts in by_site.items(): avg = sum(p['overall_score'] for p in posts) / len(posts) f.write(f"## {site_name}\n\n") f.write(f"- **Posts:** {len(posts)}\n") f.write(f"- **Avg Score:** {avg:.1f}/100\n") f.write(f"- **Missing Meta:** {sum(1 for p in posts if p['meta_score'] == 0)}\n\n") # Top 5 to optimize f.write("### Top 5 Posts to Optimize\n\n") for idx, post in enumerate(posts[:5], 1): f.write(f"{idx}. **{post['title']}** (Score: {post['overall_score']:.0f})\n") f.write(f" - URL: {post['url']}\n") if post['meta_issues'] != 'None': f.write(f" - Meta Issues: {post['meta_issues']}\n") if post['ai_recommendations']: f.write(f" - Recommendations: {post['ai_recommendations'].split(chr(10))[0]}\n") f.write("\n") f.write("\n## Legend\n\n") f.write("- **Title Score:** Evaluates length, power words, numbers, readability\n") f.write("- **Meta Score:** Evaluates presence, length, call-to-action\n") f.write("- **Overall Score:** 40% title + 60% meta description\n") f.write("- **Optimal Ranges:**\n") f.write(" - Title: 50-70 characters\n") f.write(" - Meta: 120-160 characters\n") logger.info(f"✓ Summary report: {report_file}") def run(self, use_ai: bool = True, top_n: int = 10, include_drafts: bool = False): """Run complete analysis.""" try: self.analyze_all_sites(use_ai=use_ai, top_n=top_n, include_drafts=include_drafts) self.export_results() logger.info("\n" + "="*60) logger.info("ANALYSIS COMPLETE") logger.info("="*60) logger.info(f"Total posts analyzed: {len(self.analysis_results)}") published = sum(1 for r in self.analysis_results if r['status'] == 'publish') drafts = sum(1 for r in self.analysis_results if r['status'] == 'draft') if published > 0: logger.info(f" - Published: {published}") if drafts > 0: logger.info(f" - Drafts: {drafts}") logger.info(f"AI recommendations: {sum(1 for r in self.analysis_results if r['ai_recommendations'])}") logger.info(f"AI cost: ${self.ai_cost:.4f}") except Exception as e: logger.error(f"Analysis failed: {e}", exc_info=True) sys.exit(1) def check_meta_fields(site_url: str, username: str, password: str) -> None: """ Diagnostic function to check what meta fields are available on a site. Args: site_url: WordPress site URL username: WordPress username password: WordPress app password """ logger.info(f"\n{'='*60}") logger.info("META FIELD DIAGNOSTIC") logger.info(f"{'='*60}\n") logger.info(f"Site: {site_url}") logger.info("Checking available meta fields in first post...\n") base_url = site_url.rstrip('/') api_url = f"{base_url}/wp-json/wp/v2/posts" auth = HTTPBasicAuth(username, password) try: params = { 'per_page': 1, 'status': 'publish' } response = requests.get(api_url, params=params, auth=auth, timeout=10) response.raise_for_status() posts = response.json() if not posts: logger.error("No posts found") return post = posts[0] logger.info(f"Post: {post.get('title', {}).get('rendered', 'N/A')}") logger.info(f"\nAvailable meta fields:") if isinstance(post.get('meta'), dict): meta_dict = post['meta'] if meta_dict: for key, value in sorted(meta_dict.items()): preview = str(value)[:60] logger.info(f" • {key}: {preview}") else: logger.info(" (No meta fields found)") else: logger.info(" (Meta is not a dictionary)") logger.info(f"\nFull meta object:") logger.info(json.dumps(post.get('meta', {}), indent=2)[:500]) except Exception as e: logger.error(f"Error: {e}") def main(): """Main entry point.""" import argparse parser = argparse.ArgumentParser( description='Analyze SEO across multiple WordPress sites' ) parser.add_argument( '--no-ai', action='store_true', help='Skip AI recommendations to save cost' ) parser.add_argument( '--top-n', type=int, default=10, help='Number of top posts to get AI recommendations for' ) parser.add_argument( '--output', help='Output CSV file path' ) parser.add_argument( '--include-drafts', action='store_true', help='Include draft posts in analysis (published + drafts)' ) parser.add_argument( '--no-progressive', action='store_true', help='Disable real-time CSV writing (write only at end)' ) parser.add_argument( '--diagnose', help='Diagnose meta fields for a site (URL). Example: --diagnose https://www.mistergeek.net' ) args = parser.parse_args() # Diagnostic mode if args.diagnose: # Ask for username/password if not in env from getpass import getpass username = Config.WORDPRESS_USERNAME password = Config.WORDPRESS_APP_PASSWORD if not username or not password: logger.error("WORDPRESS_USERNAME and WORDPRESS_APP_PASSWORD must be set in .env") sys.exit(1) check_meta_fields(args.diagnose, username, password) sys.exit(0) analyzer = MultiSiteSEOAnalyzer(progressive_csv=not args.no_progressive) analyzer.run(use_ai=not args.no_ai, top_n=args.top_n, include_drafts=args.include_drafts) if __name__ == '__main__': main()