diff --git a/src/seo/app.py b/src/seo/app.py index 462d1db..cb5778c 100644 --- a/src/seo/app.py +++ b/src/seo/app.py @@ -14,6 +14,7 @@ from .category_manager import WordPressCategoryManager, CategoryAssignmentProces from .editorial_strategy import EditorialStrategyAnalyzer from .post_migrator import WordPressPostMigrator from .meta_description_generator import MetaDescriptionGenerator +from .meta_description_updater import MetaDescriptionUpdater logger = logging.getLogger(__name__) @@ -318,6 +319,46 @@ class SEOApp: only_poor_quality=only_poor_quality, limit=limit ) + + def update_meta_descriptions(self, site: str, + post_ids: Optional[List[int]] = None, + category_names: Optional[List[str]] = None, + category_ids: Optional[List[int]] = None, + limit: Optional[int] = None, + dry_run: bool = False, + skip_existing: bool = True, + force_regenerate: bool = False) -> Dict: + """ + Fetch posts from WordPress, generate AI meta descriptions, and update them. + + Args: + site: WordPress site name (REQUIRED) - mistergeek.net, webscroll.fr, hellogeek.net + post_ids: Specific post IDs to update + category_names: Filter by category names + category_ids: Filter by category IDs + limit: Maximum number of posts to process + dry_run: If True, preview changes without updating + skip_existing: If True, skip posts with existing good quality meta descriptions + force_regenerate: If True, regenerate even for good quality metas + + Returns: + Statistics dict + """ + logger.info(f"šŸ”„ Updating meta descriptions on {site}...") + + if not site: + raise ValueError("Site is required. Use --site mistergeek.net|webscroll.fr|hellogeek.net") + + updater = MetaDescriptionUpdater(site) + return updater.run( + post_ids=post_ids, + category_ids=category_ids, + category_names=category_names, + limit=limit, + dry_run=dry_run, + skip_existing=skip_existing, + force_regenerate=force_regenerate + ) def _find_latest_export(self) -> Optional[str]: """Find the latest exported CSV file.""" diff --git a/src/seo/cli.py b/src/seo/cli.py index 7ae6ce6..bea7246 100644 --- a/src/seo/cli.py +++ b/src/seo/cli.py @@ -73,6 +73,12 @@ Examples: # Meta description arguments parser.add_argument('--only-missing', action='store_true', help='Only generate for posts without meta descriptions') parser.add_argument('--only-poor', action='store_true', help='Only generate for posts with poor quality meta descriptions') + + # Update meta arguments + parser.add_argument('--post-ids', type=int, nargs='+', help='Specific post IDs to update') + parser.add_argument('--category', nargs='+', help='Filter by category name(s)') + parser.add_argument('--category-id', type=int, nargs='+', help='Filter by category ID(s)') + parser.add_argument('--force', action='store_true', help='Force regenerate even for good quality meta descriptions') args = parser.parse_args() @@ -100,6 +106,7 @@ Examples: 'editorial_strategy': cmd_editorial_strategy, 'migrate': cmd_migrate, 'meta_description': cmd_meta_description, + 'update_meta': cmd_update_meta, 'status': cmd_status, 'help': cmd_help, } @@ -427,6 +434,61 @@ def cmd_meta_description(app, args): return 0 +def cmd_update_meta(app, args): + """Fetch, generate, and update meta descriptions directly on WordPress.""" + if args.dry_run: + print("Would update meta descriptions on WordPress") + if not args.site: + print(" āŒ Site is required. Use --site mistergeek.net|webscroll.fr|hellogeek.net") + return 1 + print(f" Site: {args.site}") + if args.post_ids: + print(f" Post IDs: {args.post_ids}") + if args.category: + print(f" Categories: {args.category}") + if args.limit: + print(f" Limit: {args.limit} posts") + return 0 + + # Site is required + if not args.site: + print("āŒ Site is required. Use --site mistergeek.net|webscroll.fr|hellogeek.net") + return 1 + + print(f"Updating meta descriptions on {args.site}...") + if args.post_ids: + print(f" Post IDs: {args.post_ids}") + if args.category: + print(f" Categories: {args.category}") + if args.category_id: + print(f" Category IDs: {args.category_id}") + if args.limit: + print(f" Limit: {args.limit} posts") + print(f" Skip existing: {not args.force}") + print(f" Dry run: {args.dry_run}") + + stats = app.update_meta_descriptions( + site=args.site, + post_ids=args.post_ids, + category_names=args.category, + category_ids=args.category_id, + limit=args.limit, + dry_run=args.dry_run, + skip_existing=not args.force, + force_regenerate=args.force + ) + + if stats: + print(f"\nāœ… Meta description update completed!") + print(f"\nšŸ“Š Summary:") + print(f" Total posts: {stats.get('total_posts', 0)}") + print(f" Updated: {stats.get('updated', 0)}") + print(f" Failed: {stats.get('failed', 0)}") + print(f" Skipped: {stats.get('skipped', 0)}") + print(f" API calls: {stats.get('api_calls', 0)}") + return 0 + + def cmd_status(app, args): """Show status.""" if args.dry_run: @@ -462,6 +524,9 @@ Export & Analysis: category_propose [csv] Propose categories based on content meta_description [csv] Generate AI-optimized meta descriptions meta_description --only-missing Generate only for posts without meta descriptions + update_meta --site Fetch, generate, and update meta on WordPress + update_meta --site A --post-ids 1 2 3 Update specific posts + update_meta --site A --category "VPN" Update posts in category Category Management: category_apply [csv] Apply AI category proposals to WordPress @@ -492,6 +557,13 @@ Meta Description Options: --limit Limit number of posts to process --output, -o Custom output file path +Update Meta Options: + --site, -s WordPress site (REQUIRED): mistergeek.net, webscroll.fr, hellogeek.net + --post-ids Specific post IDs to update + --category Filter by category name(s) + --category-id Filter by category ID(s) + --force Force regenerate even for good quality meta descriptions + Migration Options: --destination, --to Destination site: mistergeek.net, webscroll.fr, hellogeek.net --source, --from Source site for filtered migration @@ -534,6 +606,10 @@ Examples: seo meta_description # Generate for all posts seo meta_description --only-missing # Generate only for posts without meta seo meta_description --only-poor --limit 10 # Fix 10 poor quality metas + seo update_meta --site mistergeek.net # Update all posts on site + seo update_meta --site A --post-ids 1 2 3 # Update specific posts + seo update_meta --site A --category "VPN" --limit 10 # Update 10 posts in category + seo update_meta --site A --dry-run # Preview changes seo status """) return 0 diff --git a/src/seo/meta_description_updater.py b/src/seo/meta_description_updater.py new file mode 100644 index 0000000..4d6af99 --- /dev/null +++ b/src/seo/meta_description_updater.py @@ -0,0 +1,558 @@ +""" +Meta Description Updater - Fetch, generate, and update meta descriptions directly on WordPress +""" + +import csv +import json +import logging +import time +from pathlib import Path +from datetime import datetime +from typing import Dict, List, Optional, Tuple +import requests +from requests.auth import HTTPBasicAuth + +from .config import Config +from .meta_description_generator import MetaDescriptionGenerator + +logger = logging.getLogger(__name__) + + +class MetaDescriptionUpdater: + """Fetch posts from WordPress, generate AI meta descriptions, and update them.""" + + def __init__(self, site_name: str): + """ + Initialize the updater. + + Args: + site_name: WordPress site name (e.g., 'mistergeek.net') + """ + self.site_name = site_name + self.sites = Config.WORDPRESS_SITES + + if site_name not in self.sites: + raise ValueError(f"Site '{site_name}' not found in configuration") + + self.site_config = self.sites[site_name] + self.base_url = self.site_config['url'].rstrip('/') + self.auth = HTTPBasicAuth( + self.site_config['username'], + self.site_config['password'] + ) + + self.openrouter_api_key = Config.OPENROUTER_API_KEY + self.ai_model = Config.AI_MODEL + + self.posts = [] + self.update_results = [] + self.api_calls = 0 + self.stats = { + 'total_posts': 0, + 'updated': 0, + 'failed': 0, + 'skipped': 0 + } + + def fetch_posts(self, post_ids: Optional[List[int]] = None, + category_ids: Optional[List[int]] = None, + category_names: Optional[List[str]] = None, + limit: Optional[int] = None, + status: Optional[List[str]] = None) -> List[Dict]: + """ + Fetch posts from WordPress site. + + Args: + post_ids: Specific post IDs to fetch + category_ids: Filter by category IDs + category_names: Filter by category names (will be resolved to IDs) + limit: Maximum number of posts to fetch + status: Post statuses to fetch (default: ['publish']) + + Returns: + List of post dicts + """ + logger.info(f"Fetching posts from {self.site_name}...") + + if post_ids: + logger.info(f" Post IDs: {post_ids}") + if category_ids: + logger.info(f" Category IDs: {category_ids}") + if category_names: + logger.info(f" Category names: {category_names}") + if limit: + logger.info(f" Limit: {limit}") + + # Resolve category names to IDs if needed + if category_names and not category_ids: + category_ids = self._get_category_ids_by_names(category_names) + + # Build API parameters + params = { + 'per_page': 100, + 'page': 1, + 'status': ','.join(status) if status else 'publish', + '_embed': True + } + + if post_ids: + # Fetch specific posts + posts = [] + for post_id in post_ids: + try: + response = requests.get( + f"{self.base_url}/wp-json/wp/v2/posts/{post_id}", + auth=self.auth, + timeout=10 + ) + if response.status_code == 200: + posts.append(response.json()) + else: + logger.warning(f" Post {post_id} not found or inaccessible") + except Exception as e: + logger.error(f" Error fetching post {post_id}: {e}") + self.posts = posts + else: + # Fetch posts with filters + if category_ids: + params['categories'] = ','.join(map(str, category_ids)) + + posts = [] + while True: + try: + response = requests.get( + f"{self.base_url}/wp-json/wp/v2/posts", + params=params, + auth=self.auth, + timeout=30 + ) + response.raise_for_status() + + page_posts = response.json() + if not page_posts: + break + + posts.extend(page_posts) + + if len(page_posts) < 100: + break + if limit and len(posts) >= limit: + break + + params['page'] += 1 + time.sleep(0.3) + + except Exception as e: + logger.error(f"Error fetching posts: {e}") + break + + # Apply limit if specified + if limit: + posts = posts[:limit] + + self.posts = posts + + logger.info(f"āœ“ Fetched {len(self.posts)} posts from {self.site_name}") + return self.posts + + def _get_category_ids_by_names(self, category_names: List[str]) -> List[int]: + """ + Get category IDs by category names. + + Args: + category_names: List of category names + + Returns: + List of category IDs + """ + logger.info(f"Resolving category names to IDs...") + + try: + response = requests.get( + f"{self.base_url}/wp-json/wp/v2/categories", + params={'per_page': 100}, + auth=self.auth, + timeout=10 + ) + response.raise_for_status() + + categories = response.json() + category_map = {cat['name'].lower(): cat['id'] for cat in categories} + + category_ids = [] + for name in category_names: + name_lower = name.lower() + if name_lower in category_map: + category_ids.append(category_map[name_lower]) + logger.info(f" āœ“ '{name}' -> ID {category_map[name_lower]}") + else: + # Try partial match + for cat_name, cat_id in category_map.items(): + if name_lower in cat_name or cat_name in name_lower: + category_ids.append(cat_id) + logger.info(f" āœ“ '{name}' -> ID {cat_id} (partial match)") + break + else: + logger.warning(f" āœ— Category '{name}' not found") + + return category_ids + + except Exception as e: + logger.error(f"Error fetching categories: {e}") + return [] + + def _generate_meta_description(self, post: Dict) -> Optional[str]: + """ + Generate meta description for a post using AI. + + Args: + post: Post data dict + + Returns: + Generated meta description or None + """ + title = post.get('title', {}).get('rendered', '') + content = post.get('content', {}).get('rendered', '') + excerpt = post.get('excerpt', {}).get('rendered', '') + + # Strip HTML from content + import re + content_text = re.sub('<[^<]+?>', '', content)[:500] + excerpt_text = re.sub('<[^<]+?>', '', excerpt) + + # Build prompt + prompt = f"""You are an SEO expert. Generate an optimized meta description for the following blog post. + +**Post Title:** {title} + +**Content Context:** +Excerpt: {excerpt_text} +Content preview: {content_text}... + +**Requirements:** +1. Length: 120-160 characters (optimal for SEO) +2. Make it compelling and action-oriented +3. Clearly describe what the post is about +4. Use active voice +5. Include a call-to-action when appropriate +6. Avoid clickbait - be accurate and valuable + +**Output Format:** +Return ONLY the meta description text, nothing else. No quotes, no explanations.""" + + # Call AI API + url = "https://openrouter.ai/api/v1/chat/completions" + headers = { + "Authorization": f"Bearer {self.openrouter_api_key}", + "Content-Type": "application/json" + } + + payload = { + "model": self.ai_model, + "messages": [ + { + "role": "system", + "content": "You are an SEO expert specializing in meta description optimization." + }, + { + "role": "user", + "content": prompt + } + ], + "temperature": 0.7, + "max_tokens": 100 + } + + try: + response = requests.post(url, json=payload, headers=headers, timeout=30) + response.raise_for_status() + + result = response.json() + self.api_calls += 1 + + if 'choices' in result and len(result['choices']) > 0: + meta_description = result['choices'][0]['message']['content'].strip() + + # Remove quotes if AI included them + if meta_description.startswith('"') and meta_description.endswith('"'): + meta_description = meta_description[1:-1] + + return meta_description + else: + logger.warning("No AI response received") + return None + + except Exception as e: + logger.error(f"API call failed: {e}") + return None + + def _update_post_meta(self, post_id: int, meta_description: str) -> bool: + """ + Update post meta description in WordPress. + + Args: + post_id: Post ID to update + meta_description: New meta description + + Returns: + True if successful, False otherwise + """ + logger.info(f"Updating post {post_id}...") + + # Determine which SEO plugin meta key to use + # Try RankMath first, then Yoast + meta_fields = { + 'rank_math_description': meta_description + } + + try: + # First, get current post meta to preserve other fields + response = requests.get( + f"{self.base_url}/wp-json/wp/v2/posts/{post_id}", + auth=self.auth, + timeout=10 + ) + + if response.status_code != 200: + logger.error(f" Could not fetch post {post_id}") + return False + + current_post = response.json() + current_meta = current_post.get('meta', {}) + + # Update with new meta description + updated_meta = {**current_meta, **meta_fields} + + # Update post + update_response = requests.post( + f"{self.base_url}/wp-json/wp/v2/posts/{post_id}", + json={'meta': updated_meta}, + auth=self.auth, + timeout=10 + ) + + if update_response.status_code == 200: + logger.info(f" āœ“ Updated post {post_id}") + return True + else: + logger.error(f" āœ— Failed to update post {post_id}: {update_response.status_code}") + logger.error(f" Response: {update_response.text}") + return False + + except Exception as e: + logger.error(f" āœ— Error updating post {post_id}: {e}") + return False + + def _validate_meta_description(self, meta: str) -> Dict: + """Validate meta description quality.""" + length = len(meta) + + validation = { + 'length': length, + 'is_optimal': 120 <= length <= 160, + 'too_short': length < 120, + 'too_long': length > 160, + 'score': 0 + } + + if validation['is_optimal']: + validation['score'] = 100 + elif validation['too_short']: + validation['score'] = max(0, 50 - (120 - length)) + else: + validation['score'] = max(0, 50 - (length - 160)) + + # Bonus for ending with period + if meta.endswith('.'): + validation['score'] = min(100, validation['score'] + 5) + + # Bonus for CTA words + cta_words = ['learn', 'discover', 'find', 'explore', 'read', 'get', 'see', 'try', 'start'] + if any(word in meta.lower() for word in cta_words): + validation['score'] = min(100, validation['score'] + 5) + + return validation + + def update_posts(self, dry_run: bool = False, + skip_existing: bool = False, + force_regenerate: bool = False) -> Dict: + """ + Generate and update meta descriptions for fetched posts. + + Args: + dry_run: If True, preview changes without updating + skip_existing: If True, skip posts that already have meta descriptions + force_regenerate: If True, regenerate even for posts with good meta descriptions + + Returns: + Statistics dict + """ + logger.info("\n" + "="*70) + logger.info("META DESCRIPTION UPDATE") + logger.info("="*70) + logger.info(f"Site: {self.site_name}") + logger.info(f"Posts to process: {len(self.posts)}") + logger.info(f"Dry run: {dry_run}") + logger.info(f"Skip existing: {skip_existing}") + logger.info(f"Force regenerate: {force_regenerate}") + logger.info("="*70) + + self.stats['total_posts'] = len(self.posts) + + for i, post in enumerate(self.posts, 1): + post_id = post.get('id') + title = post.get('title', {}).get('rendered', '')[:50] + + logger.info(f"\n[{i}/{len(self.posts)}] Processing post {post_id}: {title}...") + + # Check current meta description + meta_dict = post.get('meta', {}) + current_meta = ( + meta_dict.get('rank_math_description', '') or + meta_dict.get('_yoast_wpseo_metadesc', '') or + '' + ) + + # Skip if has existing meta and skip_existing is True + if current_meta and skip_existing and not force_regenerate: + logger.info(f" Skipping: Already has meta description") + self.stats['skipped'] += 1 + continue + + # Validate existing meta (if any) + if current_meta and not force_regenerate: + validation = self._validate_meta_description(current_meta) + if validation['score'] >= 80: + logger.info(f" Skipping: Existing meta is good quality (score: {validation['score']})") + self.stats['skipped'] += 1 + continue + + # Generate new meta description + logger.info(f" Generating meta description...") + generated_meta = self._generate_meta_description(post) + + if not generated_meta: + logger.error(f" āœ— Failed to generate meta description") + self.stats['failed'] += 1 + continue + + # Validate generated meta + validation = self._validate_meta_description(generated_meta) + logger.info(f" Generated: {generated_meta[:80]}...") + logger.info(f" Length: {validation['length']} chars, Score: {validation['score']}") + + # Update post + if dry_run: + logger.info(f" [DRY RUN] Would update post {post_id}") + self.update_results.append({ + 'post_id': post_id, + 'title': title, + 'current_meta': current_meta, + 'generated_meta': generated_meta, + 'status': 'dry_run', + 'validation_score': validation['score'] + }) + else: + success = self._update_post_meta(post_id, generated_meta) + + if success: + logger.info(f" āœ“ Successfully updated post {post_id}") + self.stats['updated'] += 1 + self.update_results.append({ + 'post_id': post_id, + 'title': title, + 'current_meta': current_meta, + 'generated_meta': generated_meta, + 'status': 'updated', + 'validation_score': validation['score'] + }) + else: + self.stats['failed'] += 1 + self.update_results.append({ + 'post_id': post_id, + 'title': title, + 'status': 'failed', + 'validation_score': validation['score'] + }) + + # Rate limiting + time.sleep(0.5) + + # Save results + self._save_results() + + # Print summary + logger.info("\n" + "="*70) + logger.info("UPDATE SUMMARY") + logger.info("="*70) + logger.info(f"Total posts: {self.stats['total_posts']}") + logger.info(f"Updated: {self.stats['updated']}") + logger.info(f"Failed: {self.stats['failed']}") + logger.info(f"Skipped: {self.stats['skipped']}") + logger.info(f"API calls: {self.api_calls}") + logger.info("="*70) + + return self.stats + + def _save_results(self): + """Save update results to CSV.""" + if not self.update_results: + return + + output_dir = Path(__file__).parent.parent.parent / 'output' + output_dir.mkdir(parents=True, exist_ok=True) + timestamp = datetime.now().strftime('%Y%m%d_%H%M%S') + output_file = output_dir / f'meta_update_{self.site_name}_{timestamp}.csv' + + fieldnames = [ + 'post_id', 'title', 'current_meta', 'generated_meta', + 'status', 'validation_score' + ] + + with open(output_file, 'w', newline='', encoding='utf-8') as f: + writer = csv.DictWriter(f, fieldnames=fieldnames) + writer.writeheader() + writer.writerows(self.update_results) + + logger.info(f"\nāœ“ Results saved to: {output_file}") + + def run(self, post_ids: Optional[List[int]] = None, + category_ids: Optional[List[int]] = None, + category_names: Optional[List[str]] = None, + limit: Optional[int] = None, + dry_run: bool = False, + skip_existing: bool = False, + force_regenerate: bool = False) -> Dict: + """ + Run complete meta description update process. + + Args: + post_ids: Specific post IDs to update + category_ids: Filter by category IDs + category_names: Filter by category names + limit: Maximum number of posts to process + dry_run: If True, preview changes without updating + skip_existing: If True, skip posts with existing meta descriptions + force_regenerate: If True, regenerate even for good quality metas + + Returns: + Statistics dict + """ + # Fetch posts + self.fetch_posts( + post_ids=post_ids, + category_ids=category_ids, + category_names=category_names, + limit=limit + ) + + if not self.posts: + logger.warning("No posts found matching criteria") + return self.stats + + # Update posts + return self.update_posts( + dry_run=dry_run, + skip_existing=skip_existing, + force_regenerate=force_regenerate + )