Add AI-powered meta description generation

- Add meta_description command to generate SEO-optimized meta descriptions - Use AI to generate compelling, length-optimized descriptions (120-160 chars) - Support --only-missing flag for posts without meta descriptions - Support --only-poor flag to improve low-quality meta descriptions - Include quality validation scoring (0-100) - Add call-to-action detection and optimization - Generate detailed CSV reports with validation metrics - Add comprehensive documentation (META_DESCRIPTION_GUIDE.md) Co-authored-by: Qwen-Coder <qwen-coder@alibabacloud.com>
2026-02-16 23:54:35 +01:00
parent 84f8fc6db5
commit ba8e39b5d8
4 changed files with 908 additions and 4 deletions
--- a/src/seo/app.py
+++ b/src/seo/app.py
@@ -5,7 +5,7 @@ SEO Application Core - Integrated SEO automation functionality
 import logging
 from pathlib import Path
 from datetime import datetime
-from typing import Optional, List, Tuple
+from typing import Optional, List, Tuple, Dict

 from .exporter import PostExporter
 from .analyzer import EnhancedPostAnalyzer
@@ -13,6 +13,7 @@ from .category_proposer import CategoryProposer
 from .category_manager import WordPressCategoryManager, CategoryAssignmentProcessor
 from .editorial_strategy import EditorialStrategyAnalyzer
 from .post_migrator import WordPressPostMigrator
+from .meta_description_generator import MetaDescriptionGenerator

 logger = logging.getLogger(__name__)

@@ -267,20 +268,56 @@ class SEOApp:
    def status(self) -> dict:
        """Get status of output files."""
        files = list(self.output_dir.glob('*.csv'))
-        
+
        status_info = {
            'total_files': len(files),
            'files': []
        }
-        
+
        for file in sorted(files, key=lambda f: f.stat().st_ctime, reverse=True)[:10]:
            status_info['files'].append({
                'name': file.name,
                'size_kb': file.stat().st_size / 1024,
                'modified': datetime.fromtimestamp(file.stat().st_mtime).strftime('%Y-%m-%d %H:%M')
            })
-        
+
        return status_info
+
+    def generate_meta_descriptions(self, csv_file: Optional[str] = None,
+                                   output_file: Optional[str] = None,
+                                   only_missing: bool = False,
+                                   only_poor_quality: bool = False,
+                                   limit: Optional[int] = None) -> Tuple[str, Dict]:
+        """
+        Generate AI-optimized meta descriptions for posts.
+        
+        Args:
+            csv_file: Path to CSV file with posts (uses latest export if not provided)
+            output_file: Custom output file path for results
+            only_missing: Only generate for posts without meta descriptions
+            only_poor_quality: Only generate for posts with poor quality meta descriptions
+            limit: Maximum number of posts to process
+            
+        Returns:
+            Tuple of (output_file_path, summary_dict)
+        """
+        logger.info("✨ Generating AI-optimized meta descriptions...")
+        
+        if not csv_file:
+            csv_file = self._find_latest_export()
+            
+        if not csv_file:
+            raise FileNotFoundError("No exported posts found. Run export() first or provide a CSV file.")
+            
+        logger.info(f"Using file: {csv_file}")
+        
+        generator = MetaDescriptionGenerator(csv_file)
+        return generator.run(
+            output_file=output_file,
+            only_missing=only_missing,
+            only_poor_quality=only_poor_quality,
+            limit=limit
+        )
    
    def _find_latest_export(self) -> Optional[str]:
        """Find the latest exported CSV file."""
--- a/src/seo/cli.py
+++ b/src/seo/cli.py
@@ -69,6 +69,10 @@ Examples:
    parser.add_argument('--date-before', help='Migrate posts before this date (YYYY-MM-DD)')
    parser.add_argument('--limit', type=int, help='Limit number of posts to migrate')
    parser.add_argument('--ignore-original-date', action='store_true', help='Use current date instead of original post date')
+    
+    # Meta description arguments
+    parser.add_argument('--only-missing', action='store_true', help='Only generate for posts without meta descriptions')
+    parser.add_argument('--only-poor', action='store_true', help='Only generate for posts with poor quality meta descriptions')

    args = parser.parse_args()

@@ -95,6 +99,7 @@ Examples:
        'category_create': cmd_category_create,
        'editorial_strategy': cmd_editorial_strategy,
        'migrate': cmd_migrate,
+        'meta_description': cmd_meta_description,
        'status': cmd_status,
        'help': cmd_help,
    }
@@ -380,6 +385,48 @@ def cmd_migrate(app, args):
    return 0


+def cmd_meta_description(app, args):
+    """Generate AI-optimized meta descriptions."""
+    if args.dry_run:
+        print("Would generate AI-optimized meta descriptions")
+        if args.only_missing:
+            print("  Filter: Only posts without meta descriptions")
+        if args.only_poor:
+            print("  Filter: Only posts with poor quality meta descriptions")
+        if args.limit:
+            print(f"  Limit: {args.limit} posts")
+        return 0
+
+    csv_file = args.args[0] if args.args else None
+
+    print("Generating AI-optimized meta descriptions...")
+    if args.only_missing:
+        print("  Filter: Only posts without meta descriptions")
+    elif args.only_poor:
+        print("  Filter: Only posts with poor quality meta descriptions")
+    if args.limit:
+        print(f"  Limit: {args.limit} posts")
+
+    output_file, summary = app.generate_meta_descriptions(
+        csv_file=csv_file,
+        output_file=args.output,
+        only_missing=args.only_missing,
+        only_poor_quality=args.only_poor,
+        limit=args.limit
+    )
+
+    if output_file and summary:
+        print(f"\n✅ Meta description generation completed!")
+        print(f"  Results: {output_file}")
+        print(f"\n📊 Summary:")
+        print(f"  Total processed: {summary.get('total_posts', 0)}")
+        print(f"  Improved: {summary.get('improved', 0)} ({summary.get('improvement_rate', 0):.1f}%)")
+        print(f"  Optimal length: {summary.get('optimal_length_count', 0)} ({summary.get('optimal_length_rate', 0):.1f}%)")
+        print(f"  Average score: {summary.get('average_score', 0):.1f}")
+        print(f"  API calls: {summary.get('api_calls', 0)}")
+    return 0
+
+
 def cmd_status(app, args):
    """Show status."""
    if args.dry_run:
@@ -413,6 +460,8 @@ Export & Analysis:
  analyze -f title          Analyze specific fields (title, meta_description, categories, site)
  analyze -u                Update input CSV with new columns (creates backup)
  category_propose [csv]    Propose categories based on content
+  meta_description [csv]    Generate AI-optimized meta descriptions
+  meta_description --only-missing  Generate only for posts without meta descriptions

 Category Management:
  category_apply [csv]      Apply AI category proposals to WordPress
@@ -437,6 +486,12 @@ Export Options:
  --author-id               Filter by author ID(s)
  --site, -s                Export from specific site only

+Meta Description Options:
+  --only-missing            Only generate for posts without meta descriptions
+  --only-poor               Only generate for posts with poor quality meta descriptions
+  --limit                   Limit number of posts to process
+  --output, -o              Custom output file path
+
 Migration Options:
  --destination, --to       Destination site: mistergeek.net, webscroll.fr, hellogeek.net
  --source, --from          Source site for filtered migration
@@ -476,6 +531,9 @@ Examples:
  seo migrate posts_to_migrate.csv --destination mistergeek.net
  seo migrate --source webscroll.fr --destination mistergeek.net --category-filter VPN
  seo migrate --source A --to B --date-after 2024-01-01 --limit 10 --keep-source
+  seo meta_description                    # Generate for all posts
+  seo meta_description --only-missing     # Generate only for posts without meta
+  seo meta_description --only-poor --limit 10  # Fix 10 poor quality metas
  seo status
    """)
    return 0
--- a/src/seo/meta_description_generator.py
+++ b/src/seo/meta_description_generator.py
@@ -0,0 +1,482 @@
+"""
+Meta Description Generator - AI-powered meta description generation and optimization
+"""
+
+import csv
+import json
+import logging
+import time
+from pathlib import Path
+from datetime import datetime
+from typing import Dict, List, Optional, Tuple
+import requests
+
+from .config import Config
+
+logger = logging.getLogger(__name__)
+
+
+class MetaDescriptionGenerator:
+    """AI-powered meta description generator and optimizer."""
+
+    def __init__(self, csv_file: str):
+        """
+        Initialize the generator.
+        
+        Args:
+            csv_file: Path to CSV file with posts
+        """
+        self.csv_file = Path(csv_file)
+        self.openrouter_api_key = Config.OPENROUTER_API_KEY
+        self.ai_model = Config.AI_MODEL
+        self.posts = []
+        self.generated_results = []
+        self.api_calls = 0
+        self.ai_cost = 0.0
+        
+        # Meta description best practices
+        self.max_length = 160  # Optimal length for SEO
+        self.min_length = 120
+        self.include_keywords = True
+        
+    def load_csv(self) -> bool:
+        """Load posts from CSV file."""
+        logger.info(f"Loading CSV: {self.csv_file}")
+        
+        if not self.csv_file.exists():
+            logger.error(f"CSV file not found: {self.csv_file}")
+            return False
+            
+        try:
+            with open(self.csv_file, 'r', encoding='utf-8') as f:
+                reader = csv.DictReader(f)
+                self.posts = list(reader)
+                
+            logger.info(f"✓ Loaded {len(self.posts)} posts from CSV")
+            return True
+        except Exception as e:
+            logger.error(f"Error loading CSV: {e}")
+            return False
+    
+    def _build_prompt(self, post: Dict) -> str:
+        """
+        Build AI prompt for meta description generation.
+        
+        Args:
+            post: Post data dict
+            
+        Returns:
+            AI prompt string
+        """
+        title = post.get('title', '')
+        content_preview = post.get('content_preview', '')
+        excerpt = post.get('excerpt', '')
+        focus_keyword = post.get('focus_keyword', '')
+        current_meta = post.get('meta_description', '')
+        
+        # Build context from available content
+        content_context = ""
+        if excerpt:
+            content_context += f"Excerpt: {excerpt}\n"
+        if content_preview:
+            content_context += f"Content preview: {content_preview[:300]}..."
+        
+        prompt = f"""You are an SEO expert. Generate an optimized meta description for the following blog post.
+
+**Post Title:** {title}
+
+**Content Context:**
+{content_context}
+
+**Focus Keyword:** {focus_keyword if focus_keyword else 'Not specified'}
+
+**Current Meta Description:** {current_meta if current_meta else 'None (needs to be created)'}
+
+**Requirements:**
+1. Length: 120-160 characters (optimal for SEO)
+2. Include the focus keyword naturally if available
+3. Make it compelling and action-oriented
+4. Clearly describe what the post is about
+5. Use active voice
+6. Include a call-to-action when appropriate
+7. Avoid clickbait - be accurate and valuable
+8. Write in the same language as the content
+
+**Output Format:**
+Return ONLY the meta description text, nothing else. No quotes, no explanations."""
+
+        return prompt
+    
+    def _call_ai_api(self, prompt: str) -> Optional[str]:
+        """
+        Call AI API to generate meta description.
+        
+        Args:
+            prompt: AI prompt
+            
+        Returns:
+            Generated meta description or None
+        """
+        url = "https://openrouter.ai/api/v1/chat/completions"
+        headers = {
+            "Authorization": f"Bearer {self.openrouter_api_key}",
+            "Content-Type": "application/json"
+        }
+        
+        payload = {
+            "model": self.ai_model,
+            "messages": [
+                {
+                    "role": "system",
+                    "content": "You are an SEO expert specializing in meta description optimization. You write compelling, concise, and search-engine optimized meta descriptions."
+                },
+                {
+                    "role": "user",
+                    "content": prompt
+                }
+            ],
+            "temperature": 0.7,
+            "max_tokens": 100
+        }
+        
+        try:
+            response = requests.post(url, json=payload, headers=headers, timeout=30)
+            response.raise_for_status()
+            
+            result = response.json()
+            self.api_calls += 1
+            
+            # Extract generated text
+            if 'choices' in result and len(result['choices']) > 0:
+                meta_description = result['choices'][0]['message']['content'].strip()
+                
+                # Remove quotes if AI included them
+                if meta_description.startswith('"') and meta_description.endswith('"'):
+                    meta_description = meta_description[1:-1]
+                
+                return meta_description
+            else:
+                logger.warning("No AI response received")
+                return None
+                
+        except requests.exceptions.RequestException as e:
+            logger.error(f"API call failed: {e}")
+            return None
+        except Exception as e:
+            logger.error(f"Error processing AI response: {e}")
+            return None
+    
+    def _validate_meta_description(self, meta: str) -> Dict[str, any]:
+        """
+        Validate meta description quality.
+        
+        Args:
+            meta: Meta description text
+            
+        Returns:
+            Validation results dict
+        """
+        length = len(meta)
+        
+        validation = {
+            'length': length,
+            'is_valid': False,
+            'too_short': False,
+            'too_long': False,
+            'optimal': False,
+            'score': 0
+        }
+        
+        # Check length
+        if length < self.min_length:
+            validation['too_short'] = True
+            validation['score'] = max(0, 50 - (self.min_length - length))
+        elif length > self.max_length:
+            validation['too_long'] = True
+            validation['score'] = max(0, 50 - (length - self.max_length))
+        else:
+            validation['optimal'] = True
+            validation['score'] = 100
+        
+        # Check if it ends with a period (good practice)
+        if meta.endswith('.'):
+            validation['score'] = min(100, validation['score'] + 5)
+        
+        # Check for call-to-action words
+        cta_words = ['learn', 'discover', 'find', 'explore', 'read', 'get', 'see', 'try', 'start']
+        if any(word in meta.lower() for word in cta_words):
+            validation['score'] = min(100, validation['score'] + 5)
+        
+        validation['is_valid'] = validation['score'] >= 70
+        
+        return validation
+    
+    def generate_for_post(self, post: Dict) -> Optional[Dict]:
+        """
+        Generate meta description for a single post.
+        
+        Args:
+            post: Post data dict
+            
+        Returns:
+            Result dict with generated meta and validation
+        """
+        title = post.get('title', '')
+        post_id = post.get('post_id', '')
+        current_meta = post.get('meta_description', '')
+        
+        logger.info(f"Generating meta description for post {post_id}: {title[:50]}...")
+        
+        # Skip if post has no title
+        if not title:
+            logger.warning(f"Skipping post {post_id}: No title")
+            return None
+        
+        # Build prompt and call AI
+        prompt = self._build_prompt(post)
+        generated_meta = self._call_ai_api(prompt)
+        
+        if not generated_meta:
+            logger.error(f"Failed to generate meta description for post {post_id}")
+            return None
+        
+        # Validate the result
+        validation = self._validate_meta_description(generated_meta)
+        
+        # Calculate improvement
+        improvement = False
+        if current_meta:
+            current_validation = self._validate_meta_description(current_meta)
+            improvement = validation['score'] > current_validation['score']
+        else:
+            improvement = True  # Any meta is an improvement over none
+        
+        result = {
+            'post_id': post_id,
+            'site': post.get('site', ''),
+            'title': title,
+            'current_meta_description': current_meta,
+            'generated_meta_description': generated_meta,
+            'generated_length': validation['length'],
+            'validation_score': validation['score'],
+            'is_optimal_length': validation['optimal'],
+            'improvement': improvement,
+            'status': 'generated'
+        }
+        
+        logger.info(f"✓ Generated meta description (score: {validation['score']}, length: {validation['length']})")
+        
+        # Rate limiting
+        time.sleep(0.5)
+        
+        return result
+    
+    def generate_batch(self, batch: List[Dict]) -> List[Dict]:
+        """
+        Generate meta descriptions for a batch of posts.
+        
+        Args:
+            batch: List of post dicts
+            
+        Returns:
+            List of result dicts
+        """
+        results = []
+        
+        for i, post in enumerate(batch, 1):
+            logger.info(f"Processing post {i}/{len(batch)}")
+            result = self.generate_for_post(post)
+            if result:
+                results.append(result)
+        
+        return results
+    
+    def filter_posts_for_generation(self, posts: List[Dict], 
+                                    only_missing: bool = False,
+                                    only_poor_quality: bool = False) -> List[Dict]:
+        """
+        Filter posts based on meta description status.
+        
+        Args:
+            posts: List of post dicts
+            only_missing: Only include posts without meta descriptions
+            only_poor_quality: Only include posts with poor meta descriptions
+            
+        Returns:
+            Filtered list of posts
+        """
+        filtered = []
+        
+        for post in posts:
+            current_meta = post.get('meta_description', '')
+            
+            if only_missing:
+                # Skip posts that already have meta descriptions
+                if current_meta:
+                    continue
+                filtered.append(post)
+                
+            elif only_poor_quality:
+                # Skip posts without meta descriptions (handle separately)
+                if not current_meta:
+                    continue
+                    
+                # Check if current meta is poor quality
+                validation = self._validate_meta_description(current_meta)
+                if validation['score'] < 70:
+                    filtered.append(post)
+                    
+            else:
+                # Include all posts
+                filtered.append(post)
+        
+        return filtered
+    
+    def save_results(self, results: List[Dict], output_file: Optional[str] = None) -> str:
+        """
+        Save generation results to CSV.
+        
+        Args:
+            results: List of result dicts
+            output_file: Custom output file path
+            
+        Returns:
+            Path to saved file
+        """
+        if not output_file:
+            output_dir = Path(__file__).parent.parent.parent / 'output'
+            output_dir.mkdir(parents=True, exist_ok=True)
+            timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
+            output_file = output_dir / f'meta_descriptions_{timestamp}.csv'
+        
+        output_file = Path(output_file)
+        output_file.parent.mkdir(parents=True, exist_ok=True)
+        
+        fieldnames = [
+            'post_id', 'site', 'title', 'current_meta_description',
+            'generated_meta_description', 'generated_length',
+            'validation_score', 'is_optimal_length', 'improvement', 'status'
+        ]
+        
+        logger.info(f"Saving {len(results)} results to {output_file}...")
+        
+        with open(output_file, 'w', newline='', encoding='utf-8') as f:
+            writer = csv.DictWriter(f, fieldnames=fieldnames)
+            writer.writeheader()
+            writer.writerows(results)
+        
+        logger.info(f"✓ Results saved to: {output_file}")
+        return str(output_file)
+    
+    def generate_summary(self, results: List[Dict]) -> Dict:
+        """
+        Generate summary statistics.
+        
+        Args:
+            results: List of result dicts
+            
+        Returns:
+            Summary dict
+        """
+        if not results:
+            return {}
+        
+        total = len(results)
+        improved = sum(1 for r in results if r.get('improvement', False))
+        optimal_length = sum(1 for r in results if r.get('is_optimal_length', False))
+        avg_score = sum(r.get('validation_score', 0) for r in results) / total
+        
+        # Count by site
+        by_site = {}
+        for r in results:
+            site = r.get('site', 'unknown')
+            if site not in by_site:
+                by_site[site] = {'total': 0, 'improved': 0}
+            by_site[site]['total'] += 1
+            if r.get('improvement', False):
+                by_site[site]['improved'] += 1
+        
+        summary = {
+            'total_posts': total,
+            'improved': improved,
+            'improvement_rate': (improved / total * 100) if total > 0 else 0,
+            'optimal_length_count': optimal_length,
+            'optimal_length_rate': (optimal_length / total * 100) if total > 0 else 0,
+            'average_score': avg_score,
+            'api_calls': self.api_calls,
+            'by_site': by_site
+        }
+        
+        return summary
+    
+    def run(self, output_file: Optional[str] = None,
+            only_missing: bool = False,
+            only_poor_quality: bool = False,
+            limit: Optional[int] = None) -> Tuple[str, Dict]:
+        """
+        Run complete meta description generation process.
+        
+        Args:
+            output_file: Custom output file path
+            only_missing: Only generate for posts without meta descriptions
+            only_poor_quality: Only generate for posts with poor quality meta descriptions
+            limit: Maximum number of posts to process
+            
+        Returns:
+            Tuple of (output_file_path, summary_dict)
+        """
+        logger.info("\n" + "="*70)
+        logger.info("AI META DESCRIPTION GENERATION")
+        logger.info("="*70)
+        
+        # Load posts
+        if not self.load_csv():
+            return "", {}
+        
+        # Filter posts
+        posts_to_process = self.filter_posts_for_generation(
+            self.posts,
+            only_missing=only_missing,
+            only_poor_quality=only_poor_quality
+        )
+        
+        logger.info(f"Posts to process: {len(posts_to_process)}")
+        
+        if only_missing:
+            logger.info("Filter: Only posts without meta descriptions")
+        elif only_poor_quality:
+            logger.info("Filter: Only posts with poor quality meta descriptions")
+        
+        # Apply limit
+        if limit:
+            posts_to_process = posts_to_process[:limit]
+            logger.info(f"Limited to: {len(posts_to_process)} posts")
+        
+        if not posts_to_process:
+            logger.warning("No posts to process")
+            return "", {}
+        
+        # Generate meta descriptions
+        results = self.generate_batch(posts_to_process)
+        
+        # Save results
+        if results:
+            output_path = self.save_results(results, output_file)
+            
+            # Generate and log summary
+            summary = self.generate_summary(results)
+            
+            logger.info("\n" + "="*70)
+            logger.info("GENERATION SUMMARY")
+            logger.info("="*70)
+            logger.info(f"Total posts processed: {summary['total_posts']}")
+            logger.info(f"Improved: {summary['improved']} ({summary['improvement_rate']:.1f}%)")
+            logger.info(f"Optimal length: {summary['optimal_length_count']} ({summary['optimal_length_rate']:.1f}%)")
+            logger.info(f"Average validation score: {summary['average_score']:.1f}")
+            logger.info(f"API calls made: {summary['api_calls']}")
+            logger.info("="*70)
+            
+            return output_path, summary
+        else:
+            logger.warning("No results generated")
+            return "", {}