Add AI-powered meta description generation

- Add meta_description command to generate SEO-optimized meta descriptions
- Use AI to generate compelling, length-optimized descriptions (120-160 chars)
- Support --only-missing flag for posts without meta descriptions
- Support --only-poor flag to improve low-quality meta descriptions
- Include quality validation scoring (0-100)
- Add call-to-action detection and optimization
- Generate detailed CSV reports with validation metrics
- Add comprehensive documentation (META_DESCRIPTION_GUIDE.md)

Co-authored-by: Qwen-Coder <qwen-coder@alibabacloud.com>
This commit is contained in:
Kevin Bataille
2026-02-16 23:54:35 +01:00
parent 84f8fc6db5
commit ba8e39b5d8
4 changed files with 908 additions and 4 deletions

View File

@@ -5,7 +5,7 @@ SEO Application Core - Integrated SEO automation functionality
import logging
from pathlib import Path
from datetime import datetime
from typing import Optional, List, Tuple
from typing import Optional, List, Tuple, Dict
from .exporter import PostExporter
from .analyzer import EnhancedPostAnalyzer
@@ -13,6 +13,7 @@ from .category_proposer import CategoryProposer
from .category_manager import WordPressCategoryManager, CategoryAssignmentProcessor
from .editorial_strategy import EditorialStrategyAnalyzer
from .post_migrator import WordPressPostMigrator
from .meta_description_generator import MetaDescriptionGenerator
logger = logging.getLogger(__name__)
@@ -267,20 +268,56 @@ class SEOApp:
def status(self) -> dict:
"""Get status of output files."""
files = list(self.output_dir.glob('*.csv'))
status_info = {
'total_files': len(files),
'files': []
}
for file in sorted(files, key=lambda f: f.stat().st_ctime, reverse=True)[:10]:
status_info['files'].append({
'name': file.name,
'size_kb': file.stat().st_size / 1024,
'modified': datetime.fromtimestamp(file.stat().st_mtime).strftime('%Y-%m-%d %H:%M')
})
return status_info
def generate_meta_descriptions(self, csv_file: Optional[str] = None,
output_file: Optional[str] = None,
only_missing: bool = False,
only_poor_quality: bool = False,
limit: Optional[int] = None) -> Tuple[str, Dict]:
"""
Generate AI-optimized meta descriptions for posts.
Args:
csv_file: Path to CSV file with posts (uses latest export if not provided)
output_file: Custom output file path for results
only_missing: Only generate for posts without meta descriptions
only_poor_quality: Only generate for posts with poor quality meta descriptions
limit: Maximum number of posts to process
Returns:
Tuple of (output_file_path, summary_dict)
"""
logger.info("✨ Generating AI-optimized meta descriptions...")
if not csv_file:
csv_file = self._find_latest_export()
if not csv_file:
raise FileNotFoundError("No exported posts found. Run export() first or provide a CSV file.")
logger.info(f"Using file: {csv_file}")
generator = MetaDescriptionGenerator(csv_file)
return generator.run(
output_file=output_file,
only_missing=only_missing,
only_poor_quality=only_poor_quality,
limit=limit
)
def _find_latest_export(self) -> Optional[str]:
"""Find the latest exported CSV file."""

View File

@@ -69,6 +69,10 @@ Examples:
parser.add_argument('--date-before', help='Migrate posts before this date (YYYY-MM-DD)')
parser.add_argument('--limit', type=int, help='Limit number of posts to migrate')
parser.add_argument('--ignore-original-date', action='store_true', help='Use current date instead of original post date')
# Meta description arguments
parser.add_argument('--only-missing', action='store_true', help='Only generate for posts without meta descriptions')
parser.add_argument('--only-poor', action='store_true', help='Only generate for posts with poor quality meta descriptions')
args = parser.parse_args()
@@ -95,6 +99,7 @@ Examples:
'category_create': cmd_category_create,
'editorial_strategy': cmd_editorial_strategy,
'migrate': cmd_migrate,
'meta_description': cmd_meta_description,
'status': cmd_status,
'help': cmd_help,
}
@@ -380,6 +385,48 @@ def cmd_migrate(app, args):
return 0
def cmd_meta_description(app, args):
"""Generate AI-optimized meta descriptions."""
if args.dry_run:
print("Would generate AI-optimized meta descriptions")
if args.only_missing:
print(" Filter: Only posts without meta descriptions")
if args.only_poor:
print(" Filter: Only posts with poor quality meta descriptions")
if args.limit:
print(f" Limit: {args.limit} posts")
return 0
csv_file = args.args[0] if args.args else None
print("Generating AI-optimized meta descriptions...")
if args.only_missing:
print(" Filter: Only posts without meta descriptions")
elif args.only_poor:
print(" Filter: Only posts with poor quality meta descriptions")
if args.limit:
print(f" Limit: {args.limit} posts")
output_file, summary = app.generate_meta_descriptions(
csv_file=csv_file,
output_file=args.output,
only_missing=args.only_missing,
only_poor_quality=args.only_poor,
limit=args.limit
)
if output_file and summary:
print(f"\n✅ Meta description generation completed!")
print(f" Results: {output_file}")
print(f"\n📊 Summary:")
print(f" Total processed: {summary.get('total_posts', 0)}")
print(f" Improved: {summary.get('improved', 0)} ({summary.get('improvement_rate', 0):.1f}%)")
print(f" Optimal length: {summary.get('optimal_length_count', 0)} ({summary.get('optimal_length_rate', 0):.1f}%)")
print(f" Average score: {summary.get('average_score', 0):.1f}")
print(f" API calls: {summary.get('api_calls', 0)}")
return 0
def cmd_status(app, args):
"""Show status."""
if args.dry_run:
@@ -413,6 +460,8 @@ Export & Analysis:
analyze -f title Analyze specific fields (title, meta_description, categories, site)
analyze -u Update input CSV with new columns (creates backup)
category_propose [csv] Propose categories based on content
meta_description [csv] Generate AI-optimized meta descriptions
meta_description --only-missing Generate only for posts without meta descriptions
Category Management:
category_apply [csv] Apply AI category proposals to WordPress
@@ -437,6 +486,12 @@ Export Options:
--author-id Filter by author ID(s)
--site, -s Export from specific site only
Meta Description Options:
--only-missing Only generate for posts without meta descriptions
--only-poor Only generate for posts with poor quality meta descriptions
--limit Limit number of posts to process
--output, -o Custom output file path
Migration Options:
--destination, --to Destination site: mistergeek.net, webscroll.fr, hellogeek.net
--source, --from Source site for filtered migration
@@ -476,6 +531,9 @@ Examples:
seo migrate posts_to_migrate.csv --destination mistergeek.net
seo migrate --source webscroll.fr --destination mistergeek.net --category-filter VPN
seo migrate --source A --to B --date-after 2024-01-01 --limit 10 --keep-source
seo meta_description # Generate for all posts
seo meta_description --only-missing # Generate only for posts without meta
seo meta_description --only-poor --limit 10 # Fix 10 poor quality metas
seo status
""")
return 0

View File

@@ -0,0 +1,482 @@
"""
Meta Description Generator - AI-powered meta description generation and optimization
"""
import csv
import json
import logging
import time
from pathlib import Path
from datetime import datetime
from typing import Dict, List, Optional, Tuple
import requests
from .config import Config
logger = logging.getLogger(__name__)
class MetaDescriptionGenerator:
"""AI-powered meta description generator and optimizer."""
def __init__(self, csv_file: str):
"""
Initialize the generator.
Args:
csv_file: Path to CSV file with posts
"""
self.csv_file = Path(csv_file)
self.openrouter_api_key = Config.OPENROUTER_API_KEY
self.ai_model = Config.AI_MODEL
self.posts = []
self.generated_results = []
self.api_calls = 0
self.ai_cost = 0.0
# Meta description best practices
self.max_length = 160 # Optimal length for SEO
self.min_length = 120
self.include_keywords = True
def load_csv(self) -> bool:
"""Load posts from CSV file."""
logger.info(f"Loading CSV: {self.csv_file}")
if not self.csv_file.exists():
logger.error(f"CSV file not found: {self.csv_file}")
return False
try:
with open(self.csv_file, 'r', encoding='utf-8') as f:
reader = csv.DictReader(f)
self.posts = list(reader)
logger.info(f"✓ Loaded {len(self.posts)} posts from CSV")
return True
except Exception as e:
logger.error(f"Error loading CSV: {e}")
return False
def _build_prompt(self, post: Dict) -> str:
"""
Build AI prompt for meta description generation.
Args:
post: Post data dict
Returns:
AI prompt string
"""
title = post.get('title', '')
content_preview = post.get('content_preview', '')
excerpt = post.get('excerpt', '')
focus_keyword = post.get('focus_keyword', '')
current_meta = post.get('meta_description', '')
# Build context from available content
content_context = ""
if excerpt:
content_context += f"Excerpt: {excerpt}\n"
if content_preview:
content_context += f"Content preview: {content_preview[:300]}..."
prompt = f"""You are an SEO expert. Generate an optimized meta description for the following blog post.
**Post Title:** {title}
**Content Context:**
{content_context}
**Focus Keyword:** {focus_keyword if focus_keyword else 'Not specified'}
**Current Meta Description:** {current_meta if current_meta else 'None (needs to be created)'}
**Requirements:**
1. Length: 120-160 characters (optimal for SEO)
2. Include the focus keyword naturally if available
3. Make it compelling and action-oriented
4. Clearly describe what the post is about
5. Use active voice
6. Include a call-to-action when appropriate
7. Avoid clickbait - be accurate and valuable
8. Write in the same language as the content
**Output Format:**
Return ONLY the meta description text, nothing else. No quotes, no explanations."""
return prompt
def _call_ai_api(self, prompt: str) -> Optional[str]:
"""
Call AI API to generate meta description.
Args:
prompt: AI prompt
Returns:
Generated meta description or None
"""
url = "https://openrouter.ai/api/v1/chat/completions"
headers = {
"Authorization": f"Bearer {self.openrouter_api_key}",
"Content-Type": "application/json"
}
payload = {
"model": self.ai_model,
"messages": [
{
"role": "system",
"content": "You are an SEO expert specializing in meta description optimization. You write compelling, concise, and search-engine optimized meta descriptions."
},
{
"role": "user",
"content": prompt
}
],
"temperature": 0.7,
"max_tokens": 100
}
try:
response = requests.post(url, json=payload, headers=headers, timeout=30)
response.raise_for_status()
result = response.json()
self.api_calls += 1
# Extract generated text
if 'choices' in result and len(result['choices']) > 0:
meta_description = result['choices'][0]['message']['content'].strip()
# Remove quotes if AI included them
if meta_description.startswith('"') and meta_description.endswith('"'):
meta_description = meta_description[1:-1]
return meta_description
else:
logger.warning("No AI response received")
return None
except requests.exceptions.RequestException as e:
logger.error(f"API call failed: {e}")
return None
except Exception as e:
logger.error(f"Error processing AI response: {e}")
return None
def _validate_meta_description(self, meta: str) -> Dict[str, any]:
"""
Validate meta description quality.
Args:
meta: Meta description text
Returns:
Validation results dict
"""
length = len(meta)
validation = {
'length': length,
'is_valid': False,
'too_short': False,
'too_long': False,
'optimal': False,
'score': 0
}
# Check length
if length < self.min_length:
validation['too_short'] = True
validation['score'] = max(0, 50 - (self.min_length - length))
elif length > self.max_length:
validation['too_long'] = True
validation['score'] = max(0, 50 - (length - self.max_length))
else:
validation['optimal'] = True
validation['score'] = 100
# Check if it ends with a period (good practice)
if meta.endswith('.'):
validation['score'] = min(100, validation['score'] + 5)
# Check for call-to-action words
cta_words = ['learn', 'discover', 'find', 'explore', 'read', 'get', 'see', 'try', 'start']
if any(word in meta.lower() for word in cta_words):
validation['score'] = min(100, validation['score'] + 5)
validation['is_valid'] = validation['score'] >= 70
return validation
def generate_for_post(self, post: Dict) -> Optional[Dict]:
"""
Generate meta description for a single post.
Args:
post: Post data dict
Returns:
Result dict with generated meta and validation
"""
title = post.get('title', '')
post_id = post.get('post_id', '')
current_meta = post.get('meta_description', '')
logger.info(f"Generating meta description for post {post_id}: {title[:50]}...")
# Skip if post has no title
if not title:
logger.warning(f"Skipping post {post_id}: No title")
return None
# Build prompt and call AI
prompt = self._build_prompt(post)
generated_meta = self._call_ai_api(prompt)
if not generated_meta:
logger.error(f"Failed to generate meta description for post {post_id}")
return None
# Validate the result
validation = self._validate_meta_description(generated_meta)
# Calculate improvement
improvement = False
if current_meta:
current_validation = self._validate_meta_description(current_meta)
improvement = validation['score'] > current_validation['score']
else:
improvement = True # Any meta is an improvement over none
result = {
'post_id': post_id,
'site': post.get('site', ''),
'title': title,
'current_meta_description': current_meta,
'generated_meta_description': generated_meta,
'generated_length': validation['length'],
'validation_score': validation['score'],
'is_optimal_length': validation['optimal'],
'improvement': improvement,
'status': 'generated'
}
logger.info(f"✓ Generated meta description (score: {validation['score']}, length: {validation['length']})")
# Rate limiting
time.sleep(0.5)
return result
def generate_batch(self, batch: List[Dict]) -> List[Dict]:
"""
Generate meta descriptions for a batch of posts.
Args:
batch: List of post dicts
Returns:
List of result dicts
"""
results = []
for i, post in enumerate(batch, 1):
logger.info(f"Processing post {i}/{len(batch)}")
result = self.generate_for_post(post)
if result:
results.append(result)
return results
def filter_posts_for_generation(self, posts: List[Dict],
only_missing: bool = False,
only_poor_quality: bool = False) -> List[Dict]:
"""
Filter posts based on meta description status.
Args:
posts: List of post dicts
only_missing: Only include posts without meta descriptions
only_poor_quality: Only include posts with poor meta descriptions
Returns:
Filtered list of posts
"""
filtered = []
for post in posts:
current_meta = post.get('meta_description', '')
if only_missing:
# Skip posts that already have meta descriptions
if current_meta:
continue
filtered.append(post)
elif only_poor_quality:
# Skip posts without meta descriptions (handle separately)
if not current_meta:
continue
# Check if current meta is poor quality
validation = self._validate_meta_description(current_meta)
if validation['score'] < 70:
filtered.append(post)
else:
# Include all posts
filtered.append(post)
return filtered
def save_results(self, results: List[Dict], output_file: Optional[str] = None) -> str:
"""
Save generation results to CSV.
Args:
results: List of result dicts
output_file: Custom output file path
Returns:
Path to saved file
"""
if not output_file:
output_dir = Path(__file__).parent.parent.parent / 'output'
output_dir.mkdir(parents=True, exist_ok=True)
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
output_file = output_dir / f'meta_descriptions_{timestamp}.csv'
output_file = Path(output_file)
output_file.parent.mkdir(parents=True, exist_ok=True)
fieldnames = [
'post_id', 'site', 'title', 'current_meta_description',
'generated_meta_description', 'generated_length',
'validation_score', 'is_optimal_length', 'improvement', 'status'
]
logger.info(f"Saving {len(results)} results to {output_file}...")
with open(output_file, 'w', newline='', encoding='utf-8') as f:
writer = csv.DictWriter(f, fieldnames=fieldnames)
writer.writeheader()
writer.writerows(results)
logger.info(f"✓ Results saved to: {output_file}")
return str(output_file)
def generate_summary(self, results: List[Dict]) -> Dict:
"""
Generate summary statistics.
Args:
results: List of result dicts
Returns:
Summary dict
"""
if not results:
return {}
total = len(results)
improved = sum(1 for r in results if r.get('improvement', False))
optimal_length = sum(1 for r in results if r.get('is_optimal_length', False))
avg_score = sum(r.get('validation_score', 0) for r in results) / total
# Count by site
by_site = {}
for r in results:
site = r.get('site', 'unknown')
if site not in by_site:
by_site[site] = {'total': 0, 'improved': 0}
by_site[site]['total'] += 1
if r.get('improvement', False):
by_site[site]['improved'] += 1
summary = {
'total_posts': total,
'improved': improved,
'improvement_rate': (improved / total * 100) if total > 0 else 0,
'optimal_length_count': optimal_length,
'optimal_length_rate': (optimal_length / total * 100) if total > 0 else 0,
'average_score': avg_score,
'api_calls': self.api_calls,
'by_site': by_site
}
return summary
def run(self, output_file: Optional[str] = None,
only_missing: bool = False,
only_poor_quality: bool = False,
limit: Optional[int] = None) -> Tuple[str, Dict]:
"""
Run complete meta description generation process.
Args:
output_file: Custom output file path
only_missing: Only generate for posts without meta descriptions
only_poor_quality: Only generate for posts with poor quality meta descriptions
limit: Maximum number of posts to process
Returns:
Tuple of (output_file_path, summary_dict)
"""
logger.info("\n" + "="*70)
logger.info("AI META DESCRIPTION GENERATION")
logger.info("="*70)
# Load posts
if not self.load_csv():
return "", {}
# Filter posts
posts_to_process = self.filter_posts_for_generation(
self.posts,
only_missing=only_missing,
only_poor_quality=only_poor_quality
)
logger.info(f"Posts to process: {len(posts_to_process)}")
if only_missing:
logger.info("Filter: Only posts without meta descriptions")
elif only_poor_quality:
logger.info("Filter: Only posts with poor quality meta descriptions")
# Apply limit
if limit:
posts_to_process = posts_to_process[:limit]
logger.info(f"Limited to: {len(posts_to_process)} posts")
if not posts_to_process:
logger.warning("No posts to process")
return "", {}
# Generate meta descriptions
results = self.generate_batch(posts_to_process)
# Save results
if results:
output_path = self.save_results(results, output_file)
# Generate and log summary
summary = self.generate_summary(results)
logger.info("\n" + "="*70)
logger.info("GENERATION SUMMARY")
logger.info("="*70)
logger.info(f"Total posts processed: {summary['total_posts']}")
logger.info(f"Improved: {summary['improved']} ({summary['improvement_rate']:.1f}%)")
logger.info(f"Optimal length: {summary['optimal_length_count']} ({summary['optimal_length_rate']:.1f}%)")
logger.info(f"Average validation score: {summary['average_score']:.1f}")
logger.info(f"API calls made: {summary['api_calls']}")
logger.info("="*70)
return output_path, summary
else:
logger.warning("No results generated")
return "", {}