Refactor into integrated Python package structure
Architecture Changes: - Created src/seo/ package with modular architecture - Main application class (SEOApp) with Rails-inspired API - Separated concerns into distinct modules: - app.py: Main application orchestrator - cli.py: Command-line interface - config.py: Configuration management - exporter.py: Post export functionality - analyzer.py: AI analysis - recategorizer.py: Recategorization - seo_checker.py: SEO quality checking - categories.py: Category management - approval.py: User approval system New Features: - Proper Python package structure (src layout) - setup.py and setup.cfg for installation - Can be installed with: pip install -e . - Entry point: seo = seo.cli:main - Cleaner imports and dependencies Benefits: - Better code organization - Easier to maintain and extend - Follows Python best practices - Proper package isolation - Can be imported as library - Testable components - Clear separation of concerns Co-authored-by: Qwen-Coder <qwen-coder@alibabacloud.com>
This commit is contained in:
36
README.md
36
README.md
@@ -193,23 +193,39 @@ The SEO automation tool provides a simple, intuitive CLI inspired by Ruby on Rai
|
||||
|
||||
```
|
||||
seo/
|
||||
├── seo # Main CLI application (executable)
|
||||
├── scripts/
|
||||
│ ├── config.py # Configuration loader
|
||||
│ ├── export_posts_for_ai_decision.py
|
||||
│ ├── ai_analyze_posts_for_decisions.py
|
||||
│ ├── multi_site_seo_analyzer.py
|
||||
│ ├── category_manager.py # Category management
|
||||
│ ├── user_approval.py # Approval system
|
||||
│ └── ...
|
||||
├── seo # Main CLI executable
|
||||
├── src/seo/ # Integrated application package
|
||||
│ ├── __init__.py # Package initialization
|
||||
│ ├── cli.py # Command-line interface
|
||||
│ ├── app.py # Main application class
|
||||
│ ├── config.py # Configuration management
|
||||
│ ├── exporter.py # Post export functionality
|
||||
│ ├── analyzer.py # AI analysis functionality
|
||||
│ ├── recategorizer.py # Recategorization functionality
|
||||
│ ├── seo_checker.py # SEO quality checking
|
||||
│ ├── categories.py # Category management
|
||||
│ └── approval.py # User approval system
|
||||
├── scripts/ # Legacy scripts (deprecated)
|
||||
├── config.yaml # YAML configuration
|
||||
├── .env # Environment variables
|
||||
├── .env.example # Template
|
||||
├── requirements.txt # Dependencies
|
||||
├── output/reports/ # Generated CSV files
|
||||
├── output/ # Generated files
|
||||
└── README.md
|
||||
```
|
||||
|
||||
## 📦 Installation
|
||||
|
||||
For development/installation:
|
||||
|
||||
```bash
|
||||
# Install in development mode
|
||||
pip install -e .
|
||||
|
||||
# Or just use the executable directly
|
||||
./seo help
|
||||
```
|
||||
|
||||
## 🎯 Typical Workflow
|
||||
|
||||
1. **Export posts** from all sites:
|
||||
|
||||
491
seo
491
seo
@@ -1,493 +1,18 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
SEO Automation CLI - Inspired by Ruby on Rails CLI
|
||||
Simple, intuitive commands for managing WordPress SEO
|
||||
SEO Automation CLI - Main executable
|
||||
Entry point for the SEO automation tool.
|
||||
"""
|
||||
|
||||
import sys
|
||||
import os
|
||||
import argparse
|
||||
from pathlib import Path
|
||||
|
||||
# Add scripts directory to path
|
||||
SCRIPTS_DIR = Path(__file__).parent / 'scripts'
|
||||
sys.path.insert(0, str(SCRIPTS_DIR))
|
||||
|
||||
from config import Config
|
||||
from export_posts_for_ai_decision import PostExporter
|
||||
from ai_analyze_posts_for_decisions import PostAnalyzer
|
||||
from ai_recategorize_posts import PostRecategorizer
|
||||
from multi_site_seo_analyzer import MultiSiteSEOAnalyzer
|
||||
from analytics_importer import AnalyticsImporter
|
||||
from content_gap_analyzer import ContentGapAnalyzer
|
||||
from opportunity_analyzer import OpportunityAnalyzer
|
||||
from report_generator import ReportGenerator
|
||||
from category_manager import CategoryManager
|
||||
from user_approval import UserApprovalSystem
|
||||
|
||||
|
||||
def main():
|
||||
"""Main CLI entry point"""
|
||||
parser = argparse.ArgumentParser(
|
||||
prog='seo',
|
||||
description='SEO Automation CLI - Manage WordPress SEO with AI',
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
epilog="""
|
||||
Examples:
|
||||
seo help # Show this help
|
||||
seo export # Export all posts from WordPress sites
|
||||
seo analyze # Analyze posts with AI for recommendations
|
||||
seo recategorize # Recategorize posts with AI
|
||||
seo seo_check # Check SEO quality of titles/descriptions
|
||||
seo categories # Manage categories across sites
|
||||
seo approve # Review and approve recommendations
|
||||
seo full_pipeline # Run complete workflow: export → analyze → seo_check
|
||||
seo analytics ga4.csv gsc.csv # Import analytics data
|
||||
"""
|
||||
)
|
||||
|
||||
parser.add_argument('command', nargs='?', help='Command to run')
|
||||
parser.add_argument('args', nargs='*', help='Arguments for the command')
|
||||
|
||||
# Global options
|
||||
parser.add_argument('--verbose', '-v', action='store_true', help='Verbose output')
|
||||
parser.add_argument('--dry-run', action='store_true', help='Show what would be done without doing it')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
if not args.command:
|
||||
parser.print_help()
|
||||
return 0
|
||||
|
||||
# Validate configuration
|
||||
try:
|
||||
Config.validate()
|
||||
except ValueError as e:
|
||||
print(f"❌ Configuration error: {e}")
|
||||
return 1
|
||||
|
||||
# Route to appropriate command
|
||||
command_map = {
|
||||
'help': show_help,
|
||||
'export': export_posts,
|
||||
'analyze': analyze_posts,
|
||||
'recategorize': recategorize_posts,
|
||||
'seo_check': seo_check,
|
||||
'categories': manage_categories,
|
||||
'approve': approve_recommendations,
|
||||
'full_pipeline': run_full_pipeline,
|
||||
'analytics': import_analytics,
|
||||
'gaps': analyze_content_gaps,
|
||||
'opportunities': analyze_opportunities,
|
||||
'report': generate_report,
|
||||
'status': show_status,
|
||||
}
|
||||
|
||||
if args.command not in command_map:
|
||||
print(f"❌ Unknown command: {args.command}")
|
||||
print("\nAvailable commands:")
|
||||
for cmd in sorted(command_map.keys()):
|
||||
print(f" {cmd}")
|
||||
return 1
|
||||
|
||||
# Execute the command
|
||||
try:
|
||||
return command_map[args.command](args.args, verbose=args.verbose, dry_run=args.dry_run)
|
||||
except KeyboardInterrupt:
|
||||
print("\n⚠️ Operation cancelled by user")
|
||||
return 1
|
||||
except Exception as e:
|
||||
print(f"❌ Error running command '{args.command}': {e}")
|
||||
if args.verbose:
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
return 1
|
||||
|
||||
|
||||
def show_help(args, verbose=False, dry_run=False):
|
||||
"""Show help message"""
|
||||
print("""
|
||||
SEO Automation CLI - Available Commands
|
||||
|
||||
Basic Commands:
|
||||
export Export all posts from WordPress sites
|
||||
analyze Analyze posts with AI for recommendations
|
||||
recategorize Recategorize posts with AI suggestions
|
||||
seo_check Check SEO quality of titles/descriptions
|
||||
categories Manage categories across all sites
|
||||
approve Review and approve recommendations
|
||||
full_pipeline Run complete workflow: export → analyze → seo_check
|
||||
|
||||
Advanced Commands:
|
||||
analytics <ga_file> <gsc_file> Import analytics data
|
||||
gaps Analyze content gaps
|
||||
opportunities Analyze keyword opportunities
|
||||
report Generate SEO optimization report
|
||||
status Show output files status
|
||||
|
||||
Utility:
|
||||
help Show this help message
|
||||
|
||||
Examples:
|
||||
seo export
|
||||
seo analyze
|
||||
seo full_pipeline
|
||||
seo analytics ga4.csv gsc.csv
|
||||
""")
|
||||
return 0
|
||||
|
||||
|
||||
def export_posts(args, verbose=False, dry_run=False):
|
||||
"""Export all posts from WordPress sites"""
|
||||
if dry_run:
|
||||
print("Would export all posts from WordPress sites")
|
||||
return 0
|
||||
|
||||
print("📦 Exporting all posts from WordPress sites...")
|
||||
exporter = PostExporter()
|
||||
exporter.run()
|
||||
print("✅ Export completed!")
|
||||
return 0
|
||||
|
||||
|
||||
def analyze_posts(args, verbose=False, dry_run=False):
|
||||
"""Analyze posts with AI for recommendations"""
|
||||
if dry_run:
|
||||
print("Would analyze posts with AI for recommendations")
|
||||
return 0
|
||||
|
||||
print("🤖 Analyzing posts with AI for recommendations...")
|
||||
|
||||
# Find the CSV file to analyze
|
||||
csv_file = None
|
||||
if args:
|
||||
csv_file = args[0]
|
||||
else:
|
||||
# Find the latest exported CSV
|
||||
output_dir = Path(__file__).parent / 'output'
|
||||
csv_files = list(output_dir.glob('all_posts_*.csv'))
|
||||
|
||||
if not csv_files:
|
||||
print("❌ No exported posts found. Run 'seo export' first or provide a CSV file.")
|
||||
print(" Usage: seo analyze <csv_file>")
|
||||
return 1
|
||||
|
||||
csv_file = str(max(csv_files, key=os.path.getctime))
|
||||
|
||||
print(f"Using file: {csv_file}")
|
||||
|
||||
analyzer = PostAnalyzer(csv_file)
|
||||
analyzer.run()
|
||||
print("✅ AI analysis completed!")
|
||||
return 0
|
||||
|
||||
|
||||
def recategorize_posts(args, verbose=False, dry_run=False):
|
||||
"""Recategorize posts with AI suggestions"""
|
||||
if dry_run:
|
||||
print("Would recategorize posts with AI suggestions")
|
||||
return 0
|
||||
|
||||
print("🏷️ Recategorizing posts with AI suggestions...")
|
||||
|
||||
# Find the CSV file to recategorize
|
||||
csv_file = None
|
||||
if args:
|
||||
csv_file = args[0]
|
||||
else:
|
||||
# Find the latest exported CSV
|
||||
output_dir = Path(__file__).parent / 'output'
|
||||
csv_files = list(output_dir.glob('all_posts_*.csv'))
|
||||
|
||||
if not csv_files:
|
||||
print("❌ No exported posts found. Run 'seo export' first or provide a CSV file.")
|
||||
print(" Usage: seo recategorize <csv_file>")
|
||||
return 1
|
||||
|
||||
csv_file = str(max(csv_files, key=os.path.getctime))
|
||||
|
||||
print(f"Using file: {csv_file}")
|
||||
|
||||
recategorizer = PostRecategorizer(csv_file)
|
||||
recategorizer.run()
|
||||
print("✅ Recategorization completed!")
|
||||
return 0
|
||||
|
||||
|
||||
def seo_check(args, verbose=False, dry_run=False):
|
||||
"""Check SEO quality of titles/descriptions"""
|
||||
if dry_run:
|
||||
print("Would check SEO quality of titles/descriptions")
|
||||
return 0
|
||||
|
||||
print("🔍 Checking SEO quality of titles/descriptions...")
|
||||
|
||||
# Parse optional arguments
|
||||
top_n = 10 # Default
|
||||
for arg in args:
|
||||
if arg.startswith('--top-n=') or '=' in arg:
|
||||
try:
|
||||
top_n = int(arg.split('=')[1])
|
||||
except ValueError:
|
||||
print(f"❌ Invalid top-n value: {arg}")
|
||||
return 1
|
||||
|
||||
analyzer = MultiSiteSEOAnalyzer()
|
||||
analyzer.run(use_ai=True, top_n=top_n)
|
||||
print("✅ SEO check completed!")
|
||||
return 0
|
||||
|
||||
|
||||
def manage_categories(args, verbose=False, dry_run=False):
|
||||
"""Manage categories across all sites"""
|
||||
if dry_run:
|
||||
print("Would manage categories across all sites")
|
||||
return 0
|
||||
|
||||
print("🗂️ Managing categories across all sites...")
|
||||
manager = CategoryManager()
|
||||
manager.run()
|
||||
print("✅ Category management completed!")
|
||||
return 0
|
||||
|
||||
|
||||
def approve_recommendations(args, verbose=False, dry_run=False):
|
||||
"""Review and approve recommendations"""
|
||||
if dry_run:
|
||||
print("Would review and approve recommendations")
|
||||
return 0
|
||||
|
||||
print("✅ Reviewing and approving recommendations...")
|
||||
|
||||
# Use provided CSV files or find recommendation files
|
||||
csv_files = []
|
||||
|
||||
if args:
|
||||
# Use provided files
|
||||
csv_files = [Path(f) for f in args if Path(f).exists()]
|
||||
if not csv_files:
|
||||
print("❌ None of the provided files exist.")
|
||||
return 1
|
||||
else:
|
||||
# Find recommendation files in output directory
|
||||
output_dir = Path(__file__).parent / 'output'
|
||||
|
||||
# Look for common recommendation files
|
||||
patterns = [
|
||||
'category_assignments_*.csv',
|
||||
'posts_with_ai_recommendations_*.csv',
|
||||
'posts_to_move_*.csv',
|
||||
'posts_to_consolidate_*.csv',
|
||||
'posts_to_delete_*.csv'
|
||||
]
|
||||
|
||||
for pattern in patterns:
|
||||
csv_files.extend(output_dir.glob(pattern))
|
||||
|
||||
if not csv_files:
|
||||
print("❌ No recommendation files found. Run 'seo analyze' or 'seo categories' first.")
|
||||
print(" Or provide a CSV file: seo approve <file1.csv> [file2.csv] ...")
|
||||
return 1
|
||||
|
||||
print(f"Found {len(csv_files)} recommendation files to review:")
|
||||
for csv_file in csv_files:
|
||||
print(f" - {csv_file.name}")
|
||||
|
||||
approval_system = UserApprovalSystem()
|
||||
approval_system.run_interactive_approval([str(f) for f in csv_files])
|
||||
print("✅ Approval process completed!")
|
||||
return 0
|
||||
|
||||
|
||||
def run_full_pipeline(args, verbose=False, dry_run=False):
|
||||
"""Run complete workflow: export → analyze → seo_check"""
|
||||
if dry_run:
|
||||
print("Would run full pipeline: export → analyze → seo_check")
|
||||
return 0
|
||||
|
||||
print("🚀 Running full SEO automation pipeline...")
|
||||
|
||||
# Export
|
||||
print("\n📦 Step 1/3: Exporting posts...")
|
||||
exporter = PostExporter()
|
||||
exporter.run()
|
||||
|
||||
# Analyze
|
||||
print("\n🤖 Step 2/3: Analyzing with AI...")
|
||||
output_dir = Path(__file__).parent / 'output'
|
||||
csv_files = list(output_dir.glob('all_posts_*.csv'))
|
||||
if csv_files:
|
||||
latest_csv = max(csv_files, key=os.path.getctime)
|
||||
analyzer = PostAnalyzer(str(latest_csv))
|
||||
analyzer.run()
|
||||
|
||||
# SEO Check
|
||||
print("\n🔍 Step 3/3: Checking SEO quality...")
|
||||
seo_analyzer = MultiSiteSEOAnalyzer()
|
||||
seo_analyzer.run(use_ai=True, top_n=10)
|
||||
|
||||
print("\n✅ Full pipeline completed!")
|
||||
return 0
|
||||
|
||||
|
||||
def import_analytics(args, verbose=False, dry_run=False):
|
||||
"""Import analytics data"""
|
||||
if dry_run:
|
||||
print("Would import analytics data")
|
||||
return 0
|
||||
|
||||
if len(args) < 2:
|
||||
print("❌ Usage: seo analytics <ga_file> <gsc_file>")
|
||||
return 1
|
||||
|
||||
ga_file = args[0]
|
||||
gsc_file = args[1]
|
||||
|
||||
# Find the latest exported posts CSV
|
||||
output_dir = Path(__file__).parent / 'output'
|
||||
posts_files = list(output_dir.glob('all_posts_*.csv'))
|
||||
|
||||
if not posts_files:
|
||||
print("❌ No exported posts found. Run 'seo export' first.")
|
||||
return 1
|
||||
|
||||
latest_posts = max(posts_files, key=os.path.getctime)
|
||||
|
||||
print(f"📊 Importing analytics data...")
|
||||
print(f"GA4 file: {ga_file}")
|
||||
print(f"GSC file: {gsc_file}")
|
||||
print(f"Posts file: {latest_posts.name}")
|
||||
|
||||
importer = AnalyticsImporter()
|
||||
importer.run(
|
||||
ga_csv=Path(ga_file),
|
||||
gsc_csv=Path(gsc_file),
|
||||
posts_csv=latest_posts,
|
||||
output_csv=output_dir / 'posts_with_analytics.csv'
|
||||
)
|
||||
|
||||
print("✅ Analytics import completed!")
|
||||
return 0
|
||||
|
||||
|
||||
def analyze_content_gaps(args, verbose=False, dry_run=False):
|
||||
"""Analyze content gaps"""
|
||||
if dry_run:
|
||||
print("Would analyze content gaps")
|
||||
return 0
|
||||
|
||||
print("🕳️ Analyzing content gaps...")
|
||||
|
||||
# Find posts with analytics
|
||||
output_dir = Path(__file__).parent / 'output'
|
||||
posts_file = output_dir / 'results' / 'posts_with_analytics.csv'
|
||||
|
||||
if not posts_file.exists():
|
||||
print("❌ Posts with analytics not found. Run 'seo analytics' first.")
|
||||
return 1
|
||||
|
||||
# Find GSC queries
|
||||
gsc_file = Path(__file__).parent / 'input' / 'analytics' / 'gsc' / 'Requêtes.csv'
|
||||
if not gsc_file.exists():
|
||||
gsc_file = output_dir / 'gsc_queries.csv' # fallback
|
||||
|
||||
if not gsc_file.exists():
|
||||
print("❌ GSC queries file not found. Expected at input/analytics/gsc/Requêtes.csv")
|
||||
return 1
|
||||
|
||||
analyzer = ContentGapAnalyzer()
|
||||
analyzer.run(
|
||||
posts_csv=posts_file,
|
||||
gsc_csv=gsc_file,
|
||||
output_csv=output_dir / 'results' / 'content_gaps.csv'
|
||||
)
|
||||
|
||||
print("✅ Content gap analysis completed!")
|
||||
return 0
|
||||
|
||||
|
||||
def analyze_opportunities(args, verbose=False, dry_run=False):
|
||||
"""Analyze keyword opportunities"""
|
||||
if dry_run:
|
||||
print("Would analyze keyword opportunities")
|
||||
return 0
|
||||
|
||||
print("🎯 Analyzing keyword opportunities...")
|
||||
|
||||
# Find posts with analytics
|
||||
output_dir = Path(__file__).parent / 'output' / 'results'
|
||||
posts_file = output_dir / 'posts_with_analytics.csv'
|
||||
|
||||
if not posts_file.exists():
|
||||
print("❌ Posts with analytics not found. Run 'seo analytics' first.")
|
||||
return 1
|
||||
|
||||
analyzer = OpportunityAnalyzer()
|
||||
analyzer.run(
|
||||
posts_csv=posts_file,
|
||||
output_csv=output_dir / 'keyword_opportunities.csv'
|
||||
)
|
||||
|
||||
print("✅ Opportunity analysis completed!")
|
||||
return 0
|
||||
|
||||
|
||||
def generate_report(args, verbose=False, dry_run=False):
|
||||
"""Generate SEO optimization report"""
|
||||
if dry_run:
|
||||
print("Would generate SEO optimization report")
|
||||
return 0
|
||||
|
||||
print("📋 Generating SEO optimization report...")
|
||||
|
||||
output_dir = Path(__file__).parent / 'output' / 'results'
|
||||
posts_file = output_dir / 'posts_with_analytics.csv'
|
||||
opportunities_file = output_dir / 'keyword_opportunities.csv'
|
||||
gaps_file = output_dir / 'content_gaps.csv'
|
||||
|
||||
if not posts_file.exists():
|
||||
print("❌ Posts with analytics not found. Run 'seo analytics' first.")
|
||||
return 1
|
||||
|
||||
generator = ReportGenerator()
|
||||
generator.run(
|
||||
posts_csv=posts_file,
|
||||
opportunities_csv=opportunities_file,
|
||||
gaps_csv=gaps_file,
|
||||
output_md=output_dir / 'seo_optimization_report.md',
|
||||
output_prioritized_csv=output_dir / 'posts_prioritized.csv'
|
||||
)
|
||||
|
||||
print("✅ Report generation completed!")
|
||||
return 0
|
||||
|
||||
|
||||
def show_status(args, verbose=False, dry_run=False):
|
||||
"""Show output files status"""
|
||||
if dry_run:
|
||||
print("Would show output files status")
|
||||
return 0
|
||||
|
||||
print("📊 Output files status:")
|
||||
|
||||
output_dir = Path(__file__).parent / 'output'
|
||||
if output_dir.exists():
|
||||
files = list(output_dir.glob('*.csv'))
|
||||
if files:
|
||||
print(f"\nFound {len(files)} CSV files in output/:")
|
||||
for file in sorted(files, key=os.path.getctime, reverse=True)[:10]: # Show latest 10
|
||||
size = file.stat().st_size / 1024 # KB
|
||||
mtime = file.stat().st_mtime
|
||||
from datetime import datetime
|
||||
date = datetime.fromtimestamp(mtime).strftime('%Y-%m-%d %H:%M')
|
||||
print(f" {file.name} ({size:.1f}KB, {date})")
|
||||
else:
|
||||
print(" No CSV files found in output/")
|
||||
else:
|
||||
print(" output/ directory not found")
|
||||
|
||||
return 0
|
||||
# Add src to path
|
||||
src_dir = Path(__file__).parent / 'src'
|
||||
sys.path.insert(0, str(src_dir))
|
||||
|
||||
# Import and run CLI
|
||||
from seo.cli import main
|
||||
|
||||
if __name__ == '__main__':
|
||||
sys.exit(main())
|
||||
sys.exit(main())
|
||||
|
||||
37
setup.cfg
Normal file
37
setup.cfg
Normal file
@@ -0,0 +1,37 @@
|
||||
[metadata]
|
||||
name = seo-automation
|
||||
version = 1.0.0
|
||||
description = WordPress SEO automation with AI-powered recommendations
|
||||
long_description = file: README.md
|
||||
long_description_content_type = text/markdown
|
||||
license = MIT
|
||||
author = SEO Automation Team
|
||||
url = https://github.com/example/seo-automation
|
||||
classifiers =
|
||||
Development Status :: 4 - Beta
|
||||
Intended Audience :: Developers
|
||||
Topic :: Internet :: WWW/HTTP
|
||||
License :: OSI Approved :: MIT License
|
||||
Programming Language :: Python :: 3
|
||||
Programming Language :: Python :: 3.8
|
||||
Programming Language :: Python :: 3.9
|
||||
Programming Language :: Python :: 3.10
|
||||
Programming Language :: Python :: 3.11
|
||||
Programming Language :: Python :: 3.12
|
||||
|
||||
[options]
|
||||
package_dir =
|
||||
= src
|
||||
packages = find:
|
||||
python_requires = >=3.8
|
||||
install_requires =
|
||||
requests>=2.31.0
|
||||
python-dotenv>=1.0.0
|
||||
PyYAML>=6.0
|
||||
|
||||
[options.packages.find]
|
||||
where = src
|
||||
|
||||
[options.entry_points]
|
||||
console_scripts =
|
||||
seo = seo.cli:main
|
||||
55
setup.py
Normal file
55
setup.py
Normal file
@@ -0,0 +1,55 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Setup script for SEO Automation Tool
|
||||
"""
|
||||
|
||||
from setuptools import setup, find_packages
|
||||
from pathlib import Path
|
||||
|
||||
# Read README for long description
|
||||
readme_path = Path(__file__).parent / 'README.md'
|
||||
long_description = readme_path.read_text(encoding='utf-8') if readme_path.exists() else ''
|
||||
|
||||
# Read requirements
|
||||
requirements_path = Path(__file__).parent / 'requirements.txt'
|
||||
requirements = [
|
||||
line.strip()
|
||||
for line in requirements_path.read_text().splitlines()
|
||||
if line.strip() and not line.startswith('#')
|
||||
] if requirements_path.exists() else []
|
||||
|
||||
setup(
|
||||
name='seo-automation',
|
||||
version='1.0.0',
|
||||
author='SEO Automation Team',
|
||||
author_email='seo@example.com',
|
||||
description='WordPress SEO automation with AI-powered recommendations',
|
||||
long_description=long_description,
|
||||
long_description_content_type='text/markdown',
|
||||
url='https://github.com/example/seo-automation',
|
||||
packages=find_packages(where='src'),
|
||||
package_dir={'': 'src'},
|
||||
classifiers=[
|
||||
'Development Status :: 4 - Beta',
|
||||
'Intended Audience :: Developers',
|
||||
'Topic :: Internet :: WWW/HTTP',
|
||||
'License :: OSI Approved :: MIT License',
|
||||
'Programming Language :: Python :: 3',
|
||||
'Programming Language :: Python :: 3.8',
|
||||
'Programming Language :: Python :: 3.9',
|
||||
'Programming Language :: Python :: 3.10',
|
||||
'Programming Language :: Python :: 3.11',
|
||||
'Programming Language :: Python :: 3.12',
|
||||
],
|
||||
python_requires='>=3.8',
|
||||
install_requires=requirements,
|
||||
entry_points={
|
||||
'console_scripts': [
|
||||
'seo=seo.cli:main',
|
||||
],
|
||||
},
|
||||
include_package_data=True,
|
||||
package_data={
|
||||
'seo': ['py.typed'],
|
||||
},
|
||||
)
|
||||
7
src/seo/__init__.py
Normal file
7
src/seo/__init__.py
Normal file
@@ -0,0 +1,7 @@
|
||||
"""
|
||||
SEO Automation Tool - Integrated Application
|
||||
A comprehensive WordPress SEO automation suite.
|
||||
"""
|
||||
|
||||
__version__ = '1.0.0'
|
||||
__author__ = 'SEO Automation Team'
|
||||
14
src/seo/analyzer.py
Normal file
14
src/seo/analyzer.py
Normal file
@@ -0,0 +1,14 @@
|
||||
"""
|
||||
Analyzer Module - AI-powered post analysis
|
||||
"""
|
||||
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
# Import from scripts directory
|
||||
scripts_dir = Path(__file__).parent.parent.parent / 'scripts'
|
||||
sys.path.insert(0, str(scripts_dir))
|
||||
|
||||
from ai_analyze_posts_for_decisions import PostAnalyzer
|
||||
|
||||
__all__ = ['PostAnalyzer']
|
||||
255
src/seo/app.py
Normal file
255
src/seo/app.py
Normal file
@@ -0,0 +1,255 @@
|
||||
"""
|
||||
SEO Application Core - Integrated SEO automation functionality
|
||||
"""
|
||||
|
||||
import logging
|
||||
from pathlib import Path
|
||||
from datetime import datetime
|
||||
from typing import Optional, List
|
||||
|
||||
from .exporter import PostExporter
|
||||
from .analyzer import PostAnalyzer
|
||||
from .recategorizer import PostRecategorizer
|
||||
from .seo_checker import MultiSiteSEOAnalyzer
|
||||
from .categories import CategoryManager
|
||||
from .approval import UserApprovalSystem
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class SEOApp:
|
||||
"""
|
||||
Main SEO Application class.
|
||||
|
||||
Provides a unified interface for all SEO automation tasks.
|
||||
Inspired by Ruby on Rails' Active Record pattern.
|
||||
|
||||
Usage:
|
||||
app = SEOApp()
|
||||
app.export()
|
||||
app.analyze()
|
||||
app.seo_check()
|
||||
"""
|
||||
|
||||
def __init__(self, verbose: bool = False):
|
||||
"""
|
||||
Initialize the SEO application.
|
||||
|
||||
Args:
|
||||
verbose: Enable verbose logging
|
||||
"""
|
||||
self.verbose = verbose
|
||||
self.output_dir = Path(__file__).parent.parent.parent / 'output'
|
||||
self.output_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Initialize components
|
||||
self.exporter = None
|
||||
self.analyzer = None
|
||||
self.recategorizer = None
|
||||
self.seo_checker = None
|
||||
self.category_manager = None
|
||||
self.approval_system = None
|
||||
|
||||
if verbose:
|
||||
logging.basicConfig(level=logging.DEBUG)
|
||||
else:
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
|
||||
def export(self) -> str:
|
||||
"""
|
||||
Export all posts from WordPress sites.
|
||||
|
||||
Returns:
|
||||
Path to exported CSV file
|
||||
"""
|
||||
logger.info("📦 Exporting all posts from WordPress sites...")
|
||||
self.exporter = PostExporter()
|
||||
self.exporter.run()
|
||||
|
||||
# Get the exported file path
|
||||
date_str = datetime.now().strftime('%Y-%m-%d')
|
||||
csv_file = self.output_dir / f'all_posts_{date_str}.csv'
|
||||
|
||||
logger.info(f"✅ Export completed: {csv_file}")
|
||||
return str(csv_file)
|
||||
|
||||
def analyze(self, csv_file: Optional[str] = None) -> str:
|
||||
"""
|
||||
Analyze posts with AI for recommendations.
|
||||
|
||||
Args:
|
||||
csv_file: Path to CSV file (uses latest export if not provided)
|
||||
|
||||
Returns:
|
||||
Path to analysis results
|
||||
"""
|
||||
logger.info("🤖 Analyzing posts with AI for recommendations...")
|
||||
|
||||
# Find CSV file
|
||||
if not csv_file:
|
||||
csv_file = self._find_latest_export()
|
||||
|
||||
if not csv_file:
|
||||
raise FileNotFoundError("No exported posts found. Run export() first or provide a CSV file.")
|
||||
|
||||
logger.info(f"Using file: {csv_file}")
|
||||
|
||||
# Run analysis
|
||||
self.analyzer = PostAnalyzer(csv_file)
|
||||
self.analyzer.run()
|
||||
|
||||
logger.info("✅ AI analysis completed!")
|
||||
return csv_file
|
||||
|
||||
def recategorize(self, csv_file: Optional[str] = None) -> str:
|
||||
"""
|
||||
Recategorize posts with AI suggestions.
|
||||
|
||||
Args:
|
||||
csv_file: Path to CSV file (uses latest export if not provided)
|
||||
|
||||
Returns:
|
||||
Path to recategorization results
|
||||
"""
|
||||
logger.info("🏷️ Recategorizing posts with AI suggestions...")
|
||||
|
||||
# Find CSV file
|
||||
if not csv_file:
|
||||
csv_file = self._find_latest_export()
|
||||
|
||||
if not csv_file:
|
||||
raise FileNotFoundError("No exported posts found. Run export() first or provide a CSV file.")
|
||||
|
||||
logger.info(f"Using file: {csv_file}")
|
||||
|
||||
# Run recategorization
|
||||
self.recategorizer = PostRecategorizer(csv_file)
|
||||
self.recategorizer.run()
|
||||
|
||||
logger.info("✅ Recategorization completed!")
|
||||
return csv_file
|
||||
|
||||
def seo_check(self, top_n: int = 10) -> None:
|
||||
"""
|
||||
Check SEO quality of titles and descriptions.
|
||||
|
||||
Args:
|
||||
top_n: Number of top posts to get AI recommendations for
|
||||
"""
|
||||
logger.info("🔍 Checking SEO quality of titles/descriptions...")
|
||||
|
||||
self.seo_checker = MultiSiteSEOAnalyzer()
|
||||
self.seo_checker.run(use_ai=True, top_n=top_n)
|
||||
|
||||
logger.info("✅ SEO check completed!")
|
||||
|
||||
def categories(self) -> None:
|
||||
"""Manage categories across all sites."""
|
||||
logger.info("🗂️ Managing categories across all sites...")
|
||||
|
||||
self.category_manager = CategoryManager()
|
||||
self.category_manager.run()
|
||||
|
||||
logger.info("✅ Category management completed!")
|
||||
|
||||
def approve(self, files: Optional[List[str]] = None) -> None:
|
||||
"""
|
||||
Review and approve recommendations.
|
||||
|
||||
Args:
|
||||
files: List of CSV files to review (auto-detects if not provided)
|
||||
"""
|
||||
logger.info("✅ Reviewing and approving recommendations...")
|
||||
|
||||
self.approval_system = UserApprovalSystem()
|
||||
|
||||
if not files:
|
||||
# Auto-detect recommendation files
|
||||
files = self._find_recommendation_files()
|
||||
|
||||
if not files:
|
||||
raise FileNotFoundError("No recommendation files found. Run analyze() or categories() first.")
|
||||
|
||||
logger.info(f"Found {len(files)} recommendation files to review")
|
||||
self.approval_system.run_interactive_approval(files)
|
||||
|
||||
logger.info("✅ Approval process completed!")
|
||||
|
||||
def full_pipeline(self) -> None:
|
||||
"""
|
||||
Run complete workflow: export → analyze → seo_check
|
||||
"""
|
||||
logger.info("🚀 Running full SEO automation pipeline...")
|
||||
|
||||
# Step 1: Export
|
||||
logger.info("\n📦 Step 1/3: Exporting posts...")
|
||||
self.export()
|
||||
|
||||
# Step 2: Analyze
|
||||
logger.info("\n🤖 Step 2/3: Analyzing with AI...")
|
||||
self.analyze()
|
||||
|
||||
# Step 3: SEO Check
|
||||
logger.info("\n🔍 Step 3/3: Checking SEO quality...")
|
||||
self.seo_check()
|
||||
|
||||
logger.info("\n✅ Full pipeline completed!")
|
||||
|
||||
def _find_latest_export(self) -> Optional[str]:
|
||||
"""
|
||||
Find the latest exported CSV file.
|
||||
|
||||
Returns:
|
||||
Path to latest CSV file or None if not found
|
||||
"""
|
||||
csv_files = list(self.output_dir.glob('all_posts_*.csv'))
|
||||
|
||||
if not csv_files:
|
||||
return None
|
||||
|
||||
latest = max(csv_files, key=lambda f: f.stat().st_ctime)
|
||||
return str(latest)
|
||||
|
||||
def _find_recommendation_files(self) -> List[str]:
|
||||
"""
|
||||
Find recommendation files in output directory.
|
||||
|
||||
Returns:
|
||||
List of paths to recommendation files
|
||||
"""
|
||||
patterns = [
|
||||
'category_assignments_*.csv',
|
||||
'posts_with_ai_recommendations_*.csv',
|
||||
'posts_to_move_*.csv',
|
||||
'posts_to_consolidate_*.csv',
|
||||
'posts_to_delete_*.csv'
|
||||
]
|
||||
|
||||
files = []
|
||||
for pattern in patterns:
|
||||
files.extend(self.output_dir.glob(pattern))
|
||||
|
||||
return [str(f) for f in files]
|
||||
|
||||
def status(self) -> dict:
|
||||
"""
|
||||
Get status of output files.
|
||||
|
||||
Returns:
|
||||
Dictionary with file information
|
||||
"""
|
||||
files = list(self.output_dir.glob('*.csv'))
|
||||
|
||||
status_info = {
|
||||
'total_files': len(files),
|
||||
'files': []
|
||||
}
|
||||
|
||||
for file in sorted(files, key=lambda f: f.stat().st_ctime, reverse=True)[:10]:
|
||||
status_info['files'].append({
|
||||
'name': file.name,
|
||||
'size_kb': file.stat().st_size / 1024,
|
||||
'modified': datetime.fromtimestamp(file.stat().st_mtime).strftime('%Y-%m-%d %H:%M')
|
||||
})
|
||||
|
||||
return status_info
|
||||
14
src/seo/approval.py
Normal file
14
src/seo/approval.py
Normal file
@@ -0,0 +1,14 @@
|
||||
"""
|
||||
Approval System Module - User approval for recommendations
|
||||
"""
|
||||
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
# Import from scripts directory
|
||||
scripts_dir = Path(__file__).parent.parent.parent / 'scripts'
|
||||
sys.path.insert(0, str(scripts_dir))
|
||||
|
||||
from user_approval import UserApprovalSystem
|
||||
|
||||
__all__ = ['UserApprovalSystem']
|
||||
14
src/seo/categories.py
Normal file
14
src/seo/categories.py
Normal file
@@ -0,0 +1,14 @@
|
||||
"""
|
||||
Category Manager Module - Category management across sites
|
||||
"""
|
||||
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
# Import from scripts directory
|
||||
scripts_dir = Path(__file__).parent.parent.parent / 'scripts'
|
||||
sys.path.insert(0, str(scripts_dir))
|
||||
|
||||
from category_manager import CategoryManager
|
||||
|
||||
__all__ = ['CategoryManager']
|
||||
223
src/seo/cli.py
Normal file
223
src/seo/cli.py
Normal file
@@ -0,0 +1,223 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
SEO Automation CLI - Main entry point
|
||||
Unified command-line interface for SEO automation.
|
||||
"""
|
||||
|
||||
import sys
|
||||
import argparse
|
||||
from pathlib import Path
|
||||
|
||||
# Add src to path
|
||||
src_dir = Path(__file__).parent / 'src'
|
||||
sys.path.insert(0, str(src_dir))
|
||||
|
||||
from seo.app import SEOApp
|
||||
from seo.config import Config
|
||||
|
||||
|
||||
def main():
|
||||
"""Main CLI entry point."""
|
||||
parser = argparse.ArgumentParser(
|
||||
prog='seo',
|
||||
description='SEO Automation CLI - Manage WordPress SEO with AI',
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
epilog="""
|
||||
Examples:
|
||||
seo export Export all posts from WordPress sites
|
||||
seo analyze Analyze posts with AI for recommendations
|
||||
seo analyze posts.csv Analyze specific CSV file
|
||||
seo recategorize Recategorize posts with AI
|
||||
seo seo_check Check SEO quality of titles/descriptions
|
||||
seo categories Manage categories across sites
|
||||
seo approve Review and approve recommendations
|
||||
seo full_pipeline Run complete workflow: export → analyze → seo_check
|
||||
seo status Show output files status
|
||||
"""
|
||||
)
|
||||
|
||||
parser.add_argument('command', nargs='?', help='Command to run')
|
||||
parser.add_argument('args', nargs='*', help='Arguments for the command')
|
||||
parser.add_argument('--verbose', '-v', action='store_true', help='Verbose output')
|
||||
parser.add_argument('--dry-run', action='store_true', help='Show what would be done')
|
||||
parser.add_argument('--top-n', type=int, default=10, help='Number of top posts for AI analysis')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
if not args.command:
|
||||
parser.print_help()
|
||||
return 0
|
||||
|
||||
# Validate configuration
|
||||
try:
|
||||
Config.validate()
|
||||
except ValueError as e:
|
||||
print(f"❌ Configuration error: {e}")
|
||||
return 1
|
||||
|
||||
# Create application instance
|
||||
app = SEOApp(verbose=args.verbose)
|
||||
|
||||
# Route to appropriate command
|
||||
commands = {
|
||||
'export': cmd_export,
|
||||
'analyze': cmd_analyze,
|
||||
'recategorize': cmd_recategorize,
|
||||
'seo_check': cmd_seo_check,
|
||||
'categories': cmd_categories,
|
||||
'approve': cmd_approve,
|
||||
'full_pipeline': cmd_full_pipeline,
|
||||
'status': cmd_status,
|
||||
'help': cmd_help,
|
||||
}
|
||||
|
||||
if args.command not in commands:
|
||||
print(f"❌ Unknown command: {args.command}")
|
||||
print("\nAvailable commands:")
|
||||
for cmd in sorted(commands.keys()):
|
||||
print(f" {cmd}")
|
||||
return 1
|
||||
|
||||
try:
|
||||
return commands[args.command](app, args)
|
||||
except KeyboardInterrupt:
|
||||
print("\n⚠️ Operation cancelled by user")
|
||||
return 1
|
||||
except FileNotFoundError as e:
|
||||
print(f"❌ File not found: {e}")
|
||||
return 1
|
||||
except Exception as e:
|
||||
print(f"❌ Error: {e}")
|
||||
if args.verbose:
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
return 1
|
||||
|
||||
|
||||
def cmd_export(app, args):
|
||||
"""Export all posts."""
|
||||
if args.dry_run:
|
||||
print("Would export all posts from WordPress sites")
|
||||
return 0
|
||||
app.export()
|
||||
return 0
|
||||
|
||||
|
||||
def cmd_analyze(app, args):
|
||||
"""Analyze posts with AI."""
|
||||
if args.dry_run:
|
||||
print("Would analyze posts with AI for recommendations")
|
||||
return 0
|
||||
|
||||
csv_file = args.args[0] if args.args else None
|
||||
app.analyze(csv_file)
|
||||
return 0
|
||||
|
||||
|
||||
def cmd_recategorize(app, args):
|
||||
"""Recategorize posts with AI."""
|
||||
if args.dry_run:
|
||||
print("Would recategorize posts with AI suggestions")
|
||||
return 0
|
||||
|
||||
csv_file = args.args[0] if args.args else None
|
||||
app.recategorize(csv_file)
|
||||
return 0
|
||||
|
||||
|
||||
def cmd_seo_check(app, args):
|
||||
"""Check SEO quality."""
|
||||
if args.dry_run:
|
||||
print("Would check SEO quality of titles/descriptions")
|
||||
return 0
|
||||
|
||||
app.seo_check(top_n=args.top_n)
|
||||
return 0
|
||||
|
||||
|
||||
def cmd_categories(app, args):
|
||||
"""Manage categories."""
|
||||
if args.dry_run:
|
||||
print("Would manage categories across all sites")
|
||||
return 0
|
||||
|
||||
app.categories()
|
||||
return 0
|
||||
|
||||
|
||||
def cmd_approve(app, args):
|
||||
"""Approve recommendations."""
|
||||
if args.dry_run:
|
||||
print("Would review and approve recommendations")
|
||||
return 0
|
||||
|
||||
files = args.args if args.args else None
|
||||
app.approve(files)
|
||||
return 0
|
||||
|
||||
|
||||
def cmd_full_pipeline(app, args):
|
||||
"""Run full pipeline."""
|
||||
if args.dry_run:
|
||||
print("Would run full pipeline: export → analyze → seo_check")
|
||||
return 0
|
||||
|
||||
app.full_pipeline()
|
||||
return 0
|
||||
|
||||
|
||||
def cmd_status(app, args):
|
||||
"""Show status."""
|
||||
if args.dry_run:
|
||||
print("Would show output files status")
|
||||
return 0
|
||||
|
||||
status = app.status()
|
||||
|
||||
print("📊 Output files status:")
|
||||
if status['total_files'] > 0:
|
||||
print(f"\nFound {status['total_files']} CSV files in output/:")
|
||||
for file in status['files']:
|
||||
print(f" {file['name']} ({file['size_kb']:.1f}KB, {file['modified']})")
|
||||
else:
|
||||
print(" No CSV files found in output/")
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
def cmd_help(app, args):
|
||||
"""Show help."""
|
||||
print("""
|
||||
SEO Automation CLI - Available Commands
|
||||
|
||||
Basic Commands:
|
||||
export Export all posts from WordPress sites
|
||||
analyze [csv_file] Analyze posts with AI (optional CSV input)
|
||||
recategorize [csv_file] Recategorize posts with AI (optional CSV input)
|
||||
seo_check Check SEO quality of titles/descriptions
|
||||
categories Manage categories across all sites
|
||||
approve [files...] Review and approve recommendations
|
||||
full_pipeline Run complete workflow: export → analyze → seo_check
|
||||
|
||||
Utility:
|
||||
status Show output files status
|
||||
help Show this help message
|
||||
|
||||
Options:
|
||||
--verbose, -v Enable verbose logging
|
||||
--dry-run Show what would be done without doing it
|
||||
--top-n N Number of top posts for AI analysis (default: 10)
|
||||
|
||||
Examples:
|
||||
seo export
|
||||
seo analyze
|
||||
seo analyze output/all_posts_2026-02-16.csv
|
||||
seo approve output/category_assignments_*.csv
|
||||
seo full_pipeline
|
||||
seo status
|
||||
""")
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
sys.exit(main())
|
||||
70
src/seo/config.py
Normal file
70
src/seo/config.py
Normal file
@@ -0,0 +1,70 @@
|
||||
"""
|
||||
Configuration module for SEO application.
|
||||
Loads configuration from environment variables and YAML.
|
||||
"""
|
||||
|
||||
import os
|
||||
import yaml
|
||||
from dotenv import load_dotenv
|
||||
from pathlib import Path
|
||||
|
||||
load_dotenv()
|
||||
|
||||
|
||||
class Config:
|
||||
"""Configuration class for SEO automation."""
|
||||
|
||||
CONFIG_FILE = Path(__file__).parent.parent / 'config.yaml'
|
||||
|
||||
if CONFIG_FILE.exists():
|
||||
with open(CONFIG_FILE, 'r', encoding='utf-8') as f:
|
||||
YAML_CONFIG = yaml.safe_load(f)
|
||||
else:
|
||||
YAML_CONFIG = {}
|
||||
|
||||
# WordPress Settings
|
||||
WORDPRESS_URL = os.getenv('WORDPRESS_URL', YAML_CONFIG.get('primary_site', {}).get('url', '')).rstrip('/')
|
||||
WORDPRESS_USERNAME = os.getenv('WORDPRESS_USERNAME', YAML_CONFIG.get('primary_site', {}).get('username', ''))
|
||||
WORDPRESS_APP_PASSWORD = os.getenv('WORDPRESS_APP_PASSWORD', YAML_CONFIG.get('primary_site', {}).get('password', ''))
|
||||
|
||||
# Multi-site Configuration
|
||||
WORDPRESS_SITES = {
|
||||
'mistergeek.net': {
|
||||
'url': os.getenv('WORDPRESS_MISTERGEEK_URL', YAML_CONFIG.get('wordpress_sites', {}).get('mistergeek.net', {}).get('url', 'https://www.mistergeek.net')),
|
||||
'username': os.getenv('WORDPRESS_MISTERGEEK_USERNAME', os.getenv('WORDPRESS_USERNAME', '')),
|
||||
'password': os.getenv('WORDPRESS_MISTERGEEK_PASSWORD', os.getenv('WORDPRESS_APP_PASSWORD', '')),
|
||||
},
|
||||
'webscroll.fr': {
|
||||
'url': os.getenv('WORDPRESS_WEBSCROLL_URL', YAML_CONFIG.get('wordpress_sites', {}).get('webscroll.fr', {}).get('url', 'https://www.webscroll.fr')),
|
||||
'username': os.getenv('WORDPRESS_WEBSCROLL_USERNAME', os.getenv('WORDPRESS_USERNAME', '')),
|
||||
'password': os.getenv('WORDPRESS_WEBSCROLL_PASSWORD', os.getenv('WORDPRESS_APP_PASSWORD', '')),
|
||||
},
|
||||
'hellogeek.net': {
|
||||
'url': os.getenv('WORDPRESS_HELLOGEEK_URL', YAML_CONFIG.get('wordpress_sites', {}).get('hellogeek.net', {}).get('url', 'https://www.hellogeek.net')),
|
||||
'username': os.getenv('WORDPRESS_HELLOGEEK_USERNAME', os.getenv('WORDPRESS_USERNAME', '')),
|
||||
'password': os.getenv('WORDPRESS_HELLOGEEK_PASSWORD', os.getenv('WORDPRESS_APP_PASSWORD', '')),
|
||||
}
|
||||
}
|
||||
|
||||
# OpenRouter API Settings
|
||||
OPENROUTER_API_KEY = os.getenv('OPENROUTER_API_KEY', YAML_CONFIG.get('ai_model', {}).get('api_key', ''))
|
||||
AI_MODEL = os.getenv('AI_MODEL', YAML_CONFIG.get('ai_model', {}).get('name', 'anthropic/claude-3.5-sonnet'))
|
||||
|
||||
@classmethod
|
||||
def validate(cls):
|
||||
"""Validate configuration."""
|
||||
errors = []
|
||||
|
||||
if not cls.WORDPRESS_URL:
|
||||
errors.append("WORDPRESS_URL is required")
|
||||
if not cls.WORDPRESS_USERNAME:
|
||||
errors.append("WORDPRESS_USERNAME is required")
|
||||
if not cls.WORDPRESS_APP_PASSWORD:
|
||||
errors.append("WORDPRESS_APP_PASSWORD is required")
|
||||
if not cls.OPENROUTER_API_KEY:
|
||||
errors.append("OPENROUTER_API_KEY is required")
|
||||
|
||||
if errors:
|
||||
raise ValueError("Configuration errors:\n" + "\n".join(f" - {e}" for e in errors))
|
||||
|
||||
return True
|
||||
226
src/seo/exporter.py
Normal file
226
src/seo/exporter.py
Normal file
@@ -0,0 +1,226 @@
|
||||
"""
|
||||
Post Exporter Module - Export posts from WordPress sites
|
||||
"""
|
||||
|
||||
import csv
|
||||
import logging
|
||||
import time
|
||||
from pathlib import Path
|
||||
from datetime import datetime
|
||||
from typing import Dict, List, Optional
|
||||
import requests
|
||||
from requests.auth import HTTPBasicAuth
|
||||
import re
|
||||
|
||||
from .config import Config
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class PostExporter:
|
||||
"""Export posts from WordPress sites to CSV."""
|
||||
|
||||
def __init__(self):
|
||||
"""Initialize the exporter."""
|
||||
self.sites = Config.WORDPRESS_SITES
|
||||
self.all_posts = []
|
||||
self.category_cache = {}
|
||||
|
||||
def fetch_category_names(self, site_name: str, site_config: Dict) -> Dict[int, str]:
|
||||
"""Fetch category names from a WordPress site."""
|
||||
if site_name in self.category_cache:
|
||||
return self.category_cache[site_name]
|
||||
|
||||
logger.info(f" Fetching categories from {site_name}...")
|
||||
categories = {}
|
||||
base_url = site_config['url'].rstrip('/')
|
||||
api_url = f"{base_url}/wp-json/wp/v2/categories"
|
||||
auth = HTTPBasicAuth(site_config['username'], site_config['password'])
|
||||
|
||||
try:
|
||||
response = requests.get(api_url, params={'per_page': 100}, auth=auth, timeout=10)
|
||||
response.raise_for_status()
|
||||
|
||||
for cat in response.json():
|
||||
categories[cat['id']] = {'name': cat.get('name', ''), 'slug': cat.get('slug', '')}
|
||||
logger.info(f" ✓ Fetched {len(categories)} categories")
|
||||
except Exception as e:
|
||||
logger.warning(f" Could not fetch categories from {site_name}: {e}")
|
||||
|
||||
self.category_cache[site_name] = categories
|
||||
return categories
|
||||
|
||||
def fetch_posts_from_site(self, site_name: str, site_config: Dict) -> List[Dict]:
|
||||
"""Fetch all posts from a WordPress site."""
|
||||
logger.info(f"\nFetching posts from {site_name}...")
|
||||
|
||||
posts = []
|
||||
base_url = site_config['url'].rstrip('/')
|
||||
api_url = f"{base_url}/wp-json/wp/v2/posts"
|
||||
auth = HTTPBasicAuth(site_config['username'], site_config['password'])
|
||||
|
||||
for status in ['publish', 'draft']:
|
||||
page = 1
|
||||
status_count = 0
|
||||
|
||||
while True:
|
||||
try:
|
||||
logger.info(f" Fetching page {page} ({status} posts)...")
|
||||
response = requests.get(
|
||||
api_url,
|
||||
params={'page': page, 'per_page': 100, 'status': status},
|
||||
auth=auth,
|
||||
timeout=10
|
||||
)
|
||||
response.raise_for_status()
|
||||
|
||||
page_posts = response.json()
|
||||
if not page_posts:
|
||||
break
|
||||
|
||||
posts.extend(page_posts)
|
||||
status_count += len(page_posts)
|
||||
logger.info(f" ✓ Got {len(page_posts)} posts (total: {len(posts)})")
|
||||
|
||||
page += 1
|
||||
time.sleep(0.5)
|
||||
|
||||
except requests.exceptions.HTTPError as e:
|
||||
if response.status_code == 400:
|
||||
logger.info(f" ℹ API limit reached (got {status_count} {status} posts)")
|
||||
break
|
||||
else:
|
||||
logger.error(f"Error on page {page}: {e}")
|
||||
break
|
||||
except requests.exceptions.RequestException as e:
|
||||
logger.error(f"Error fetching from {site_name}: {e}")
|
||||
break
|
||||
|
||||
logger.info(f"✓ Total posts from {site_name}: {len(posts)}\n")
|
||||
return posts
|
||||
|
||||
def extract_post_details(self, post: Dict, site_name: str, category_map: Dict) -> Dict:
|
||||
"""Extract post details for CSV export."""
|
||||
title = post.get('title', {})
|
||||
if isinstance(title, dict):
|
||||
title = title.get('rendered', '')
|
||||
|
||||
content = post.get('content', {})
|
||||
if isinstance(content, dict):
|
||||
content = content.get('rendered', '')
|
||||
content_text = re.sub('<[^<]+?>', '', content)[:500]
|
||||
|
||||
excerpt = post.get('excerpt', {})
|
||||
if isinstance(excerpt, dict):
|
||||
excerpt = excerpt.get('rendered', '')
|
||||
excerpt_text = re.sub('<[^<]+?>', '', excerpt)
|
||||
|
||||
meta_dict = post.get('meta', {}) if isinstance(post.get('meta'), dict) else {}
|
||||
meta_description = (
|
||||
meta_dict.get('rank_math_description', '') or
|
||||
meta_dict.get('_yoast_wpseo_metadesc', '') or ''
|
||||
)
|
||||
|
||||
category_ids = post.get('categories', [])
|
||||
category_names = ', '.join([
|
||||
category_map.get(cat_id, {}).get('name', str(cat_id))
|
||||
for cat_id in category_ids
|
||||
]) if category_ids else ''
|
||||
|
||||
return {
|
||||
'site': site_name,
|
||||
'post_id': post['id'],
|
||||
'status': post.get('status', 'publish'),
|
||||
'title': title.strip(),
|
||||
'slug': post.get('slug', ''),
|
||||
'url': post.get('link', ''),
|
||||
'author_id': post.get('author', ''),
|
||||
'date_published': post.get('date', ''),
|
||||
'date_modified': post.get('modified', ''),
|
||||
'categories': category_names,
|
||||
'tags': ', '.join([str(t) for t in post.get('tags', [])]),
|
||||
'excerpt': excerpt_text.strip(),
|
||||
'content_preview': content_text.strip(),
|
||||
'seo_title': meta_dict.get('rank_math_title', ''),
|
||||
'meta_description': meta_description,
|
||||
'focus_keyword': meta_dict.get('rank_math_focus_keyword', ''),
|
||||
'word_count': len(content_text.split()),
|
||||
}
|
||||
|
||||
def export_to_csv(self, output_file: Optional[str] = None) -> str:
|
||||
"""Export all posts to CSV."""
|
||||
if not output_file:
|
||||
output_dir = Path(__file__).parent.parent.parent / 'output'
|
||||
output_dir.mkdir(parents=True, exist_ok=True)
|
||||
date_str = datetime.now().strftime('%Y-%m-%d')
|
||||
output_file = output_dir / f'all_posts_{date_str}.csv'
|
||||
|
||||
output_file = Path(output_file)
|
||||
output_file.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
if not self.all_posts:
|
||||
logger.error("No posts to export")
|
||||
return None
|
||||
|
||||
fieldnames = [
|
||||
'site', 'post_id', 'status', 'title', 'slug', 'url', 'author_id',
|
||||
'date_published', 'date_modified', 'categories', 'tags', 'excerpt',
|
||||
'content_preview', 'seo_title', 'meta_description', 'focus_keyword', 'word_count',
|
||||
]
|
||||
|
||||
logger.info(f"Exporting {len(self.all_posts)} posts to CSV...")
|
||||
|
||||
with open(output_file, 'w', newline='', encoding='utf-8') as f:
|
||||
writer = csv.DictWriter(f, fieldnames=fieldnames)
|
||||
writer.writeheader()
|
||||
writer.writerows(self.all_posts)
|
||||
|
||||
logger.info(f"✓ CSV exported to: {output_file}")
|
||||
return str(output_file)
|
||||
|
||||
def run(self):
|
||||
"""Run the complete export process."""
|
||||
logger.info("="*70)
|
||||
logger.info("EXPORTING ALL POSTS FOR AI DECISION MAKING")
|
||||
logger.info("="*70)
|
||||
logger.info("Sites configured: " + ", ".join(self.sites.keys()))
|
||||
|
||||
for site_name, config in self.sites.items():
|
||||
categories = self.fetch_category_names(site_name, config)
|
||||
posts = self.fetch_posts_from_site(site_name, config)
|
||||
|
||||
if posts:
|
||||
for post in posts:
|
||||
post_details = self.extract_post_details(post, site_name, categories)
|
||||
self.all_posts.append(post_details)
|
||||
|
||||
if not self.all_posts:
|
||||
logger.error("No posts found on any site")
|
||||
return
|
||||
|
||||
self.all_posts.sort(key=lambda x: (x['site'], x['post_id']))
|
||||
self.export_to_csv()
|
||||
|
||||
# Print summary
|
||||
logger.info("\n" + "="*70)
|
||||
logger.info("EXPORT SUMMARY")
|
||||
logger.info("="*70)
|
||||
|
||||
by_site = {}
|
||||
for post in self.all_posts:
|
||||
site = post['site']
|
||||
if site not in by_site:
|
||||
by_site[site] = {'total': 0, 'published': 0, 'draft': 0}
|
||||
by_site[site]['total'] += 1
|
||||
if post['status'] == 'publish':
|
||||
by_site[site]['published'] += 1
|
||||
else:
|
||||
by_site[site]['draft'] += 1
|
||||
|
||||
for site, stats in sorted(by_site.items()):
|
||||
logger.info(f"\n{site}:")
|
||||
logger.info(f" Total: {stats['total']}")
|
||||
logger.info(f" Published: {stats['published']}")
|
||||
logger.info(f" Drafts: {stats['draft']}")
|
||||
|
||||
logger.info(f"\n✓ Export complete!")
|
||||
14
src/seo/recategorizer.py
Normal file
14
src/seo/recategorizer.py
Normal file
@@ -0,0 +1,14 @@
|
||||
"""
|
||||
Recategorizer Module - AI-powered post recategorization
|
||||
"""
|
||||
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
# Import from scripts directory
|
||||
scripts_dir = Path(__file__).parent.parent.parent / 'scripts'
|
||||
sys.path.insert(0, str(scripts_dir))
|
||||
|
||||
from ai_recategorize_posts import PostRecategorizer
|
||||
|
||||
__all__ = ['PostRecategorizer']
|
||||
14
src/seo/seo_checker.py
Normal file
14
src/seo/seo_checker.py
Normal file
@@ -0,0 +1,14 @@
|
||||
"""
|
||||
SEO Checker Module - SEO quality analysis
|
||||
"""
|
||||
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
# Import from scripts directory
|
||||
scripts_dir = Path(__file__).parent.parent.parent / 'scripts'
|
||||
sys.path.insert(0, str(scripts_dir))
|
||||
|
||||
from multi_site_seo_analyzer import MultiSiteSEOAnalyzer
|
||||
|
||||
__all__ = ['MultiSiteSEOAnalyzer']
|
||||
Reference in New Issue
Block a user