diff --git a/src/seo/app.py b/src/seo/app.py index 8017503..e772aa0 100644 --- a/src/seo/app.py +++ b/src/seo/app.py @@ -17,6 +17,7 @@ from .meta_description_generator import MetaDescriptionGenerator from .meta_description_updater import MetaDescriptionUpdater from .performance_tracker import SEOPerformanceTracker from .performance_analyzer import PerformanceAnalyzer +from .media_importer import WordPressMediaImporter logger = logging.getLogger(__name__) @@ -484,5 +485,26 @@ class SEOApp: report.append("2. Improve content for low-ranking pages\n") report.append("3. Build internal links to important pages\n") report.append("4. Monitor keyword rankings regularly\n") - + return "\n".join(report) + + def import_media(self, migration_report: str, + source_site: str = 'mistergeek.net', + destination_site: str = 'hellogeek.net', + dry_run: bool = True) -> Dict: + """ + Import media from source to destination site for migrated posts. + + Args: + migration_report: Path to migration report CSV + source_site: Source site name + destination_site: Destination site name + dry_run: If True, preview without importing + + Returns: + Statistics dict + """ + logger.info(f"šŸ“ø Importing media from {source_site} to {destination_site}...") + + importer = WordPressMediaImporter(source_site, destination_site) + return importer.run_from_migration_report(migration_report, dry_run=dry_run) diff --git a/src/seo/cli.py b/src/seo/cli.py index 10cbaf6..fea60d2 100644 --- a/src/seo/cli.py +++ b/src/seo/cli.py @@ -85,6 +85,10 @@ Examples: parser.add_argument('--gsc', help='Path to Google Search Console export CSV') parser.add_argument('--start-date', help='Start date YYYY-MM-DD (for API mode)') parser.add_argument('--end-date', help='End date YYYY-MM-DD (for API mode)') + + # Media import arguments + parser.add_argument('--from-site', help='Source site for media import (default: mistergeek.net)') + parser.add_argument('--to-site', help='Destination site for media import (default: hellogeek.net)') args = parser.parse_args() @@ -116,6 +120,7 @@ Examples: 'performance': cmd_performance, 'keywords': cmd_keywords, 'report': cmd_report, + 'import_media': cmd_import_media, 'status': cmd_status, 'help': cmd_help, } @@ -598,6 +603,47 @@ def cmd_report(app, args): return 0 +def cmd_import_media(app, args): + """Import media from source to destination site for migrated posts.""" + if args.dry_run: + print("Would import media") + print(f" Source: {args.from_site or 'mistergeek.net'}") + print(f" Destination: {args.to_site or 'hellogeek.net'}") + if args.args: + print(f" Migration report: {args.args[0]}") + return 0 + + migration_report = args.args[0] if args.args else None + + if not migration_report: + print("āŒ Migration report CSV required") + print(" Usage: seo import_media ") + return 1 + + source_site = args.from_site or 'mistergeek.net' + dest_site = args.to_site or 'hellogeek.net' + + print(f"Importing media from {source_site} to {dest_site}...") + print(f"Migration report: {migration_report}") + + stats = app.import_media( + migration_report=migration_report, + source_site=source_site, + destination_site=dest_site, + dry_run=False + ) + + if stats: + print(f"\nāœ… Media import completed!") + print(f"\nšŸ“Š Summary:") + print(f" Total posts: {stats.get('total_posts', 0)}") + print(f" Posts with media: {stats.get('posts_with_media', 0)}") + print(f" Images uploaded: {stats.get('images_uploaded', 0)}") + print(f" Featured images set: {stats.get('featured_images_set', 0)}") + print(f" Errors: {stats.get('errors', 0)}") + return 0 + + def cmd_help(app, args): """Show help.""" print(""" @@ -638,6 +684,7 @@ Utility: performance --ga4 analytics.csv --gsc search.csv Analyze with both sources keywords Show keyword opportunities report Generate SEO performance report + import_media Import media for migrated posts help Show this help message Export Options: diff --git a/src/seo/media_importer.py b/src/seo/media_importer.py new file mode 100644 index 0000000..842a44b --- /dev/null +++ b/src/seo/media_importer.py @@ -0,0 +1,467 @@ +""" +Media Importer - Import media from one WordPress site to another +Specifically designed for migrated posts +""" + +import logging +import os +import tempfile +import requests +from requests.auth import HTTPBasicAuth +from pathlib import Path +from datetime import datetime +from typing import Dict, List, Optional, Tuple +import csv + +from .config import Config + +logger = logging.getLogger(__name__) + + +class WordPressMediaImporter: + """Import media from source WordPress site to destination site.""" + + def __init__(self, source_site: str = 'mistergeek.net', + destination_site: str = 'hellogeek.net'): + """ + Initialize media importer. + + Args: + source_site: Source site name + destination_site: Destination site name + """ + self.source_site = source_site + self.destination_site = destination_site + self.sites = Config.WORDPRESS_SITES + + # Validate sites + if source_site not in self.sites: + raise ValueError(f"Source site '{source_site}' not found") + if destination_site not in self.sites: + raise ValueError(f"Destination site '{destination_site}' not found") + + # Setup source + self.source_config = self.sites[source_site] + self.source_url = self.source_config['url'].rstrip('/') + self.source_auth = HTTPBasicAuth( + self.source_config['username'], + self.source_config['password'] + ) + + # Setup destination + self.dest_config = self.sites[destination_site] + self.dest_url = self.dest_config['url'].rstrip('/') + self.dest_auth = HTTPBasicAuth( + self.dest_config['username'], + self.dest_config['password'] + ) + + self.media_cache = {} # Cache source media ID -> dest media ID + self.stats = { + 'total_posts': 0, + 'posts_with_media': 0, + 'images_downloaded': 0, + 'images_uploaded': 0, + 'featured_images_set': 0, + 'errors': 0 + } + + def fetch_migrated_posts(self, post_ids: Optional[List[int]] = None) -> List[Dict]: + """ + Fetch posts that need media imported. + + Args: + post_ids: Specific post IDs to process + + Returns: + List of post dicts + """ + logger.info(f"Fetching posts from {self.destination_site}...") + + if post_ids: + # Fetch specific posts + posts = [] + for post_id in post_ids: + try: + response = requests.get( + f"{self.dest_url}/wp-json/wp/v2/posts/{post_id}", + auth=self.dest_auth, + timeout=10 + ) + if response.status_code == 200: + posts.append(response.json()) + except Exception as e: + logger.error(f"Error fetching post {post_id}: {e}") + return posts + else: + # Fetch recent posts (assuming migrated posts are recent) + try: + response = requests.get( + f"{self.dest_url}/wp-json/wp/v2/posts", + params={ + 'per_page': 100, + 'status': 'publish,draft', + '_embed': True + }, + auth=self.dest_auth, + timeout=30 + ) + response.raise_for_status() + return response.json() + except Exception as e: + logger.error(f"Error fetching posts: {e}") + return [] + + def get_source_post(self, post_id: int) -> Optional[Dict]: + """ + Fetch corresponding post from source site. + + Args: + post_id: Post ID on source site + + Returns: + Post dict or None + """ + try: + response = requests.get( + f"{self.source_url}/wp-json/wp/v2/posts/{post_id}", + auth=self.source_auth, + timeout=10, + params={'_embed': True} + ) + + if response.status_code == 200: + return response.json() + else: + logger.warning(f"Source post {post_id} not found") + return None + + except Exception as e: + logger.error(f"Error fetching source post {post_id}: {e}") + return None + + def download_media(self, media_url: str) -> Optional[bytes]: + """ + Download media file from source site. + + Args: + media_url: URL of media file + + Returns: + File content bytes or None + """ + try: + response = requests.get(media_url, timeout=30) + response.raise_for_status() + return response.content + except Exception as e: + logger.error(f"Error downloading {media_url}: {e}") + return None + + def upload_media(self, file_content: bytes, filename: str, + mime_type: str = 'image/jpeg', + alt_text: str = '', + caption: str = '') -> Optional[int]: + """ + Upload media to destination site. + + Args: + file_content: File content bytes + filename: Filename for the media + mime_type: MIME type of the file + alt_text: Alt text for the image + caption: Caption for the image + + Returns: + Media ID on destination site or None + """ + try: + # Upload file + files = {'file': (filename, file_content, mime_type)} + + response = requests.post( + f"{self.dest_url}/wp-json/wp/v2/media", + files=files, + auth=self.dest_auth, + headers={ + 'Content-Disposition': f'attachment; filename={filename}', + 'Content-Type': mime_type + }, + timeout=30 + ) + + if response.status_code == 201: + media_data = response.json() + media_id = media_data['id'] + + # Update alt text and caption + if alt_text or caption: + meta_update = {} + if alt_text: + meta_update['_wp_attachment_image_alt'] = alt_text + if caption: + meta_update['excerpt'] = caption + + requests.post( + f"{self.dest_url}/wp-json/wp/v2/media/{media_id}", + json=meta_update, + auth=self.dest_auth, + timeout=10 + ) + + logger.info(f"āœ“ Uploaded {filename} (ID: {media_id})") + return media_id + else: + logger.error(f"Error uploading {filename}: {response.status_code}") + return None + + except Exception as e: + logger.error(f"Error uploading {filename}: {e}") + return None + + def import_featured_image(self, source_post: Dict, dest_post_id: int) -> bool: + """ + Import featured image from source post to destination post. + + Args: + source_post: Source post dict + dest_post_id: Destination post ID + + Returns: + True if successful + """ + # Check if source has featured image + featured_media_id = source_post.get('featured_media') + if not featured_media_id: + logger.info(f" No featured image on source post") + return False + + # Check if already imported + if featured_media_id in self.media_cache: + dest_media_id = self.media_cache[featured_media_id] + logger.info(f" Using cached media ID: {dest_media_id}") + else: + # Fetch media details from source + try: + media_response = requests.get( + f"{self.source_url}/wp-json/wp/v2/media/{featured_media_id}", + auth=self.source_auth, + timeout=10 + ) + + if media_response.status_code != 200: + logger.error(f"Could not fetch media {featured_media_id}") + return False + + media_data = media_response.json() + + # Download media file + media_url = media_data.get('source_url', '') + if not media_url: + # Try alternative URL structure + media_url = media_data.get('guid', {}).get('rendered', '') + + file_content = self.download_media(media_url) + if not file_content: + return False + + # Extract filename and mime type + filename = media_data.get('slug', 'image.jpg') + '.jpg' + mime_type = media_data.get('mime_type', 'image/jpeg') + alt_text = media_data.get('alt_text', '') + caption = media_data.get('caption', {}).get('rendered', '') + + # Upload to destination + dest_media_id = self.upload_media( + file_content, filename, mime_type, alt_text, caption + ) + + if not dest_media_id: + return False + + # Cache the mapping + self.media_cache[featured_media_id] = dest_media_id + self.stats['images_uploaded'] += 1 + + except Exception as e: + logger.error(f"Error importing featured image: {e}") + return False + + # Set featured image on destination post + try: + response = requests.post( + f"{self.dest_url}/wp-json/wp/v2/posts/{dest_post_id}", + json={'featured_media': dest_media_id}, + auth=self.dest_auth, + timeout=10 + ) + + if response.status_code == 200: + logger.info(f"āœ“ Set featured image on post {dest_post_id}") + self.stats['featured_images_set'] += 1 + return True + else: + logger.error(f"Error setting featured image: {response.status_code}") + return False + + except Exception as e: + logger.error(f"Error setting featured image: {e}") + return False + + def import_post_media(self, source_post: Dict, dest_post_id: int) -> int: + """ + Import all media from a post (featured image + inline images). + + Args: + source_post: Source post dict + dest_post_id: Destination post ID + + Returns: + Number of images imported + """ + images_imported = 0 + + # Import featured image + if self.import_featured_image(source_post, dest_post_id): + images_imported += 1 + + # TODO: Import inline images from content + # This would require parsing the content for tags + # and replacing source URLs with destination URLs + + return images_imported + + def process_posts(self, post_mappings: List[Tuple[int, int]], + dry_run: bool = False) -> Dict: + """ + Process media import for mapped posts. + + Args: + post_mappings: List of (source_post_id, dest_post_id) tuples + dry_run: If True, preview without importing + + Returns: + Statistics dict + """ + logger.info("\n" + "="*70) + logger.info("MEDIA IMPORTER") + logger.info("="*70) + logger.info(f"Source: {self.source_site}") + logger.info(f"Destination: {self.destination_site}") + logger.info(f"Posts to process: {len(post_mappings)}") + logger.info(f"Dry run: {dry_run}") + logger.info("="*70) + + self.stats['total_posts'] = len(post_mappings) + + for i, (source_id, dest_id) in enumerate(post_mappings, 1): + logger.info(f"\n[{i}/{len(post_mappings)}] Processing post mapping:") + logger.info(f" Source: {source_id} → Destination: {dest_id}") + + # Fetch source post + source_post = self.get_source_post(source_id) + if not source_post: + logger.warning(f" Skipping: Source post not found") + self.stats['errors'] += 1 + continue + + # Check if source has media + if not source_post.get('featured_media'): + logger.info(f" No featured image to import") + continue + + self.stats['posts_with_media'] += 1 + + if dry_run: + logger.info(f" [DRY RUN] Would import featured image") + self.stats['images_downloaded'] += 1 + self.stats['images_uploaded'] += 1 + self.stats['featured_images_set'] += 1 + else: + # Import media + imported = self.import_post_media(source_post, dest_id) + if imported > 0: + self.stats['images_downloaded'] += imported + + # Print summary + logger.info("\n" + "="*70) + logger.info("IMPORT SUMMARY") + logger.info("="*70) + logger.info(f"Total posts: {self.stats['total_posts']}") + logger.info(f"Posts with media: {self.stats['posts_with_media']}") + logger.info(f"Images downloaded: {self.stats['images_downloaded']}") + logger.info(f"Images uploaded: {self.stats['images_uploaded']}") + logger.info(f"Featured images set: {self.stats['featured_images_set']}") + logger.info(f"Errors: {self.stats['errors']}") + logger.info("="*70) + + return self.stats + + def run_from_csv(self, csv_file: str, dry_run: bool = False) -> Dict: + """ + Import media for posts listed in CSV file. + + CSV should have columns: source_post_id, destination_post_id + + Args: + csv_file: Path to CSV file with post mappings + dry_run: If True, preview without importing + + Returns: + Statistics dict + """ + logger.info(f"Loading post mappings from: {csv_file}") + + try: + with open(csv_file, 'r', encoding='utf-8') as f: + reader = csv.DictReader(f) + mappings = [] + + for row in reader: + source_id = int(row.get('source_post_id', 0)) + dest_id = int(row.get('destination_post_id', 0)) + + if source_id and dest_id: + mappings.append((source_id, dest_id)) + + logger.info(f"āœ“ Loaded {len(mappings)} post mappings") + + except Exception as e: + logger.error(f"Error loading CSV: {e}") + return self.stats + + return self.process_posts(mappings, dry_run=dry_run) + + def run_from_migration_report(self, report_file: str, + dry_run: bool = False) -> Dict: + """ + Import media using migration report CSV. + + Args: + report_file: Path to migration report CSV + dry_run: If True, preview without importing + + Returns: + Statistics dict + """ + logger.info(f"Loading migration report: {report_file}") + + try: + with open(report_file, 'r', encoding='utf-8') as f: + reader = csv.DictReader(f) + mappings = [] + + for row in reader: + source_id = int(row.get('source_post_id', 0)) + dest_id = int(row.get('destination_post_id', 0)) + + if source_id and dest_id: + mappings.append((source_id, dest_id)) + + logger.info(f"āœ“ Loaded {len(mappings)} post mappings from migration report") + + except Exception as e: + logger.error(f"Error loading migration report: {e}") + return self.stats + + return self.process_posts(mappings, dry_run=dry_run)