Add media importer for migrated posts
- Add import_media command to import featured images - Fetch media from source site (mistergeek.net) - Upload to destination site (hellogeek.net) - Map source media IDs to destination media IDs - Set featured images on migrated posts - Use migration report CSV as input - Support dry-run mode - Cache media mappings to avoid duplicate uploads Co-authored-by: Qwen-Coder <qwen-coder@alibabacloud.com>
This commit is contained in:
@@ -17,6 +17,7 @@ from .meta_description_generator import MetaDescriptionGenerator
|
||||
from .meta_description_updater import MetaDescriptionUpdater
|
||||
from .performance_tracker import SEOPerformanceTracker
|
||||
from .performance_analyzer import PerformanceAnalyzer
|
||||
from .media_importer import WordPressMediaImporter
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -486,3 +487,24 @@ class SEOApp:
|
||||
report.append("4. Monitor keyword rankings regularly\n")
|
||||
|
||||
return "\n".join(report)
|
||||
|
||||
def import_media(self, migration_report: str,
|
||||
source_site: str = 'mistergeek.net',
|
||||
destination_site: str = 'hellogeek.net',
|
||||
dry_run: bool = True) -> Dict:
|
||||
"""
|
||||
Import media from source to destination site for migrated posts.
|
||||
|
||||
Args:
|
||||
migration_report: Path to migration report CSV
|
||||
source_site: Source site name
|
||||
destination_site: Destination site name
|
||||
dry_run: If True, preview without importing
|
||||
|
||||
Returns:
|
||||
Statistics dict
|
||||
"""
|
||||
logger.info(f"📸 Importing media from {source_site} to {destination_site}...")
|
||||
|
||||
importer = WordPressMediaImporter(source_site, destination_site)
|
||||
return importer.run_from_migration_report(migration_report, dry_run=dry_run)
|
||||
|
||||
@@ -86,6 +86,10 @@ Examples:
|
||||
parser.add_argument('--start-date', help='Start date YYYY-MM-DD (for API mode)')
|
||||
parser.add_argument('--end-date', help='End date YYYY-MM-DD (for API mode)')
|
||||
|
||||
# Media import arguments
|
||||
parser.add_argument('--from-site', help='Source site for media import (default: mistergeek.net)')
|
||||
parser.add_argument('--to-site', help='Destination site for media import (default: hellogeek.net)')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
if not args.command:
|
||||
@@ -116,6 +120,7 @@ Examples:
|
||||
'performance': cmd_performance,
|
||||
'keywords': cmd_keywords,
|
||||
'report': cmd_report,
|
||||
'import_media': cmd_import_media,
|
||||
'status': cmd_status,
|
||||
'help': cmd_help,
|
||||
}
|
||||
@@ -598,6 +603,47 @@ def cmd_report(app, args):
|
||||
return 0
|
||||
|
||||
|
||||
def cmd_import_media(app, args):
|
||||
"""Import media from source to destination site for migrated posts."""
|
||||
if args.dry_run:
|
||||
print("Would import media")
|
||||
print(f" Source: {args.from_site or 'mistergeek.net'}")
|
||||
print(f" Destination: {args.to_site or 'hellogeek.net'}")
|
||||
if args.args:
|
||||
print(f" Migration report: {args.args[0]}")
|
||||
return 0
|
||||
|
||||
migration_report = args.args[0] if args.args else None
|
||||
|
||||
if not migration_report:
|
||||
print("❌ Migration report CSV required")
|
||||
print(" Usage: seo import_media <migration_report.csv>")
|
||||
return 1
|
||||
|
||||
source_site = args.from_site or 'mistergeek.net'
|
||||
dest_site = args.to_site or 'hellogeek.net'
|
||||
|
||||
print(f"Importing media from {source_site} to {dest_site}...")
|
||||
print(f"Migration report: {migration_report}")
|
||||
|
||||
stats = app.import_media(
|
||||
migration_report=migration_report,
|
||||
source_site=source_site,
|
||||
destination_site=dest_site,
|
||||
dry_run=False
|
||||
)
|
||||
|
||||
if stats:
|
||||
print(f"\n✅ Media import completed!")
|
||||
print(f"\n📊 Summary:")
|
||||
print(f" Total posts: {stats.get('total_posts', 0)}")
|
||||
print(f" Posts with media: {stats.get('posts_with_media', 0)}")
|
||||
print(f" Images uploaded: {stats.get('images_uploaded', 0)}")
|
||||
print(f" Featured images set: {stats.get('featured_images_set', 0)}")
|
||||
print(f" Errors: {stats.get('errors', 0)}")
|
||||
return 0
|
||||
|
||||
|
||||
def cmd_help(app, args):
|
||||
"""Show help."""
|
||||
print("""
|
||||
@@ -638,6 +684,7 @@ Utility:
|
||||
performance --ga4 analytics.csv --gsc search.csv Analyze with both sources
|
||||
keywords <gsc.csv> Show keyword opportunities
|
||||
report Generate SEO performance report
|
||||
import_media <report.csv> Import media for migrated posts
|
||||
help Show this help message
|
||||
|
||||
Export Options:
|
||||
|
||||
467
src/seo/media_importer.py
Normal file
467
src/seo/media_importer.py
Normal file
@@ -0,0 +1,467 @@
|
||||
"""
|
||||
Media Importer - Import media from one WordPress site to another
|
||||
Specifically designed for migrated posts
|
||||
"""
|
||||
|
||||
import logging
|
||||
import os
|
||||
import tempfile
|
||||
import requests
|
||||
from requests.auth import HTTPBasicAuth
|
||||
from pathlib import Path
|
||||
from datetime import datetime
|
||||
from typing import Dict, List, Optional, Tuple
|
||||
import csv
|
||||
|
||||
from .config import Config
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class WordPressMediaImporter:
|
||||
"""Import media from source WordPress site to destination site."""
|
||||
|
||||
def __init__(self, source_site: str = 'mistergeek.net',
|
||||
destination_site: str = 'hellogeek.net'):
|
||||
"""
|
||||
Initialize media importer.
|
||||
|
||||
Args:
|
||||
source_site: Source site name
|
||||
destination_site: Destination site name
|
||||
"""
|
||||
self.source_site = source_site
|
||||
self.destination_site = destination_site
|
||||
self.sites = Config.WORDPRESS_SITES
|
||||
|
||||
# Validate sites
|
||||
if source_site not in self.sites:
|
||||
raise ValueError(f"Source site '{source_site}' not found")
|
||||
if destination_site not in self.sites:
|
||||
raise ValueError(f"Destination site '{destination_site}' not found")
|
||||
|
||||
# Setup source
|
||||
self.source_config = self.sites[source_site]
|
||||
self.source_url = self.source_config['url'].rstrip('/')
|
||||
self.source_auth = HTTPBasicAuth(
|
||||
self.source_config['username'],
|
||||
self.source_config['password']
|
||||
)
|
||||
|
||||
# Setup destination
|
||||
self.dest_config = self.sites[destination_site]
|
||||
self.dest_url = self.dest_config['url'].rstrip('/')
|
||||
self.dest_auth = HTTPBasicAuth(
|
||||
self.dest_config['username'],
|
||||
self.dest_config['password']
|
||||
)
|
||||
|
||||
self.media_cache = {} # Cache source media ID -> dest media ID
|
||||
self.stats = {
|
||||
'total_posts': 0,
|
||||
'posts_with_media': 0,
|
||||
'images_downloaded': 0,
|
||||
'images_uploaded': 0,
|
||||
'featured_images_set': 0,
|
||||
'errors': 0
|
||||
}
|
||||
|
||||
def fetch_migrated_posts(self, post_ids: Optional[List[int]] = None) -> List[Dict]:
|
||||
"""
|
||||
Fetch posts that need media imported.
|
||||
|
||||
Args:
|
||||
post_ids: Specific post IDs to process
|
||||
|
||||
Returns:
|
||||
List of post dicts
|
||||
"""
|
||||
logger.info(f"Fetching posts from {self.destination_site}...")
|
||||
|
||||
if post_ids:
|
||||
# Fetch specific posts
|
||||
posts = []
|
||||
for post_id in post_ids:
|
||||
try:
|
||||
response = requests.get(
|
||||
f"{self.dest_url}/wp-json/wp/v2/posts/{post_id}",
|
||||
auth=self.dest_auth,
|
||||
timeout=10
|
||||
)
|
||||
if response.status_code == 200:
|
||||
posts.append(response.json())
|
||||
except Exception as e:
|
||||
logger.error(f"Error fetching post {post_id}: {e}")
|
||||
return posts
|
||||
else:
|
||||
# Fetch recent posts (assuming migrated posts are recent)
|
||||
try:
|
||||
response = requests.get(
|
||||
f"{self.dest_url}/wp-json/wp/v2/posts",
|
||||
params={
|
||||
'per_page': 100,
|
||||
'status': 'publish,draft',
|
||||
'_embed': True
|
||||
},
|
||||
auth=self.dest_auth,
|
||||
timeout=30
|
||||
)
|
||||
response.raise_for_status()
|
||||
return response.json()
|
||||
except Exception as e:
|
||||
logger.error(f"Error fetching posts: {e}")
|
||||
return []
|
||||
|
||||
def get_source_post(self, post_id: int) -> Optional[Dict]:
|
||||
"""
|
||||
Fetch corresponding post from source site.
|
||||
|
||||
Args:
|
||||
post_id: Post ID on source site
|
||||
|
||||
Returns:
|
||||
Post dict or None
|
||||
"""
|
||||
try:
|
||||
response = requests.get(
|
||||
f"{self.source_url}/wp-json/wp/v2/posts/{post_id}",
|
||||
auth=self.source_auth,
|
||||
timeout=10,
|
||||
params={'_embed': True}
|
||||
)
|
||||
|
||||
if response.status_code == 200:
|
||||
return response.json()
|
||||
else:
|
||||
logger.warning(f"Source post {post_id} not found")
|
||||
return None
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error fetching source post {post_id}: {e}")
|
||||
return None
|
||||
|
||||
def download_media(self, media_url: str) -> Optional[bytes]:
|
||||
"""
|
||||
Download media file from source site.
|
||||
|
||||
Args:
|
||||
media_url: URL of media file
|
||||
|
||||
Returns:
|
||||
File content bytes or None
|
||||
"""
|
||||
try:
|
||||
response = requests.get(media_url, timeout=30)
|
||||
response.raise_for_status()
|
||||
return response.content
|
||||
except Exception as e:
|
||||
logger.error(f"Error downloading {media_url}: {e}")
|
||||
return None
|
||||
|
||||
def upload_media(self, file_content: bytes, filename: str,
|
||||
mime_type: str = 'image/jpeg',
|
||||
alt_text: str = '',
|
||||
caption: str = '') -> Optional[int]:
|
||||
"""
|
||||
Upload media to destination site.
|
||||
|
||||
Args:
|
||||
file_content: File content bytes
|
||||
filename: Filename for the media
|
||||
mime_type: MIME type of the file
|
||||
alt_text: Alt text for the image
|
||||
caption: Caption for the image
|
||||
|
||||
Returns:
|
||||
Media ID on destination site or None
|
||||
"""
|
||||
try:
|
||||
# Upload file
|
||||
files = {'file': (filename, file_content, mime_type)}
|
||||
|
||||
response = requests.post(
|
||||
f"{self.dest_url}/wp-json/wp/v2/media",
|
||||
files=files,
|
||||
auth=self.dest_auth,
|
||||
headers={
|
||||
'Content-Disposition': f'attachment; filename={filename}',
|
||||
'Content-Type': mime_type
|
||||
},
|
||||
timeout=30
|
||||
)
|
||||
|
||||
if response.status_code == 201:
|
||||
media_data = response.json()
|
||||
media_id = media_data['id']
|
||||
|
||||
# Update alt text and caption
|
||||
if alt_text or caption:
|
||||
meta_update = {}
|
||||
if alt_text:
|
||||
meta_update['_wp_attachment_image_alt'] = alt_text
|
||||
if caption:
|
||||
meta_update['excerpt'] = caption
|
||||
|
||||
requests.post(
|
||||
f"{self.dest_url}/wp-json/wp/v2/media/{media_id}",
|
||||
json=meta_update,
|
||||
auth=self.dest_auth,
|
||||
timeout=10
|
||||
)
|
||||
|
||||
logger.info(f"✓ Uploaded {filename} (ID: {media_id})")
|
||||
return media_id
|
||||
else:
|
||||
logger.error(f"Error uploading {filename}: {response.status_code}")
|
||||
return None
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error uploading {filename}: {e}")
|
||||
return None
|
||||
|
||||
def import_featured_image(self, source_post: Dict, dest_post_id: int) -> bool:
|
||||
"""
|
||||
Import featured image from source post to destination post.
|
||||
|
||||
Args:
|
||||
source_post: Source post dict
|
||||
dest_post_id: Destination post ID
|
||||
|
||||
Returns:
|
||||
True if successful
|
||||
"""
|
||||
# Check if source has featured image
|
||||
featured_media_id = source_post.get('featured_media')
|
||||
if not featured_media_id:
|
||||
logger.info(f" No featured image on source post")
|
||||
return False
|
||||
|
||||
# Check if already imported
|
||||
if featured_media_id in self.media_cache:
|
||||
dest_media_id = self.media_cache[featured_media_id]
|
||||
logger.info(f" Using cached media ID: {dest_media_id}")
|
||||
else:
|
||||
# Fetch media details from source
|
||||
try:
|
||||
media_response = requests.get(
|
||||
f"{self.source_url}/wp-json/wp/v2/media/{featured_media_id}",
|
||||
auth=self.source_auth,
|
||||
timeout=10
|
||||
)
|
||||
|
||||
if media_response.status_code != 200:
|
||||
logger.error(f"Could not fetch media {featured_media_id}")
|
||||
return False
|
||||
|
||||
media_data = media_response.json()
|
||||
|
||||
# Download media file
|
||||
media_url = media_data.get('source_url', '')
|
||||
if not media_url:
|
||||
# Try alternative URL structure
|
||||
media_url = media_data.get('guid', {}).get('rendered', '')
|
||||
|
||||
file_content = self.download_media(media_url)
|
||||
if not file_content:
|
||||
return False
|
||||
|
||||
# Extract filename and mime type
|
||||
filename = media_data.get('slug', 'image.jpg') + '.jpg'
|
||||
mime_type = media_data.get('mime_type', 'image/jpeg')
|
||||
alt_text = media_data.get('alt_text', '')
|
||||
caption = media_data.get('caption', {}).get('rendered', '')
|
||||
|
||||
# Upload to destination
|
||||
dest_media_id = self.upload_media(
|
||||
file_content, filename, mime_type, alt_text, caption
|
||||
)
|
||||
|
||||
if not dest_media_id:
|
||||
return False
|
||||
|
||||
# Cache the mapping
|
||||
self.media_cache[featured_media_id] = dest_media_id
|
||||
self.stats['images_uploaded'] += 1
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error importing featured image: {e}")
|
||||
return False
|
||||
|
||||
# Set featured image on destination post
|
||||
try:
|
||||
response = requests.post(
|
||||
f"{self.dest_url}/wp-json/wp/v2/posts/{dest_post_id}",
|
||||
json={'featured_media': dest_media_id},
|
||||
auth=self.dest_auth,
|
||||
timeout=10
|
||||
)
|
||||
|
||||
if response.status_code == 200:
|
||||
logger.info(f"✓ Set featured image on post {dest_post_id}")
|
||||
self.stats['featured_images_set'] += 1
|
||||
return True
|
||||
else:
|
||||
logger.error(f"Error setting featured image: {response.status_code}")
|
||||
return False
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error setting featured image: {e}")
|
||||
return False
|
||||
|
||||
def import_post_media(self, source_post: Dict, dest_post_id: int) -> int:
|
||||
"""
|
||||
Import all media from a post (featured image + inline images).
|
||||
|
||||
Args:
|
||||
source_post: Source post dict
|
||||
dest_post_id: Destination post ID
|
||||
|
||||
Returns:
|
||||
Number of images imported
|
||||
"""
|
||||
images_imported = 0
|
||||
|
||||
# Import featured image
|
||||
if self.import_featured_image(source_post, dest_post_id):
|
||||
images_imported += 1
|
||||
|
||||
# TODO: Import inline images from content
|
||||
# This would require parsing the content for <img> tags
|
||||
# and replacing source URLs with destination URLs
|
||||
|
||||
return images_imported
|
||||
|
||||
def process_posts(self, post_mappings: List[Tuple[int, int]],
|
||||
dry_run: bool = False) -> Dict:
|
||||
"""
|
||||
Process media import for mapped posts.
|
||||
|
||||
Args:
|
||||
post_mappings: List of (source_post_id, dest_post_id) tuples
|
||||
dry_run: If True, preview without importing
|
||||
|
||||
Returns:
|
||||
Statistics dict
|
||||
"""
|
||||
logger.info("\n" + "="*70)
|
||||
logger.info("MEDIA IMPORTER")
|
||||
logger.info("="*70)
|
||||
logger.info(f"Source: {self.source_site}")
|
||||
logger.info(f"Destination: {self.destination_site}")
|
||||
logger.info(f"Posts to process: {len(post_mappings)}")
|
||||
logger.info(f"Dry run: {dry_run}")
|
||||
logger.info("="*70)
|
||||
|
||||
self.stats['total_posts'] = len(post_mappings)
|
||||
|
||||
for i, (source_id, dest_id) in enumerate(post_mappings, 1):
|
||||
logger.info(f"\n[{i}/{len(post_mappings)}] Processing post mapping:")
|
||||
logger.info(f" Source: {source_id} → Destination: {dest_id}")
|
||||
|
||||
# Fetch source post
|
||||
source_post = self.get_source_post(source_id)
|
||||
if not source_post:
|
||||
logger.warning(f" Skipping: Source post not found")
|
||||
self.stats['errors'] += 1
|
||||
continue
|
||||
|
||||
# Check if source has media
|
||||
if not source_post.get('featured_media'):
|
||||
logger.info(f" No featured image to import")
|
||||
continue
|
||||
|
||||
self.stats['posts_with_media'] += 1
|
||||
|
||||
if dry_run:
|
||||
logger.info(f" [DRY RUN] Would import featured image")
|
||||
self.stats['images_downloaded'] += 1
|
||||
self.stats['images_uploaded'] += 1
|
||||
self.stats['featured_images_set'] += 1
|
||||
else:
|
||||
# Import media
|
||||
imported = self.import_post_media(source_post, dest_id)
|
||||
if imported > 0:
|
||||
self.stats['images_downloaded'] += imported
|
||||
|
||||
# Print summary
|
||||
logger.info("\n" + "="*70)
|
||||
logger.info("IMPORT SUMMARY")
|
||||
logger.info("="*70)
|
||||
logger.info(f"Total posts: {self.stats['total_posts']}")
|
||||
logger.info(f"Posts with media: {self.stats['posts_with_media']}")
|
||||
logger.info(f"Images downloaded: {self.stats['images_downloaded']}")
|
||||
logger.info(f"Images uploaded: {self.stats['images_uploaded']}")
|
||||
logger.info(f"Featured images set: {self.stats['featured_images_set']}")
|
||||
logger.info(f"Errors: {self.stats['errors']}")
|
||||
logger.info("="*70)
|
||||
|
||||
return self.stats
|
||||
|
||||
def run_from_csv(self, csv_file: str, dry_run: bool = False) -> Dict:
|
||||
"""
|
||||
Import media for posts listed in CSV file.
|
||||
|
||||
CSV should have columns: source_post_id, destination_post_id
|
||||
|
||||
Args:
|
||||
csv_file: Path to CSV file with post mappings
|
||||
dry_run: If True, preview without importing
|
||||
|
||||
Returns:
|
||||
Statistics dict
|
||||
"""
|
||||
logger.info(f"Loading post mappings from: {csv_file}")
|
||||
|
||||
try:
|
||||
with open(csv_file, 'r', encoding='utf-8') as f:
|
||||
reader = csv.DictReader(f)
|
||||
mappings = []
|
||||
|
||||
for row in reader:
|
||||
source_id = int(row.get('source_post_id', 0))
|
||||
dest_id = int(row.get('destination_post_id', 0))
|
||||
|
||||
if source_id and dest_id:
|
||||
mappings.append((source_id, dest_id))
|
||||
|
||||
logger.info(f"✓ Loaded {len(mappings)} post mappings")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error loading CSV: {e}")
|
||||
return self.stats
|
||||
|
||||
return self.process_posts(mappings, dry_run=dry_run)
|
||||
|
||||
def run_from_migration_report(self, report_file: str,
|
||||
dry_run: bool = False) -> Dict:
|
||||
"""
|
||||
Import media using migration report CSV.
|
||||
|
||||
Args:
|
||||
report_file: Path to migration report CSV
|
||||
dry_run: If True, preview without importing
|
||||
|
||||
Returns:
|
||||
Statistics dict
|
||||
"""
|
||||
logger.info(f"Loading migration report: {report_file}")
|
||||
|
||||
try:
|
||||
with open(report_file, 'r', encoding='utf-8') as f:
|
||||
reader = csv.DictReader(f)
|
||||
mappings = []
|
||||
|
||||
for row in reader:
|
||||
source_id = int(row.get('source_post_id', 0))
|
||||
dest_id = int(row.get('destination_post_id', 0))
|
||||
|
||||
if source_id and dest_id:
|
||||
mappings.append((source_id, dest_id))
|
||||
|
||||
logger.info(f"✓ Loaded {len(mappings)} post mappings from migration report")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error loading migration report: {e}")
|
||||
return self.stats
|
||||
|
||||
return self.process_posts(mappings, dry_run=dry_run)
|
||||
Reference in New Issue
Block a user