""" Wix Blog Manager This module provides high-level functions for managing blog content on Wix, including content creation, SEO optimization, and media management. """ import os import re import logging import tempfile import requests from typing import Dict, List, Optional, Union, Any, Tuple from datetime import datetime from pathlib import Path import markdown import html2text from bs4 import BeautifulSoup from .wix_api_client import WixAPIClient # Configure logging logging.basicConfig( level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s' ) logger = logging.getLogger('wix_blog_manager') class WixBlogManager: """ High-level manager for Wix blog content. This class provides convenient methods for common blog management tasks, building on the lower-level WixAPIClient. """ def __init__( self, api_key: Optional[str] = None, refresh_token: Optional[str] = None, site_id: Optional[str] = None ): """ Initialize the Wix Blog Manager. Args: api_key: Wix API key (optional if using refresh token) refresh_token: Wix refresh token for OAuth authentication site_id: Wix site ID """ self.client = WixAPIClient(api_key, refresh_token, site_id) def publish_markdown_post( self, title: str, markdown_content: str, featured_image_path: Optional[str] = None, featured_image_url: Optional[str] = None, excerpt: Optional[str] = None, tags: Optional[List[str]] = None, categories: Optional[List[str]] = None, seo_title: Optional[str] = None, seo_description: Optional[str] = None, seo_keywords: Optional[List[str]] = None, publish: bool = False ) -> Dict: """ Publish a blog post from markdown content. Args: title: Post title markdown_content: Post content in markdown format featured_image_path: Local path to featured image (optional) featured_image_url: URL of featured image to download (optional) excerpt: Post excerpt/summary (optional) tags: List of tags (optional) categories: List of category names (optional) seo_title: SEO title (optional) seo_description: SEO description (optional) seo_keywords: SEO keywords (optional) publish: Whether to publish the post immediately (optional) Returns: Published blog post data """ # Convert markdown to HTML html_content = self._markdown_to_html(markdown_content) # Process images in the content html_content, embedded_images = self._process_content_images(html_content) # Handle featured image featured_image_id = None temp_image_path = None if featured_image_url and not featured_image_path: # Download the image from URL try: temp_image_path = self._download_image(featured_image_url) featured_image_path = temp_image_path except Exception as e: logger.error(f"Failed to download featured image: {str(e)}") if featured_image_path: try: image_response = self.client.upload_image( file_path=featured_image_path, title=title, alt_text=title ) featured_image_id = image_response.get("file", {}).get("id") logger.info(f"Uploaded featured image with ID: {featured_image_id}") except Exception as e: logger.error(f"Failed to upload featured image: {str(e)}") # Clean up temporary file if created if temp_image_path and os.path.exists(temp_image_path): try: os.remove(temp_image_path) except: pass # Process categories - convert names to IDs category_ids = [] if categories: for category_name in categories: try: category_id = self.client.get_or_create_category(category_name) if category_id: category_ids.append(category_id) except Exception as e: logger.error(f"Failed to process category '{category_name}': {str(e)}") # Generate excerpt if not provided if not excerpt: excerpt = self._generate_excerpt(markdown_content) # Prepare SEO data seo_data = None if seo_title or seo_description or seo_keywords: seo_data = { "title": seo_title or title, "description": seo_description or excerpt or "", "keywords": seo_keywords or tags or [] } # Create the blog post response = self.client.create_post( title=title, content=html_content, excerpt=excerpt, featured_image_id=featured_image_id, tags=tags, categories=category_ids, seo_data=seo_data, publish=publish ) # Update SEO settings if the post was published if publish and response.get("post", {}).get("id"): post_id = response["post"]["id"] post_url = self.client.get_post_url(post_id) try: self.client.update_seo_settings( page_url=post_url, title=seo_title or title, description=seo_description or excerpt or "", keywords=seo_keywords or tags, og_image_url=featured_image_url ) except Exception as e: logger.error(f"Failed to update SEO settings: {str(e)}") return response def update_markdown_post( self, post_id: str, title: Optional[str] = None, markdown_content: Optional[str] = None, featured_image_path: Optional[str] = None, featured_image_url: Optional[str] = None, excerpt: Optional[str] = None, tags: Optional[List[str]] = None, categories: Optional[List[str]] = None, seo_title: Optional[str] = None, seo_description: Optional[str] = None, seo_keywords: Optional[List[str]] = None, publish: bool = False ) -> Dict: """ Update an existing blog post with markdown content. Args: post_id: ID of the post to update title: New post title (optional) markdown_content: New post content in markdown format (optional) featured_image_path: Local path to new featured image (optional) featured_image_url: URL of new featured image to download (optional) excerpt: New post excerpt/summary (optional) tags: New list of tags (optional) categories: New list of category names (optional) seo_title: New SEO title (optional) seo_description: New SEO description (optional) seo_keywords: New SEO keywords (optional) publish: Whether to publish the post after updating (optional) Returns: Updated blog post data """ # Get current post data current_post = self.client.get_post(post_id) if "post" not in current_post: raise ValueError(f"Post with ID {post_id} not found") # Convert markdown to HTML if provided html_content = None if markdown_content: html_content = self._markdown_to_html(markdown_content) # Process images in the content html_content, embedded_images = self._process_content_images(html_content) # Handle featured image featured_image_id = None temp_image_path = None if featured_image_url and not featured_image_path: # Download the image from URL try: temp_image_path = self._download_image(featured_image_url) featured_image_path = temp_image_path except Exception as e: logger.error(f"Failed to download featured image: {str(e)}") if featured_image_path: try: image_response = self.client.upload_image( file_path=featured_image_path, title=title or current_post["post"].get("title", ""), alt_text=title or current_post["post"].get("title", "") ) featured_image_id = image_response.get("file", {}).get("id") logger.info(f"Uploaded featured image with ID: {featured_image_id}") except Exception as e: logger.error(f"Failed to upload featured image: {str(e)}") # Clean up temporary file if created if temp_image_path and os.path.exists(temp_image_path): try: os.remove(temp_image_path) except: pass # Process categories - convert names to IDs category_ids = None if categories: category_ids = [] for category_name in categories: try: category_id = self.client.get_or_create_category(category_name) if category_id: category_ids.append(category_id) except Exception as e: logger.error(f"Failed to process category '{category_name}': {str(e)}") # Generate excerpt if not provided but markdown is if not excerpt and markdown_content: excerpt = self._generate_excerpt(markdown_content) # Prepare SEO data seo_data = None if seo_title or seo_description or seo_keywords: seo_data = { "title": seo_title or title or current_post["post"].get("title", ""), "description": seo_description or excerpt or current_post["post"].get("excerpt", ""), "keywords": seo_keywords or tags or current_post["post"].get("tags", []) } # Update the blog post response = self.client.update_post( post_id=post_id, title=title, content=html_content, excerpt=excerpt, featured_image_id=featured_image_id, tags=tags, categories=category_ids, seo_data=seo_data, publish=publish ) # Update SEO settings if needed if (seo_title or seo_description or seo_keywords or featured_image_url): post_url = self.client.get_post_url(post_id) try: self.client.update_seo_settings( page_url=post_url, title=seo_title or title, description=seo_description or excerpt, keywords=seo_keywords or tags, og_image_url=featured_image_url ) except Exception as e: logger.error(f"Failed to update SEO settings: {str(e)}") return response def find_post_by_title(self, title: str) -> Optional[Dict]: """ Find a post by its title (exact match). Args: title: Post title to search for Returns: Post data or None if not found """ # List all posts (this is inefficient but Wix API doesn't support filtering by title) # In a production environment, you might want to implement pagination response = self.client.list_posts(limit=100) posts = response.get("posts", []) for post in posts: if post.get("title") == title: return post return None def publish_or_update_markdown_post( self, title: str, markdown_content: str, featured_image_path: Optional[str] = None, featured_image_url: Optional[str] = None, excerpt: Optional[str] = None, tags: Optional[List[str]] = None, categories: Optional[List[str]] = None, seo_title: Optional[str] = None, seo_description: Optional[str] = None, seo_keywords: Optional[List[str]] = None, publish: bool = False, update_if_exists: bool = True ) -> Dict: """ Publish a new post or update an existing one with the same title. Args: title: Post title markdown_content: Post content in markdown format featured_image_path: Local path to featured image (optional) featured_image_url: URL of featured image to download (optional) excerpt: Post excerpt/summary (optional) tags: List of tags (optional) categories: List of category names (optional) seo_title: SEO title (optional) seo_description: SEO description (optional) seo_keywords: SEO keywords (optional) publish: Whether to publish the post immediately (optional) update_if_exists: Whether to update an existing post with the same title (optional) Returns: Published or updated blog post data """ # Check if a post with this title already exists existing_post = self.find_post_by_title(title) if existing_post and update_if_exists: # Update existing post logger.info(f"Updating existing post with title: {title}") return self.update_markdown_post( post_id=existing_post["id"], title=title, markdown_content=markdown_content, featured_image_path=featured_image_path, featured_image_url=featured_image_url, excerpt=excerpt, tags=tags, categories=categories, seo_title=seo_title, seo_description=seo_description, seo_keywords=seo_keywords, publish=publish ) else: # Create new post logger.info(f"Creating new post with title: {title}") return self.publish_markdown_post( title=title, markdown_content=markdown_content, featured_image_path=featured_image_path, featured_image_url=featured_image_url, excerpt=excerpt, tags=tags, categories=categories, seo_title=seo_title, seo_description=seo_description, seo_keywords=seo_keywords, publish=publish ) def optimize_seo_for_post( self, post_id: str, seo_title: Optional[str] = None, seo_description: Optional[str] = None, seo_keywords: Optional[List[str]] = None, og_image_url: Optional[str] = None, structured_data: Optional[Dict] = None ) -> Dict: """ Optimize SEO settings for an existing blog post. Args: post_id: ID of the blog post seo_title: SEO title (optional) seo_description: SEO description (optional) seo_keywords: SEO keywords (optional) og_image_url: Open Graph image URL (optional) structured_data: Structured data (JSON-LD) (optional) Returns: Updated SEO settings data """ # Get the post URL post_url = self.client.get_post_url(post_id) # Update SEO settings return self.client.update_seo_settings( page_url=post_url, title=seo_title, description=seo_description, keywords=seo_keywords, og_image_url=og_image_url, structured_data=structured_data ) def generate_structured_data( self, post_id: str, author_name: str, publisher_name: str, publisher_logo_url: str ) -> Dict: """ Generate structured data (JSON-LD) for a blog post. Args: post_id: ID of the blog post author_name: Name of the author publisher_name: Name of the publisher publisher_logo_url: URL of the publisher's logo Returns: Structured data as a dictionary """ # Get post data post_data = self.client.get_post(post_id) post = post_data.get("post", {}) # Get post URL post_url = self.client.get_post_url(post_id) # Create structured data structured_data = { "@context": "https://schema.org", "@type": "BlogPosting", "headline": post.get("title", ""), "description": post.get("excerpt", ""), "author": { "@type": "Person", "name": author_name }, "publisher": { "@type": "Organization", "name": publisher_name, "logo": { "@type": "ImageObject", "url": publisher_logo_url } }, "datePublished": post.get("publishedDate", ""), "dateModified": post.get("lastPublishedDate", "") } # Add featured image if available if post.get("featuredImageId"): try: media_item = self.client.get_media_item(post["featuredImageId"]) image_url = media_item.get("file", {}).get("url", "") if image_url: structured_data["image"] = image_url except: pass return structured_data def apply_structured_data_to_post( self, post_id: str, author_name: str, publisher_name: str, publisher_logo_url: str ) -> Dict: """ Generate and apply structured data to a blog post. Args: post_id: ID of the blog post author_name: Name of the author publisher_name: Name of the publisher publisher_logo_url: URL of the publisher's logo Returns: Updated SEO settings data """ # Generate structured data structured_data = self.generate_structured_data( post_id=post_id, author_name=author_name, publisher_name=publisher_name, publisher_logo_url=publisher_logo_url ) # Get the post URL post_url = self.client.get_post_url(post_id) # Update SEO settings with structured data return self.client.update_seo_settings( page_url=post_url, structured_data=structured_data ) # Helper methods def _markdown_to_html(self, markdown_content: str) -> str: """ Convert markdown content to HTML. Args: markdown_content: Content in markdown format Returns: HTML content """ # Use the markdown library to convert to HTML html = markdown.markdown( markdown_content, extensions=['extra', 'codehilite', 'tables', 'toc'] ) return html def _html_to_markdown(self, html_content: str) -> str: """ Convert HTML content to markdown. Args: html_content: Content in HTML format Returns: Markdown content """ # Use html2text to convert HTML to markdown h = html2text.HTML2Text() h.ignore_links = False h.ignore_images = False h.ignore_tables = False h.ignore_emphasis = False return h.handle(html_content) def _process_content_images(self, html_content: str) -> Tuple[str, List[Dict]]: """ Process images in HTML content, uploading them to Wix and replacing URLs. Args: html_content: HTML content with image tags Returns: Tuple of (updated HTML content, list of uploaded image data) """ soup = BeautifulSoup(html_content, 'html.parser') img_tags = soup.find_all('img') uploaded_images = [] for img in img_tags: src = img.get('src', '') alt = img.get('alt', '') # Skip images that are already hosted on Wix if 'wixstatic.com' in src: continue # Handle images with data URLs if src.startswith('data:image'): logger.info("Skipping data URL image - not supported in this implementation") continue # Handle remote images if src.startswith('http://') or src.startswith('https://'): try: # Download the image temp_path = self._download_image(src) # Upload to Wix image_response = self.client.upload_image( file_path=temp_path, title=alt or "Blog image", alt_text=alt or "Blog image" ) # Get the new URL new_url = image_response.get("file", {}).get("url", "") if new_url: # Replace the src attribute img['src'] = new_url uploaded_images.append({ 'original_url': src, 'wix_url': new_url, 'wix_id': image_response.get("file", {}).get("id", "") }) # Clean up temp file if os.path.exists(temp_path): os.remove(temp_path) except Exception as e: logger.error(f"Failed to process image {src}: {str(e)}") # Handle local images (not implemented in this version) else: logger.info(f"Skipping local image {src} - not supported in this implementation") # Return the updated HTML return str(soup), uploaded_images def _download_image(self, url: str) -> str: """ Download an image from a URL to a temporary file. Args: url: URL of the image Returns: Path to the downloaded temporary file """ response = requests.get(url, stream=True) response.raise_for_status() # Determine file extension content_type = response.headers.get('content-type', '') extension = '.jpg' # Default if 'image/jpeg' in content_type: extension = '.jpg' elif 'image/png' in content_type: extension = '.png' elif 'image/gif' in content_type: extension = '.gif' elif 'image/webp' in content_type: extension = '.webp' # Create a temporary file fd, temp_path = tempfile.mkstemp(suffix=extension) os.close(fd) # Write the image data to the file with open(temp_path, 'wb') as f: for chunk in response.iter_content(chunk_size=8192): f.write(chunk) return temp_path def _generate_excerpt(self, markdown_content: str, max_length: int = 160) -> str: """ Generate an excerpt from markdown content. Args: markdown_content: Content in markdown format max_length: Maximum length of the excerpt Returns: Generated excerpt """ # Convert markdown to plain text h = html2text.HTML2Text() h.ignore_links = True h.ignore_images = True h.ignore_tables = True h.ignore_emphasis = True # First convert markdown to HTML, then HTML to plain text html = markdown.markdown(markdown_content) plain_text = h.handle(html) # Clean up the text plain_text = re.sub(r'\s+', ' ', plain_text).strip() # Truncate to max_length if len(plain_text) <= max_length: return plain_text # Try to truncate at a sentence boundary sentences = re.split(r'(?<=[.!?])\s+', plain_text) excerpt = "" for sentence in sentences: if len(excerpt + sentence) <= max_length: excerpt += sentence + " " else: break # If we couldn't get a full sentence, just truncate if not excerpt: excerpt = plain_text[:max_length-3] + "..." return excerpt.strip()