Files
ALwrity/ToBeMigrated/blog_metadata/get_blog_metadata.py
2025-08-06 16:29:49 +05:30

436 lines
16 KiB
Python

import os
import time
import datetime
import sys
import streamlit as st
from loguru import logger
import random
import asyncio
import re
logger.remove()
logger.add(sys.stdout,
colorize=True,
format="<level>{level}</level>|<green>{file}:{line}:{function}</green>| {message}"
)
from ..gpt_providers.text_generation.main_text_generation import llm_text_gen
async def blog_metadata(blog_article):
"""
Generate comprehensive SEO metadata for a blog article.
Args:
blog_article (str): The content of the blog article
Returns:
tuple: (blog_title, blog_meta_desc, blog_tags, blog_categories, blog_hashtags, blog_slug)
"""
logger.info("Generating comprehensive blog metadata")
progress_bar = st.progress(0)
total_steps = 6 # Increased steps for new metadata types
status_container = st.empty()
try:
# Step 1: Generate blog title
status_container.info("Generating SEO-optimized blog title...")
await asyncio.sleep(random.uniform(0.5, 1.5))
blog_title = generate_blog_title(blog_article)
progress_bar.progress(1 / total_steps)
# Step 2: Generate blog meta description
status_container.info("Creating compelling meta description...")
await asyncio.sleep(random.uniform(0.5, 1.5))
blog_meta_desc = generate_blog_description(blog_article)
progress_bar.progress(2 / total_steps)
# Step 3: Generate blog tags
status_container.info("Extracting relevant blog tags...")
await asyncio.sleep(random.uniform(0.5, 1.5))
blog_tags = get_blog_tags(blog_article)
progress_bar.progress(3 / total_steps)
# Step 4: Generate blog categories
status_container.info("Identifying primary blog categories...")
await asyncio.sleep(random.uniform(0.5, 1.5))
blog_categories = get_blog_categories(blog_article)
progress_bar.progress(4 / total_steps)
# Step 5: Generate social media hashtags
status_container.info("Creating social media hashtags...")
await asyncio.sleep(random.uniform(0.5, 1.5))
blog_hashtags = generate_blog_hashtags(blog_article)
progress_bar.progress(5 / total_steps)
# Step 6: Generate SEO URL slug
status_container.info("Generating SEO-friendly URL slug...")
await asyncio.sleep(random.uniform(0.5, 1.5))
blog_slug = generate_blog_slug(blog_title)
progress_bar.progress(6 / total_steps)
# Present the result in a table format
status_container.success("✅ Blog SEO Metadata generation complete")
#st.table({
# "Metadata": ["Blog Title", "Meta Description", "Tags", "Categories", "Social Hashtags", "URL Slug"],
# "Value": [blog_title, blog_meta_desc, blog_tags, blog_categories, blog_hashtags, blog_slug]
#})
return blog_title, blog_meta_desc, blog_tags, blog_categories, blog_hashtags, blog_slug
except Exception as e:
status_container.error(f"Error generating metadata: {str(e)}")
logger.error(f"Failed to generate metadata: {str(e)}")
# Return default values to ensure the blog generation process can continue
return f"Blog Article", "An informative blog post", "content, blog", "General, Information", "#content #blog", "blog-article"
def generate_blog_title(blog_article):
"""
Generate an SEO-optimized and engaging title for a blog article.
Args:
blog_article (str): The content of the blog article
Returns:
str: An SEO-optimized title
"""
logger.info("Generating SEO-optimized blog title")
# Extract the first 3000 characters for title generation
snippet = blog_article[:3000] if len(blog_article) > 3000 else blog_article
prompt = f"""As an expert SEO copywriter, create the perfect blog title based on this content.
REQUIREMENTS:
1. Make it compelling, specific, and actionable
2. Include primary keywords naturally near the beginning
3. Keep it between 50-60 characters (10-12 words maximum)
4. Make it promise clear value to the reader
5. Use power words that evoke emotion where appropriate
AVOID:
- Clickbait tactics or false promises
- Generic titles that could apply to any article
- Using words like "unveiling", "unleash", "power of", "ultimate guide", or "complete"
- ALL CAPS or excessive punctuation!!!!
EXAMPLES OF GREAT TITLES:
- "7 Proven Strategies to Improve Your Email Marketing ROI"
- "Why Remote Work Improves Productivity: New Research Findings"
- "How to Build a Personal Budget That Actually Works"
CONTENT TO ANALYZE:
"{snippet}"
Reply with ONLY the title and no other text or explanation.
"""
try:
title = llm_text_gen(prompt)
# Clean up any quotes or extra spaces
title = title.strip('"\'').strip()
logger.info(f"Generated title: {title}")
return title
except Exception as err:
logger.error(f"Failed to generate blog title: {err}")
return "Blog Article" # Fallback title
def generate_blog_description(blog_content):
"""
Generate an SEO-optimized meta description for the blog.
Args:
blog_content (str): The content of the blog article
Returns:
str: An SEO-optimized meta description
"""
logger.info("Generating SEO-optimized meta description")
# Extract the first 2000 characters for description generation
snippet = blog_content[:2000] if len(blog_content) > 2000 else blog_content
prompt = f"""As an SEO expert, write the perfect meta description for this blog content.
REQUIREMENTS:
1. Exactly 150-160 characters (this is critical for SEO)
2. Include primary keywords naturally
3. Compelling value proposition that makes readers want to click
4. Clear indication of what the reader will learn/gain
5. End with an implicit call-to-action when possible
EXAMPLES OF EXCELLENT META DESCRIPTIONS:
- "Learn how to increase email open rates by 43% with these 5 proven strategies from industry experts. Implement today for immediate results."
- "Discover why 67% of professionals struggle with work-life balance and explore research-backed techniques to reclaim your time and energy."
CONTENT TO SUMMARIZE:
"{snippet}"
Reply with ONLY the meta description and no other text. Keep it between 150-160 characters exactly.
"""
try:
description = llm_text_gen(prompt)
# Clean up any quotes or extra spaces
description = description.strip('"\'').strip()
logger.info(f"Generated meta description: {description}")
return description
except Exception as err:
logger.error(f"Failed to generate blog description: {err}")
return "An informative blog post about this topic." # Fallback description
def get_blog_tags(blog_article):
"""
Generate relevant SEO tags for a blog article.
Args:
blog_article (str): The content of the blog article
Returns:
str: Comma-separated list of relevant tags
"""
logger.info("Generating SEO-optimized blog tags")
# Extract the first 3000 characters for tag generation
snippet = blog_article[:3000] if len(blog_article) > 3000 else blog_article
prompt = f"""As an SEO specialist, extract the 4-6 most relevant tags for this blog post.
REQUIREMENTS:
1. Choose specific, targeted keywords that accurately represent the content
2. Include a mix of broad and specific tags
3. Focus on terms users would actually search for
4. Include at least one long-tail keyword phrase
5. Ensure all tags are directly addressed in the content
CONTENT TO ANALYZE:
"{snippet}"
Reply with ONLY the tags as a comma-separated list (e.g., "keyword1, keyword2, keyword3, keyword phrase"). Provide 4-6 tags total.
"""
try:
tags = llm_text_gen(prompt)
# Clean up any quotes or extra commas
tags = tags.strip('"\'').strip()
if tags.endswith(','):
tags = tags[:-1]
logger.info(f"Generated tags: {tags}")
return tags
except Exception as err:
logger.error(f"Failed to generate blog tags: {err}")
return "content, blog" # Fallback tags
def get_blog_categories(blog_article):
"""
Identify the most appropriate blog categories for the article.
Args:
blog_article (str): The content of the blog article
Returns:
str: Comma-separated list of relevant categories
"""
logger.info("Generating blog categories")
# Extract the first 2000 characters for category generation
snippet = blog_article[:2000] if len(blog_article) > 2000 else blog_article
prompt = f"""As a content strategist, identify the 2-3 most appropriate high-level categories for this blog.
REQUIREMENTS:
1. Choose broad, established categories used in content organization
2. Select categories that best represent the main themes of the article
3. Consider the target audience and their interests
4. Focus on categories that would help with site navigation
5. Aim for a primary category and 1-2 supporting categories
EXAMPLES OF GOOD CATEGORIES:
- Marketing, Social Media, Strategy
- Finance, Personal Budgeting, Money Management
- Productivity, Remote Work, Business
CONTENT TO ANALYZE:
"{snippet}"
Reply with ONLY the categories as a comma-separated list (e.g., "Category1, Category2, Category3"). Provide 2-3 categories total.
"""
try:
categories = llm_text_gen(prompt)
# Clean up any quotes or extra commas
categories = categories.strip('"\'').strip()
if categories.endswith(','):
categories = categories[:-1]
logger.info(f"Generated categories: {categories}")
return categories
except Exception as err:
logger.error(f"Failed to generate blog categories: {err}")
return "General, Information" # Fallback categories
def generate_blog_hashtags(blog_article):
"""
Generate social media hashtags for promoting the blog article.
Args:
blog_article (str): The content of the blog article
Returns:
str: Space-separated list of hashtags starting with #
"""
logger.info("Generating social media hashtags")
# Extract the first 2000 characters for hashtag generation
snippet = blog_article[:2000] if len(blog_article) > 2000 else blog_article
prompt = f"""As a social media strategist, create 5-7 effective hashtags for this blog content.
REQUIREMENTS:
1. Mix of popular and niche hashtags for better visibility
2. Include industry-specific and trending hashtags where relevant
3. Avoid overly generic hashtags (like #content or #blog)
4. Format each hashtag with # symbol and camelCase or separate words
5. Include at least one branded or campaign-style hashtag
EXAMPLES OF EFFECTIVE HASHTAG SETS:
- #EmailMarketing #ROITips #DigitalStrategy #MarketingTips #GrowthHacking #EmailROI
- #RemoteWork #ProductivityTips #FutureOfWork #WorkFromHome #RemoteProductivity #HRInsights
CONTENT TO ANALYZE:
"{snippet}"
Reply with ONLY the hashtags, each starting with # and separated by spaces. Provide 5-7 hashtags total.
"""
try:
hashtags = llm_text_gen(prompt)
# Clean up any quotes or extra spaces
hashtags = hashtags.strip('"\'').strip()
# Ensure all hashtags start with #
if not hashtags.startswith('#'):
hashtags = ' '.join([f"#{tag.strip('#')}" for tag in hashtags.split()])
logger.info(f"Generated hashtags: {hashtags}")
return hashtags
except Exception as err:
logger.error(f"Failed to generate blog hashtags: {err}")
return "#content #blog" # Fallback hashtags
def generate_blog_slug(blog_title):
"""
Generate an SEO-friendly URL slug from the blog title.
Args:
blog_title (str): The title of the blog article
Returns:
str: An SEO-friendly URL slug
"""
logger.info("Generating SEO-friendly URL slug")
try:
# Use a prompt to generate a customized slug
prompt = f"""As an SEO specialist, create an SEO-friendly URL slug for this blog title: "{blog_title}"
REQUIREMENTS:
1. Keep it under 60 characters
2. Use only lowercase letters, numbers, and hyphens
3. Include primary keywords near the beginning
4. Remove all unnecessary words (a, the, and, or, but, etc.)
5. Ensure it's human-readable and descriptive
EXAMPLES:
- Title: "10 Effective Ways to Improve Your Email Marketing ROI This Quarter"
Slug: "improve-email-marketing-roi"
- Title: "Why Most Remote Workers Are More Productive According to New Research"
Slug: "remote-workers-productivity-research"
Reply with ONLY the slug and no other text or explanation.
"""
slug = llm_text_gen(prompt)
# Clean up and normalize the slug
slug = slug.strip('"\'').strip()
# If the LLM didn't create a proper slug, do it programmatically
if not re.match(r'^[a-z0-9-]+$', slug):
# Fallback to simple programmatic slug creation
slug = blog_title.lower()
# Remove special characters
slug = re.sub(r'[^a-z0-9\s-]', '', slug)
# Replace spaces with hyphens
slug = re.sub(r'\s+', '-', slug)
# Remove redundant hyphens
slug = re.sub(r'-+', '-', slug)
# Limit length to 60 characters
slug = slug[:60].strip('-')
logger.info(f"Generated slug: {slug}")
return slug
except Exception as err:
logger.error(f"Failed to generate blog slug: {err}")
# Create a simple slug programmatically as fallback
slug = blog_title.lower()
slug = re.sub(r'[^a-z0-9\s-]', '', slug)
slug = re.sub(r'\s+', '-', slug)
slug = re.sub(r'-+', '-', slug)
slug = slug[:60].strip('-')
return slug
# Helper function to run the asyncio event loop within Streamlit
def run_async(coro):
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
result = loop.run_until_complete(coro)
loop.close()
return result
def get_blog_metadata_longform(longform_content):
""" Function for caching long-form content """
# Open the file in write mode ("w") to overwrite existing content.
filepath = os.path.join(os.getenv("CONTENT_SAVE_DIR"), "lognform_metadata_file")
with open(filepath, "w") as file:
# Write the text to the file
file.write(longform_content)
print(f"String saved successfully to: {filepath}")
#genai.configure(api_key=os.environ['GEMINI_API_KEY'])
#file_path = genai.upload_file(path=filepath)
# Wait for the file to finish processing
#while file_path.state.name == 'PROCESSING':
# print('Waiting for video to be processed.')
# time.sleep(2)
# file_path = genai.get_file(video_file.name)
#print(f'Video processing complete: {file_path.uri}')
# Create a cache with a 5 minute TTL
#cache = caching.CachedContent.create(
# model='models/gemini-1.5-flash-001',
# display_name='Alwrity Longform content', # used to identify the cache
# system_instruction=(
# 'You are an expert file analyzer , and your job is to answer '
# 'the user\'s query based on the file you have access to.'
# ),
# contents=[file_path],
# ttl=datetime.timedelta(minutes=15),
#)
# Construct a GenerativeModel which uses the created cache.
#model = genai.GenerativeModel.from_cached_content(cached_content=cache)
# Query the model
#response = model.generate_content([(
# 'SUmmarize the given file '
# 'in 10 lines '
# 'list main points')])
#print(response.usage_metadata)
#return(response.text)
return("TBD: Not implemented")