Feature: AI SEO - Generate rich snippet from url

2024-07-12 19:05:20 +05:30
parent 7abc396633
commit e06c4ffae3
5 changed files with 153 additions and 8 deletions
--- a/README.md
+++ b/README.md
@@ -1,10 +1,10 @@
 # How to Alwrity - Getting Started

 Alwrity assists content creators and digital marketers in keyword web research, AI website & Social media content generation & AI Copywriting.
-Our toolkit integrates (OpenAI, Gemini, Anthropic) AI models for text generation, image creation(Stability.ai), STT(whisper, AssemblyAI) and Web or local data analysis, streamlining your content creation pipeline and ensuring high-quality output with minimal effort.
+Our toolkit integrates **(OpenAI, Gemini, Anthropic)** AI models for text generation, image creation**(Stability.ai), STT(whisper, AssemblyAI)** and Web or local data analysis, streamlining your content creation pipeline and ensuring high-quality output with minimal effort.

 Prompting is abstracted to get going sooner. Focus on your content quality, rather than AI tooling around it.
-Alwrity gives personalization, factual web researched & SEO optimized content and tools for automating content & digital marketing.
+Alwrity gives hyper content personalization, factual web researched & SEO optimized content and tools for automating content & digital marketing.

 AI will help achieve Content Hyper-Personalization.
 ![](https://github.com/AJaySi/AI-Writer/blob/main/lib/workspace/alwrity_ai_writer.png)
@@ -22,7 +22,7 @@ If you have 💻 Laptop + 🛜 Internet + 10 minutes, you will be generating blo
 ### [Getting started for Developers](https://github.com/AJaySi/AI-Writer/wiki/Alwrity--%E2%80%90-Get-started)
 ```
 1). git clone https://github.com/AJaySi/AI-Writer.git
-2). pip install -r requirements.txt
+2). pip install -r -U requirements.txt
 3). streamlit run alwrity.py

 4). Visit Alwrity UI in a Browser & Start generation AI personalized content.
--- a/alwrity.py
+++ b/alwrity.py
@@ -10,7 +10,7 @@ load_dotenv()

 #from lib.chatbot_custom.chatbot_local_docqa import alwrity_chat_docqa
 from lib.utils.alwrity_utils import (blog_from_keyword, ai_agents_team, 
-        essay_writer, ai_news_writer,
+        essay_writer, ai_news_writer, ai_seo_tools,
        ai_finance_ta_writer, ai_social_writer,
        do_web_research, competitor_analysis,
        )
@@ -333,8 +333,8 @@ def main():
        sidebar_configuration()

        # Define the tabs
-        tab1, tab2, tab3, tab4, tab5 = st.tabs(
-            ["AI Writers", "Content Planning", "Agents Content Teams", "Alwrity Brain", "Ask Alwrity"])
+        tab1, tab2, tab3, tab4, tab5, tab6 = st.tabs(
+            ["AI Writers", "Content Planning", "Agents Teams", "AI SEO tools", "Alwrity Brain", "Ask Alwrity"])
        with tab1:
            write_blog()

@@ -345,9 +345,12 @@ def main():
            ai_agents_team()

        with tab4:
-            alwrity_brain()
+            ai_seo_tools()

        with tab5:
+            alwrity_brain()
+
+        with tab6:
            st.info("Chatbot")
            st.markdown("Create a collection by uploading files (PDF, MD, CSV, etc), or crawl a data source (Websites, more sources coming soon.")
            st.markdown("One can ask/chat, summarize and do semantic search over the uploaded data")
--- a/lib/ai_seo_tools/seo_structured_data.py
+++ b/lib/ai_seo_tools/seo_structured_data.py
@@ -0,0 +1,128 @@
+import streamlit as st
+import json
+from datetime import date
+
+from ..ai_web_researcher.firecrawl_web_crawler import scrape_url
+from ..gpt_providers.text_generation.main_text_generation import llm_text_gen
+
+
+# Define a dictionary for schema types
+schema_types = {
+    "Article": {
+        "fields": ["Headline", "Author", "Date Published", "Keywords"],
+        "schema_type": "Article",
+    },
+    "Product": {
+        "fields": ["Name", "Description", "Price", "Brand", "Image URL"],
+        "schema_type": "Product",
+    },
+    "Recipe": {
+        "fields": ["Name", "Ingredients", "Cooking Time", "Serving Size", "Image URL"],
+        "schema_type": "Recipe",
+    },
+    "Event": {
+        "fields": ["Name", "Start Date", "End Date", "Location", "Description"],
+        "schema_type": "Event",
+    },
+    "LocalBusiness": {
+        "fields": ["Name", "Address", "Phone Number", "Opening Hours", "Image URL"],
+        "schema_type": "LocalBusiness",
+    },
+    # ... (add more schema types as needed)
+}
+
+
+def generate_json_data(content_type, details, url):
+    """Generates structured data (JSON-LD) based on user input."""
+    try:
+        scraped_text = scrape_url(url)
+        #logger.info(scraped_text)
+    except Exception as err:
+        st.error(f"Failed to scrape web page from url-{weburl} - Error: {err}")
+        st.stop()
+
+    schema = schema_types.get(content_type)
+    if not schema:
+        st.error(f"Invalid content type: {content_type}")
+        return
+    data = {
+        "@context": "https://schema.org",
+        "@type": schema["schema_type"],
+    }
+    for field in schema["fields"]:
+        value = details.get(field)
+        if isinstance(value, date):
+            value = value.isoformat()
+        if value:
+            data[field] = value
+        else:
+            data[field] = "N/A"  # Use placeholder values if input is missing
+    if url:
+        data['url'] = url
+
+    llm_structured_data = get_llm_structured_data(content_type, data, scraped_text)
+    return llm_structured_data
+
+
+def get_llm_structured_data(content_type, data, scraped_text):
+    """ Function to get structured data from LLM """
+    prompt = f"""Given the following information:
+
+        HTML Content: <<<HTML>>> {scraped_text} <<<END_HTML>>>
+        Content Type: <<<CONTENT_TYPE>>> {content_type} <<<END_CONTENT_TYPE>>>
+        Additional Relevant Data: <<<ADDITIONAL_DATA>>> {data} <<<END_ADDITIONAL_DATA>>>
+
+        Create a detailed structured data (JSON-LD) script for SEO purposes. 
+        The structured data should help search engines understand the content and features of the webpage, enhancing its visibility and potential for rich snippets in search results.
+        
+        Detailed Steps:
+        Parse the HTML content to extract relevant information like the title, main heading, and body content.
+        Use the contentType to determine the structured data type (e.g., Article, Product, Recipe).
+        Integrate the additional relevant data (e.g., author, datePublished, keywords) into the structured data.
+        Ensure all URLs, images, and other attributes are correctly formatted and included.
+        Validate the generated JSON-LD to ensure it meets schema.org standards and is free of errors.
+
+        Expected Output:
+        Generate a JSON-LD structured data snippet based on the provided inputs."""
+
+    try:
+        response = llm_text_gen(prompt)
+        return response
+    except Exception as err:
+        st.error(f"Exit: Failed to get response from LLM: {err}")
+
+
+def ai_structured_data():
+    st.title("📝 Generate Structured Data for SEO 🚀")
+    st.markdown("**Make your content more discoverable with rich snippets.**")
+
+    content_type = st.selectbox("**Select Content Type**", list(schema_types.keys()))
+
+    details = {}
+    schema_fields = schema_types[content_type]["fields"]
+    num_fields = len(schema_fields)
+
+    url = st.text_input("**URL :**", placeholder="Enter the URL of your webpage")
+    for i in range(0, num_fields, 2):
+        cols = st.columns(2)
+        for j in range(2):
+            if i + j < num_fields:
+                field = schema_fields[i + j]
+                if "Date" in field:
+                    details[field] = cols[j].date_input(field)
+                else:
+                    details[field] = cols[j].text_input(field, placeholder=f"Enter {field.lower()}")
+
+    if st.button("Generate Structured Data"):
+
+        structured_data = generate_json_data(content_type, details, url)
+        if structured_data:
+            st.subheader("Generated Structured Data (JSON-LD):")
+            st.markdown(structured_data)
+
+            st.download_button(
+                label="Download JSON-LD",
+                data=structured_data,
+                file_name=f"{content_type}_structured_data.json",
+                mime="application/json",
+            )
--- a/lib/utils/alwrity_utils.py
+++ b/lib/utils/alwrity_utils.py
@@ -36,6 +36,7 @@ from lib.ai_writers.youtube_ai_writer import write_yt_title, write_yt_descriptio
 from lib.ai_writers.web_url_ai_writer import blog_from_url
 from lib.ai_writers.image_ai_writer import blog_from_image
 from lib.ai_writers.ai_essay_writer import ai_essay_generator
+from lib.ai_seo_tools.seo_structured_data import ai_structured_data
 from lib.gpt_providers.text_to_image_generation.main_generate_image_from_prompt import generate_image
 from lib.content_planning_calender.content_planning_agents_alwrity_crew import ai_agents_planner

@@ -110,6 +111,19 @@ def process_input(input_text, uploaded_file):
    return None


+def ai_seo_tools():
+    """ Collection SEO tools for content creators. """
+    options = [
+        "Generate Structured Data - Rich Snippet",
+        "AI SEO Audit",
+        "Quit"
+    ]
+    choice = st.selectbox("**👇Select AI SEO Tool:**", options, index=0, format_func=lambda x: f"📝 {x}")
+
+    if choice == "Generate Structured Data - Rich Snippet":
+        ai_structured_data()
+
+
 def blog_from_keyword():
    """ Input blog keywords, research and write a factual blog."""
    st.title("Blog Content Writer")
--- a/lib/workspace/alwrity_config/main_config.json
+++ b/lib/workspace/alwrity_config/main_config.json
@@ -2,7 +2,7 @@
    "Blog Content Characteristics": {
        "Blog Length": "2000",
        "Blog Tone": "Casual",
-        "Blog Demographic": "Digital Marketing",
+        "Blog Demographic": "Professional",
        "Blog Type": "Informational",
        "Blog Language": "English",
        "Blog Output Format": "markdown"