Feature: AI SEO - Generate rich snippet from url

2024-07-12 19:05:20 +05:30
parent 7abc396633
commit e06c4ffae3
5 changed files with 153 additions and 8 deletions
--- a/lib/ai_seo_tools/seo_structured_data.py
+++ b/lib/ai_seo_tools/seo_structured_data.py
@@ -0,0 +1,128 @@
+import streamlit as st
+import json
+from datetime import date
+
+from ..ai_web_researcher.firecrawl_web_crawler import scrape_url
+from ..gpt_providers.text_generation.main_text_generation import llm_text_gen
+
+
+# Define a dictionary for schema types
+schema_types = {
+    "Article": {
+        "fields": ["Headline", "Author", "Date Published", "Keywords"],
+        "schema_type": "Article",
+    },
+    "Product": {
+        "fields": ["Name", "Description", "Price", "Brand", "Image URL"],
+        "schema_type": "Product",
+    },
+    "Recipe": {
+        "fields": ["Name", "Ingredients", "Cooking Time", "Serving Size", "Image URL"],
+        "schema_type": "Recipe",
+    },
+    "Event": {
+        "fields": ["Name", "Start Date", "End Date", "Location", "Description"],
+        "schema_type": "Event",
+    },
+    "LocalBusiness": {
+        "fields": ["Name", "Address", "Phone Number", "Opening Hours", "Image URL"],
+        "schema_type": "LocalBusiness",
+    },
+    # ... (add more schema types as needed)
+}
+
+
+def generate_json_data(content_type, details, url):
+    """Generates structured data (JSON-LD) based on user input."""
+    try:
+        scraped_text = scrape_url(url)
+        #logger.info(scraped_text)
+    except Exception as err:
+        st.error(f"Failed to scrape web page from url-{weburl} - Error: {err}")
+        st.stop()
+
+    schema = schema_types.get(content_type)
+    if not schema:
+        st.error(f"Invalid content type: {content_type}")
+        return
+    data = {
+        "@context": "https://schema.org",
+        "@type": schema["schema_type"],
+    }
+    for field in schema["fields"]:
+        value = details.get(field)
+        if isinstance(value, date):
+            value = value.isoformat()
+        if value:
+            data[field] = value
+        else:
+            data[field] = "N/A"  # Use placeholder values if input is missing
+    if url:
+        data['url'] = url
+
+    llm_structured_data = get_llm_structured_data(content_type, data, scraped_text)
+    return llm_structured_data
+
+
+def get_llm_structured_data(content_type, data, scraped_text):
+    """ Function to get structured data from LLM """
+    prompt = f"""Given the following information:
+
+        HTML Content: <<<HTML>>> {scraped_text} <<<END_HTML>>>
+        Content Type: <<<CONTENT_TYPE>>> {content_type} <<<END_CONTENT_TYPE>>>
+        Additional Relevant Data: <<<ADDITIONAL_DATA>>> {data} <<<END_ADDITIONAL_DATA>>>
+
+        Create a detailed structured data (JSON-LD) script for SEO purposes. 
+        The structured data should help search engines understand the content and features of the webpage, enhancing its visibility and potential for rich snippets in search results.
+        
+        Detailed Steps:
+        Parse the HTML content to extract relevant information like the title, main heading, and body content.
+        Use the contentType to determine the structured data type (e.g., Article, Product, Recipe).
+        Integrate the additional relevant data (e.g., author, datePublished, keywords) into the structured data.
+        Ensure all URLs, images, and other attributes are correctly formatted and included.
+        Validate the generated JSON-LD to ensure it meets schema.org standards and is free of errors.
+
+        Expected Output:
+        Generate a JSON-LD structured data snippet based on the provided inputs."""
+
+    try:
+        response = llm_text_gen(prompt)
+        return response
+    except Exception as err:
+        st.error(f"Exit: Failed to get response from LLM: {err}")
+
+
+def ai_structured_data():
+    st.title("📝 Generate Structured Data for SEO 🚀")
+    st.markdown("**Make your content more discoverable with rich snippets.**")
+
+    content_type = st.selectbox("**Select Content Type**", list(schema_types.keys()))
+
+    details = {}
+    schema_fields = schema_types[content_type]["fields"]
+    num_fields = len(schema_fields)
+
+    url = st.text_input("**URL :**", placeholder="Enter the URL of your webpage")
+    for i in range(0, num_fields, 2):
+        cols = st.columns(2)
+        for j in range(2):
+            if i + j < num_fields:
+                field = schema_fields[i + j]
+                if "Date" in field:
+                    details[field] = cols[j].date_input(field)
+                else:
+                    details[field] = cols[j].text_input(field, placeholder=f"Enter {field.lower()}")
+
+    if st.button("Generate Structured Data"):
+
+        structured_data = generate_json_data(content_type, details, url)
+        if structured_data:
+            st.subheader("Generated Structured Data (JSON-LD):")
+            st.markdown(structured_data)
+
+            st.download_button(
+                label="Download JSON-LD",
+                data=structured_data,
+                file_name=f"{content_type}_structured_data.json",
+                mime="application/json",
+            )