Code Improvements: Error Handling: Improve error messages to be more informative. Log errors for debugging purposes. Code Readability: Add docstrings and comments to explain the purpose of functions and complex code blocks. Modularization: Split large functions into smaller, reusable functions. Group related functions together. Input Validation: Ensure user inputs are validated to prevent errors later in the code. User Experience Improvements: User Feedback: Provide immediate feedback on actions (e.g., loading spinners, success, and error messages). Use placeholders and help text to guide users on what inputs are expected. Interactive Elements: Use more interactive elements like sliders, date pickers, and multi-selects to enhance the user interface.
131 lines
4.8 KiB
Python
131 lines
4.8 KiB
Python
import streamlit as st
|
|
import json
|
|
from datetime import date
|
|
from dotenv import load_dotenv
|
|
|
|
from ..ai_web_researcher.firecrawl_web_crawler import scrape_url
|
|
from ..gpt_providers.text_generation.main_text_generation import llm_text_gen
|
|
|
|
# Load environment variables
|
|
load_dotenv()
|
|
|
|
# Define a dictionary for schema types
|
|
schema_types = {
|
|
"Article": {
|
|
"fields": ["Headline", "Author", "Date Published", "Keywords"],
|
|
"schema_type": "Article",
|
|
},
|
|
"Product": {
|
|
"fields": ["Name", "Description", "Price", "Brand", "Image URL"],
|
|
"schema_type": "Product",
|
|
},
|
|
"Recipe": {
|
|
"fields": ["Name", "Ingredients", "Cooking Time", "Serving Size", "Image URL"],
|
|
"schema_type": "Recipe",
|
|
},
|
|
"Event": {
|
|
"fields": ["Name", "Start Date", "End Date", "Location", "Description"],
|
|
"schema_type": "Event",
|
|
},
|
|
"LocalBusiness": {
|
|
"fields": ["Name", "Address", "Phone Number", "Opening Hours", "Image URL"],
|
|
"schema_type": "LocalBusiness",
|
|
},
|
|
# ... (add more schema types as needed)
|
|
}
|
|
|
|
def generate_json_data(content_type, details, url):
|
|
"""Generates structured data (JSON-LD) based on user input."""
|
|
try:
|
|
scraped_text = scrape_url(url)
|
|
except Exception as err:
|
|
st.error(f"Failed to scrape web page from URL: {url} - Error: {err}")
|
|
return
|
|
|
|
schema = schema_types.get(content_type)
|
|
if not schema:
|
|
st.error(f"Invalid content type: {content_type}")
|
|
return
|
|
|
|
data = {
|
|
"@context": "https://schema.org",
|
|
"@type": schema["schema_type"],
|
|
}
|
|
for field in schema["fields"]:
|
|
value = details.get(field)
|
|
if isinstance(value, date):
|
|
value = value.isoformat()
|
|
data[field] = value if value else "N/A" # Use placeholder values if input is missing
|
|
|
|
if url:
|
|
data['url'] = url
|
|
|
|
llm_structured_data = get_llm_structured_data(content_type, data, scraped_text)
|
|
return llm_structured_data
|
|
|
|
def get_llm_structured_data(content_type, data, scraped_text):
|
|
"""Function to get structured data from LLM."""
|
|
prompt = f"""Given the following information:
|
|
|
|
HTML Content: <<<HTML>>> {scraped_text} <<<END_HTML>>>
|
|
Content Type: <<<CONTENT_TYPE>>> {content_type} <<<END_CONTENT_TYPE>>>
|
|
Additional Relevant Data: <<<ADDITIONAL_DATA>>> {data} <<<END_ADDITIONAL_DATA>>>
|
|
|
|
Create a detailed structured data (JSON-LD) script for SEO purposes.
|
|
The structured data should help search engines understand the content and features of the webpage, enhancing its visibility and potential for rich snippets in search results.
|
|
|
|
Detailed Steps:
|
|
Parse the HTML content to extract relevant information like the title, main heading, and body content.
|
|
Use the contentType to determine the structured data type (e.g., Article, Product, Recipe).
|
|
Integrate the additional relevant data (e.g., author, datePublished, keywords) into the structured data.
|
|
Ensure all URLs, images, and other attributes are correctly formatted and included.
|
|
Validate the generated JSON-LD to ensure it meets schema.org standards and is free of errors.
|
|
|
|
Expected Output:
|
|
Generate a JSON-LD structured data snippet based on the provided inputs."""
|
|
|
|
try:
|
|
response = llm_text_gen(prompt)
|
|
return response
|
|
except Exception as err:
|
|
st.error(f"Failed to get response from LLM: {err}")
|
|
return
|
|
|
|
def ai_structured_data():
|
|
st.title("📝 Generate Structured Data for SEO 🚀")
|
|
st.markdown("**Make your content more discoverable with rich snippets.**")
|
|
|
|
content_type = st.selectbox("**Select Content Type**", list(schema_types.keys()))
|
|
|
|
details = {}
|
|
schema_fields = schema_types[content_type]["fields"]
|
|
num_fields = len(schema_fields)
|
|
|
|
url = st.text_input("**URL :**", placeholder="Enter the URL of your webpage")
|
|
for i in range(0, num_fields, 2):
|
|
cols = st.columns(2)
|
|
for j in range(2):
|
|
if i + j < num_fields:
|
|
field = schema_fields[i + j]
|
|
if "Date" in field:
|
|
details[field] = cols[j].date_input(field)
|
|
else:
|
|
details[field] = cols[j].text_input(field, placeholder=f"Enter {field.lower()}")
|
|
|
|
if st.button("Generate Structured Data"):
|
|
if not url:
|
|
st.error("URL is required to generate structured data.")
|
|
return
|
|
|
|
structured_data = generate_json_data(content_type, details, url)
|
|
if structured_data:
|
|
st.subheader("Generated Structured Data (JSON-LD):")
|
|
st.markdown(structured_data)
|
|
|
|
st.download_button(
|
|
label="Download JSON-LD",
|
|
data=structured_data,
|
|
file_name=f"{content_type}_structured_data.json",
|
|
mime="application/json",
|
|
)
|