WIP - Streamlit UI, firecrawl - V0.5

This commit is contained in:
ajaysi
2024-06-12 16:01:46 +05:30
parent ccbaa0e4fa
commit f2aa79264e
12 changed files with 201 additions and 261 deletions

View File

@@ -60,10 +60,14 @@ def scrape_url(url):
Returns:
dict: The result of the URL scraping, or None if an error occurred.
"""
print(f"\n\nURL: {url} ---- {os.getenv('FIRECRAWL_API_KEY')}\n\n")
client = initialize_client()
params = {
'pageOptions': {
'onlyMainContent': True
}
}
try:
result = client.scrape_url(url)
result = client.scrape_url(url, params=params)
return result
except Exception as e:
logging.error(f"Error scraping URL: {e}")

View File

@@ -75,8 +75,6 @@ def write_blog_from_keywords(search_keywords, url=None):
logger.info(f"\n\n######### Blog content after Tavily AI research: ######### \n\n")
blog_markdown_str = write_blog_google_serp(search_keywords, tavily_search_result)
status.update(label="Finished Writing Blog From Tavily Results:{blog_markdown_str}")
else:
print("Not Writing with TAVILY..\n\n")
except Exception as err:
logger.error(f"Failed to do Tavily AI research: {err}")
@@ -96,10 +94,10 @@ def write_blog_from_keywords(search_keywords, url=None):
status.update(label=f"Saved the content in this file: {saved_blog_to_file}")
blog_frontmatter = dedent(f"""
\n---------------------------------------------------------------------
title: {blog_title}\n
categories: [{blog_categories}]\n
tags: [{blog_tags}]\n
Meta description: {blog_meta_desc.replace(":", "-")}\n
title: {blog_title.strip()}\n
categories: [{blog_categories.strip()}]\n
tags: [{blog_tags.strip()}]\n
Meta description: {blog_meta_desc.replace(":", "-").strip()}\n
---------------------------------------------------------------------\n
""")
logger.info(f"\n\n --------- Finished writing Blog for : {search_keywords} -------------- \n")

View File

@@ -1,6 +1,8 @@
import sys
import os
from textwrap import dedent
import json
from pathlib import Path
from datetime import datetime
import streamlit as st
@@ -14,14 +16,11 @@ logger.add(sys.stdout,
format="<level>{level}</level>|<green>{file}:{line}:{function}</green>| {message}"
)
from ..ai_web_researcher.gpt_online_researcher import do_google_serp_search,\
do_tavily_ai_search, do_metaphor_ai_research, do_google_pytrends_analysis
from ..ai_web_researcher.firecrawl_web_crawler import scrape_url
from .blog_from_google_serp import write_blog_google_serp, blog_with_research
from ..ai_web_researcher.you_web_reseacher import get_rag_results, search_ydc_index
from ..blog_metadata.get_blog_metadata import blog_metadata
from ..blog_postprocessing.save_blog_to_file import save_blog_to_file
from ..gpt_providers.text_to_image_generation.main_generate_image_from_prompt import generate_image
from ..gpt_providers.text_generation.main_text_generation import llm_text_gen
def blog_from_url(weburl):
@@ -38,62 +37,36 @@ def blog_from_url(weburl):
with st.status("Started Writing..", expanded=True) as status:
st.empty()
status.update(label=f"Researching and Writing Blog on: {weburl}")
scraped_text = scrape_url(weburl)
print(scraped_text)
exit(1)
# Call on the got-researcher, tavily apis for this. Do google search for organic competition.
try:
google_search_result, g_titles = do_google_serp_search(search_keywords)
status.update(label=f"🙎 Finished with Google web for Search: {search_keywords}")
example_blog_titles.append(g_titles)
status.update(label=f"🛀 Starting Tavily AI research: {search_keywords}")
tavily_search_result, t_titles, t_answer = do_tavily_ai_search(search_keywords)
status.update(label=f"🙆 Finished Google Search & Tavily AI Search on: {search_keywords}",
state="complete", expanded=False)
scraped_text = scrape_url(weburl)
logger.info(scraped_text)
except Exception as err:
st.error(f"Failed in web research: {err}")
st.error(f"Failed to scrape web page from url-{weburl} - Error: {err}")
logger.error(f"Failed in web research: {err}")
st.stop()
status.update(label="Successfully Scraped/Fetched url: {weburl}", expanded=False, state="complete")
with st.status("Started Writing blog from google search..", expanded=True) as status:
status.update(label="Researching and Writing Blog on keywords.")
# Call on the got-researcher, tavily apis for this. Do google search for organic competition.
try:
status.update(label=f"🛀 Writing blog from Google Search on: {search_keywords}")
blog_markdown_str = write_blog_google_serp(search_keywords, google_search_result)
st.markdown(blog_markdown_str)
status.update(label="🙎 Draft 1: Your Content from Google search result.", state="complete", expanded=False)
except Exception as err:
st.error(f"Failed in Google web research: {err}")
logger.error(f"Failed in Google web research: {err}")
# logger.info/check the final blog content.
logger.info("######### Draft1: Finished Blog from Google web search: ###########")
with st.status("Started Writing blog from Tavily Web search..", expanded=True) as status:
with st.status(f"Started Writing blog from {weburl}..", expanded=True) as status:
# Do Tavily AI research to augument the above blog.
try:
#example_blog_titles.append(t_titles)
if blog_markdown_str and tavily_search_result:
logger.info(f"\n\n######### Blog content after Tavily AI research: ######### \n\n")
blog_markdown_str = write_blog_google_serp(search_keywords, tavily_search_result)
status.update(label="Finished Writing Blog From Tavily Results:{blog_markdown_str}")
else:
print("Not Writing with TAVILY..\n\n")
blog_markdown_str = write_blog_from_weburl(scraped_text)
status.update(label="Finished Writing Blog From: {weburl}")
except Exception as err:
logger.error(f"Failed to do Tavily AI research: {err}")
logger.error(f"Failed to write blog from: {weburl}")
st.error(f"Failed to write blog from: {weburl}")
st.stop()
status.update(label="🙎 Generating - Title, Meta Description, Tags, Categories for the content.")
try:
status.update(label="🙎 Generating - Title, Meta Description, Tags, Categories for the content.")
blog_title, blog_meta_desc, blog_tags, blog_categories = blog_metadata(blog_markdown_str)
except Exception as err:
st.error(f"Failed to get blog metadata: {err}")
try:
status.update(label="🙎 Generating Image for the new blog.")
generated_image_filepath = generate_image(f"{blog_title} + ' ' + {blog_meta_desc}")
except Exception as err:
st.error(f"Failed in Image generation: {err}")
st.warning(f"Failed in Image generation: {err}")
saved_blog_to_file = save_blog_to_file(blog_markdown_str, blog_title, blog_meta_desc,
blog_tags, blog_categories, generated_image_filepath)
@@ -106,8 +79,45 @@ def blog_from_url(weburl):
Meta description: {blog_meta_desc.replace(":", "-")}\n
---------------------------------------------------------------------\n
""")
logger.info(f"\n\n --------- Finished writing Blog for : {search_keywords} -------------- \n")
logger.info(f"\n\n --------- Finished writing Blog for : {weburl} -------------- \n")
st.markdown(f"{blog_frontmatter}")
st.image(generated_image_filepath)
st.markdown(f"{blog_markdown_str}")
status.update(label=f"Finished, Review & Use your Original Content Below: {saved_blog_to_file}")
def write_blog_from_weburl(scraped_website):
"""Combine the given online research and GPT blog content"""
try:
config_path = Path(os.environ["ALWRITY_CONFIG"])
with open(config_path, 'r', encoding='utf-8') as file:
config = json.load(file)
except Exception as err:
logger.error(f"Error: Failed to read values from config: {err}")
exit(1)
blog_characteristics = config['Blog Content Characteristics']
prompt = f"""
As expert Creative Content writer, I will provide you with scraped website content.
I want you to write a detailed {blog_characteristics['Blog Type']} blog post including 5 FAQs.
Below are the guidelines to follow:
1). You must respond in {blog_characteristics['Blog Language']} language.
2). Tone and Brand Alignment: Adjust your tone, voice, personality for {blog_characteristics['Blog Tone']} audience.
3). Make sure your response content length is of {blog_characteristics['Blog Length']} words.
4). Include FAQs from 'People also Ask' section of provided context 'google search result'.
I want the post to offer unique insights, relatable examples, and a fresh perspective on the topic.
\n\n
Website Content:
'''{scraped_website}'''
"""
logger.info("Generating blog and FAQs from Google web search results.")
try:
response = llm_text_gen(prompt)
return response
except Exception as err:
logger.error(f"Exit: Failed to get response from LLM: {err}")
exit(1)

View File

@@ -1,29 +0,0 @@
import sys
import os
from loguru import logger
logger.remove()
logger.add(sys.stdout,
colorize=True,
format="<level>{level}</level>|<green>{file}:{line}:{function}</green>| {message}"
)
from ..gpt_providers.text_generation.main_text_generation import llm_text_gen
def get_blog_categories(blog_article):
"""
Function to generate blog categories for given blog content.
"""
prompt = f"""As an expert SEO and content writer, I will provide you with blog content.
Suggest only 2 blog categories which are most relevant to provided blog content,
by identifying the main topic. Also consider the target audience and the
blog's category taxonomy. Only reply with comma separated values.
The blog content is: '{blog_article}'"
"""
logger.info("Generating blog categories for the given blog.")
try:
response = llm_text_gen(prompt)
return response
except Exception as err:
logger.error(f"get_blog_categories:Failed to get response from LLM: {err}")

View File

@@ -1,30 +0,0 @@
import sys
import os
from loguru import logger
logger.remove()
logger.add(sys.stdout,
colorize=True,
format="<level>{level}</level>|<green>{file}:{line}:{function}</green>| {message}"
)
from ..gpt_providers.text_generation.main_text_generation import llm_text_gen
def generate_blog_description(blog_content):
"""
Prompt designed to give SEO optimized blog descripton
"""
logger.info("Generating Blog Meta Description for the given blog.")
prompt = f"""As an expert SEO and blog writer, Compose a compelling meta description for the given blog content,
adhering to SEO best practices. Keep it between 150-160 characters.
Provide a glimpse of the content's value to entice readers.
Respond with only one of your best effort and do not include your explanations.
Blog Content: '{blog_content}'"""
try:
response = llm_text_gen(prompt)
return response
except Exception as err:
logger.error(f"Failed to get response from LLM:{err}")
raise err

View File

@@ -1,6 +1,4 @@
import sys
import configparser
import json
import streamlit as st
from loguru import logger
@@ -17,43 +15,84 @@ def blog_metadata(blog_article):
""" Common function to get blog metadata """
logger.info(f"Generating Content MetaData\n")
blog_metadata_prompt = """
As an expert SEO and content writer, I will provide you with blog content.
1. Suggest only 2 blog categories which are most relevant to the provided blog content, by identifying the main topic.
Also consider the target audience and the blog's category taxonomy. Only reply with comma-separated values.
2. Compose a compelling meta description for the given blog content, adhering to SEO best practices.
Keep it between 150-160 characters. Provide a glimpse of the content's value to entice readers.
Respond with only one of your best efforts and do not include your explanations.
3. Write 1 blog title following SEO best practices. Please keep the title concise, not exceeding 60 words.
Respond with only 1 title and no explanations. Negative Keywords: Unveiling, unleash, power of. Don't use such words in your title.
4. Suggest only 2 relevant and specific blog tags for the given blog content. Only reply with comma-separated values.
The blog content is: '{blog_article}'
Please provide the result in the following JSON format:
{
"title": "Your generated blog title",
"meta_description": "Your generated meta description",
"tags": ["tag1", "tag2"],
"categories": ["category1", "category2"]
}
"""
try:
response = llm_text_gen(blog_metadata_prompt)
""" Cleans the response by removing ``` and 'json' strings """
result_json = response.replace("```", "").replace("json", "").strip()
# Convert the cleaned response to JSON
result_json = json.loads(result_json)
except Exception as err:
logger.error(f"Failed to get response from LLM: {err}")
st.error(f"Failed to get response from LLM: {err}")
# Extract the data from the JSON response
blog_title = result_json.get("title")
blog_meta_desc = result_json.get("meta_description")
blog_tags = result_json.get("tags")
blog_categories = result_json.get("categories")
blog_title = generate_blog_title(blog_article)
blog_meta_desc = generate_blog_description(blog_article)
blog_tags = get_blog_tags(blog_article)
blog_categories = get_blog_categories(blog_article)
return blog_title, blog_meta_desc, blog_tags, blog_categories
def generate_blog_title(blog_article):
"""
Given a blog title generate an outline for it
"""
logger.info("Generating blog title.")
prompt = f"""As a SEO expert, I will provide you with a blog content.
Your task is write a SEO optimized and call to action, blog title for given blog content.
Follow SEO best practises to suggest the blog title.
Please keep the titles concise, not exceeding 60 words.
Respond with only the title and no explanations.
Negative Keywords: Unvieling, unleash, power of. Dont use such words in your title.
\nGenerate blog title for this given blog content:\n '{blog_article}' """
try:
response = llm_text_gen(prompt)
return response
except Exception as err:
logger.error(f"Failed to get response from LLM: {err}")
raise err
def generate_blog_description(blog_content):
"""
Prompt designed to give SEO optimized blog descripton
"""
logger.info("Generating Blog Meta Description for the given blog.")
prompt = f"""As an expert SEO and blog writer, Compose a compelling meta description for the given blog content,
adhering to SEO best practices. Keep it between 150-160 characters.
Provide a glimpse of the content's value to entice readers.
Respond with only one of your best effort and do not include your explanations.
Blog Content: '{blog_content}'"""
try:
response = llm_text_gen(prompt)
return response
except Exception as err:
logger.error(f"Failed to get response from LLM:{err}")
raise err
def get_blog_categories(blog_article):
"""
Function to generate blog categories for given blog content.
"""
prompt = f"""As an expert SEO and content writer, I will provide you with blog content.
Suggest only 2 blog categories which are most relevant to provided blog content,
by identifying the main topic. Also consider the target audience and the
blog's category taxonomy. Only reply with comma separated values.
The blog content is: '{blog_article}'"
"""
logger.info("Generating blog categories for the given blog.")
try:
response = llm_text_gen(prompt)
return response
except Exception as err:
logger.error(f"get_blog_categories:Failed to get response from LLM: {err}")
def get_blog_tags(blog_article):
"""
Function to suggest tags for the given blog content
"""
# Suggest at least 5 tags for the following blog post [Enter your blog post text here].
prompt = f"""As an expert SEO and blog writer, suggest only 2 relevant and specific blog tags
for the given blog content. Only reply with comma separated values.
Blog content: {blog_article}."""
logger.info("Generating Blog tags for the given blog post.")
try:
response = llm_text_gen(prompt)
return response
except Exception as err:
logger.error(f"Failed to get response from LLM: {err}")
raise err

View File

@@ -1,53 +0,0 @@
import os
import sys
from loguru import logger
logger.remove()
logger.add(sys.stdout,
colorize=True,
format="<level>{level}</level>|<green>{file}:{line}:{function}</green>| {message}"
)
from ..gpt_providers.text_generation.main_text_generation import llm_text_gen
def generate_blog_title(blog_article, keywords=None, example_titles=None, num_titles=1):
"""
Given a blog title generate an outline for it
"""
prompt = ''
logger.info("Generating blog title.")
if not keywords and not example_titles:
prompt = f"""As a SEO expert, I will provide you with a blog content.
Your task is write a SEO optimized and call to action, blog title for given blog content.
Follow SEO best practises to suggest the blog title.
Please keep the titles concise, not exceeding 60 words.
Respond with only {num_titles} title and no explanations.
Negative Keywords: Unvieling, unleash, power of. Dont use such words in your title.
Generate {num_titles} blog title for this given blog content:\n '{blog_article}' """
elif keywords and example_titles:
prompt = f"""As a SEO expert, I will provide you with my blog keywords and example titles.
Your task is to write {num_titles} blog title.
Ensure that your blog titles will help in competing against given example titles.
Follow SEO best practises to suggest the blog title.
Please keep the titles concise, not exceeding 60 words.
Respond with only {num_titles} title and no explanations.
Negative Keywords: Unvieling, unleash, power of. Dont use such words in your title.
Blog Keywords: '{keywords}'
Example Titles: '{example_titles}'
"""
elif not example_titles:
prompt = prompt = f"""As a SEO expert, I will provide you with my blog article.
Your task is to write {num_titles} blog title.
Follow SEO best practises to suggest the blog title.
Please keep the titles concise, not exceeding 60 words.
Respond with only {num_titles} title and no explanations.
Negative Keywords: Unvieling, unleash, power of. Dont use such words in your title.
Blog Article: '{keywords}'
"""
try:
response = llm_text_gen(prompt)
return response
except Exception as err:
logger.error(f"Failed to get response from LLM: {err}")
raise err

View File

@@ -1,29 +0,0 @@
import sys
import os
from loguru import logger
logger.remove()
logger.add(sys.stdout,
colorize=True,
format="<level>{level}</level>|<green>{file}:{line}:{function}</green>| {message}"
)
from ..gpt_providers.text_generation.main_text_generation import llm_text_gen
def get_blog_tags(blog_article):
"""
Function to suggest tags for the given blog content
"""
# Suggest at least 5 tags for the following blog post [Enter your blog post text here].
gpt_providers = os.environ["GPT_PROVIDER"]
prompt = f"""As an expert SEO and blog writer, suggest only 2 relevant and specific blog tags
for the given blog content. Only reply with comma separated values.
Blog content: {blog_article}."""
logger.info("Generating Blog tags for the given blog post.")
try:
response = llm_text_gen(prompt)
return response
except Exception as err:
logger.error(f"Failed to get response from LLM: {err}")
raise err

View File

@@ -21,19 +21,21 @@ from lib.ai_writers.twitter_ai_writer import tweet_writer
from lib.ai_writers.insta_ai_writer import insta_writer
from lib.ai_writers.youtube_ai_writer import write_yt_title, write_yt_description, write_yt_script
from lib.ai_writers.web_url_ai_writer import blog_from_url
from lib.gpt_providers.text_generation.ai_story_writer import ai_story_generator
from lib.gpt_providers.text_generation.ai_essay_writer import ai_essay_generator
from lib.ai_writers.ai_story_writer import ai_story_generator
from lib.ai_writers.ai_essay_writer import ai_essay_generator
from lib.gpt_providers.text_to_image_generation.main_generate_image_from_prompt import generate_image
from lib.content_planning_calender.content_planning_agents_alwrity_crew import ai_agents_planner
def is_youtube_link(text):
youtube_regex = re.compile(r'(https?://)?(www\.)?(youtube|youtu|youtube-nocookie)\.(com|be)/(watch\?v=|embed/|v/|.+\?v=)?([^&=%\?]{11})')
return youtube_regex.match(text)
if text is not None:
youtube_regex = re.compile(r'(https?://)?(www\.)?(youtube|youtu|youtube-nocookie)\.(com|be)/(watch\?v=|embed/|v/|.+\?v=)?([^&=%\?]{11})')
return youtube_regex.match(text)
def is_web_link(text):
web_regex = re.compile(r'(https?://)?(www\.)?[-a-zA-Z0-9@:%._\+~#=]{1,256}\.[a-zA-Z0-9()]{1,6}\b([-a-zA-Z0-9()@:%_\+.~#?&//=]*)')
return web_regex.match(text)
if text is not None:
web_regex = re.compile(r'(https?://)?(www\.)?[-a-zA-Z0-9@:%._\+~#=]{1,256}\.[a-zA-Z0-9()]{1,6}\b([-a-zA-Z0-9()@:%_\+.~#?&//=]*)')
return web_regex.match(text)
def process_input(input_text, uploaded_file):
if is_youtube_link(input_text):
@@ -44,8 +46,10 @@ def process_input(input_text, uploaded_file):
elif is_web_link(input_text):
return("web_url")
else:
elif input_text is not None:
return("keywords")
elif input_text is None:
input_text = None
if uploaded_file is not None:
file_details = {"filename": uploaded_file.name, "filetype": uploaded_file.type, "filesize": uploaded_file.size}
@@ -61,8 +65,10 @@ def process_input(input_text, uploaded_file):
st.image(uploaded_file)
elif uploaded_file.type.startswith("audio/"):
st.audio(uploaded_file)
return("audio_file")
elif uploaded_file.type.startswith("video/"):
st.video(uploaded_file)
def blog_from_keyword():
""" Input blog keywords, research and write a factual blog."""
@@ -82,11 +88,16 @@ def blog_from_keyword():
type=["txt", "pdf", "docx", "jpg", "jpeg", "png", "mp3", "wav", "mp4", "mkv", "avi"],
help='Attach files such as audio, video, images, or documents.')
input_type = process_input(user_input, uploaded_file)
content_type = st.radio("Select content type:", ["Normal-length content", "Long-form content", "Experimental - AI Agents team"])
if st.button("Write Blog"):
# Clear the previous results from the screen
st.empty()
if user_input == "": user_input = None
if uploaded_file is None and user_input is None:
st.error("🤬🤬 Either Enter/Type/Attach, can't read your mind.(yet..)")
st.stop()
input_type = process_input(user_input, uploaded_file)
# Check if the user input is keywords or blog title.
if 'keywords' in input_type:
if user_input and len(user_input.split()) >= 2:
@@ -95,10 +106,9 @@ def blog_from_keyword():
short_blog = write_blog_from_keywords(user_input)
st.markdown(short_blog)
except Exception as err:
st.error(f"🚫 Failed to write blog on {user_keywords}, Error: {err}")
st.error(f"🚫 Failed to write blog on {user_input}, Error: {err}")
elif content_type == "Long-form content":
try:
st.empty()
long_form_generator(user_input)
st.success(f"Successfully wrote long-form blog on: {user_input}")
except Exception as err:
@@ -112,7 +122,7 @@ def blog_from_keyword():
else:
st.error('🚫 Blog keywords should be at least two words long. Please try again.')
elif 'youtube_url' in input_type:
elif 'youtube_url' in input_type or 'audio_file' in input_type:
generate_audio_blog(user_input)
elif 'web_url' in input_type:
blog_from_url(user_input)
@@ -248,23 +258,28 @@ def write_story():
st.title("Alwrity AI Story Writer ✍️")
st.write("Select your story writing persona or book genre and let AI help you craft an amazing story. 🌟")
# Select persona
selected_persona_name = st.selectbox(
"Select Your Story Writing Persona or Book Genre:",
options=personas,
help="Choose a persona that resonates with the style you want the AI Story Writer to adopt."
)
# Create two columns
col1, col2 = st.columns(2)
# Display persona description
if selected_persona_name:
st.info(persona_descriptions[selected_persona_name])
with col1:
# Select persona
selected_persona_name = st.selectbox(
"Select Your Story Writing Persona or Book Genre:",
options=personas,
help="Choose a persona that resonates with the style you want the AI Story Writer to adopt."
)
# Combined input for characters and plot details
story_details_input = st.text_area(
"Enter characters and plot details for your story:",
placeholder="E.g., Characters: John, Alice, Dragon, Detective\nPlot: A detective is trying to solve a mystery in a small town...",
help="Provide a list of characters and a brief outline of the plot for your story."
)
# Display persona description
if selected_persona_name:
st.info(persona_descriptions[selected_persona_name])
with col2:
# Combined input for characters and plot details
story_details_input = st.text_area(
"Enter characters and plot details for your story:",
placeholder="E.g., Characters: John, Alice, Dragon, Detective\nPlot: A detective is trying to solve a mystery in a small town...",
help="Provide a list of characters and a brief outline of the plot for your story."
)
# Generate story button
if st.button("Generate Story"):
@@ -275,7 +290,6 @@ def write_story():
st.error("Please select a persona and enter the story details to generate a story.")
def essay_writer():
st.title("AI Essay Writer 📝")
st.write("Select your essay type, education level, and desired length, then let AI generate an essay for you. ✨")

View File

@@ -166,4 +166,20 @@ div.row-widget.stRadio > div[role="radiogroup"] > label[data-baseweb="radio"] {
margin-bottom: 10px;
color: #333;
}
audio::-webkit-media-controls-panel,
audio::-webkit-media-controls-enclosure {
background-color:#532b5a;}
audio::-webkit-media-controls-time-remaining-display,
audio::-webkit-media-controls-current-time-display {
color: white;
text-shadow: none;
}
audio::-webkit-media-controls-timeline {
background-color: #532b5a;
border-radius: 25px;
margin-left: 10px;
margin-right: 10px;
}