WIP - Streamlit UI, firecrawl - V0.5
This commit is contained in:
@@ -24,7 +24,9 @@ def check_api_keys():
|
|||||||
api_keys = {
|
api_keys = {
|
||||||
"METAPHOR_API_KEY": "Metaphor AI Key (Get it here: https://dashboard.exa.ai/login)",
|
"METAPHOR_API_KEY": "Metaphor AI Key (Get it here: https://dashboard.exa.ai/login)",
|
||||||
"TAVILY_API_KEY": "Tavily AI Key (Get it here: https://tavily.com/#api)",
|
"TAVILY_API_KEY": "Tavily AI Key (Get it here: https://tavily.com/#api)",
|
||||||
"SERPER_API_KEY": "Serper API Key (Get it here: https://serper.dev/signup)"
|
"SERPER_API_KEY": "Serper API Key (Get it here: https://serper.dev/signup)",
|
||||||
|
"STABILITY_API_KEY": "Stability API Key (Get it here: https://platform.stability.ai/)",
|
||||||
|
"FIRECRAWL_API_KEY": "Firecrawl API key (Get it here: https://www.firecrawl.dev/account)"
|
||||||
}
|
}
|
||||||
missing_keys = []
|
missing_keys = []
|
||||||
|
|
||||||
|
|||||||
94
lib/ai_web_researcher/firecrawl_web_crawler.py
Normal file
94
lib/ai_web_researcher/firecrawl_web_crawler.py
Normal file
@@ -0,0 +1,94 @@
|
|||||||
|
import os
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from firecrawl import FirecrawlApp
|
||||||
|
import logging
|
||||||
|
from dotenv import load_dotenv
|
||||||
|
# Load environment variables from .env file
|
||||||
|
load_dotenv(Path('../../.env'))
|
||||||
|
|
||||||
|
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
||||||
|
|
||||||
|
|
||||||
|
def initialize_client():
|
||||||
|
"""
|
||||||
|
Initialize and return a Firecrawl client.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
api_key (str): Your Firecrawl API key.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
firecrawl.Client: An instance of the Firecrawl client.
|
||||||
|
"""
|
||||||
|
return FirecrawlApp(api_key=os.getenv("FIRECRAWL_API_KEY"))
|
||||||
|
|
||||||
|
|
||||||
|
def scrape_website(website_url, depth=1, max_pages=10):
|
||||||
|
"""
|
||||||
|
Scrape a website starting from the given URL.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
api_key (str): Your Firecrawl API key.
|
||||||
|
website_url (str): The URL of the website to scrape.
|
||||||
|
depth (int, optional): The depth of crawling. Default is 1.
|
||||||
|
max_pages (int, optional): The maximum number of pages to scrape. Default is 10.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
dict: The result of the website scraping, or None if an error occurred.
|
||||||
|
"""
|
||||||
|
client = FirecrawlApp(api_key=os.getenv("FIRECRAWL_API_KEY"))
|
||||||
|
try:
|
||||||
|
result = client.crawl_url({
|
||||||
|
'url': website_url,
|
||||||
|
'depth': depth,
|
||||||
|
'max_pages': max_pages
|
||||||
|
})
|
||||||
|
return result
|
||||||
|
except Exception as e:
|
||||||
|
logging.error(f"Error scraping website: {e}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def scrape_url(url):
|
||||||
|
"""
|
||||||
|
Scrape a specific URL.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
api_key (str): Your Firecrawl API key.
|
||||||
|
url (str): The URL to scrape.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
dict: The result of the URL scraping, or None if an error occurred.
|
||||||
|
"""
|
||||||
|
print(f"\n\nURL: {url} ---- {os.getenv('FIRECRAWL_API_KEY')}\n\n")
|
||||||
|
client = initialize_client()
|
||||||
|
try:
|
||||||
|
result = client.scrape_url(url)
|
||||||
|
return result
|
||||||
|
except Exception as e:
|
||||||
|
logging.error(f"Error scraping URL: {e}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def extract_data(url, schema):
|
||||||
|
"""
|
||||||
|
Extract structured data from a URL using the provided schema.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
api_key (str): Your Firecrawl API key.
|
||||||
|
url (str): The URL to extract data from.
|
||||||
|
schema (dict): The schema to use for data extraction.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
dict: The extracted data, or None if an error occurred.
|
||||||
|
"""
|
||||||
|
client = initialize_client()
|
||||||
|
try:
|
||||||
|
result = client.extract({
|
||||||
|
'url': url,
|
||||||
|
'schema': schema
|
||||||
|
})
|
||||||
|
return result
|
||||||
|
except Exception as e:
|
||||||
|
logging.error(f"Error extracting data: {e}")
|
||||||
|
return None
|
||||||
@@ -1,101 +0,0 @@
|
|||||||
import sys
|
|
||||||
import os
|
|
||||||
from textwrap import dedent
|
|
||||||
from pathlib import Path
|
|
||||||
from datetime import datetime
|
|
||||||
|
|
||||||
from dotenv import load_dotenv
|
|
||||||
load_dotenv(Path('../../.env'))
|
|
||||||
from loguru import logger
|
|
||||||
logger.remove()
|
|
||||||
logger.add(sys.stdout,
|
|
||||||
colorize=True,
|
|
||||||
format="<level>{level}</level>|<green>{file}:{line}:{function}</green>| {message}"
|
|
||||||
)
|
|
||||||
|
|
||||||
from ..ai_web_researcher.gpt_online_researcher import do_google_serp_search,\
|
|
||||||
do_tavily_ai_search, do_metaphor_ai_research, do_google_pytrends_analysis
|
|
||||||
from .blog_from_google_serp import write_blog_google_serp, improve_blog_intro, blog_with_keywords, blog_with_research
|
|
||||||
from ..ai_web_researcher.you_web_reseacher import get_rag_results, search_ydc_index
|
|
||||||
from ..blog_metadata.get_blog_metadata import blog_metadata
|
|
||||||
from ..blog_postprocessing.save_blog_to_file import save_blog_to_file
|
|
||||||
|
|
||||||
|
|
||||||
def write_blog_from_keywords(search_keywords, url=None):
|
|
||||||
"""
|
|
||||||
This function will take a blog Topic to first generate sections for it
|
|
||||||
and then generate content for each section.
|
|
||||||
"""
|
|
||||||
# Use to store the blog in a string, to save in a *.md file.
|
|
||||||
blog_markdown_str = ""
|
|
||||||
example_blog_titles = []
|
|
||||||
|
|
||||||
logger.info(f"Researching and Writing Blog on keywords: {search_keywords}")
|
|
||||||
# Call on the got-researcher, tavily apis for this. Do google search for organic competition.
|
|
||||||
try:
|
|
||||||
google_search_result, g_titles = do_google_serp_search(search_keywords)
|
|
||||||
example_blog_titles.append(g_titles)
|
|
||||||
blog_markdown_str = write_blog_google_serp(search_keywords, google_search_result)
|
|
||||||
tavily_search_result, t_titles, t_answer = do_tavily_ai_search(search_keywords)
|
|
||||||
# Hate the robotic introductions.
|
|
||||||
blog_markdown_str = improve_blog_intro(blog_markdown_str, t_answer)
|
|
||||||
except Exception as err:
|
|
||||||
logger.error(f"Failed in Google web research: {err}")
|
|
||||||
# logger.info/check the final blog content.
|
|
||||||
logger.info("\n######### Draft1: Finished Blog from Google web search: ###########\n\n")
|
|
||||||
|
|
||||||
# Do Tavily AI research to augument the above blog.
|
|
||||||
try:
|
|
||||||
#example_blog_titles.append(t_titles)
|
|
||||||
blog_markdown_str = blog_with_research(blog_markdown_str, tavily_search_result)
|
|
||||||
logger.info(f"######### Blog content after Tavily AI research: ######### \n\n{blog_markdown_str}\n\n")
|
|
||||||
except Exception as err:
|
|
||||||
logger.error(f"Failed to do Tavily AI research: {err}")
|
|
||||||
logger.info("######### Draft2: Blog content after Tavily AI research: #########\n\n")
|
|
||||||
|
|
||||||
# try:
|
|
||||||
# # Do Metaphor/Exa AI search.
|
|
||||||
# metaphor_search_result, m_titles = do_metaphor_ai_research(search_keywords)
|
|
||||||
# example_blog_titles.append(m_titles)
|
|
||||||
# blog_markdown_str = blog_with_research(blog_markdown_str, metaphor_search_result)
|
|
||||||
# except Exception as err:
|
|
||||||
# logger.error(f"Failed to do Metaphor AI search: {err}")
|
|
||||||
# logger.info("######### Draft3: Blog content after Tavily AI research: ######### \n\n")
|
|
||||||
|
|
||||||
# Do Google trends analysis and combine with latest blog.
|
|
||||||
# try:
|
|
||||||
# pytrends_search_result = do_google_pytrends_analysis(search_keywords)
|
|
||||||
# logger.info(f"Google Trends keywords to use in the blog: {pytrends_search_result}\n")
|
|
||||||
# blog_markdown_str = blog_with_keywords(blog_markdown_str, pytrends_search_result)
|
|
||||||
# except Exception as err:
|
|
||||||
# logger.error(f"Failed to do Google Trends Analysis:{err}")
|
|
||||||
# logger.info(f"########### Blog Content After Google Trends Analysis:######### \n {blog_markdown_str}\n\n")
|
|
||||||
#
|
|
||||||
|
|
||||||
blog_title, blog_meta_desc, blog_tags, blog_categories = blog_metadata(blog_markdown_str,
|
|
||||||
search_keywords, example_blog_titles)
|
|
||||||
|
|
||||||
# fixme: Remove the hardcoding, need add another option OR in config ?
|
|
||||||
image_dir = os.path.join(os.getcwd(), "blog_images")
|
|
||||||
generated_image_name = f"generated_image_{datetime.now():%Y-%m-%d-%H-%M-%S}.png"
|
|
||||||
generated_image_filepath = os.path.join(image_dir, generated_image_name)
|
|
||||||
# Generate an image based on meta description
|
|
||||||
#logger.info(f"Calling Image generation with prompt: {blog_meta_desc}")
|
|
||||||
#main_img_path = generate_image(blog_meta_desc, image_dir, "dalle3")
|
|
||||||
if url:
|
|
||||||
try:
|
|
||||||
generated_image_filepath = screenshot_api(url, generated_image_filepath)
|
|
||||||
except Exception as err:
|
|
||||||
logger.error(f"Failed in taking compnay page screenshot: {err}")
|
|
||||||
# TBD: Save the blog content as a .md file. Markdown or HTML ?
|
|
||||||
save_blog_to_file(blog_markdown_str, blog_title, blog_meta_desc, blog_tags, blog_categories, generated_image_filepath)
|
|
||||||
|
|
||||||
blog_frontmatter = dedent(f"""\n\n\n\
|
|
||||||
---
|
|
||||||
title: {blog_title}
|
|
||||||
categories: [{blog_categories}]
|
|
||||||
tags: [{blog_tags}]
|
|
||||||
Meta description: {blog_meta_desc.replace(":", "-")}
|
|
||||||
---\n\n""")
|
|
||||||
logger.info(f"{blog_frontmatter}{blog_markdown_str}")
|
|
||||||
logger.info(f"\n\n ################ Finished writing Blog for : {search_keywords} #################### \n")
|
|
||||||
113
lib/ai_writers/web_url_ai_writer.py
Normal file
113
lib/ai_writers/web_url_ai_writer.py
Normal file
@@ -0,0 +1,113 @@
|
|||||||
|
import sys
|
||||||
|
import os
|
||||||
|
from textwrap import dedent
|
||||||
|
from pathlib import Path
|
||||||
|
from datetime import datetime
|
||||||
|
import streamlit as st
|
||||||
|
|
||||||
|
from dotenv import load_dotenv
|
||||||
|
load_dotenv(Path('../../.env'))
|
||||||
|
from loguru import logger
|
||||||
|
logger.remove()
|
||||||
|
logger.add(sys.stdout,
|
||||||
|
colorize=True,
|
||||||
|
format="<level>{level}</level>|<green>{file}:{line}:{function}</green>| {message}"
|
||||||
|
)
|
||||||
|
|
||||||
|
from ..ai_web_researcher.gpt_online_researcher import do_google_serp_search,\
|
||||||
|
do_tavily_ai_search, do_metaphor_ai_research, do_google_pytrends_analysis
|
||||||
|
from ..ai_web_researcher.firecrawl_web_crawler import scrape_url
|
||||||
|
from .blog_from_google_serp import write_blog_google_serp, blog_with_research
|
||||||
|
from ..ai_web_researcher.you_web_reseacher import get_rag_results, search_ydc_index
|
||||||
|
from ..blog_metadata.get_blog_metadata import blog_metadata
|
||||||
|
from ..blog_postprocessing.save_blog_to_file import save_blog_to_file
|
||||||
|
from ..gpt_providers.text_to_image_generation.main_generate_image_from_prompt import generate_image
|
||||||
|
|
||||||
|
|
||||||
|
def blog_from_url(weburl):
|
||||||
|
"""
|
||||||
|
This function will take a blog Topic to first generate sections for it
|
||||||
|
and then generate content for each section.
|
||||||
|
"""
|
||||||
|
# Use to store the blog in a string, to save in a *.md file.
|
||||||
|
blog_markdown_str = None
|
||||||
|
tavily_search_result = None
|
||||||
|
example_blog_titles = []
|
||||||
|
|
||||||
|
logger.info(f"Researching and Writing Blog on: {weburl}")
|
||||||
|
with st.status("Started Writing..", expanded=True) as status:
|
||||||
|
st.empty()
|
||||||
|
status.update(label=f"Researching and Writing Blog on: {weburl}")
|
||||||
|
scraped_text = scrape_url(weburl)
|
||||||
|
print(scraped_text)
|
||||||
|
exit(1)
|
||||||
|
# Call on the got-researcher, tavily apis for this. Do google search for organic competition.
|
||||||
|
try:
|
||||||
|
google_search_result, g_titles = do_google_serp_search(search_keywords)
|
||||||
|
status.update(label=f"🙎 Finished with Google web for Search: {search_keywords}")
|
||||||
|
example_blog_titles.append(g_titles)
|
||||||
|
|
||||||
|
status.update(label=f"🛀 Starting Tavily AI research: {search_keywords}")
|
||||||
|
tavily_search_result, t_titles, t_answer = do_tavily_ai_search(search_keywords)
|
||||||
|
status.update(label=f"🙆 Finished Google Search & Tavily AI Search on: {search_keywords}",
|
||||||
|
state="complete", expanded=False)
|
||||||
|
|
||||||
|
except Exception as err:
|
||||||
|
st.error(f"Failed in web research: {err}")
|
||||||
|
logger.error(f"Failed in web research: {err}")
|
||||||
|
|
||||||
|
with st.status("Started Writing blog from google search..", expanded=True) as status:
|
||||||
|
status.update(label="Researching and Writing Blog on keywords.")
|
||||||
|
# Call on the got-researcher, tavily apis for this. Do google search for organic competition.
|
||||||
|
try:
|
||||||
|
status.update(label=f"🛀 Writing blog from Google Search on: {search_keywords}")
|
||||||
|
blog_markdown_str = write_blog_google_serp(search_keywords, google_search_result)
|
||||||
|
st.markdown(blog_markdown_str)
|
||||||
|
status.update(label="🙎 Draft 1: Your Content from Google search result.", state="complete", expanded=False)
|
||||||
|
except Exception as err:
|
||||||
|
st.error(f"Failed in Google web research: {err}")
|
||||||
|
logger.error(f"Failed in Google web research: {err}")
|
||||||
|
|
||||||
|
# logger.info/check the final blog content.
|
||||||
|
logger.info("######### Draft1: Finished Blog from Google web search: ###########")
|
||||||
|
|
||||||
|
with st.status("Started Writing blog from Tavily Web search..", expanded=True) as status:
|
||||||
|
# Do Tavily AI research to augument the above blog.
|
||||||
|
try:
|
||||||
|
#example_blog_titles.append(t_titles)
|
||||||
|
if blog_markdown_str and tavily_search_result:
|
||||||
|
logger.info(f"\n\n######### Blog content after Tavily AI research: ######### \n\n")
|
||||||
|
blog_markdown_str = write_blog_google_serp(search_keywords, tavily_search_result)
|
||||||
|
status.update(label="Finished Writing Blog From Tavily Results:{blog_markdown_str}")
|
||||||
|
else:
|
||||||
|
print("Not Writing with TAVILY..\n\n")
|
||||||
|
except Exception as err:
|
||||||
|
logger.error(f"Failed to do Tavily AI research: {err}")
|
||||||
|
|
||||||
|
status.update(label="🙎 Generating - Title, Meta Description, Tags, Categories for the content.")
|
||||||
|
try:
|
||||||
|
blog_title, blog_meta_desc, blog_tags, blog_categories = blog_metadata(blog_markdown_str)
|
||||||
|
except Exception as err:
|
||||||
|
st.error(f"Failed to get blog metadata: {err}")
|
||||||
|
|
||||||
|
try:
|
||||||
|
generated_image_filepath = generate_image(f"{blog_title} + ' ' + {blog_meta_desc}")
|
||||||
|
except Exception as err:
|
||||||
|
st.error(f"Failed in Image generation: {err}")
|
||||||
|
|
||||||
|
saved_blog_to_file = save_blog_to_file(blog_markdown_str, blog_title, blog_meta_desc,
|
||||||
|
blog_tags, blog_categories, generated_image_filepath)
|
||||||
|
status.update(label=f"Saved the content in this file: {saved_blog_to_file}")
|
||||||
|
blog_frontmatter = dedent(f"""
|
||||||
|
\n---------------------------------------------------------------------
|
||||||
|
title: {blog_title}\n
|
||||||
|
categories: [{blog_categories}]\n
|
||||||
|
tags: [{blog_tags}]\n
|
||||||
|
Meta description: {blog_meta_desc.replace(":", "-")}\n
|
||||||
|
---------------------------------------------------------------------\n
|
||||||
|
""")
|
||||||
|
logger.info(f"\n\n --------- Finished writing Blog for : {search_keywords} -------------- \n")
|
||||||
|
st.markdown(f"{blog_frontmatter}")
|
||||||
|
st.image(generated_image_filepath)
|
||||||
|
st.markdown(f"{blog_markdown_str}")
|
||||||
|
status.update(label=f"Finished, Review & Use your Original Content Below: {saved_blog_to_file}")
|
||||||
@@ -1,6 +1,7 @@
|
|||||||
import sys
|
import sys
|
||||||
import configparser
|
import configparser
|
||||||
import json
|
import json
|
||||||
|
import streamlit as st
|
||||||
|
|
||||||
from loguru import logger
|
from loguru import logger
|
||||||
logger.remove()
|
logger.remove()
|
||||||
@@ -12,7 +13,7 @@ logger.add(sys.stdout,
|
|||||||
from ..gpt_providers.text_generation.main_text_generation import llm_text_gen
|
from ..gpt_providers.text_generation.main_text_generation import llm_text_gen
|
||||||
|
|
||||||
|
|
||||||
def blog_metadata(blog_content):
|
def blog_metadata(blog_article):
|
||||||
""" Common function to get blog metadata """
|
""" Common function to get blog metadata """
|
||||||
logger.info(f"Generating Content MetaData\n")
|
logger.info(f"Generating Content MetaData\n")
|
||||||
|
|
||||||
@@ -47,7 +48,7 @@ def blog_metadata(blog_content):
|
|||||||
result_json = json.loads(result_json)
|
result_json = json.loads(result_json)
|
||||||
except Exception as err:
|
except Exception as err:
|
||||||
logger.error(f"Failed to get response from LLM: {err}")
|
logger.error(f"Failed to get response from LLM: {err}")
|
||||||
raise err
|
st.error(f"Failed to get response from LLM: {err}")
|
||||||
|
|
||||||
# Extract the data from the JSON response
|
# Extract the data from the JSON response
|
||||||
blog_title = result_json.get("title")
|
blog_title = result_json.get("title")
|
||||||
|
|||||||
@@ -0,0 +1,61 @@
|
|||||||
|
import os
|
||||||
|
import google.generativeai as genai
|
||||||
|
from dotenv import load_dotenv
|
||||||
|
|
||||||
|
def load_environment():
|
||||||
|
"""Load environment variables from a .env file."""
|
||||||
|
load_dotenv()
|
||||||
|
|
||||||
|
def configure_google_api():
|
||||||
|
"""Configure the Google API for audio summarization."""
|
||||||
|
api_key = os.getenv("GEMINI_API_KEY")
|
||||||
|
if not api_key:
|
||||||
|
raise ValueError("Google API key not found. Please set the GEMINI_API_KEY environment variable.")
|
||||||
|
genai.configure(api_key=api_key)
|
||||||
|
|
||||||
|
def transcribe_audio(audio_file_path):
|
||||||
|
"""Summarize the audio using Google's Generative API.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
audio_file_path (str): The path to the audio file to be summarized.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
str: The summary text of the audio.
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
ValueError: If the audio file path is invalid or the API response is not successful.
|
||||||
|
Exception: For any other errors that occur during the process.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
# Load environment variables and configure API
|
||||||
|
load_environment()
|
||||||
|
configure_google_api()
|
||||||
|
|
||||||
|
# Create generative model instance
|
||||||
|
model = genai.GenerativeModel("models/gemini-1.5-pro-latest")
|
||||||
|
audio_file = None
|
||||||
|
try:
|
||||||
|
# Upload the audio file
|
||||||
|
audio_file = genai.upload_file(path=audio_file_path)
|
||||||
|
except Exception as err:
|
||||||
|
print(err)
|
||||||
|
# Generate the summary
|
||||||
|
response = model.generate_content(
|
||||||
|
[
|
||||||
|
"Listen carefully to the given following audio file. Transcribe the following given audio.",
|
||||||
|
audio_file
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
# Check if the response contains text
|
||||||
|
if not hasattr(response, 'text'):
|
||||||
|
raise ValueError("The API response does not contain text.")
|
||||||
|
|
||||||
|
return response.text
|
||||||
|
|
||||||
|
except ValueError as ve:
|
||||||
|
print(f"ValueError: {ve}")
|
||||||
|
except FileNotFoundError:
|
||||||
|
print(f"FileNotFoundError: The audio file at {audio_file_path} does not exist.")
|
||||||
|
except Exception as e:
|
||||||
|
print(f"An error occurred: {e}")
|
||||||
@@ -1,9 +1,12 @@
|
|||||||
from pytube import YouTube
|
|
||||||
import os
|
import os
|
||||||
|
import re
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
|
from pytube import YouTube
|
||||||
from loguru import logger
|
from loguru import logger
|
||||||
from openai import OpenAI
|
from openai import OpenAI
|
||||||
from tqdm import tqdm
|
from tqdm import tqdm
|
||||||
|
import streamlit as st
|
||||||
|
|
||||||
from tenacity import (
|
from tenacity import (
|
||||||
retry,
|
retry,
|
||||||
@@ -11,15 +14,40 @@ from tenacity import (
|
|||||||
wait_random_exponential,
|
wait_random_exponential,
|
||||||
) # for exponential backoff
|
) # for exponential backoff
|
||||||
|
|
||||||
|
from .gemini_audio_text import transcribe_audio
|
||||||
|
|
||||||
|
|
||||||
def progress_function(stream, chunk, bytes_remaining):
|
def progress_function(stream, chunk, bytes_remaining):
|
||||||
# Calculate the percentage completion
|
# Calculate the percentage completion
|
||||||
current = ((stream.filesize - bytes_remaining) / stream.filesize)
|
current = ((stream.filesize - bytes_remaining) / stream.filesize)
|
||||||
progress_bar.update(current - progress_bar.n) # Update the progress bar
|
progress_bar.update(current - progress_bar.n) # Update the progress bar
|
||||||
|
|
||||||
|
def rename_file_with_underscores(file_path):
|
||||||
|
"""Rename a file by replacing spaces and special characters with underscores.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
file_path (str): The original file path.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
str: The new file path with underscores.
|
||||||
|
"""
|
||||||
|
# Extract the directory and the filename
|
||||||
|
dir_name, original_filename = os.path.split(file_path)
|
||||||
|
|
||||||
|
# Replace spaces and special characters with underscores in the filename
|
||||||
|
new_filename = re.sub(r'[^\w\-_\.]', '_', original_filename)
|
||||||
|
|
||||||
|
# Create the new file path
|
||||||
|
new_file_path = os.path.join(dir_name, new_filename)
|
||||||
|
|
||||||
|
# Rename the file
|
||||||
|
os.rename(file_path, new_file_path)
|
||||||
|
|
||||||
|
return new_file_path
|
||||||
|
|
||||||
|
|
||||||
@retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(6))
|
@retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(6))
|
||||||
def speech_to_text(video_url, output_path='.'):
|
def speech_to_text(video_url):
|
||||||
"""
|
"""
|
||||||
Transcribes speech to text from a YouTube video URL using OpenAI's Whisper model.
|
Transcribes speech to text from a YouTube video URL using OpenAI's Whisper model.
|
||||||
|
|
||||||
@@ -33,75 +61,94 @@ def speech_to_text(video_url, output_path='.'):
|
|||||||
Raises:
|
Raises:
|
||||||
SystemExit: If a critical error occurs that prevents successful execution.
|
SystemExit: If a critical error occurs that prevents successful execution.
|
||||||
"""
|
"""
|
||||||
try:
|
output_path = os.getenv("CONTENT_SAVE_DIR")
|
||||||
audio_file = None
|
with st.status("Started Writing..", expanded=False) as status:
|
||||||
if video_url.startswith("https://www.youtube.com/") or video_url.startswith("http://www.youtube.com/"):
|
try:
|
||||||
logger.info(f"Accessing YouTube URL: {video_url}")
|
audio_file = None
|
||||||
yt = YouTube(video_url, on_progress_callback=progress_function)
|
if video_url.startswith("https://www.youtube.com/") or video_url.startswith("http://www.youtube.com/"):
|
||||||
|
logger.info(f"Accessing YouTube URL: {video_url}")
|
||||||
logger.info("Fetching the highest quality audio stream")
|
status.update(label=f"Accessing YouTube URL: {video_url}")
|
||||||
audio_stream = yt.streams.filter(only_audio=True).first()
|
yt = YouTube(video_url, on_progress_callback=progress_function)
|
||||||
|
|
||||||
if audio_stream is None:
|
logger.info("Fetching the highest quality audio stream")
|
||||||
logger.warning("No audio stream found for this video.")
|
status.update(label="Fetching the highest quality audio stream")
|
||||||
return None
|
audio_stream = yt.streams.filter(only_audio=True).first()
|
||||||
|
|
||||||
logger.info(f"Downloading audio for: {yt.title}")
|
if audio_stream is None:
|
||||||
global progress_bar
|
logger.warning("No audio stream found for this video.")
|
||||||
progress_bar = tqdm(total=1.0, unit='iB', unit_scale=True, desc=yt.title)
|
st.warning("No audio stream found for this video.")
|
||||||
|
return None
|
||||||
|
|
||||||
|
logger.info(f"Downloading audio for: {yt.title}")
|
||||||
|
status.update(label=f"Downloading audio for: {yt.title}")
|
||||||
|
global progress_bar
|
||||||
|
progress_bar = tqdm(total=1.0, unit='iB', unit_scale=True, desc=yt.title)
|
||||||
|
try:
|
||||||
|
audio_filename = re.sub(r'[^\w\-_\.]', '_', yt.title) + '.mp4'
|
||||||
|
audio_file = audio_stream.download(
|
||||||
|
output_path=os.getenv("CONTENT_SAVE_DIR"),
|
||||||
|
filename=audio_filename)
|
||||||
|
#audio_file = rename_file_with_underscores(audio_file)
|
||||||
|
except Exception as err:
|
||||||
|
logger.error(f"Failed to download audio file: {audio_file}")
|
||||||
|
|
||||||
|
progress_bar.close()
|
||||||
|
logger.info(f"Audio downloaded: {yt.title} to {audio_file}")
|
||||||
|
status.update(label=f"Audio downloaded: {yt.title} to {output_path}")
|
||||||
|
# Audio filepath from local directory.
|
||||||
|
elif os.path.exists(audio_input):
|
||||||
|
audio_file = video_url
|
||||||
|
|
||||||
|
# Checking file size
|
||||||
|
max_file_size = 24 * 1024 * 1024 # 24MB
|
||||||
|
file_size = os.path.getsize(audio_file)
|
||||||
|
# Convert file size to MB for logging
|
||||||
|
file_size_MB = file_size / (1024 * 1024) # Convert bytes to MB
|
||||||
|
|
||||||
|
logger.info(f"Downloaded Audio Size is: {file_size_MB:.2f} MB")
|
||||||
|
status.update(label=f"Downloaded Audio Size is: {file_size_MB:.2f} MB")
|
||||||
|
|
||||||
|
if file_size > max_file_size:
|
||||||
|
logger.error("File size exceeds 24MB limit.")
|
||||||
|
# FIXME: We can chunk hour long videos, the code is not tested.
|
||||||
|
#long_video(audio_file)
|
||||||
|
sys.exit("File size limit exceeded.")
|
||||||
|
st.error("Audio File size limit exceeded.")
|
||||||
|
|
||||||
try:
|
try:
|
||||||
audio_file = audio_stream.download(output_path)
|
status.update(label=f"Initializing OpenAI client for transcription: {audio_file}")
|
||||||
except Exception as err:
|
logger.info(f"Initializing OpenAI client for transcription: {audio_file}")
|
||||||
logger.error(f"Failed to download audio file: {audio_file}")
|
client = OpenAI(api_key=os.getenv('OPENAI_API_KEY'))
|
||||||
|
|
||||||
progress_bar.close()
|
logger.info("Transcribing using OpenAI's Whisper model.")
|
||||||
logger.info(f"Audio downloaded: {yt.title} to {output_path}")
|
transcript = client.audio.transcriptions.create(
|
||||||
# Audio filepath from local directory.
|
model="whisper-1",
|
||||||
elif os.path.exists(audio_input):
|
file=open(audio_file, "rb"),
|
||||||
audio_file = video_url
|
response_format="text"
|
||||||
|
)
|
||||||
# Checking file size
|
logger.info(f"\nYouTube video transcription:\n{yt.title}\n{transcript}\n")
|
||||||
max_file_size = 24 * 1024 * 1024 # 24MB
|
status.update(label=f"\nYouTube video transcription:\n{yt.title}\n{transcript}\n")
|
||||||
file_size = os.path.getsize(audio_file)
|
return transcript, yt.title
|
||||||
# Convert file size to MB for logging
|
|
||||||
file_size_MB = file_size / (1024 * 1024) # Convert bytes to MB
|
except Exception as e:
|
||||||
logger.info(f"Downloaded Audio Size is: {file_size_MB:.2f} MB")
|
logger.error(f"Failed in Whisper transcription: {e}")
|
||||||
if file_size > max_file_size:
|
st.warning(f"Failed in Openai Whisper transcription: {e}")
|
||||||
logger.error("File size exceeds 24MB limit.")
|
transcript = transcribe_audio(audio_file)
|
||||||
# FIXME: We can chunk hour long videos, the code is not tested.
|
print(f"\n\n\n--- Tracribe: {transcript} ----\n\n\n")
|
||||||
#long_video(audio_file)
|
return transcript, yt.title
|
||||||
sys.exit("File size limit exceeded.")
|
|
||||||
|
|
||||||
try:
|
|
||||||
logger.info("Initializing OpenAI client for transcription.")
|
|
||||||
client = OpenAI(api_key=os.getenv('OPENAI_API_KEY'))
|
|
||||||
|
|
||||||
logger.info("Transcribing using OpenAI's Whisper model.")
|
|
||||||
transcript = client.audio.transcriptions.create(
|
|
||||||
model="whisper-1",
|
|
||||||
file=open(audio_file, "rb", encoding="utf-8"),
|
|
||||||
response_format="text"
|
|
||||||
)
|
|
||||||
logger.info(f"\nYouTube video transcription:\n{yt.title}\n{transcript}\n")
|
|
||||||
return transcript, yt.title
|
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Failed in Whisper transcription: {e}")
|
st.error(f"An error occurred during YouTube video processing: {e}")
|
||||||
sys.exit("Transcription failure.")
|
|
||||||
|
finally:
|
||||||
except Exception as e:
|
try:
|
||||||
logger.error(f"An error occurred during YouTube video processing: {e}")
|
if os.path.exists(audio_file):
|
||||||
sys.exit("Video processing failure.")
|
os.remove(audio_file)
|
||||||
|
logger.info("Temporary audio file removed.")
|
||||||
finally:
|
except PermissionError:
|
||||||
try:
|
st.error(f"Permission error: Cannot remove '{audio_file}'. Please make sure of necessary permissions.")
|
||||||
if os.path.exists(audio_file):
|
except Exception as e:
|
||||||
os.remove(audio_file)
|
st.error(f"An error occurred removing audio file: {e}")
|
||||||
logger.info("Temporary audio file removed.")
|
|
||||||
except PermissionError:
|
|
||||||
logger.error(f"Permission error: Cannot remove '{audio_file}'. Please make sure of necessary permissions.")
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"An error occurred removing audio file: {e}")
|
|
||||||
|
|
||||||
|
|
||||||
def long_video(temp_file_name):
|
def long_video(temp_file_name):
|
||||||
|
|||||||
@@ -8,6 +8,7 @@ import os
|
|||||||
import requests
|
import requests
|
||||||
from PIL import Image
|
from PIL import Image
|
||||||
from io import BytesIO
|
from io import BytesIO
|
||||||
|
import streamlit as st
|
||||||
|
|
||||||
from .save_image import save_generated_image
|
from .save_image import save_generated_image
|
||||||
|
|
||||||
@@ -18,7 +19,7 @@ def generate_stable_diffusion_image(prompt):
|
|||||||
api_key = os.getenv("STABILITY_API_KEY")
|
api_key = os.getenv("STABILITY_API_KEY")
|
||||||
|
|
||||||
if api_key is None:
|
if api_key is None:
|
||||||
raise Exception("Missing Stability API key.")
|
st.warning("Missing Stability API key.")
|
||||||
|
|
||||||
response = requests.post(
|
response = requests.post(
|
||||||
f"{api_host}/v1/generation/{engine_id}/text-to-image",
|
f"{api_host}/v1/generation/{engine_id}/text-to-image",
|
||||||
|
|||||||
@@ -11,6 +11,7 @@
|
|||||||
import os
|
import os
|
||||||
import sys
|
import sys
|
||||||
import datetime
|
import datetime
|
||||||
|
import streamlit as st
|
||||||
|
|
||||||
import openai # OpenAI Python library to make API calls
|
import openai # OpenAI Python library to make API calls
|
||||||
from loguru import logger
|
from loguru import logger
|
||||||
@@ -56,6 +57,7 @@ def generate_image(user_prompt):
|
|||||||
image_stored_at = generate_stable_diffusion_image(img_prompt)
|
image_stored_at = generate_stable_diffusion_image(img_prompt)
|
||||||
except Exception as err:
|
except Exception as err:
|
||||||
logger.error(f"Failed to generate Image: {err}")
|
logger.error(f"Failed to generate Image: {err}")
|
||||||
|
st.warning(f"Failed to generate Image: {err}")
|
||||||
return image_stored_at
|
return image_stored_at
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -1,4 +1,5 @@
|
|||||||
import os
|
import os
|
||||||
|
import re
|
||||||
import streamlit as st
|
import streamlit as st
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
import configparser
|
import configparser
|
||||||
@@ -19,6 +20,7 @@ from lib.ai_writers.linkedin_ai_writer import linked_post_writer
|
|||||||
from lib.ai_writers.twitter_ai_writer import tweet_writer
|
from lib.ai_writers.twitter_ai_writer import tweet_writer
|
||||||
from lib.ai_writers.insta_ai_writer import insta_writer
|
from lib.ai_writers.insta_ai_writer import insta_writer
|
||||||
from lib.ai_writers.youtube_ai_writer import write_yt_title, write_yt_description, write_yt_script
|
from lib.ai_writers.youtube_ai_writer import write_yt_title, write_yt_description, write_yt_script
|
||||||
|
from lib.ai_writers.web_url_ai_writer import blog_from_url
|
||||||
from lib.gpt_providers.text_generation.ai_story_writer import ai_story_generator
|
from lib.gpt_providers.text_generation.ai_story_writer import ai_story_generator
|
||||||
from lib.gpt_providers.text_generation.ai_essay_writer import ai_essay_generator
|
from lib.gpt_providers.text_generation.ai_essay_writer import ai_essay_generator
|
||||||
from lib.gpt_providers.text_to_image_generation.main_generate_image_from_prompt import generate_image
|
from lib.gpt_providers.text_to_image_generation.main_generate_image_from_prompt import generate_image
|
||||||
@@ -35,14 +37,15 @@ def is_web_link(text):
|
|||||||
|
|
||||||
def process_input(input_text, uploaded_file):
|
def process_input(input_text, uploaded_file):
|
||||||
if is_youtube_link(input_text):
|
if is_youtube_link(input_text):
|
||||||
st.success("Detected YouTube link")
|
if input_text.startswith("https://www.youtube.com/") or input_text.startswith("http://www.youtube.com/"):
|
||||||
st.video(input_text)
|
return("youtube_url")
|
||||||
|
else:
|
||||||
|
st.error("Invalid YouTube URL. Please enter a valid URL.")
|
||||||
|
|
||||||
elif is_web_link(input_text):
|
elif is_web_link(input_text):
|
||||||
st.success("Detected Web link")
|
return("web_url")
|
||||||
st.write(f"[Visit link]({input_text})")
|
|
||||||
else:
|
else:
|
||||||
st.success("Detected Keywords")
|
return("keywords")
|
||||||
st.write(input_text)
|
|
||||||
|
|
||||||
if uploaded_file is not None:
|
if uploaded_file is not None:
|
||||||
file_details = {"filename": uploaded_file.name, "filetype": uploaded_file.type, "filesize": uploaded_file.size}
|
file_details = {"filename": uploaded_file.name, "filetype": uploaded_file.type, "filesize": uploaded_file.size}
|
||||||
@@ -66,7 +69,7 @@ def blog_from_keyword():
|
|||||||
st.title("Blog Content Writer")
|
st.title("Blog Content Writer")
|
||||||
col1, col2 = st.columns([2, 1.5])
|
col1, col2 = st.columns([2, 1.5])
|
||||||
with col1:
|
with col1:
|
||||||
blog_keywords = st.text_area('**Enter Keywords/Title/YouTube Link/Web URLs**',
|
user_input = st.text_area('**Enter Keywords/Title/YouTube Link/Web URLs**',
|
||||||
help='Provide keywords, titles, YouTube links, or web URLs to generate content.',
|
help='Provide keywords, titles, YouTube links, or web URLs to generate content.',
|
||||||
placeholder="""Write Blog From:
|
placeholder="""Write Blog From:
|
||||||
- Keywords/Blog Title: Provide keywords to web research & write blog.
|
- Keywords/Blog Title: Provide keywords to web research & write blog.
|
||||||
@@ -79,33 +82,40 @@ def blog_from_keyword():
|
|||||||
type=["txt", "pdf", "docx", "jpg", "jpeg", "png", "mp3", "wav", "mp4", "mkv", "avi"],
|
type=["txt", "pdf", "docx", "jpg", "jpeg", "png", "mp3", "wav", "mp4", "mkv", "avi"],
|
||||||
help='Attach files such as audio, video, images, or documents.')
|
help='Attach files such as audio, video, images, or documents.')
|
||||||
|
|
||||||
if blog_keywords and len(blog_keywords.split()) < 2:
|
input_type = process_input(user_input, uploaded_file)
|
||||||
st.error('🚫 Blog keywords should be at least two words long. Please try again.')
|
|
||||||
|
|
||||||
content_type = st.radio("Select content type:", ["Normal-length content", "Long-form content", "Experimental - AI Agents team"])
|
content_type = st.radio("Select content type:", ["Normal-length content", "Long-form content", "Experimental - AI Agents team"])
|
||||||
if st.button("Write Blog"):
|
if st.button("Write Blog"):
|
||||||
# Clear the previous results from the screen
|
# Clear the previous results from the screen
|
||||||
st.empty()
|
st.empty()
|
||||||
if blog_keywords and len(blog_keywords.split()) >= 2:
|
# Check if the user input is keywords or blog title.
|
||||||
if content_type == "Normal-length content":
|
if 'keywords' in input_type:
|
||||||
try:
|
if user_input and len(user_input.split()) >= 2:
|
||||||
short_blog = write_blog_from_keywords(blog_keywords)
|
if content_type == "Normal-length content":
|
||||||
st.markdown(short_blog)
|
try:
|
||||||
except Exception as err:
|
short_blog = write_blog_from_keywords(user_input)
|
||||||
st.error(f"🚫 Failed to write blog on {blog_keywords}, Error: {err}")
|
st.markdown(short_blog)
|
||||||
elif content_type == "Long-form content":
|
except Exception as err:
|
||||||
try:
|
st.error(f"🚫 Failed to write blog on {user_keywords}, Error: {err}")
|
||||||
st.empty()
|
elif content_type == "Long-form content":
|
||||||
long_form_generator(blog_keywords)
|
try:
|
||||||
st.success(f"Successfully wrote long-form blog on: {blog_keywords}")
|
st.empty()
|
||||||
except Exception as err:
|
long_form_generator(user_input)
|
||||||
st.error(f"🚫 Failed to write blog on {blog_keywords}, Error: {err}")
|
st.success(f"Successfully wrote long-form blog on: {user_input}")
|
||||||
elif content_type == "Experimental - AI Agents team":
|
except Exception as err:
|
||||||
try:
|
st.error(f"🚫 Failed to write blog on {user_input}, Error: {err}")
|
||||||
ai_agents_writers(blog_keywords)
|
elif content_type == "Experimental - AI Agents team":
|
||||||
st.success(f"Successfully wrote content with AI agents on: {blog_keywords}")
|
try:
|
||||||
except Exception as err:
|
ai_agents_writers(user_input)
|
||||||
st.error(f"🚫 Failed to Write content with AI agents: {err}")
|
st.success(f"Successfully wrote content with AI agents on: {user_input}")
|
||||||
|
except Exception as err:
|
||||||
|
st.error(f"🚫 Failed to Write content with AI agents: {err}")
|
||||||
|
else:
|
||||||
|
st.error('🚫 Blog keywords should be at least two words long. Please try again.')
|
||||||
|
|
||||||
|
elif 'youtube_url' in input_type:
|
||||||
|
generate_audio_blog(user_input)
|
||||||
|
elif 'web_url' in input_type:
|
||||||
|
blog_from_url(user_input)
|
||||||
|
|
||||||
|
|
||||||
def ai_agents_team():
|
def ai_agents_team():
|
||||||
|
|||||||
@@ -1,13 +1,13 @@
|
|||||||
[main]
|
[main]
|
||||||
role = Expert Writing Critic & content Editor.
|
role = Expert Writing Critic & content Editor at Alwrity.com
|
||||||
|
|
||||||
goal = Review the draft content and identfy potential issues.
|
goal = Review & eidt draft content provided by Content writer.
|
||||||
|
|
||||||
# Personalize the agent, give persona, personality, tone etc.
|
# Personalize the agent, give persona, personality, tone etc.
|
||||||
[backstory]
|
[backstory]
|
||||||
text = You are expert reviewer with 10 years of exprience in reviewing digital content. The make sure that article are interesting and correct information provided. Simplicity will resonate with your readers. Pay attention to grammar and punctuation. Avoid AI sounding words and pass AI detection tools. Engage with active voice. It’s as if you’re in conversation with the reader. Example: Use "You will see benefits" instead of "One will see benefits." Use headings, bullets, and formatting to break the monotony of the text. These elements add rhythm and can make a document more inviting. A concise conclusion that resonates with the beginning can bring your piece full circle, satisfying your readers.
|
text = You are expert reviewer with 10 years of exprience in reviewing digital content. The make sure that article are interesting and correct information provided. Simplicity will resonate with your readers. Pay attention to grammar and punctuation. Avoid AI sounding words and pass AI detection tools. Engage with active voice. It’s as if you’re in conversation with the reader. Example: Use "You will see benefits" instead of "One will see benefits." Use headings, bullets, and formatting to break the monotony of the text. These elements add rhythm and can make a document more inviting. A concise conclusion that resonates with the beginning can bring your piece full circle, satisfying your readers.
|
||||||
|
|
||||||
[task]
|
[task]
|
||||||
task_description = Sharpen the focus of the draft content by identifying overly wordy sections and crafting concise alternatives. Words with many syllables are barriers to simplicity. Choose simpler words, avoid sounding like AI. Pay special attention to readiblity, formatting & styling of the content. Make sure the draft content SEO optimised for given keywords. Make sure the final content is 2000 words long.
|
task_description = Sharpen the focus of the draft content by identifying overly wordy sections and crafting concise alternatives. Words with many syllables are barriers to simplicity. Choose simpler words, avoid sounding like AI. Pay special attention to readiblity, formatting & styling of the content. Make sure the draft content is SEO optimised for given keywords. Use insights & context from other team members.
|
||||||
|
|
||||||
task_expected_output = Final content with your review comments edited in the content draft.
|
task_expected_output = Provide a final content version that can be published.
|
||||||
|
|||||||
@@ -1,13 +1,13 @@
|
|||||||
[main]
|
[main]
|
||||||
role = Content writer at Alwrity.com
|
role = Content writer at Alwrity.com
|
||||||
|
|
||||||
goal = Write original, engaging & SEO optimized content. Rank high on Google for given keywords. Ensure organic growth of Alwrity.com
|
goal = Write an original, highly detailed, engaging & SEO optimized blog post on given outline.
|
||||||
|
|
||||||
|
|
||||||
[backstory]
|
[backstory]
|
||||||
text = You are a renowned Content Writer, known for your insightful and engaging articles. You transform complex concepts into compelling narratives. Limit them to 20 words or so, using language familiar to the majority. Example: Instead of "Utilize this methodology," say "Use this method." Employ a clear and concise writing style. Engage your audience with a compelling, fun, and informative tone, that effectively conveys the technical aspects of the topic in simple terms.
|
text = You are a renowned Content Writer, known for your insightful and engaging articles. You transform complex concepts into compelling narratives. Limit them to 20 words or so, using language familiar to the majority. Example: Instead of "Utilize this methodology," say "Use this method." Employ a clear and concise writing style. Engage your audience with a compelling, fun, and informative tone, that effectively conveys the technical aspects of the topic in simple terms.
|
||||||
|
|
||||||
[task]
|
[task]
|
||||||
task_description = Using the insights provided, write detailed blog content.
|
task_description = Use the outline provided by Senior Content Outliner and write detailed blog content.
|
||||||
|
|
||||||
task_expected_output = An original highly detailed blog content convering most sections of the provided outline.
|
task_expected_output = An original & highly detailed blog content convering most sections of the provided outline.
|
||||||
|
|||||||
@@ -29,3 +29,4 @@ lxml_html_clean
|
|||||||
streamlit
|
streamlit
|
||||||
yfinance
|
yfinance
|
||||||
pandas_ta
|
pandas_ta
|
||||||
|
firecrawl-py
|
||||||
|
|||||||
Reference in New Issue
Block a user