diff --git a/README.md b/README.md index 82dc9937..d61a78d8 100644 --- a/README.md +++ b/README.md @@ -20,6 +20,12 @@ If you have ๐Ÿ’ป Laptop + ๐Ÿ›œ Internet + 10 minutes, you will be generating blo ### [Getting started for Developers](https://github.com/AJaySi/AI-Writer/wiki/Alwrity--%E2%80%90-Get-started) +### Updating to latest Code: (Existing users) +` +1). Git pull +2). streamlit run alwrity.py +` + **Still stuck, [Open issue here](https://github.com/AJaySi/AI-Writer/issues) & Someone will bail you out. --- @@ -146,7 +152,7 @@ This gives context for generating content. Tavily AI, Google search, serp and Vi --- -Notes from underground: +#### Notes from underground: 1). Focus is on writing/generating highly unique, SEO optimized blog content. 2). Models: Openai, gemini, ollama are interesting. Minstral API is also worth exploring. Cohere API is purpose made. @@ -156,3 +162,10 @@ Pydantically speakng, Due to experimental nature of prompting, its getting expen 4).Getting AI agents to 'brainstrom' blog ideas seems more pressing. CrewAI seems more straightforward than autogen. 5). Too Many APIs floating around: The implementation is using tools that dont depend on API keys and rather scrape them. Duh, scraping wont scale, that is GPT vision based scraping will come in handy. +6). firecrawl is interesting, gpt-researcher is providing local docsqa. +7). Had to provide streamlit UI as Alwrity's audience arent comfortable with commandline. +8). Local folder RAG and Chat with your content, website is on the cards. +9). AI models are better, not sure until when 'Free' APIs will be "Free". +10). The code is always a mess, lot of changes happening.. +11). Focus is to stop making any more AI content tools, but rather revisit & improve user experience & content quality. +12). To Err is Human & AI.... diff --git a/alwrity.py b/alwrity.py index b6263424..58007031 100644 --- a/alwrity.py +++ b/alwrity.py @@ -1,259 +1,518 @@ -print("Welcome, Alwrity at your service..") import os -from pathlib import Path -import configparser +import json from datetime import datetime - -import typer -from prompt_toolkit import prompt -from prompt_toolkit.shortcuts import radiolist_dialog - from dotenv import load_dotenv -import requests -from rich import print -from rich.text import Text -load_dotenv(Path('.env')) -print("Loading, required libraries..") -app = typer.Typer() +import streamlit as st -from lib.utils.alwrity_utils import blog_from_audio, blog_from_keyword, do_web_research, do_web_research, ai_news_writer -from lib.utils.alwrity_utils import write_story, essay_writer, blog_tools, ai_finance_ta_writer, ai_content_team -from lib.utils.alwrity_utils import content_planning_agents, competitor_analysis, image_to_text_writer, image_generator +# Load .env file +load_dotenv() +from lib.chatbot_custom.chatbot_local_docqa import alwrity_chat_docqa +from lib.utils.alwrity_streamlit_utils import ( + blog_from_keyword, ai_agents_team, + blog_from_audio, write_story, + essay_writer, ai_news_writer, + ai_finance_ta_writer, ai_social_writer, + do_web_research, competitor_analysis, + ) -def prompt_for_time_range(): - os.system("clear" if os.name == "posix" else "cls") - print("\n๐Ÿ™‹ If you're researching keywords that are recent, use accordingly. Default is Anytime.\n") - choices = [("anytime", "Anytime"), ("past year", "Past Year"), ("past month", "Past Month"), - ("past week", "Past Week"), ("past day", "Past Day")] - selected_time_range = radiolist_dialog(title="Select Search result time range:", values=choices).run() - return selected_time_range[0] if selected_time_range else None - - -def write_blog_options(): - choices = [ - ("Keywords", "Write from few keywords"), - ("Audio To Blog", "Write from audio files"), - ("AI Story Writer", "Story Writer"), - ("AI Essay Writer", "Essay writer"), - ("AI News Articles", "Write News reports"), - ("AI Finance Writer", "Write Financial TA report"), - ("Social", "AI Social writer(instagram, tweets, linkedin, facebook post)"), - ("Copywriter", "AI Copywriter"), - ("Quit", "Quit") - ] - selected_blog_type = radiolist_dialog(title="Choose Content creation Type:", values=choices).run() - return selected_blog_type if selected_blog_type else None - - -@app.command() -def start_interactive_mode(): - os.system("clear" if os.name == "posix" else "cls") - text = "_______________________________________________________________________\n" - text += "\nโš ๏ธ Alert! ๐Ÿ’ฅโ“๐Ÿ’ฅ\n" - text += "Interactive tool, needs your attention/inputs, get off your mobile..'\n" - text += "_______________________________________________________________________\n" - print(text) - - choices = [ - ("AI Writers", "Write with AI"), - ("Content Planning", "Plan Content with AI"), - ("Content Teams", "AI Content Agent Teams"), - ("Quit", "Quit") - ] - mode = radiolist_dialog(title="Choose an option:", values=choices).run() - if mode: - if mode == 'AI Writers': - write_blog() - elif mode == 'Content Planning': - content_planning_tools() - elif mode == 'Content Teams': - print("AI Content teams") - ai_content_team() - elif mode == 'Social Media': - print(""" #whatsapp #instagram #youtube #twitter/X #Linked-in posts """) - raise typer.Exit() - elif mode == 'Quit': - typer.echo("Exiting, Getting Lost!") - raise typer.Exit() - - -def content_planning_tools(): - """ """ - os.system("clear" if os.name == "posix" else "cls") - text = "_______________________________________________________________________\n" - text += "\nโš ๏ธ Alert! ๐Ÿ’ฅโ“๐Ÿ’ฅ\n" - text += "ไฝœๅฎถ็š„้šœ็ข - Writer's block - Bloqueo de escritor - Schreibblockade\n" - text += "Use Google Keyword planner, google ads instead. Better tools than below.\n" - text += "Note: Who Cares, just give some titles, keywords to get started.. To Err is Human & AI..\n" - text += "_______________________________________________________________________\n" - print(text) - - choices = [ - ("Do keyword Research", "Keywords web research - ๐Ÿค“ Will read & earn my bread.."), - ("Competitor Analysis", "Competitor Analysis - ๐Ÿง What's my neighbour doing.."), - ("Content Calender", "๐Ÿฅน๐Ÿฅน Just give me content calender ๐Ÿฅน๐Ÿฅน") - ] - mode = radiolist_dialog(title="Choose an option:", values=choices).run() - - if mode == 'Do keyword Research': - if check_search_apis(): - do_web_research() - elif mode == 'Competitor Analysis': - competitor_analysis() - elif mode == 'Content Calender': - content_planning_agents() - - -def check_search_apis(): +# Custom CSS for styling +st.markdown( """ - Check if necessary environment variables are present. - Display messages with links on how to get them if not present. - """ - - # Use rich.print for styling and hyperlinking - print("Alwrity uses Basic, Semantic, Neural web search using above APIs for contextual blog generation.\n") - - api_keys = { - "METAPHOR_API_KEY": "Metaphor AI Key (Get it here: [link=https://dashboard.exa.ai/login]Metaphor API[/link])", - "TAVILY_API_KEY": "Tavily AI Key (Get it here: [link=https://tavily.com/#api]Tavily API[/link])", - "SERPER_API_KEY": "Serper API Key (Get it here: [link=https://serper.dev/signup]SerperDev API[/link])", + + """, + unsafe_allow_html=True +) + + +# Function to check if API keys are present and prompt user to input if not +def check_api_keys(): + api_keys = { + "METAPHOR_API_KEY": "Metaphor AI Key (Get it here: https://dashboard.exa.ai/login)", + "TAVILY_API_KEY": "Tavily AI Key (Get it here: https://tavily.com/#api)", + "SERPER_API_KEY": "Serper API Key (Get it here: https://serper.dev/signup)" + } missing_keys = [] - with typer.progressbar(api_keys.items(), label="Checking API keys", length=len(api_keys)) as progress: - for key, description in progress: - if os.getenv(key) is None: - # Use rich.print for styling and hyperlinking - print(f"[bold red]โœ– ๐Ÿšซ {key} is missing:[/bold red] [blue underline]Get {key} API Key[/blue underline]") - missing_keys.append((key, description)) + for key, description in api_keys.items(): + if os.getenv(key) is None: + missing_keys.append((key, description)) if missing_keys: - print("\nMost are Free APIs and really worth your while signing up for them.") - print("๐Ÿ’ฉ๐Ÿ’ฉ๐Ÿ’ฉ: GO GET THEM, on above urls. [bold red]") - #print("Note: They offer free/limited api calls, so we use most of them to have a lot of free api calls.") + st.warning(f"API keys are missing. Please provide them below:{missing_keys}") for key, description in missing_keys: - get_api_key(key, description) - else: - return True - - -def get_api_key(api_key: str, api_description: str): - """ - Ask the user to input the missing API key and add it to the .env file. - - Args: - api_key (str): The name of the API key variable. - api_description (str): The description of the API key. - """ - print("\n\n") - print(f"[bold green] ๐Ÿ™‹ Attention Here: ๐Ÿ™‹ -- {api_description}") - user_input = typer.prompt(f"๐Ÿ’ฉ -**Please Enter(copy/paste) {api_key} API Key** - Here๐Ÿ™‹:") - with open(".env", "a") as env_file: - env_file.write(f"{api_key}={user_input}\n") - print(f"โœ… API Key added to .env file.") - - -def write_blog(): - blog_type = write_blog_options() - if blog_type: - if blog_type == 'Keywords': - blog_from_keyword() - elif blog_type == 'AI Story Writer': - write_story() - elif blog_type == 'AI Essay Writer': - essay_writer() - elif blog_type == 'Audio To Blog': - blog_from_audio() - elif blog_type == 'AI News Articles': - ai_news_writer() - elif blog_type == 'AI Finance Writer': - ai_finance_ta_writer() - elif blog_type == 'Quit': - typer.echo("Exiting, Getting Lost..") - raise typer.Exit() + api_key = st.text_input(f"Enter {key}:", placeholder=description, help=description) + if api_key: + with open(".env", "a") as env_file: + env_file.write(f"{key}={api_key}\n") + os.environ[key] = api_key + st.success(f"{key} added successfully! Enter to Continue..") + return False + return True +# Function to check LLM provider and API key def check_llm_environs(): - """ Function to check which LLM api is given. """ - # Load .env file - load_dotenv(Path('.env')) gpt_provider = os.getenv("GPT_PROVIDER") - - # Disable unsupported GPT providers supported_providers = ['google', 'openai', 'mistralai'] + if gpt_provider is None or gpt_provider.lower() not in map(str.lower, supported_providers): - # Prompt user to select a provider - gpt_provider = radiolist_dialog( - title="Select your GPT Provider(llm) from 'google', 'openai', 'mistralai'", - values=[("google", "Google Gemini Pro"), ("openai", "OpenAI- ChatGPT"), ("mistralai", "MistralAI/WIP")]).run() - # Update .env file + gpt_provider = st.selectbox( + "Select your LLM Provider", + options=["google", "openai", "mistralai"], + help="Select from 'google', 'openai', 'mistralai'" + ) os.environ["GPT_PROVIDER"] = gpt_provider with open(".env", "a") as env_file: env_file.write(f"GPT_PROVIDER={gpt_provider}\n") - print(f"โœ… API Key added to .env file.") + st.success(f"GPT Provider set to {gpt_provider}") + api_key_var = "" if gpt_provider.lower() == "google": api_key_var = "GEMINI_API_KEY" - missing_api_msg = f"To get your {api_key_var}, please visit: https://aistudio.google.com/app/apikey" - elif gpt_provider.lower() == "openai": + missing_api_msg = "To get your Gemini API key, please visit: https://aistudio.google.com/app/apikey" + elif gpt_provider.lower() == "openai": api_key_var = "OPENAI_API_KEY" missing_api_msg = "To get your OpenAI API key, please visit: https://openai.com/blog/openai-api" elif gpt_provider.lower() == "mistralai": api_key_var = "MISTRAL_API_KEY" missing_api_msg = "To get your MistralAI API key, please visit: https://mistralai.com/api" - if api_key_var not in os.environ: - get_api_key(api_key_var, missing_api_msg) + if os.getenv(api_key_var) is None: + api_key = st.text_input(f"Enter {api_key_var}:", placeholder=missing_api_msg, help=missing_api_msg) + if api_key: + with open(".env", "a") as env_file: + env_file.write(f"{api_key_var}={api_key}\n") + os.environ[api_key_var] = api_key + st.success(f"{api_key_var} added successfully! Enter to continue..") + return False + return True -def check_internet(): - try: - response = requests.get("http://www.google.com", timeout=5) - if not response.status_code == 200: - print("๐Ÿ’ฅ๐Ÿคฏ WTFish, Internet is NOT available. Enjoy the wilderness..") - exit(1) +# Function to save configuration to a file +def save_config(config): + with open(os.getenv("ALWRITY_CONFIG"), "w") as config_file: + json.dump(config, config_file, indent=4) + + +# Sidebar configuration +def sidebar_configuration(): + st.sidebar.title("๐Ÿ› ๏ธ Personalization ๐Ÿ—๏ธ") + + with st.sidebar.expander("**๐Ÿ‘ท Content Personalization**"): + blog_length = st.text_input("**Content Length**", value="2000", + help="Length of blogs Or word count. Note: It won't be exact and depends on GPT providers and Max token count.") + blog_tone = st.selectbox("**Content Tone**", + options=["Casual", "Professional", "How-to", "Beginner", "Research", "Programming", "Social Media"], + help="Choose the tone of the blog.") + blog_demographic = st.selectbox("Target Audience", + options=["Working professional", "Content creators & Digital marketing", "Gen-Z", "Tech-savvy", "Students", "Kids"], + help="Choose the target audience.") + blog_type = st.selectbox("Content Type", + options=["Informational", "Commercial", "Company", "News", "Finance", "Competitor", "Programming", "Scholar"], + help="Choose the type of the blog.") + blog_language = st.selectbox("Content Language", + options=["English", "Spanish", "German", "Chinese", "Arabic", "Nepali", "Hindi", "Hindustani"], + help="Choose the language of the blog.") + blog_output_format = st.selectbox("Content Output Format", + options=["markdown", "HTML", "plaintext"], + help="Choose the output format of the blog.") + + with st.sidebar.expander("**๐Ÿฉป Images Personalization**"): + image_generation_model = st.selectbox("Image Generation Model", + options=["stable-diffusion", "dalle2", "dalle3"], + help="Choose the image generation model.") + number_of_blog_images = st.number_input("Number of Blog Images", value=1, help="Number of blog images to include.") + + with st.sidebar.expander("**๐Ÿค– LLM Personalization**"): + gpt_provider = st.selectbox("GPT Provider", + options=["google", "openai", "minstral"], + help="Choose the GPT provider.") + model = st.text_input("Model", value="gemini-1.5-flash-latest", help="Mention which model of the above provider to use.") + temperature = st.slider( + "Temperature", + min_value=0.1, + max_value=1.0, + value=0.7, + step=0.1, + format="%.1f", + help="""Temperature controls the 'creativity' or randomness of the text generated by GPT. + Greater determinism with higher values indicating more randomness.""" + ) + + top_p = st.slider( + "Top-p", + min_value=0.0, + max_value=1.0, + value=0.9, + step=0.1, + format="%.1f", + help="Top-p sampling controls the level of diversity in the generated text." + ) + + # Selectbox for max tokens + max_tokens_options = [500, 1000, 2000, 4000, 16000, 32000, 64000] + max_tokens = st.selectbox( + "Max Tokens", + options=max_tokens_options, + index=max_tokens_options.index(4000), + help="Max tokens determine the maximum length of the output sequence generated by a model." + ) + n = st.number_input("N", + value=1, + min_value=1, + max_value=10, + help="Defines the number of words or characters grouped together in a sequence when analyzing text.") + frequency_penalty = st.slider( + "Frequency Penalty", + min_value=0.0, + max_value=2.0, + value=1.0, + step=0.1, + format="%.1f", + help="Influences word selection during text generation, promoting diversity with higher values." + ) + + presence_penalty = st.slider( + "Presence Penalty", + min_value=0.0, + max_value=2.0, + value=1.0, + step=0.1, + format="%.1f", + help="Encourages the use of diverse words by discouraging repetition." + ) + + with st.sidebar.expander("**๐Ÿ•ต๏ธ Search Engine Personalization**"): + geographic_location = st.selectbox("Geographic Location", + options=["us", "in", "fr", "cn"], + help="Choose the geo location for the web search.") + search_language = st.selectbox("Search Language", + options=["en", "zn-cn", "de", "hi"], + help="Choose the language for search results.") + number_of_results = st.number_input("Number of Results", + value=10, + max_value=20, + min_value=1, + help="Number of Google search results to fetch.") + time_range = st.selectbox("Time Range", + options=["anytime", "past day", "past week", "past month", "past year"], + help="Choose the time range for search results.") + include_domains = st.text_input("Include Domains", value="", + help="A list of domains to specifically include in the search results. Default is None, which includes all domains.") + similar_url = st.text_input("Similar URL", value="", help="A single URL that instructs search engines to give results similar to the given URL.") + + # Storing collected inputs in a dictionary + config = { + "Blog Content Characteristics": { + "Blog Length": blog_length, + "Blog Tone": blog_tone, + "Blog Demographic": blog_demographic, + "Blog Type": blog_type, + "Blog Language": blog_language, + "Blog Output Format": blog_output_format + }, + "Blog Images Details": { + "Image Generation Model": image_generation_model, + "Number of Blog Images": number_of_blog_images + }, + "LLM Options": { + "GPT Provider": gpt_provider, + "Model": model, + "Temperature": temperature, + "Top-p": top_p, + "Max Tokens": max_tokens, + "N": n, + "Frequency Penalty": frequency_penalty, + "Presence Penalty": presence_penalty + }, + "Search Engine Parameters": { + "Geographic Location": geographic_location, + "Search Language": search_language, + "Number of Results": number_of_results, + "Time Range": time_range, + "Include Domains": include_domains, + "Similar URL": similar_url + } + } + + # Writing the configuration to a file whenever a change is made + save_config(config) + + + +# Function to read prompts from the file +def read_prompts(file_path="prompt_llm.txt"): + if os.path.exists(file_path): + with open(file_path, "r") as file: + prompts = file.readlines() + return [prompt.strip() for prompt in prompts] + return [] + +# Function to write prompts to the file +def write_prompts(prompts, file_path="prompt_llm.txt"): + with open(file_path, "w") as file: + for prompt in prompts: + file.write(f"{prompt}\n") + +def main(): + st.markdown("
Welcome to Alwrity!
", unsafe_allow_html=True) + # Export the paths and file names. Dont want alwrity to be chatty and prompt for inputs. + os.environ["SEARCH_SAVE_FILE"] = os.path.join(os.getcwd(), "lib", "workspace", "web_research_report", + f"web_research_report_{datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}") + os.environ["IMG_SAVE_DIR"] = os.path.join(os.getcwd(), "lib", "workspace", "generated_content") + os.environ["CONTENT_SAVE_DIR"] = os.path.join(os.getcwd(), "lib", "workspace", "generated_content") + os.environ["PROMPTS_DIR"] = os.path.join(os.getcwd(), "lib", "workspace", "prompts") + os.environ["ALWRITY_CONFIG"] = os.path.join(os.getcwd(), "lib", "workspace", "alwrity_config", "main_config.json") + + # Check API keys and LLM environment settings + api_keys_valid = check_api_keys() + llm_environs_valid = check_llm_environs() + + if api_keys_valid and llm_environs_valid: + # Clear previous messages and display the sidebar configuration + sidebar_configuration() + else: + st.error("Error loading Environment variables.") + st.stop() + + # Define the tabs + tab1, tab2, tab3, tab4, tab5 = st.tabs( + ["AI Writers", "Content Planning", "Agents Content Teams", "Alwrity Brain", "Ask Alwrity"]) + with tab1: + write_blog() + + with tab2: + content_planning_tools() + + with tab3: + ai_agents_team() + + with tab4: + alwrity_brain() + + with tab5: + st.info("Chatbot") + st.markdown("Create a collection by uploading files (PDF, MD, CSV, etc), or crawl a data source (Websites, more sources coming soon.") + st.markdown("One can ask/chat, summarize and do semantic search over the uploaded data") + #alwrity_chat_docqa() + + # Sidebar for prompt modification + st.sidebar.title("๐Ÿ“ Modify Prompts") + prompts = read_prompts() + + if prompts: + edited_prompts = [] + for i, prompt in enumerate(prompts): + edited_prompt = st.sidebar.text_area(f"Prompt {i+1}", prompt) + edited_prompts.append(edited_prompt) + + if st.sidebar.button("Save Prompts"): + write_prompts(edited_prompts) + st.sidebar.success("Prompts saved successfully!") + else: + st.sidebar.warning("No prompts found in the file.") + + +# Functions for the main options +def write_blog(): + options = [ + "Write from few keywords", + "Write from audio files", + "Story Writer", + "Essay writer", + "Write News reports", + "Write Financial TA report", + "AI Social writer (instagram, tweets, linkedin, facebook post)", + "AI Copywriter", + "Quit" + ] + choice = st.selectbox("**Select a content creation type:**", options, index=0, format_func=lambda x: f"๐Ÿ“ {x}") + + if choice == "Write from few keywords": + blog_from_keyword() + elif choice == "Write from audio files": + blog_from_audio() + elif choice == "Story Writer": + write_story() + elif choice == "Essay writer": + essay_writer() + elif choice == "Write News reports": + ai_news_writer() + elif choice == "Write Financial TA report": + ai_finance_ta_writer() + elif choice == "AI Social writer (instagram, tweets, linkedin, facebook post)": + ai_social_writer() + elif choice == "Quit": + st.write("Exiting, Getting Lost. But.... I have nowhere to go ๐Ÿฅน๐Ÿฅน") + + +def content_planning_tools(): + st.markdown("
Content Planning
", unsafe_allow_html=True) + st.markdown("""**Alwrity content Ideation & Planning** : Provide few keywords to do comprehensive web research. + Provide few keywords to get Google, Neural, pytrends analysis. Know keywords, blog titles to target. + Generate months long content calender around given keywords.""") + options = [ + "Keywords Researcher", + "Competitor Analysis" + ] + choice = st.selectbox("Select a content planning tool:", options, index=0, format_func=lambda x: f"๐Ÿ” {x}") + + if choice == "Keywords Researcher": + do_web_research() + elif choice == "Competitor Analysis": + competitor_analysis() + #elif choice == "Get Content Calender": + # planning_agents() + + +def alwrity_brain(): + st.title("๐Ÿง  Alwrity Brain, Better than yours!") + st.write("Choose a folder to write content on. Alwrity will do RAG on these documents. The documents can of any type, pdf, pptx, docs, txt, cs etc. Video files and Audio files are also permitted.") + + folder_path = st.text_input("**Enter folder path:**") + if st.button("**Process Folder**"): + if folder_path: + try: + process_folder_for_rag(folder_path) + st.success("Folder processed successfully!") + except Exception as e: + st.error(f"Error processing folder: {e}") else: - return - except requests.ConnectionError: - print("๐Ÿ’ฅ๐Ÿคฏ WTFish: Internet is NOT available. Enjoy the wilderness..") - exit(1) - except requests.Timeout: - print("Request timed out. Internet might be slow.") - exit(1) - except Exception as e: - print("Internet: An error occurred:", e) - exit(1) + st.warning("Please enter a valid folder path.") -def create_env_file(): - env_file = Path('.env') - if not env_file.is_file(): - try: - with open('.env', 'w') as f: - f.write('# Alwrity will add your environment variables here\n') - except Exception as e: - print(f"๐Ÿ’ฅ๐ŸคฏError occurred while creating .env file: {e}") - if __name__ == "__main__": - - print("Checking Internet..") - check_internet() - print("Creating .env file") - create_env_file() - print("Checking Search APIs..") - check_search_apis() - print("Checking LLM APIs..") - check_llm_environs() + main() - # Export the paths and file names. Dont want alwrity to be chatty and prompt for inputs. - os.environ["SEARCH_SAVE_FILE"] = os.path.join(os.getcwd(), "lib", "workspace", - f"web_research_report_{datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}") - os.environ["IMG_SAVE_DIR"] = os.path.join(os.getcwd(), "lib", "workspace") - os.environ["CONTENT_SAVE_DIR"] = os.path.join(os.getcwd(), "lib", "workspace") - os.environ["PROMPTS_DIR"] = os.path.join(os.getcwd(), "lib", "workspace", "prompts") - - load_dotenv(Path('.env')) - app() diff --git a/alwrity_streamlit.py b/alwrity_streamlit.py deleted file mode 100644 index 5b93c429..00000000 --- a/alwrity_streamlit.py +++ /dev/null @@ -1,389 +0,0 @@ -import os -from datetime import datetime -from dotenv import load_dotenv -import streamlit as st - -# Load .env file -load_dotenv() - -from lib.chatbot_custom.chatbot_local_docqa import alwrity_chat_docqa -from lib.utils.alwrity_streamlit_utils import ( - blog_from_keyword, ai_agents_team, - blog_from_audio, write_story, - essay_writer, ai_news_writer, - ai_finance_ta_writer, ai_social_writer, - do_web_research, competitor_analysis, - ) - -# Custom CSS for styling -st.markdown( - """ - - """, - unsafe_allow_html=True -) - -# Function to check if API keys are present and prompt user to input if not -def check_api_keys(): - api_keys = { - "METAPHOR_API_KEY": "Metaphor AI Key (Get it here: https://dashboard.exa.ai/login)", - "TAVILY_API_KEY": "Tavily AI Key (Get it here: https://tavily.com/#api)", - "SERPER_API_KEY": "Serper API Key (Get it here: https://serper.dev/signup)" - } - missing_keys = [] - - for key, description in api_keys.items(): - if os.getenv(key) is None: - missing_keys.append((key, description)) - - if missing_keys: - st.warning(f"API keys are missing. Please provide them below:{missing_keys}") - for key, description in missing_keys: - api_key = st.text_input(f"Enter {key}:", placeholder=description, help=description) - if api_key: - with open(".env", "a") as env_file: - env_file.write(f"{key}={api_key}\n") - os.environ[key] = api_key - st.success(f"{key} added successfully! Enter to Continue..") - return False - return True - - -# Function to check LLM provider and API key -def check_llm_environs(): - gpt_provider = os.getenv("GPT_PROVIDER") - supported_providers = ['google', 'openai', 'mistralai'] - - if gpt_provider is None or gpt_provider.lower() not in map(str.lower, supported_providers): - gpt_provider = st.selectbox( - "Select your LLM Provider", - options=["google", "openai", "mistralai"], - help="Select from 'google', 'openai', 'mistralai'" - ) - os.environ["GPT_PROVIDER"] = gpt_provider - with open(".env", "a") as env_file: - env_file.write(f"GPT_PROVIDER={gpt_provider}\n") - st.success(f"GPT Provider set to {gpt_provider}") - - api_key_var = "" - if gpt_provider.lower() == "google": - api_key_var = "GEMINI_API_KEY" - missing_api_msg = "To get your Gemini API key, please visit: https://aistudio.google.com/app/apikey" - elif gpt_provider.lower() == "openai": - api_key_var = "OPENAI_API_KEY" - missing_api_msg = "To get your OpenAI API key, please visit: https://openai.com/blog/openai-api" - elif gpt_provider.lower() == "mistralai": - api_key_var = "MISTRAL_API_KEY" - missing_api_msg = "To get your MistralAI API key, please visit: https://mistralai.com/api" - - if os.getenv(api_key_var) is None: - api_key = st.text_input(f"Enter {api_key_var}:", placeholder=missing_api_msg, help=missing_api_msg) - if api_key: - with open(".env", "a") as env_file: - env_file.write(f"{api_key_var}={api_key}\n") - os.environ[api_key_var] = api_key - st.success(f"{api_key_var} added successfully! Enter to continue..") - return False - return True - - -# Sidebar configuration -def sidebar_configuration(): - st.sidebar.title("๐Ÿ› ๏ธ Alwrity Configuration ๐Ÿ—๏ธ") - - with st.sidebar.expander("๐Ÿ‘ท Blog Content Characteristics"): - st.text_input("**Blog Length**", value="2000", - help="Length of blogs Or word count. Note: It won't be exact and depends on GPT providers and Max token count.") - st.text_input("**Blog Tone**", value="Casual", - help="Professional, how-to, beginner, research, programming, casual, etc.") - st.text_input("Blog Demographic", value="Content creators & Digital marketing", - help="Target Audience, Gen-Z, Tech-savvy, Working professional, students, kids, etc.") - st.text_input("Blog Type", value="Informational", - help="Informational, commercial, company, news, finance, competitor, programming, scholar, etc.") - st.text_input("Blog Language", value="English", - help="Spanish, German, Chinese, Arabic, Nepali, Hindi, Hindustani, etc.") - st.text_input("Blog Output Format", value="markdown", - help="Specify the output format of the blog as: HTML, markdown, plaintext. Defaults to markdown.") - - with st.sidebar.expander("๐Ÿฉป Blog Images Details"): - st.text_input("Image Generation Model", value="stable-diffusion", help="Options are dalle2, dalle3, stable-diffusion.") - st.number_input("Number of Blog Images", value=1, help="Number of blog images to include.") - - with st.sidebar.expander("๐Ÿค– LLM Options"): - st.text_input("GPT Provider", value="google", help="Choose one of the following: Openai, Google, Minstral.") - st.text_input("Model", value="gemini-1.5-flash-latest", help="Mention which model of the above provider to use.") - st.number_input("Temperature", value=0.7, - help="""Temperature controls the 'creativity' or randomness of the text generated by GPT. - Greater determinism with higher values indicating more randomness.""") - st.number_input("Top-p", value=0.9, help="Top-p sampling controls the level of diversity in the generated text.") - st.number_input("Max Tokens", value=4096, help="Max tokens determine the maximum length of the output sequence generated by a model.") - st.number_input("N", value=1, help="Defines the number of words or characters grouped together in a sequence when analyzing text.") - st.number_input("Frequency Penalty", value=1, - help="Influences word selection during text generation, promoting diversity with higher values.") - st.number_input("Presence Penalty", value=1, help="Encourages the use of diverse words by discouraging repetition.") - - with st.sidebar.expander("๐Ÿ•ต๏ธ Search Engine Parameters"): - st.text_input("Geographic Location", value="us", - help="Geo location restricts the web search to a given country. Examples are us for United States, in for India, fr for France, cn for China, etc.") - st.text_input("Search Language", value="en", - help="Define the language you want search results in. Example: en for English, zn-cn for Chinese, de for German, hi for Hindi, etc.") - st.number_input("Number of Results", value=10, help="Number of Google search results to fetch.") - st.text_input("Time Range", value="anytime", - help="Acceptable values: past day, past week, past month, past year. Limits the search results for a given time duration from today.") - st.text_input("Include Domains", value="", - help="A list of domains to specifically include in the search results. Default is None, which includes all domains.") - st.text_input("Similar URL", value="", help="A single URL that instructs search engines to give results similar to the given URL.") - -# Function to read prompts from the file -def read_prompts(file_path="prompt_llm.txt"): - if os.path.exists(file_path): - with open(file_path, "r") as file: - prompts = file.readlines() - return [prompt.strip() for prompt in prompts] - return [] - -# Function to write prompts to the file -def write_prompts(prompts, file_path="prompt_llm.txt"): - with open(file_path, "w") as file: - for prompt in prompts: - file.write(f"{prompt}\n") - -def main(): - st.markdown("
Welcome to Alwrity!
", unsafe_allow_html=True) - # Export the paths and file names. Dont want alwrity to be chatty and prompt for inputs. - os.environ["SEARCH_SAVE_FILE"] = os.path.join(os.getcwd(), "lib", "workspace", "web_research_report", - f"web_research_report_{datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}") - os.environ["IMG_SAVE_DIR"] = os.path.join(os.getcwd(), "lib", "workspace", "generated_content") - os.environ["CONTENT_SAVE_DIR"] = os.path.join(os.getcwd(), "lib", "workspace", "generated_content") - os.environ["PROMPTS_DIR"] = os.path.join(os.getcwd(), "lib", "workspace", "prompts") - - # Check API keys and LLM environment settings - api_keys_valid = check_api_keys() - llm_environs_valid = check_llm_environs() - - if api_keys_valid and llm_environs_valid: - # Clear previous messages and display the sidebar configuration - sidebar_configuration() - else: - st.error("Error loading Environment variables.") - st.stop() - - # Define the tabs - tab1, tab2, tab3, tab4, tab5 = st.tabs( - ["AI Writers", "Content Planning", "Agents Content Teams", "Alwrity Brain", "Ask Alwrity"]) - with tab1: - write_blog() - - with tab2: - content_planning_tools() - - with tab3: - ai_agents_team() - - with tab4: - alwrity_brain() - - with tab5: - st.info("Chatbot") - st.markdown("Create a collection by uploading files (PDF, MD, CSV, etc), or crawl a data source (Websites, more sources coming soon.") - st.markdown("One can ask/chat, summarize and do semantic search over the uploaded data") - #alwrity_chat_docqa() - - # Sidebar for prompt modification - st.sidebar.title("๐Ÿ“ Modify Prompts") - prompts = read_prompts() - - if prompts: - edited_prompts = [] - for i, prompt in enumerate(prompts): - edited_prompt = st.sidebar.text_area(f"Prompt {i+1}", prompt) - edited_prompts.append(edited_prompt) - - if st.sidebar.button("Save Prompts"): - write_prompts(edited_prompts) - st.sidebar.success("Prompts saved successfully!") - else: - st.sidebar.warning("No prompts found in the file.") - - -# Functions for the main options -def write_blog(): - options = [ - "Write from few keywords", - "Write from audio files", - "Story Writer", - "Essay writer", - "Write News reports", - "Write Financial TA report", - "AI Social writer (instagram, tweets, linkedin, facebook post)", - "AI Copywriter", - "Quit" - ] - choice = st.selectbox("**Select a content creation type:**", options, index=0, format_func=lambda x: f"๐Ÿ“ {x}") - - if choice == "Write from few keywords": - blog_from_keyword() - elif choice == "Write from audio files": - blog_from_audio() - elif choice == "Story Writer": - write_story() - elif choice == "Essay writer": - essay_writer() - elif choice == "Write News reports": - ai_news_writer() - elif choice == "Write Financial TA report": - ai_finance_ta_writer() - elif choice == "AI Social writer (instagram, tweets, linkedin, facebook post)": - ai_social_writer() - elif choice == "Quit": - st.write("Exiting, Getting Lost. But.... I have nowhere to go ๐Ÿฅน๐Ÿฅน") - - -def content_planning_tools(): - st.markdown("
Content Planning
", unsafe_allow_html=True) - st.markdown("""**Alwrity content Ideation & Planning** : Provide few keywords to do comprehensive web research. - Provide few keywords to get Google, Neural, pytrends analysis. Know keywords, blog titles to target. - Generate months long content calender around given keywords.""") - options = [ - "Keywords Researcher", - "Competitor Analysis" - ] - choice = st.selectbox("Select a content planning tool:", options, index=0, format_func=lambda x: f"๐Ÿ” {x}") - - if choice == "Keywords Researcher": - do_web_research() - elif choice == "Competitor Analysis": - competitor_analysis() - #elif choice == "Get Content Calender": - # planning_agents() - - -def alwrity_brain(): - st.title("๐Ÿง  Alwrity Brain, Better than yours!") - st.write("Choose a folder to write content on. Alwrity will do RAG on these documents. The documents can of any type, pdf, pptx, docs, txt, cs etc. Video files and Audio files are also permitted.") - - folder_path = st.text_input("**Enter folder path:**") - if st.button("**Process Folder**"): - if folder_path: - try: - process_folder_for_rag(folder_path) - st.success("Folder processed successfully!") - except Exception as e: - st.error(f"Error processing folder: {e}") - else: - st.warning("Please enter a valid folder path.") - - - -if __name__ == "__main__": - main() - diff --git a/lib/ai_web_researcher/common_utils.py b/lib/ai_web_researcher/common_utils.py index a1da54ec..a2bef0e7 100644 --- a/lib/ai_web_researcher/common_utils.py +++ b/lib/ai_web_researcher/common_utils.py @@ -2,7 +2,8 @@ import os import sys import re -import configparser +import json +from pathlib import Path import streamlit as st from datetime import datetime, timedelta from pathlib import Path @@ -16,33 +17,33 @@ logger.add(sys.stdout, def cfg_search_param(flag): """ - Read values from the main_config file and return them as variables and a dictionary. + Read values from the main_config.json file and return them as variables and a dictionary. Args: - file_path (str): The path to the main_config file. + flag (str): A flag to determine which configuration values to return. Returns: - dict: A dictionary containing the values read from the config file. - str: The geographic location value. - str: The search language value. - int: The number of search results to fetch. + various: The values read from the config file based on the flag. """ try: - file_path = Path(__file__).resolve().parents[2] / "main_config" + file_path = Path(os.environ.get("ALWRITY_CONFIG", "")) + if not file_path.is_file(): + raise FileNotFoundError(f"Configuration file not found: {file_path}") logger.info(f"Reading search config params from {file_path}") - config = configparser.ConfigParser() - config.read(file_path, encoding="utf-8") - web_research_section = config["web_research"] + + with open(file_path, 'r', encoding='utf-8') as file: + config = json.load(file) + web_research_section = config["Search Engine Parameters"] if 'serperdev' in flag: # Get values as variables - geo_location = web_research_section.get("geo_location") - search_language = web_research_section.get("search_language") - num_results = web_research_section.getint("num_results") + geo_location = web_research_section.get("Geographic Location") + search_language = web_research_section.get("Search Language") + num_results = web_research_section.get("Number of Results") return geo_location, search_language, num_results elif 'tavily' in flag: - include_urls = web_research_section.get("include_domains") + include_urls = web_research_section.get("Include Domains") pattern = re.compile(r"^(https?://[^\s,]+)(,\s*https?://[^\s,]+)*$") if pattern.match(include_urls): include_urls = [url.strip() for url in include_urls.split(',')] @@ -51,7 +52,7 @@ def cfg_search_param(flag): return include_urls elif 'exa' in flag: - include_urls = web_research_section.get("include_domains") + include_urls = web_research_section.get("Include Domains") pattern = re.compile(r"^(https?://\w+)(,\s*https?://\w+)*$") if pattern.match(include_urls) is not None: include_urls = include_urls.split(',') @@ -60,9 +61,9 @@ def cfg_search_param(flag): else: include_urls = None - num_results = web_research_section.getint("num_results") - similar_url = web_research_section.get("similar_url") - time_range = web_research_section.get("time_range") + num_results = web_research_section.get("Number of Results") + similar_url = web_research_section.get("Similar URL") + time_range = web_research_section.get("Time Range") if time_range == "past day": start_published_date = (datetime.now() - timedelta(days=1)).strftime('%Y-%m-%d') elif time_range == "past week": @@ -86,7 +87,6 @@ def cfg_search_param(flag): logger.error(f"Error: Invalid value in config file: {e}") return {}, None, None, None - def save_in_file(table_content): """ Helper function to save search analysis in a file. """ file_path = os.environ.get('SEARCH_SAVE_FILE') diff --git a/lib/ai_writers/blog_from_google_serp.py b/lib/ai_writers/blog_from_google_serp.py index f588b291..019a84a3 100644 --- a/lib/ai_writers/blog_from_google_serp.py +++ b/lib/ai_writers/blog_from_google_serp.py @@ -1,7 +1,7 @@ import os import sys import json -import configparser +from pathlib import Path from loguru import logger logger.remove() @@ -13,26 +13,27 @@ logger.add(sys.stdout, from ..gpt_providers.text_generation.main_text_generation import llm_text_gen -# FIXME: Provide num_blogs, num_faqs as inputs. def write_blog_google_serp(search_keyword, search_results): - """Combine the given online research and gpt blog content""" + """Combine the given online research and GPT blog content""" try: - config_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..', 'main_config')) - config = configparser.ConfigParser() - config.read(config_path, encoding='utf-8') + config_path = Path(os.environ["ALWRITY_CONFIG"]) + with open(config_path, 'r', encoding='utf-8') as file: + config = json.load(file) except Exception as err: - print(f"Error: Failed to read values from config: {err}") + logger.error(f"Error: Failed to read values from config: {err}") exit(1) + blog_characteristics = config['Blog Content Characteristics'] + prompt = f""" As expert Creative Content writer, - I want you to write {config.get('blog_characteristics', 'blog_type')} blog post, + I want you to write {blog_characteristics['Blog Type']} blog post, that explores {search_keyword} and also include 5 FAQs. Below are the guidelines to follow: - 1). You must respond in {config.get('blog_characteristics', 'blog_language')} language. - 2). Tone and Brand Alignment: Adjust your tone, voice, personality for {config.get('blog_characteristics', 'blog_tone')} audience. - 3). Make sure your response content length is of {config.get('blog_characteristics', 'blog_length')} words. + 1). You must respond in {blog_characteristics['Blog Language']} language. + 2). Tone and Brand Alignment: Adjust your tone, voice, personality for {blog_characteristics['Blog Tone']} audience. + 3). Make sure your response content length is of {blog_characteristics['Blog Length']} words. 4). Include FAQs from 'People also Ask' section of provided context 'google search result'. I want the post to offer unique insights, relatable examples, and a fresh perspective on the topic. @@ -40,7 +41,9 @@ def write_blog_google_serp(search_keyword, search_results): \n\n \"\"\"{search_results}\"\"\" """ + logger.info("Generating blog and FAQs from Google web search results.") + try: response = llm_text_gen(prompt) return response @@ -48,7 +51,6 @@ def write_blog_google_serp(search_keyword, search_results): logger.error(f"Exit: Failed to get response from LLM: {err}") exit(1) - def improve_blog_intro(blog_content, blog_intro): """Combine the given online research and gpt blog content""" prompt = f""" diff --git a/lib/ai_writers/keywords_to_blog_streamlit.py b/lib/ai_writers/keywords_to_blog_streamlit.py index facc8d93..5e009722 100644 --- a/lib/ai_writers/keywords_to_blog_streamlit.py +++ b/lib/ai_writers/keywords_to_blog_streamlit.py @@ -44,9 +44,11 @@ def write_blog_from_keywords(search_keywords, url=None): status.update(label=f"๐Ÿ›€ Starting Tavily AI research: {search_keywords}") tavily_search_result, t_titles, t_answer = do_tavily_ai_search(search_keywords) - status.update(label=f"๐Ÿ™† Finished Google Search & Tavily AI Search on: {search_keywords}", expanded=False) + status.update(label=f"๐Ÿ™† Finished Google Search & Tavily AI Search on: {search_keywords}", + state="complete", expanded=False) except Exception as err: + st.error(f"Failed in web research: {err}") logger.error(f"Failed in web research: {err}") with st.status("Started Writing blog from google search..", expanded=True) as status: @@ -56,12 +58,9 @@ def write_blog_from_keywords(search_keywords, url=None): status.update(label=f"๐Ÿ›€ Writing blog from Google Search on: {search_keywords}") blog_markdown_str = write_blog_google_serp(search_keywords, google_search_result) st.markdown(blog_markdown_str) - - # Hate the robotic introductions. - #blog_markdown_str = improve_blog_intro(blog_markdown_str, t_answer) - #st.markdown(blog_markdown_str) - status.update(label="๐Ÿ™Ž Draft 1: Your Content from Google search result.", expanded=False) + status.update(label="๐Ÿ™Ž Draft 1: Your Content from Google search result.", state="complete", expanded=False) except Exception as err: + st.error(f"Failed in Google web research: {err}") logger.error(f"Failed in Google web research: {err}") # logger.info/check the final blog content. diff --git a/lib/ai_writers/long_form_ai_writer.py b/lib/ai_writers/long_form_ai_writer.py index 56e36afd..581fe956 100644 --- a/lib/ai_writers/long_form_ai_writer.py +++ b/lib/ai_writers/long_form_ai_writer.py @@ -60,22 +60,26 @@ def long_form_generator(content_keywords): """ with st.status("Start Writing Long Form Article, Hold my Beer..", expanded=True) as status: # Read the main_config to define tone, character, personality of the content to be generated. - try: + try: + status.update(label=f"Starting to write content on {content_keywords}.") logger.info(f"Starting to write content on {content_keywords}.") # Define persona and writing guidelines - content_tone, target_audience, content_type, content_language, output_format = read_return_config_section('blog_characteristics') + content_tone, target_audience, content_type, content_language, output_format, content_length = read_return_config_section('blog_characteristics') except Exception as err: logger.error(f"Failed to Read config params from main_config: {err}") - return + st.error(f"Failed to Read config params from main_config: {err}") + return False try: filepath = os.path.join(os.environ["PROMPTS_DIR"], "long_form_ai_writer.prompts") + status.update(label=f"Reading Prompts from {filepath}.") # Check if file exists if not os.path.exists(filepath): raise FileNotFoundError(f"File {filepath} does not exist") with open(filepath, 'r') as file: prompts = yaml.safe_load(file) except Exception as err: + st.error(f"Exit: Failed to read prompts from {filepath}: {err}") logger.error(f"Exit: Failed to read prompts from {filepath}: {err}") exit(1) @@ -147,11 +151,12 @@ def long_form_generator(content_keywords): content_title=content_title, web_research_result=web_research_result)).text logger.info(f"The content Outline is: {content_outline}\n\n") - status.update(label="Generated the content outline.") + status.update(label=f"Completed with Content Outline.") except Exception as err: logger.error(f"Failed to generate content outline: {err}") try: + status.update(label=f"Do web research with Tavily to provide context for content creation.") logger.info("Do web research with Tavily to provide context for content creation.") # Do Metaphor/Exa AI search. table_data = [] @@ -163,6 +168,7 @@ def long_form_generator(content_keywords): web_research_result = table_data except Exception as err: logger.error(f"Failed to do Tavily AI search: {err}") + st.error(f"Failed to do Tavily AI search: {err}") return try: @@ -172,6 +178,7 @@ def long_form_generator(content_keywords): web_research_result=web_research_result, writing_guidelines=writing_guidelines)).text except Exception as err: + st.error(f"Failed to Generate Starting draft: {err}") logger.error(f"Failed to Generate Starting draft: {err}") return @@ -194,10 +201,11 @@ def long_form_generator(content_keywords): logger.error(f"Failed as: {err} and {continuation}") logger.info(f"Writing in progress... Current draft length: {len(draft)} characters") + status.update(label=f"Writing in progress... Current draft length: {len(draft)} characters") search_terms = f""" I will provide you with blog outline, your task is to read the outline & return 3 google search keywords. Your response will be used to do web research for writing on the given outline. - Do not explain your response, provide 3 google search sentences encompassing the given content outline. + Do not explain your response, provide 8 google search sentences encompassing the given content outline. Provide the search term results as comma separated values.\n\n Content Outline:\n '{content_outline}' @@ -227,26 +235,36 @@ def long_form_generator(content_keywords): # At this point, the context is little stale. We should more web research on # related queries as per the content outline, to augment the LLM context. except Exception as err: + st.error(f"Failed to continually write the Essay: {err}") logger.error(f"Failed to continually write the Essay: {err}") return # Remove 'IAMDONE' and print the final story final = draft.replace('IAMDONE', '').strip() - - blog_title, blog_meta_desc, blog_tags, blog_categories = blog_metadata(final, - content_keywords, m_titles) - - generated_image_filepath = None - # TBD: Save the blog content as a .md file. Markdown or HTML ? - save_blog_to_file(final, blog_title, blog_meta_desc, blog_tags, blog_categories, generated_image_filepath) - - blog_frontmatter = f""" - --- - title: {blog_title} - categories: [{blog_categories}] - tags: [{blog_tags}] - Meta description: {blog_meta_desc.replace(":", "-")} - ---""" - logger.info(f"\n{blog_frontmatter}{final}\n\n") - st.write(f"\n{blog_frontmatter}{final}\n\n") + status.update(label="Success: Finished writing Long form content.") + + # FIXME: The current implementation is suited for normal length content. + # In long content sending the whole content for each content metadata is expensive. +# blog_title, blog_meta_desc, blog_tags, blog_categories = blog_metadata(final, +# content_keywords, m_titles) +# status.update(label="Success: Finished with Title, Meta Description, Tags, categories") +# generated_image_filepath = None +# # TBD: Save the blog content as a .md file. Markdown or HTML ? +# save_blog_to_file(final, blog_title, blog_meta_desc, blog_tags, blog_categories, generated_image_filepath) +# +# blog_frontmatter = dedent(f""" +# \n--------------------------------------------------------------------- +# title: {blog_title.strip()}\n +# categories: [{blog_categories.strip()}]\n +# tags: [{blog_tags.strip()}]\n +# Meta description: {blog_meta_desc.replace(":", "-").strip()}\n +# ---------------------------------------------------------------------\n +# """) +# +# logger.info(f"\n{blog_frontmatter}{final}\n\n") +# st.markdown(f"\n{blog_frontmatter}{final}\n\n") + logger.info(f"\n{final}\n\n") + logger.info(f"\n\n ################ Finished writing Blog for : {content_keywords} #################### \n") + with st.expander("**Click to View the final content draft:**"): + st.markdown(f"\n{final}\n\n") diff --git a/lib/gpt_providers/text_generation/openai_text_gen.py b/lib/gpt_providers/text_generation/openai_text_gen.py index 7ee033e5..fe5b4f58 100644 --- a/lib/gpt_providers/text_generation/openai_text_gen.py +++ b/lib/gpt_providers/text_generation/openai_text_gen.py @@ -1,12 +1,10 @@ import os import time #IWish -import logging import openai -import configparser # Configure standard logging +import logging logging.basicConfig(level=logging.INFO, format='[%(asctime)s-%(levelname)s-%(module)s-%(lineno)d]- %(message)s') - logger = logging.getLogger(__name__) from tenacity import ( retry, diff --git a/lib/utils/read_main_config_params.py b/lib/utils/read_main_config_params.py index 241b6c8d..3d9eb89b 100644 --- a/lib/utils/read_main_config_params.py +++ b/lib/utils/read_main_config_params.py @@ -1,73 +1,67 @@ -# -# Common utils for lib -# import os -import sys -import configparser +import json from pathlib import Path -from loguru import logger -logger.remove() -logger.add(sys.stdout, - colorize=True, - format="{level}|{file}:{line}:{function}| {message}" - ) - def read_return_config_section(config_section): """ read_return_config_section - Read Language Model (LLM) parameters from the configuration file. + Read configuration parameters from the JSON configuration file. Args: - config_path (str): The path to the configuration file. + config_section (str): The section of the configuration file to read. Returns: - tuple: A tuple containing the LLM parameters (gpt_provider, model, temperature, max_tokens, top_p, n, frequency_penalty). + tuple: A tuple containing the specified configuration parameters. Raises: FileNotFoundError: If the configuration file is not found. - configparser.Error: If there is an error parsing the configuration file. + json.JSONDecodeError: If there is an error parsing the JSON configuration file. """ try: - config_path = Path(__file__).resolve().parents[2] / "main_config" - config = configparser.ConfigParser() - config.read(config_path, encoding="utf-8") + config_path = Path(os.environ["ALWRITY_CONFIG"]) - if 'llm_config' in config_section: - gpt_provider = config.get('llm_options', 'gpt_provider') - model = config.get('llm_options', 'model') - temperature = config.getfloat('llm_options', 'temperature') - max_tokens = config.getint('llm_options', 'max_tokens') - top_p = config.getfloat('llm_options', 'top_p') - n = config.getint('llm_options', 'n') - frequency_penalty = config.getfloat('llm_options', 'frequency_penalty') - - return gpt_provider, model, temperature, max_tokens, top_p, n, frequency_penalty - elif 'blog_characteristics' in config_section: - # Access and return the specified config values - blog_tone = config.get('blog_characteristics', 'blog_tone') - blog_demographic = config.get('blog_characteristics', 'blog_demographic') - blog_type = config.get('blog_characteristics', 'blog_type') - blog_language = config.get('blog_characteristics', 'blog_language') - blog_output_format = config.get('blog_characteristics', 'blog_output_format') + with open(config_path, 'r', encoding="utf-8") as file: + config = json.load(file) + + if config_section == 'llm_config': + gpt_provider = config['LLM Options']['GPT Provider'] + model = config['LLM Options']['Model'] + temperature = config['LLM Options']['Temperature'] + max_tokens = config['LLM Options']['Max Tokens'] + top_p = config['LLM Options']['Top-p'] + n = config['LLM Options']['N'] + frequency_penalty = config['LLM Options']['Frequency Penalty'] + presence_penalty = config['LLM Options']['Presence Penalty'] + + return gpt_provider, model, temperature, max_tokens, top_p, n, frequency_penalty - return blog_tone, blog_demographic, blog_type, blog_language, blog_output_format + elif config_section == 'blog_characteristics': + blog_tone = config['Blog Content Characteristics']['Blog Tone'] + blog_demographic = config['Blog Content Characteristics']['Blog Demographic'] + blog_type = config['Blog Content Characteristics']['Blog Type'] + blog_language = config['Blog Content Characteristics']['Blog Language'] + blog_output_format = config['Blog Content Characteristics']['Blog Output Format'] + blog_length = config['Blog Content Characteristics']['Blog Length'] - elif 'web_research' in config_section: - # Access the config file and return the specified values - geo_location = config.get('web_research', 'geo_location') - search_language = config.get('web_research', 'search_language') - num_results = config.getint('web_research', 'num_results') - time_range = config.get('web_research', 'time_range') - include_domains = config.get('web_research', 'include_domains') - similar_url = config.get('web_research', 'similar_url') + return blog_tone, blog_demographic, blog_type, blog_language, blog_output_format, blog_length + + elif config_section == 'web_research': + geo_location = config['Search Engine Parameters']['Geographic Location'] + search_language = config['Search Engine Parameters']['Search Language'] + num_results = config['Search Engine Parameters']['Number of Results'] + time_range = config['Search Engine Parameters']['Time Range'] + include_domains = config['Search Engine Parameters']['Include Domains'] + similar_url = config['Search Engine Parameters']['Similar URL'] return geo_location, search_language, num_results, time_range, include_domains, similar_url except FileNotFoundError: logger.error(f"Configuration file not found: {config_path}") raise - except configparser.Error as err: - logger.error(f"Error reading LLM parameters from config file: {err}") + except json.JSONDecodeError as err: + logger.error(f"Error reading parameters from config file: {err}") + raise + except KeyError as err: + logger.error(f"Missing key in the configuration file: {err}") raise except Exception as err: logger.error(f"An unexpected error occurred: {err}") diff --git a/lib/workspace/alwrity_config/main_config.json b/lib/workspace/alwrity_config/main_config.json new file mode 100644 index 00000000..0d6847c5 --- /dev/null +++ b/lib/workspace/alwrity_config/main_config.json @@ -0,0 +1,32 @@ +{ + "Blog Content Characteristics": { + "Blog Length": "2000", + "Blog Tone": "Beginner", + "Blog Demographic": "Tech-savvy", + "Blog Type": "Informational", + "Blog Language": "English", + "Blog Output Format": "markdown" + }, + "Blog Images Details": { + "Image Generation Model": "stable-diffusion", + "Number of Blog Images": 1 + }, + "LLM Options": { + "GPT Provider": "google", + "Model": "gemini-1.5-flash-latest", + "Temperature": 0.7, + "Top-p": 0.9, + "Max Tokens": 4000, + "N": 1, + "Frequency Penalty": 1.0, + "Presence Penalty": 1.0 + }, + "Search Engine Parameters": { + "Geographic Location": "us", + "Search Language": "en", + "Number of Results": 10, + "Time Range": "anytime", + "Include Domains": "", + "Similar URL": "" + } +} \ No newline at end of file diff --git a/main_config b/main_config deleted file mode 100644 index d0d86b30..00000000 --- a/main_config +++ /dev/null @@ -1,165 +0,0 @@ -################################################### -# -# Define Blog Content charateristics: -# This is the main config file which drives the code. -# This config will restrict code modifications and hence ease of usuability. -# -################################################### - -[blog_characteristics] - -# Length of blogs Or word count. Note: It wont be exact and depends on GPT providers and Max token count. -blog_length = 2000 - -# company/brand-name - -# professional, how-to, begginer, research, programming, casual, etc -blog_tone = "Casual" - -# Target Audience, Gen-Z, Tech-savvy, Working professional, students, kids etc -blog_demographic = "Content creators & Digital marketing" - -# informational, commercial, company, news, finance, competitor, programming, scholar etc -blog_type = "Informational" - -# Spanish, German, Chinese, Arabic, Nepali, Hindi, Hindustani etc -blog_language = "English" - -# Specify the output format of the blog as: HTML, markdown, plaintext. Defaults to markdown. -blog_output_format = "markdown" - - - -############################################################ -# -# Blog Images details. -# Note: The images are created from the blog content. Blog title is used, -# the title is modified for image generation prompt. -# -############################################################ - -[img_details] -# Options are dalle2, dalle3, stable-diffusion. -image_gen_model = "stable-diffusion" - -# Number of blog images to include. -num_images = 1 - - - -########################################################### -# -# Define LLM and its charateristics for fine control on output -# Note: -########################################################### - -[llm_options] - -# Choose one of following: Openai, Google, Minstral -gpt_provider = google - -# Mention which model of the above provider to use. -model = gemini-1.5-flash-latest - -# Temperature is a parameter that controls the โ€œcreativityโ€ or randomness of the text generated by GPT. -# greater determinism and higher values indicating more randomness. -# while a lower temperature (e.g., 0.2) makes the output more deterministic and focused (thus, getting flagged as AI content). -temperature = 0.7 - -# Top-p sampling is particularly useful in scenarios where you want to control the level of diversity in the generated text. -# By adjusting the threshold p, you can influence the diversity of the generated sequences. -# A lower top_p will lead to more diverse but potentially less coherent outputs, -# while a higher top_p will produce more conservative outputs with higher probability tokens. -top_p = 0.9 - -# "Max tokens" is a parameter that determines the maximum length of the output sequence generated by a model, -# usually measured in the number of tokens (words or subwords). -# It helps control the length of generated text and manage computational resources during text generation tasks. -max_tokens = 4096 - -# "n" represents the number of words or characters grouped together in a sequence when analyzing text. -# For example, if "n" is 2, we're looking at pairs of words (bigrams), -# if "n" is 3, we're looking at groups of three words (trigrams), and so on. -# It helps us understand patterns and relationships between words in a piece of text. -n = 1 - -# The frequency penalty parameter, ranging from -1 to 1, influences word selection during text generation. -# Higher values favor less common words, promoting diversity, while lower values favor common words, leading to more predictable text. -frequency_penalty = 1 - -# Presence Penalty encourages the use of diverse words by discouraging repetition. -# It encourages the model to avoid using the same words repeatedly and prompts it to generate varied text by suggesting, -# "Try using different words instead of repeating the same ones." -# from -2 (more flexible while generating text) to 2 (strong discouragement in repetition). -presence_penalty = 1 - - -###################################################### -# -# Search Engine Paramters. -# Alwrity does comprehensive web research for given content topic. -# Choose search engine parameters below, this finetunes search results -# and makes the generated content more accurate. -# -###################################################### - -# Visit https://serper.dev/playground and provide values from there. -# https://api.serper.dev/locations -[web_research] - -# Geographic location(gl): This values restricts the web search to given country. -# Examples are us for United States, in for India, fr for france, cn for china etc -geo_location = us - -# Locale:hl:language : Define the language you want to search results in. -# Example: en for english, zn-cn for chinese, de for german, hi for hindi etc -search_language: en - -# num_results: Default 10 - Number of google search results to fetch. -num_results = 10 - -# time_range: Acceptable values, past day, past week, past month, past year -# This limits the search results for given time duration, from today. -time_range = anytime - -# include_domains (Give Full URLs, separate by comma): A list of domains to specifically include in the search results. -# Default is None, which includes all domains. Example: https://wikipedia.com,https://stackoverflow.com,google schalor,reddit etc -include_domains = - -# similar_url : A single URL, this will instruct search engines to give results similar to the given URL. -similar_url = - - -########################################################### -# -# Creating Your Virtual content writing Team. -# -# One can choose from the following roles and make a virtual team. -# -# Chief Editor - Oversees the research process and manages the team. -# Researcher (gpt-researcher) - A specialized autonomous agent that conducts in depth research on a given topic. -# Editor - Responsible for planning the research outline and structure. -# Reviewer - Validates the correctness of the research results given a set of criteria. -# Revisor - Revises the research results based on the feedback from the reviewer. -# Writer - Responsible for compiling and writing the final report. -# Publisher - Responsible for publishing the final report in various formats. -# -########################################################### - -# Choose multi-agent framework, for now its default to Crewai. More options for langgraph, agentgpt, autogpt etc. - -# Step1: Choose the team members from this list: chief_editor, researcher, editor, reviewer, writer, publisher -# Only the mentioned team members/agents will be included in your dream team. -your_content_team_members = chief_editor, researcher, editor, reviewer, writer - -# Step2: Edit team members for your need in workspace/my_content_team folder. -# Personlization of AI team members for your needs/requirements. Imagine a real team and think along those lines. -# To get you started, refer these files in workspace folder, these are template Agents, one can modify for their company's content needs. -# Tip: Start small, define roles, goals and give backstory which makes Agents work for you. -# Run the example and keep a quality control. Change Persona of the agent, tone of voice, personality etc. -# Check if researcher is giving right results and check out of each Agent. Iterate and refine each agents, until they write to your -# requirements. This will take time but you will end up with a AI Agents content writing team for your own needs(Free, Free, Free). -# where_your_team_at = Provide the directory location where Team members are defined. -# Check with template files in workspace/my_content_team folder. -# where_your_team_at = -