diff --git a/README.md b/README.md index f7fecf1b..e9f50d46 100644 --- a/README.md +++ b/README.md @@ -27,7 +27,7 @@ To start using this tool, simply follow one of the options below: - Open PowerShell or Windows Terminal: Press `Windows Key + X`, then select "Windows Terminal". - Paste or type and press enter:⏎.⏎.
-`winget install --id Git.Git -e --source winget` +winget install --id Git.Git -e --source winget - Wait for download bars to finish *Note for Linux Users:* If you're on Linux and can't install these, get lost πŸ§™β™‚οΈ @@ -43,11 +43,11 @@ To clone the repository to your local machine, perform the following steps: 2. **Navigate to the Desired Directory:** Use the `cd` command to move to the directory where you want to clone the repository. 3. **Clone the Repository:** Run the following command in PowerShell to clone the repository: -`git clone https://github.com/AJaySi/AI-Blog-Writer.git` +git clone https://github.com/AJaySi/AI-Blog-Writer.git This command will download all the files from the repository to your local machine. 4. **Verify the Clone:** After the cloning process is complete, navigate into the newly created directory using: -`cd AI-Blog-Writer` +cd AI-Blog-Writer ``` Once you've cloned the repository, you can proceed with the next steps for installation and setup. diff --git a/alwrity.py b/alwrity.py index dbf3dc79..057622a4 100644 --- a/alwrity.py +++ b/alwrity.py @@ -1,5 +1,6 @@ import os from pathlib import Path +import configparser import typer from prompt_toolkit.shortcuts import checkboxlist_dialog, message_dialog, input_dialog @@ -268,7 +269,7 @@ def do_web_research(): while True: print("________________________________________________________________") search_keywords = input_dialog( - title='Enter Search Keywords below:', + title='Enter Search Keywords below: More Options in main_config.', text='πŸ‘‹ Enter keywords for web research (Or keywords from your blog):', ).run() if search_keywords and len(search_keywords.split()) >= 2: @@ -278,34 +279,15 @@ def do_web_research(): title='Warning', text='🚫 Search keywords should be at least three words long. Please try again.' ).run() - selected_time_range = prompt_for_time_range() - - # Display input dialog for similar search URL (optional) - similar_url = input_dialog( - title="Enter a similar search URL", - text="πŸ‘‹ Enter a similar search URL (Optional: Enter to skip):\nπŸ™‹Usecases: Competitor Analysis Tool. πŸ“‘Discover similar companies, startups and technologies.", - default="", - ).run() - - # Display input dialog for included URLs (optional) - include_urls = input_dialog( - title="Enter URLs to include in the web search:", - text="πŸ‘‹ Enter comma-separated URLs to include in web research (press Enter to skip):\nπŸ™‹ If you wish to [bold]confine search[/bold] to certain domains like wikipedia etc.", - default="", - ).run() - try: print(f"πŸš€πŸŽ¬πŸš€ [bold green]Starting web research on given keywords: {search_keywords}..") - #print(f"Web Research: Time Range - {time_range}, Search Keywords - {search_keywords}, Include URLs - {include_urls}") - web_research_result = gpt_web_researcher(search_keywords, - time_range=selected_time_range, - include_domains=include_urls, - similar_url=similar_url) + web_research_result = gpt_web_researcher(search_keywords) except Exception as err: print(f"\nπŸ’₯🀯 [bold red]ERROR 🀯 : Failed to do web research: {err}\n") + def check_llm_environs(): """ Function to check which LLM api is given. """ # Check if GPT_PROVIDER is defined in .env file diff --git a/lib/ai_web_researcher/common_utils.py b/lib/ai_web_researcher/common_utils.py new file mode 100644 index 00000000..bdaf3ad6 --- /dev/null +++ b/lib/ai_web_researcher/common_utils.py @@ -0,0 +1,101 @@ +# Common utils for web_researcher +import os +import sys +import re +import configparser +from datetime import datetime, timedelta +from pathlib import Path +from loguru import logger +logger.remove() +logger.add(sys.stdout, + colorize=True, + format="{level}|{file}:{line}:{function}| {message}" + ) + + +def cfg_search_param(flag): + """ + Read values from the main_config file and return them as variables and a dictionary. + + Args: + file_path (str): The path to the main_config file. + + Returns: + dict: A dictionary containing the values read from the config file. + str: The geographic location value. + str: The search language value. + int: The number of search results to fetch. + """ + try: + file_path = Path(__file__).resolve().parents[2] / "main_config" + logger.info(f"Reading search config params from {file_path}") + config = configparser.ConfigParser() + config.read(file_path) + web_research_section = config["web_research"] + + if 'serperdev' in flag: + # Get values as variables + geo_location = web_research_section.get("geo_location") + search_language = web_research_section.get("search_language") + num_results = web_research_section.getint("num_results") + return geo_location, search_language, num_results + + elif 'tavily' in flag: + include_urls = web_research_section.get("include_domains") + pattern = re.compile(r"^(https?://\w+)(,\s*https?://\w+)*$") + if pattern.match(include_urls) is not None: + include_urls = include_urls.split(',') + elif re.match(r"^http?://\w+$", include_urls) is not None: + include_urls = include_urls.split(" ") + else: + include_urls = None + return include_urls + + elif 'exa' in flag: + include_urls = web_research_section.get("include_domains") + pattern = re.compile(r"^(https?://\w+)(,\s*https?://\w+)*$") + if pattern.match(include_urls) is not None: + include_urls = include_urls.split(',') + elif re.match(r"^http?://\w+$", include_urls) is not None: + include_urls = include_urls.split(" ") + else: + include_urls = None + + num_results = web_research_section.getint("num_results") + similar_url = web_research_section.get("similar_url") + time_range = web_research_section.get("time_range") + if time_range == "past day": + start_published_date = (datetime.now() - timedelta(days=1)).strftime('%Y-%m-%d') + elif time_range == "past week": + start_published_date = (datetime.now() - timedelta(days=7)).strftime("%Y-%m-%d") + elif time_range == "past month": + start_published_date = (datetime.now() - timedelta(days=30)).strftime('%Y-%m-%d') + elif time_range == "past year": + start_published_date = (datetime.now() - timedelta(days=365)).strftime('%Y-%m-%d') + elif time_range == "anytime" or not time_range: + start_published_date = None + time_range = start_published_date + return include_urls, time_range, num_results, similar_url + + except FileNotFoundError: + logger.error(f"Error: Config file '{file_path}' not found.") + return {}, None, None, None + except KeyError as e: + logger.error(f"Error: Missing section or option in config file: {e}") + return {}, None, None, None + except ValueError as e: + logger.error(f"Error: Invalid value in config file: {e}") + return {}, None, None, None + + +def save_in_file(table_content): + """ Helper function to save search analysis in a file. """ + file_path = os.environ.get('SEARCH_SAVE_FILE') + try: + # Save the content to the file + with open(file_path, "a+") as file: + file.write(table_content) + file.write("\n" * 3) # Add three newlines at the end + logger.info(f"Search content saved to {file_path}") + except Exception as e: + logger.error(f"Error occurred while writing to the file: {e}") diff --git a/lib/ai_web_researcher/google_serp_search.py b/lib/ai_web_researcher/google_serp_search.py index ba539d33..fecf278b 100644 --- a/lib/ai_web_researcher/google_serp_search.py +++ b/lib/ai_web_researcher/google_serp_search.py @@ -20,20 +20,18 @@ Modifications: - Customize the search parameters, such as location and language, in the functions as needed. - Adjust logging configurations, table formatting, and other aspects based on preferences. -To-Do (TBD): -- Consider adding further enhancements or customization based on specific use cases. - -Note: This script depends on external libraries such as SerpApi, Loguru, Rich, and Tabulate. Install them using 'pip install serpapi loguru rich tabulate' if not already installed. """ import os from pathlib import Path import sys - +import configparser +from pathlib import Path import pandas as pd import json import requests from clint.textui import progress + #from serpapi import GoogleSearch from loguru import logger from tabulate import tabulate @@ -49,6 +47,8 @@ logger.add( format="{level}|{file}:{line}:{function}| {message}" ) +from .common_utils import save_in_file, cfg_search_param + from tenacity import retry, stop_after_attempt, wait_random_exponential @retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(6)) @@ -87,17 +87,7 @@ def google_search(query): # except Exception as err: # logger.error("FIXME: Failed to do Google search with BROWSERLESS API.") # logger.debug("FIXME: Trying with dataforSEO API.") -# -# # Retry with dataforSEO API -# try: -# logger.info("Perform SERP with Data for SEO.") -# #search_result = perform_dataforseo_google_search(query) -# #return process_search_results(search_result, flag) -# except Exception as err: -# logger.error("FIXME: Failed to do Google search with dataforSEO API.") -# logger.debug("All retries failed. Giving up.") -# raise - + def perform_serpapi_google_search(query, location="in"): @@ -159,12 +149,17 @@ def perform_serperdev_google_search(query): # Serper API endpoint URL url = "https://google.serper.dev/search" + try: + geo_loc, lang, num_results = cfg_search_param('serperdev') + except Exception as err: + logger.error(f"Failed to read config {err}") + # FIXME: Expose options to end user. Request payload payload = json.dumps({ "q": query, - "gl": "in", - "hl": "en", - "num": 10, + "gl": geo_loc, + "hl": lang, + "num": num_results, "autocorrect": True, "page": 1, "type": "search", @@ -294,16 +289,3 @@ def process_search_results(search_results): except Exception as save_results_err: logger.error(f"Failed to save search results: {save_results_err}") return search_results - - -def save_in_file(table_content): - """ Helper function to save search analysis in a file. """ - file_path = os.environ.get('SEARCH_SAVE_FILE') - try: - # Save the content to the file - with open(file_path, "a+") as file: - file.write(table_content) - file.write("\n" * 3) # Add three newlines at the end - logger.info(f"Search content saved to {file_path}") - except Exception as e: - logger.error(f"Error occurred while writing to the file: {e}") diff --git a/lib/ai_web_researcher/gpt_online_researcher.py b/lib/ai_web_researcher/gpt_online_researcher.py index 57e7e71d..f8f7bdf1 100644 --- a/lib/ai_web_researcher/gpt_online_researcher.py +++ b/lib/ai_web_researcher/gpt_online_researcher.py @@ -1,6 +1,22 @@ ################################################################ -# # +# ## Features +# +# - **Web Research**: Alwrity enables users to conduct web research efficiently. +# By providing keywords or topics of interest, users can initiate searches across multiple platforms simultaneously. +# +# - **Google SERP Search**: The tool integrates with Google Search Engine Results Pages (SERP) +# to retrieve relevant information based on user queries. It offers insights into organic search results, +# People Also Ask, and related searches. +# +# - **Tavily AI Integration**: Alwrity leverages Tavily AI's capabilities to enhance web research. +# It utilizes advanced algorithms to search for information and extract relevant data from various sources. +# +# - **Metaphor AI Semantic Search**: Alwrity employs Metaphor AI's semantic search technology to find related articles and content. +# By analyzing context and meaning, it delivers precise and accurate results. +# +# - **Google Trends Analysis**: The tool provides Google Trends analysis for user-defined keywords. +# It helps users understand the popularity and trends associated with specific topics over time. # ############################################################## @@ -26,8 +42,9 @@ logger.add(sys.stdout, ) -def gpt_web_researcher(search_keywords, time_range=None, include_domains=list(), similar_url=None): - """ """ +def gpt_web_researcher(search_keywords): + """ Keyword based web researcher, basic, neural and Semantic search.""" + print(f"Web Research:Time Range - {time_range},Search Keywords - {search_keywords},Include URLs - {include_domains}") # TBD: Keeping the results directory as fixed, for now. os.environ["SEARCH_SAVE_FILE"] = os.path.join(os.getcwd(), "workspace", "web_research_reports", search_keywords.replace(" ", "_") + "_" + datetime.now().strftime("%Y-%m-%d_%H-%M-%S")) @@ -35,8 +52,8 @@ def gpt_web_researcher(search_keywords, time_range=None, include_domains=list(), include_domains = list() google_search_result = do_google_serp_search(search_keywords) - tavily_search_result = do_tavily_ai_search(search_keywords, include_domains) - metaphor_search_result = do_metaphor_ai_research(search_keywords, include_domains, time_range, similar_url) + tavily_search_result = do_tavily_ai_search(search_keywords) + metaphor_search_result = do_metaphor_ai_research(search_keywords) gtrends_search_result = do_google_pytrends_analysis(search_keywords) # get_rag_results(search_query) print(f"\n\nReview the analysis in this file at: {os.environ.get('SEARCH_SAVE_FILE')}\n") @@ -54,30 +71,23 @@ def do_google_serp_search(search_keywords): # Not failing, as tavily would do same and then GPT-V to search. -def do_tavily_ai_search(search_keywords, include_domains=None): +def do_tavily_ai_search(search_keywords): """ Common function to do Tavily AI web research.""" try: # FIXME: Include the follow-up questions as blog FAQs. logger.info(f"Doing Tavily AI search for: {search_keywords}") - t_results = get_tavilyai_results(search_keywords, include_domains) + t_results = get_tavilyai_results(search_keywords) t_titles = tavily_extract_information(t_results, 'titles') return(t_results, t_titles) except Exception as err: logger.error(f"Failed to do Tavily AI Search: {err}") -def do_metaphor_ai_research(search_keywords, - include_domains=None, - time_range=None, - similar_url=None): +def do_metaphor_ai_research(search_keywords): """ """ try: logger.info(f"Start Semantic/Neural web search with Metahpor: {search_keywords}") - response_articles = metaphor_search_articles( - search_keywords, - include_domains=include_domains, - time_range=time_range, - similar_url=similar_url) + response_articles = metaphor_search_articles(search_keywords) m_titles = metaphor_extract_titles_or_text(response_articles, return_titles=True) return(response_articles, m_titles) except Exception as err: diff --git a/lib/ai_web_researcher/metaphor_basic_neural_web_search.py b/lib/ai_web_researcher/metaphor_basic_neural_web_search.py index a9baa9aa..f1f323c9 100644 --- a/lib/ai_web_researcher/metaphor_basic_neural_web_search.py +++ b/lib/ai_web_researcher/metaphor_basic_neural_web_search.py @@ -26,6 +26,7 @@ from exa_py import Exa from tenacity import (retry, stop_after_attempt, wait_random_exponential,)# for exponential backoff from .gpt_summarize_web_content import summarize_web_content from .gpt_competitor_analysis import summarize_competitor_content +from .common_utils import save_in_file, cfg_search_param @retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(6)) @@ -99,12 +100,7 @@ def metaphor_find_similar(similar_url): -def metaphor_search_articles(query, - num_results=5, - use_autoprompt=True, - include_domains=[], - time_range=None, - similar_url=None): +def metaphor_search_articles(query): """ Search for articles using the Metaphor API. @@ -120,16 +116,7 @@ def metaphor_search_articles(query, """ metaphor = get_metaphor_client() try: - if time_range == "past day": - start_published_date = (datetime.now() - timedelta(days=1)).strftime('%Y-%m-%d') - elif time_range == "past week": - start_published_date = (datetime.now() - timedelta(days=7)).strftime("%Y-%m-%d") - elif time_range == "past month": - start_published_date = (datetime.now() - timedelta(days=30)).strftime('%Y-%m-%d') - elif time_range == "past year": - start_published_date = (datetime.now() - timedelta(days=365)).strftime('%Y-%m-%d') - else: - start_published_date = None + include_domains, start_published_date, num_results, similar_url = cfg_search_param('exa') logger.info(f"Metaphor web search with Date: {start_published_date} and Query: {query}") try: @@ -145,6 +132,7 @@ def metaphor_search_articles(query, # From each webpage, get a summary of the web page. contents_response = search_response.results + # FIXME: Need to summarize for smaller input context window. # for content in tqdm(contents_response, desc="Reading Web URL content:", unit="content"): # summarized_content = summarize_web_content(content.text, "gemini") # content.text = summarized_content @@ -200,19 +188,6 @@ def print_search_result(contents_response): logger.error(f"Failed to save search results: {save_results_err}") -def save_in_file(table_content): - """ Helper function to save search analysis in a file. """ - file_path = os.environ.get('SEARCH_SAVE_FILE') - try: - # Save the content to the file - with open(file_path, "a+") as file: - file.write(table_content) - file.write("\n" * 3) # Add three newlines at the end - logger.info(f"Search content saved to {file_path}") - except Exception as e: - logger.error(f"Error occurred while writing to the file: {e}") - - def metaphor_scholar_search(query, include_domains=None, time_range="anytime"): """ Search for papers using the Metaphor API. diff --git a/lib/ai_web_researcher/tavily_ai_search.py b/lib/ai_web_researcher/tavily_ai_search.py index 7753dd6e..e5829df0 100644 --- a/lib/ai_web_researcher/tavily_ai_search.py +++ b/lib/ai_web_researcher/tavily_ai_search.py @@ -22,7 +22,6 @@ Modifications: To-Do (TBD): - Consider adding further enhancements or customization based on specific use cases. -Note: This script depends on external libraries such as Tavily, Rich, Tabulate, Loguru, and Tenacity. Install them using 'pip install tavily rich tabulate loguru tenacity' if not already installed. """ @@ -44,11 +43,14 @@ logger.add(sys.stdout, colorize=True, format="{level}|{file}:{line}:{function}| {message}" ) + +from .common_utils import save_in_file, cfg_search_param + from tenacity import retry, stop_after_attempt, wait_random_exponential @retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(6)) -def get_tavilyai_results(keywords, include_urls, search_depth="advanced"): +def get_tavilyai_results(keywords): """ Get Tavily AI search results based on specified keywords and options. @@ -74,11 +76,23 @@ def get_tavilyai_results(keywords, include_urls, search_depth="advanced"): except Exception as err: logger.error(f"Failed to create Tavily client. Check TAVILY_API_KEY: {err}") + # Read search config params from the file. + try: + include_urls = cfg_search_param('tavily') + except Exception as err: + logger.error(f"Failed to read search params from main_config: {err}") + try: if include_urls: - tavily_search_result = client.search(keywords, search_depth, include_answer=True, include_domains=include_urls) + tavily_search_result = client.search(keywords, + search_depth="advanced", + include_answer=True, + include_domains=include_urls) else: - tavily_search_result = client.search(keywords, search_depth, include_answer=True) + tavily_search_result = client.search(keywords, + search_depth = "advanced", + include_answer=True) + print_result_table(tavily_search_result) return(tavily_search_result) except Exception as err: diff --git a/lib/ai_writers/keywords_to_blog.py b/lib/ai_writers/keywords_to_blog.py index db84ac6a..8d16a31f 100644 --- a/lib/ai_writers/keywords_to_blog.py +++ b/lib/ai_writers/keywords_to_blog.py @@ -37,26 +37,26 @@ def write_blog_from_keywords(search_keywords, url=None): blog_markdown_str = "" example_blog_titles = [] - logger.info(f"Researching and Writing Blog on keywords: {search_keywords}") - # Call on the got-researcher, tavily apis for this. Do google search for organic competition. - try: - google_search_result, g_titles = do_google_serp_search(search_keywords) - example_blog_titles.append(g_titles) - blog_markdown_str = write_blog_google_serp(search_keywords, google_search_result) - except Exception as err: - logger.error(f"Failed in Google web research: {err}") - # logger.info/check the final blog content. - logger.info("\n######### Draft1: Finished Blog from Google web search: ###########\n\n") +# logger.info(f"Researching and Writing Blog on keywords: {search_keywords}") +# # Call on the got-researcher, tavily apis for this. Do google search for organic competition. +# try: +# google_search_result, g_titles = do_google_serp_search(search_keywords) +# example_blog_titles.append(g_titles) +# blog_markdown_str = write_blog_google_serp(search_keywords, google_search_result) +# except Exception as err: +# logger.error(f"Failed in Google web research: {err}") +# # logger.info/check the final blog content. +# logger.info("\n######### Draft1: Finished Blog from Google web search: ###########\n\n") - # Do Tavily AI research to augument the above blog. - try: - tavily_search_result, t_titles = do_tavily_ai_search(search_keywords) - example_blog_titles.append(t_titles) - blog_markdown_str = blog_with_research(blog_markdown_str, tavily_search_result) - logger.info(f"######### Blog content after Tavily AI research: ######### \n\n{blog_markdown_str}\n\n") - except Exception as err: - logger.error(f"Failed to do Tavily AI research: {err}") - logger.info("######### Draft2: Blog content after Tavily AI research: #########\n\n") +# # Do Tavily AI research to augument the above blog. +# try: +# tavily_search_result, t_titles = do_tavily_ai_search(search_keywords) +# example_blog_titles.append(t_titles) +# blog_markdown_str = blog_with_research(blog_markdown_str, tavily_search_result) +# logger.info(f"######### Blog content after Tavily AI research: ######### \n\n{blog_markdown_str}\n\n") +# except Exception as err: +# logger.error(f"Failed to do Tavily AI research: {err}") +# logger.info("######### Draft2: Blog content after Tavily AI research: #########\n\n") try: # Do Metaphor/Exa AI search. diff --git a/lib/gpt_providers/audio_to_text_generation/stt_audio_blog.py b/lib/gpt_providers/audio_to_text_generation/stt_audio_blog.py index 626c6f67..ee55be08 100644 --- a/lib/gpt_providers/audio_to_text_generation/stt_audio_blog.py +++ b/lib/gpt_providers/audio_to_text_generation/stt_audio_blog.py @@ -96,10 +96,10 @@ def speech_to_text(video_url, output_path='.'): finally: try: if os.path.exists(audio_file): - os.remove(audio_file) - logger.info("Temporary audio file removed.") + os.remove(audio_file) + logger.info("Temporary audio file removed.") except PermissionError: - logger.error(f"Permission error: Cannot remove '{audio_file}'. Please make sure you have the necessary permissions.") + logger.error(f"Permission error: Cannot remove '{audio_file}'. Please make sure of necessary permissions.") except Exception as e: logger.error(f"An error occurred removing audio file: {e}") diff --git a/main_config b/main_config index 5233b620..fb06eea1 100644 --- a/main_config +++ b/main_config @@ -33,6 +33,7 @@ blog_output_folder = "" blog_image_output_folder = "" + ############################################################ # # Blog Images details. @@ -41,6 +42,7 @@ blog_image_output_folder = "" # ############################################################ +[img_details] # Options are dalle2, dalle3, stable-diffusion. image_gen_model = "stable-diffusion" @@ -48,6 +50,7 @@ image_gen_model = "stable-diffusion" num_images = 1 + ########################################################### # # Define LLM and its charateristics for fine control on output @@ -93,3 +96,39 @@ frequency_penalty = 1 # "Try using different words instead of repeating the same ones." # from -2 (more flexible while generating text) to 2 (strong discouragement in repetition). presence_penalty = 1 + + +###################################################### +# +# Search Engine Paramters. +# Alwrity does comprehensive web research for given content topic. +# Choose search engine parameters below, this finetunes search results +# and makes the generated content more accurate. +# +###################################################### + +# Visit https://serper.dev/playground and provide values from there. +# https://api.serper.dev/locations +[web_research] + +# Geographic location(gl): This values restricts the web search to given country. +# Examples are us for United States, in for India, fr for france, cn for china etc +geo_location = us + +# Locale:hl:language : Define the language you want to search results in. +# Example: en for english, zn-cn for chinese, de for german, hi for hindi etc +search_language: en + +# num_results: Default 10 - Number of google search results to fetch. +num_results = 10 + +# time_range: Acceptable values, past day, past week, past month, past year +# This limits the search results for given time duration, from today. +time_range = anytime + +# include_domains (Give Full URLs, separate by comma): A list of domains to specifically include in the search results. +# Default is None, which includes all domains. Example: Wikipedia.com, stackoverflow.com, google schalor, reddit etc +include_domains = + +# similar_url : A single URL, this will instruct search engines to give results similar to the given URL. +similar_url =