From 3a772b36e5943421d20cb1ffb883c1931fdb04f4 Mon Sep 17 00:00:00 2001 From: ajaysi Date: Wed, 24 Apr 2024 21:13:30 +0530 Subject: [PATCH] AI news report writer, web researched --- alwrity.py | 26 ++--- lib/ai_web_researcher/google_serp_search.py | 52 ++++++++-- lib/ai_writers/ai_news_article_writer.py | 101 ++++++++++++++++++++ lib/check_blog_seo/seo_analysis.py | 2 +- lib/utils/alwrity_utils.py | 55 ++++++++++- requirements.txt | 1 - 6 files changed, 213 insertions(+), 24 deletions(-) create mode 100644 lib/ai_writers/ai_news_article_writer.py diff --git a/alwrity.py b/alwrity.py index f6967e6a..12867455 100644 --- a/alwrity.py +++ b/alwrity.py @@ -21,7 +21,7 @@ load_dotenv(Path('.env')) app = typer.Typer() -from lib.utils.alwrity_utils import blog_from_audio, blog_from_keyword, do_web_research, do_web_research +from lib.utils.alwrity_utils import blog_from_audio, blog_from_keyword, do_web_research, do_web_research, ai_news_writer from lib.utils.alwrity_utils import write_story, essay_writer, blog_tools, competitor_analysis, image_to_text_writer, image_generator @@ -36,11 +36,11 @@ def prompt_for_time_range(): def write_blog_options(): choices = [ - ("Keywords", "Keywords"), - ("Audio To Blog", "Audio To Blog"), - ("Programming", "Programming"), - ("Scholar", "Scholar"), - ("News/TBD", "News/TBD"), + ("Keywords", "Keywords - Provide main blog keywords Or Title"), + ("Audio To Blog", "Audio To Blog - Transcribe Audio files into blog content"), + ("Programming", "Programming - Write technical blogs on latest topics"), + ("Scholar", "Scholar - Research Reports from google scholar, arxiv articles."), + ("AI News Articles", "News - AI News article writer, factual trusted sources"), ("Finance/TBD", "Finance/TBD"), ("Quit", "Quit") ] @@ -60,21 +60,21 @@ def start_interactive_mode(): print(text) choices = [ - ("AI Blog Writer", "AI Blog Writer"), + ("AI Writer", "AI Writer - Blog, YT2Blog, Research Report, News, Finance long-form articles"), ("AI Story Writer", "AI Story Writer"), - ("AI Essay Writer", "AI Essay Writer"), + ("AI Essay Writer", "AI Essay writer"), ("AI Image to Text Writer", "AI Image to Text Writer"), - ("Online Blog Tools/Apps", "Online Blog Tools/Apps"), - ("Do keyword Research", "Do keyword Research"), + ("Online Blog Tools/Apps", "Online AI Apps - Content & Digital marketing"), + ("Do keyword Research", "Keywords web research - Basic, AI and semantic web research"), ("Competitor Analysis", "Competitor Analysis"), - ("Create Blog Images", "Create Blog Images"), + ("Create Blog Images", "Create Images - Stability, Dalle3"), ("AI Social Media(TBD)", "AI Social Media(TBD)"), ("AI Code Writer(TBD)", "AI Code Writer(TBD)"), ("Quit", "Quit") ] mode = radiolist_dialog(title="Choose an option:", values=choices).run() if mode: - if mode == 'AI Blog Writer': + if mode == 'AI Writer': write_blog() elif mode == 'AI Story Writer': write_story() @@ -166,6 +166,8 @@ def write_blog(): elif blog_type == 'Audio To Blog': blog_from_audio() + elif blog_type == 'AI News Articles': + ai_news_writer() elif blog_type == 'GitHub': github = prompt("Enter GitHub URL, CSV file, or topic:") print(f"Write blog based on GitHub: {github}") diff --git a/lib/ai_web_researcher/google_serp_search.py b/lib/ai_web_researcher/google_serp_search.py index c3d9df99..c3efd5fa 100644 --- a/lib/ai_web_researcher/google_serp_search.py +++ b/lib/ai_web_researcher/google_serp_search.py @@ -160,16 +160,13 @@ def perform_serperdev_google_search(query): except Exception as err: logger.error(f"Failed to read config {err}") - # FIXME: Expose options to end user. Request payload + # Build payload as end user or main_config payload = json.dumps({ "q": query, "gl": geo_loc, - "location": geo_loc, "hl": lang, "num": num_results, "autocorrect": True, - "type": "search", - "engine": "google" }) # Request headers with API key @@ -184,6 +181,44 @@ def perform_serperdev_google_search(query): # Check if the request was successful if response.status_code == 200: # Parse and return the JSON response + process_search_results(response) + return response.json() + else: + # Print an error message if the request fails + logger.error(f"Error: {response.status_code}, {response.text}") + return None + + +def perform_serper_news_search(news_keywords, news_country, news_language): + """ Function for Serper.dev News google search """ + # Get the Serper API key from environment variables + logger.info(f"Doing serper.dev google search. {news_keywords} - {news_country} - {news_language}") + serper_api_key = os.getenv('SERPER_API_KEY') + + # Check if the API key is available + if not serper_api_key: + raise ValueError("SERPER_API_KEY is missing. Set it in the .env file.") + + # Serper API endpoint URL + url = "https://google.serper.dev/news" + payload = json.dumps({ + "q": news_keywords, + "gl": news_country, + "hl": news_language, + }) + # Request headers with API key + headers = { + 'X-API-KEY': serper_api_key, + 'Content-Type': 'application/json' + } + # Send a POST request to the Serper API with progress bar + with progress.Bar(label="Searching News", expected_size=100) as bar: + response = requests.post(url, headers=headers, data=payload, stream=True) + # Check if the request was successful + if response.status_code == 200: + # Parse and return the JSON response + #process_search_results(response, "news") + #google_news(news_keywords) return response.json() else: # Print an error message if the request fails @@ -209,7 +244,7 @@ def google_news(search_keywords, news_period="7d", region="IN"): print(googlenews.search('APPLE')) -def process_search_results(search_results): +def process_search_results(search_results, search_type="general"): """ Create a Pandas DataFrame from the search results. @@ -220,8 +255,11 @@ def process_search_results(search_results): pd.DataFrame: Pandas DataFrame containing the search results. """ data = [] - logger.info(f"Google Search Parameters: {search_results.get('searchParameters', {})}") - organic_results = search_results.get("organic", []) + #logger.info(f"Google Search Parameters: {search_results.get('searchParameters', {})}") + if 'general' in search_type: + organic_results = search_results.get("organic", []) + if 'news' in search_type: + organic_results = search_results.get("news", []) # Displaying Organic Results organic_data = [] diff --git a/lib/ai_writers/ai_news_article_writer.py b/lib/ai_writers/ai_news_article_writer.py new file mode 100644 index 00000000..77520721 --- /dev/null +++ b/lib/ai_writers/ai_news_article_writer.py @@ -0,0 +1,101 @@ +###################################################### +# +# Alwrity, as an AI news writer, will have to be factually correct. +# We will do multiple rounds of web research and cite our sources. +# 'include_urls' will focus news articles only from well known sources. +# Choosing a country will help us get better results. +# +###################################################### + +import sys +import os +import json +from textwrap import dedent +from pathlib import Path +from datetime import datetime + +from dotenv import load_dotenv +load_dotenv(Path('../../.env')) +from loguru import logger +logger.remove() +logger.add(sys.stdout, + colorize=True, + format="{level}|{file}:{line}:{function}| {message}" + ) + +from ..gpt_providers.text_generation.main_text_generation import llm_text_gen +from ..ai_web_researcher.google_serp_search import perform_serper_news_search + + +def ai_news_generation(news_keywords, news_country, news_language): + """ Generate news aritcle based on given keywords. """ + # Use to store the blog in a string, to save in a *.md file. + blog_markdown_str = "" + + logger.info(f"Researching and Writing News Article on keywords: {news_keywords}") + # Call on the got-researcher, tavily apis for this. Do google search for organic competition. + try: + google_news_result = perform_serper_news_search(news_keywords, news_country, news_language) + blog_markdown_str = write_news_google_search(news_keywords, news_country, news_language, google_news_result) + #print(blog_markdown_str) + except Exception as err: + logger.error(f"Failed in Google News web research: {err}") + logger.info("\n######### Draft1: Finished News article from Google web search: ###########\n\n") + + +def write_news_google_search(news_keywords, news_country, news_language, search_results): + """Combine the given online research and gpt blog content""" + news_language = get_language_name(news_language) + news_country = get_country_name(news_country) + + prompt = f""" + As an experienced {news_language} news journalist and editor, + I will provide you with my 'News keywords' and its 'google search results'. + Your goal is to write a News report, backed by given google search results. + Important, as a news report, its imperative that your content is factually correct and cited. + + Follow below guidelines: + 1). Understand and utilize the provided google search result json. + 2). Always provide in-line citations and provide referance links. + 3). Understand the given news item and adapt your tone accordingly. + 4). Always include the dates when then news was reported. + 6). Do not explain, describe your response. + 7). Your blog should be highly formatted in markdown style and highly readable. + 8). Important: Please read the entire prompt before writing anything. Follow the prompt exactly as I instructed. + + \n\nNews Keywords: "{news_keywords}"\n\n + Google search Result: "{search_results}" + """ + logger.info("Generating blog and FAQs from Google web search results.") + try: + response = llm_text_gen(prompt) + return response + except Exception as err: + logger.error(f"Exit: Failed to get response from LLM: {err}") + exit(1) + + +def get_language_name(language_code): + languages = { + "es": "Spanish", + "vn": "Vietnamese", + "en": "English", + "ar": "Arabic", + "hi": "Hindi", + "de": "German", + "zh-cn": "Chinese (Simplified)" + # Add more language codes and corresponding names as needed + } + return languages.get(language_code, "Unknown") + +def get_country_name(country_code): + countries = { + "es": "Spain", + "vn": "Vietnam", + "pk": "Pakistan", + "in": "India", + "de": "Germany", + "cn": "China" + # Add more country codes and corresponding names as needed + } + return countries.get(country_code, "Unknown") diff --git a/lib/check_blog_seo/seo_analysis.py b/lib/check_blog_seo/seo_analysis.py index d62a8118..bc60c1a2 100644 --- a/lib/check_blog_seo/seo_analysis.py +++ b/lib/check_blog_seo/seo_analysis.py @@ -1,5 +1,5 @@ from typing import List, Dict, Union -from nltk import tokenize, stem, pos_tag +#from nltk import tokenize, stem, pos_tag from textblob import TextBlob import enchant diff --git a/lib/utils/alwrity_utils.py b/lib/utils/alwrity_utils.py index 446b5ed0..0dc21c56 100644 --- a/lib/utils/alwrity_utils.py +++ b/lib/utils/alwrity_utils.py @@ -15,11 +15,11 @@ from lib.ai_web_researcher.gpt_online_researcher import gpt_web_researcher from lib.ai_web_researcher.metaphor_basic_neural_web_search import metaphor_find_similar from lib.ai_writers.keywords_to_blog import write_blog_from_keywords from lib.ai_writers.speech_to_blog.main_audio_to_blog import generate_audio_blog +from lib.ai_writers.long_form_ai_writer import long_form_generator +from lib.ai_writers.ai_news_article_writer import ai_news_generation from lib.gpt_providers.text_generation.ai_story_writer import ai_story_generator from lib.gpt_providers.text_generation.ai_essay_writer import ai_essay_generator from lib.gpt_providers.text_to_image_generation.main_generate_image_from_prompt import generate_image -from lib.ai_writers.long_form_ai_writer import long_form_generator - def blog_from_audio(): @@ -29,7 +29,6 @@ def blog_from_audio(): """ while True: - print("https://github.com/AJaySi/AI-Blog-Writer/wiki/Audio-to-blog-AI-article-writer-%E2%80%90-Alwrity-Speech-To-Text-Feature") audio_input = prompt("""Enter Youtube video URL OR provide Full-Path to audio file.\nšŸ‘‹ : """) # If the user cancels, exit the loop and the application if audio_input is None: @@ -88,6 +87,56 @@ def blog_from_keyword(): exit(1) +def ai_news_writer(): + """ """ + while True: + print("________________________________________________________________") + news_keywords = input_dialog( + title='Enter Keywords from News headlines:', + text='Describe the News article in 3-5 words.\nšŸ‘‹ Enter main keywords describing the News Event: ', + ).run() + + # If the user cancels, exit the loop + if news_keywords is None: + break + if news_keywords and len(news_keywords.split()) >= 2: + break + else: + message_dialog( + title='Error', + text='🚫 News keywords should be at least two words long. Least, you can do..' + ).run() + news_country = radiolist_dialog( + title="Select origin country of the News event:", + values=[ + ("es", "Spain"), + ("vn", "Vietnam"), + ("pk", "Pakistan"), + ("in", "India"), + ("de", "Germany"), + ("cn", "China") + ], + default="in" + ).run() + news_language = radiolist_dialog( + title="Select news article language to search for:", + values=[ + ("en", "English"), + ("es", "Spanish"), + ("vi", "Vietnamese"), + ("ar", "Arabic"), + ("hi", "Hindi"), + ("de", "German"), + ("zh-cn", "Chinese") + ], + default="en" + ).run() + try: + ai_news_generation(news_keywords, news_country, news_language) + except Exception as err: + raise err + + def do_web_research(): """ Input keywords and do web research and present a report.""" if check_search_apis(): diff --git a/requirements.txt b/requirements.txt index d0dc886f..d1980063 100644 --- a/requirements.txt +++ b/requirements.txt @@ -20,7 +20,6 @@ requests_html pytrends pytube wordcloud -nltk prompt_toolkit ipython html2image