AI news report writer, web researched

This commit is contained in:
ajaysi
2024-04-24 21:13:30 +05:30
parent 1c7ba95b27
commit 3a772b36e5
6 changed files with 213 additions and 24 deletions

View File

@@ -160,16 +160,13 @@ def perform_serperdev_google_search(query):
except Exception as err:
logger.error(f"Failed to read config {err}")
# FIXME: Expose options to end user. Request payload
# Build payload as end user or main_config
payload = json.dumps({
"q": query,
"gl": geo_loc,
"location": geo_loc,
"hl": lang,
"num": num_results,
"autocorrect": True,
"type": "search",
"engine": "google"
})
# Request headers with API key
@@ -184,6 +181,44 @@ def perform_serperdev_google_search(query):
# Check if the request was successful
if response.status_code == 200:
# Parse and return the JSON response
process_search_results(response)
return response.json()
else:
# Print an error message if the request fails
logger.error(f"Error: {response.status_code}, {response.text}")
return None
def perform_serper_news_search(news_keywords, news_country, news_language):
""" Function for Serper.dev News google search """
# Get the Serper API key from environment variables
logger.info(f"Doing serper.dev google search. {news_keywords} - {news_country} - {news_language}")
serper_api_key = os.getenv('SERPER_API_KEY')
# Check if the API key is available
if not serper_api_key:
raise ValueError("SERPER_API_KEY is missing. Set it in the .env file.")
# Serper API endpoint URL
url = "https://google.serper.dev/news"
payload = json.dumps({
"q": news_keywords,
"gl": news_country,
"hl": news_language,
})
# Request headers with API key
headers = {
'X-API-KEY': serper_api_key,
'Content-Type': 'application/json'
}
# Send a POST request to the Serper API with progress bar
with progress.Bar(label="Searching News", expected_size=100) as bar:
response = requests.post(url, headers=headers, data=payload, stream=True)
# Check if the request was successful
if response.status_code == 200:
# Parse and return the JSON response
#process_search_results(response, "news")
#google_news(news_keywords)
return response.json()
else:
# Print an error message if the request fails
@@ -209,7 +244,7 @@ def google_news(search_keywords, news_period="7d", region="IN"):
print(googlenews.search('APPLE'))
def process_search_results(search_results):
def process_search_results(search_results, search_type="general"):
"""
Create a Pandas DataFrame from the search results.
@@ -220,8 +255,11 @@ def process_search_results(search_results):
pd.DataFrame: Pandas DataFrame containing the search results.
"""
data = []
logger.info(f"Google Search Parameters: {search_results.get('searchParameters', {})}")
organic_results = search_results.get("organic", [])
#logger.info(f"Google Search Parameters: {search_results.get('searchParameters', {})}")
if 'general' in search_type:
organic_results = search_results.get("organic", [])
if 'news' in search_type:
organic_results = search_results.get("news", [])
# Displaying Organic Results
organic_data = []

View File

@@ -0,0 +1,101 @@
######################################################
#
# Alwrity, as an AI news writer, will have to be factually correct.
# We will do multiple rounds of web research and cite our sources.
# 'include_urls' will focus news articles only from well known sources.
# Choosing a country will help us get better results.
#
######################################################
import sys
import os
import json
from textwrap import dedent
from pathlib import Path
from datetime import datetime
from dotenv import load_dotenv
load_dotenv(Path('../../.env'))
from loguru import logger
logger.remove()
logger.add(sys.stdout,
colorize=True,
format="<level>{level}</level>|<green>{file}:{line}:{function}</green>| {message}"
)
from ..gpt_providers.text_generation.main_text_generation import llm_text_gen
from ..ai_web_researcher.google_serp_search import perform_serper_news_search
def ai_news_generation(news_keywords, news_country, news_language):
""" Generate news aritcle based on given keywords. """
# Use to store the blog in a string, to save in a *.md file.
blog_markdown_str = ""
logger.info(f"Researching and Writing News Article on keywords: {news_keywords}")
# Call on the got-researcher, tavily apis for this. Do google search for organic competition.
try:
google_news_result = perform_serper_news_search(news_keywords, news_country, news_language)
blog_markdown_str = write_news_google_search(news_keywords, news_country, news_language, google_news_result)
#print(blog_markdown_str)
except Exception as err:
logger.error(f"Failed in Google News web research: {err}")
logger.info("\n######### Draft1: Finished News article from Google web search: ###########\n\n")
def write_news_google_search(news_keywords, news_country, news_language, search_results):
"""Combine the given online research and gpt blog content"""
news_language = get_language_name(news_language)
news_country = get_country_name(news_country)
prompt = f"""
As an experienced {news_language} news journalist and editor,
I will provide you with my 'News keywords' and its 'google search results'.
Your goal is to write a News report, backed by given google search results.
Important, as a news report, its imperative that your content is factually correct and cited.
Follow below guidelines:
1). Understand and utilize the provided google search result json.
2). Always provide in-line citations and provide referance links.
3). Understand the given news item and adapt your tone accordingly.
4). Always include the dates when then news was reported.
6). Do not explain, describe your response.
7). Your blog should be highly formatted in markdown style and highly readable.
8). Important: Please read the entire prompt before writing anything. Follow the prompt exactly as I instructed.
\n\nNews Keywords: "{news_keywords}"\n\n
Google search Result: "{search_results}"
"""
logger.info("Generating blog and FAQs from Google web search results.")
try:
response = llm_text_gen(prompt)
return response
except Exception as err:
logger.error(f"Exit: Failed to get response from LLM: {err}")
exit(1)
def get_language_name(language_code):
languages = {
"es": "Spanish",
"vn": "Vietnamese",
"en": "English",
"ar": "Arabic",
"hi": "Hindi",
"de": "German",
"zh-cn": "Chinese (Simplified)"
# Add more language codes and corresponding names as needed
}
return languages.get(language_code, "Unknown")
def get_country_name(country_code):
countries = {
"es": "Spain",
"vn": "Vietnam",
"pk": "Pakistan",
"in": "India",
"de": "Germany",
"cn": "China"
# Add more country codes and corresponding names as needed
}
return countries.get(country_code, "Unknown")

View File

@@ -1,5 +1,5 @@
from typing import List, Dict, Union
from nltk import tokenize, stem, pos_tag
#from nltk import tokenize, stem, pos_tag
from textblob import TextBlob
import enchant

View File

@@ -15,11 +15,11 @@ from lib.ai_web_researcher.gpt_online_researcher import gpt_web_researcher
from lib.ai_web_researcher.metaphor_basic_neural_web_search import metaphor_find_similar
from lib.ai_writers.keywords_to_blog import write_blog_from_keywords
from lib.ai_writers.speech_to_blog.main_audio_to_blog import generate_audio_blog
from lib.ai_writers.long_form_ai_writer import long_form_generator
from lib.ai_writers.ai_news_article_writer import ai_news_generation
from lib.gpt_providers.text_generation.ai_story_writer import ai_story_generator
from lib.gpt_providers.text_generation.ai_essay_writer import ai_essay_generator
from lib.gpt_providers.text_to_image_generation.main_generate_image_from_prompt import generate_image
from lib.ai_writers.long_form_ai_writer import long_form_generator
def blog_from_audio():
@@ -29,7 +29,6 @@ def blog_from_audio():
"""
while True:
print("https://github.com/AJaySi/AI-Blog-Writer/wiki/Audio-to-blog-AI-article-writer-%E2%80%90-Alwrity-Speech-To-Text-Feature")
audio_input = prompt("""Enter Youtube video URL OR provide Full-Path to audio file.\n👋 : """)
# If the user cancels, exit the loop and the application
if audio_input is None:
@@ -88,6 +87,56 @@ def blog_from_keyword():
exit(1)
def ai_news_writer():
""" """
while True:
print("________________________________________________________________")
news_keywords = input_dialog(
title='Enter Keywords from News headlines:',
text='Describe the News article in 3-5 words.\n👋 Enter main keywords describing the News Event: ',
).run()
# If the user cancels, exit the loop
if news_keywords is None:
break
if news_keywords and len(news_keywords.split()) >= 2:
break
else:
message_dialog(
title='Error',
text='🚫 News keywords should be at least two words long. Least, you can do..'
).run()
news_country = radiolist_dialog(
title="Select origin country of the News event:",
values=[
("es", "Spain"),
("vn", "Vietnam"),
("pk", "Pakistan"),
("in", "India"),
("de", "Germany"),
("cn", "China")
],
default="in"
).run()
news_language = radiolist_dialog(
title="Select news article language to search for:",
values=[
("en", "English"),
("es", "Spanish"),
("vi", "Vietnamese"),
("ar", "Arabic"),
("hi", "Hindi"),
("de", "German"),
("zh-cn", "Chinese")
],
default="en"
).run()
try:
ai_news_generation(news_keywords, news_country, news_language)
except Exception as err:
raise err
def do_web_research():
""" Input keywords and do web research and present a report."""
if check_search_apis():