WIP-AI writer, Try Web research working.
This commit is contained in:
@@ -213,7 +213,6 @@ def process_search_results(search_results):
|
||||
data = []
|
||||
logger.info(f"Google Search Parameters: {search_results.get('searchParameters', {})}")
|
||||
organic_results = search_results.get("organic", [])
|
||||
print(search_results)
|
||||
|
||||
# Displaying Organic Results
|
||||
organic_data = []
|
||||
|
||||
@@ -7,7 +7,7 @@ Features:
|
||||
- Visualizes Google Trends data, including interest over time and interest by region.
|
||||
- Retrieves related queries and topics for a set of search keywords.
|
||||
- Utilizes visualization libraries such as Matplotlib, Plotly, and Rich for displaying results.
|
||||
- Incorporates logging for error handling and informative messages.
|
||||
- Incorporates logger.for error handling and informative messages.
|
||||
|
||||
Usage:
|
||||
- Provide a search term or a list of search terms for analysis.
|
||||
@@ -22,6 +22,7 @@ Modifications:
|
||||
Note: Ensure that the required libraries are installed using 'pip install pytrends requests_html tqdm tabulate plotly rich'.
|
||||
"""
|
||||
|
||||
import os
|
||||
import requests
|
||||
import numpy as np
|
||||
import sys
|
||||
@@ -37,14 +38,12 @@ import pandas as pd
|
||||
import matplotlib.pyplot as plt
|
||||
import plotly.express as px
|
||||
import plotly.io as pio
|
||||
import logging
|
||||
from requests_html import HTML, HTMLSession
|
||||
from urllib.parse import quote_plus
|
||||
from tqdm import tqdm
|
||||
from tabulate import tabulate
|
||||
from pytrends.request import TrendReq
|
||||
import wordcloud
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
from wordcloud import WordCloud
|
||||
from loguru import logger
|
||||
|
||||
# Configure logger
|
||||
@@ -75,7 +74,7 @@ def fetch_google_trends_interest_overtime(keyword):
|
||||
|
||||
return data
|
||||
except Exception as e:
|
||||
logging.error(f"Error in fetch_google_trends_data: {e}")
|
||||
logger.error(f"Error in fetch_google_trends_data: {e}")
|
||||
return pd.DataFrame()
|
||||
|
||||
|
||||
@@ -151,10 +150,11 @@ def get_related_queries_and_save_csv(keywords, hl='en-US', tz=360, cat=0, timefr
|
||||
print("\n\033[1m🔝 Top\033[0m: The most popular search queries. Scoring is on a relative scale where a value of 100 is the most commonly searched query, 50 is a query searched half as often, and a value of 0 is a query searched for less than 1% as often as the most popular query.\n")
|
||||
print("\n\033[1m🚀 Rising\033[0m: Queries with the biggest increase in search frequency since the last time period. Results marked 'Breakout' had a tremendous increase, probably because these queries are new and had few (if any) prior searches.\n")
|
||||
# Display the DataFrame using tabulate
|
||||
print(tabulate(all_queries_df, headers='keys', tablefmt='fancy_grid'))
|
||||
table = tabulate(all_queries_df, headers='keys', tablefmt='fancy_grid')
|
||||
print(table)
|
||||
# Save the combined table to a file
|
||||
try:
|
||||
save_in_file(all_queries_df)
|
||||
save_in_file(table)
|
||||
except Exception as save_results_err:
|
||||
logger.error(f"Failed to save search results: {save_results_err}")
|
||||
return top_rising_queries
|
||||
@@ -178,6 +178,7 @@ def get_related_topics_and_save_csv(search_keywords):
|
||||
pytrends = TrendReq(hl='en-US', tz=360)
|
||||
|
||||
# Build payload
|
||||
# FIXME: Remove hardcoding.
|
||||
pytrends.build_payload(search_keywords, cat=0, timeframe='today 12-m')
|
||||
|
||||
# Get related topics
|
||||
@@ -198,10 +199,7 @@ def get_related_topics_and_save_csv(search_keywords):
|
||||
# Rename columns to avoid duplicates and provide meaningful names
|
||||
df_top_topics.columns = ['Top- ' + col if col != 'topic_title' else col for col in df_top_topics.columns]
|
||||
df_rising_topics.columns = ['Rising- ' + col if col != 'topic_title' else col for col in df_rising_topics.columns]
|
||||
|
||||
# Save to CSV
|
||||
all_topics_df = pd.concat([df_top_topics, df_rising_topics], axis=1)
|
||||
#all_topics_df.to_csv('related_topics.csv', index=False)
|
||||
|
||||
print(f"\n\n 📢❗🚨 Rising and Trending Keywords for {search_keywords}\n")
|
||||
print("\033[1m🔝 Top\033[0m: The most popular search topics.")
|
||||
@@ -209,11 +207,15 @@ def get_related_topics_and_save_csv(search_keywords):
|
||||
# Display the DataFrame using tabulate
|
||||
pd.set_option('display.max_rows', all_topics_df.shape[0]+1)
|
||||
print(all_topics_df.head(10))
|
||||
#print(tabulate(all_topics_df, headers='keys', tablefmt='fancy_grid'))
|
||||
table = tabulate(all_topics_df, headers='keys', tablefmt='fancy_grid')
|
||||
try:
|
||||
save_in_file(table)
|
||||
except Exception as save_results_err:
|
||||
logger.error(f"Failed to save search results: {save_results_err}")
|
||||
return all_topics_df
|
||||
|
||||
except Exception as e:
|
||||
print(f"ERROR: An error occurred: {e}")
|
||||
print(f"ERROR: An error occurred in related topics: {e}")
|
||||
return pd.DataFrame()
|
||||
|
||||
|
||||
@@ -224,7 +226,7 @@ def get_source(url):
|
||||
response.raise_for_status() # Raise an HTTPError for bad responses
|
||||
return response
|
||||
except requests.exceptions.RequestException as e:
|
||||
logging.error(f"Error during HTTP request: {e}")
|
||||
logger.error(f"Error during HTTP request: {e}")
|
||||
return None
|
||||
|
||||
|
||||
@@ -240,10 +242,10 @@ def get_results(query):
|
||||
else:
|
||||
return None
|
||||
except json.JSONDecodeError as e:
|
||||
logging.error(f"Error decoding JSON response: {e}")
|
||||
logger.error(f"Error decoding JSON response: {e}")
|
||||
return None
|
||||
except requests.exceptions.RequestException as e:
|
||||
logging.error(f"Error during HTTP request: {e}")
|
||||
logger.error(f"Error during HTTP request: {e}")
|
||||
return None
|
||||
|
||||
|
||||
@@ -256,7 +258,7 @@ def format_results(results):
|
||||
suggestions.append(suggestion)
|
||||
return suggestions
|
||||
except (KeyError, IndexError) as e:
|
||||
logging.error(f"Error parsing search results: {e}")
|
||||
logger.error(f"Error parsing search results: {e}")
|
||||
return []
|
||||
|
||||
|
||||
@@ -288,7 +290,7 @@ def get_expanded_terms(query):
|
||||
|
||||
return terms
|
||||
except Exception as e:
|
||||
logging.error(f"Error in get_expanded_terms: {e}")
|
||||
logger.error(f"Error in get_expanded_terms: {e}")
|
||||
return []
|
||||
|
||||
|
||||
@@ -307,7 +309,7 @@ def get_expanded_suggestions(query):
|
||||
|
||||
return all_results
|
||||
except Exception as e:
|
||||
logging.error(f"Error in get_expanded_suggestions: {e}")
|
||||
logger.error(f"Error in get_expanded_suggestions: {e}")
|
||||
return []
|
||||
|
||||
|
||||
@@ -321,10 +323,14 @@ def get_suggestions_for_keyword(search_term):
|
||||
#expanded_results_df.to_csv('results.csv', index=False)
|
||||
pd.set_option('display.max_rows', expanded_results_df.shape[0]+1)
|
||||
expanded_results_df.drop_duplicates('Keywords', inplace=True)
|
||||
|
||||
table = tabulate(expanded_results_df, headers=['Keywords', 'Relevance'], tablefmt='fancy_grid')
|
||||
try:
|
||||
save_in_file(table)
|
||||
except Exception as save_results_err:
|
||||
logger.error(f"Failed to save search results: {save_results_err}")
|
||||
return expanded_results_df
|
||||
except Exception as e:
|
||||
logging.error(f"get_suggestions_for_keyword: Error in main: {e}")
|
||||
logger.error(f"get_suggestions_for_keyword: Error in main: {e}")
|
||||
|
||||
|
||||
|
||||
@@ -355,7 +361,7 @@ def perform_keyword_clustering(expanded_results_df, num_clusters=5):
|
||||
|
||||
return expanded_results_df
|
||||
except Exception as e:
|
||||
logging.error(f"Error in perform_keyword_clustering: {e}")
|
||||
logger.error(f"Error in perform_keyword_clustering: {e}")
|
||||
return pd.DataFrame()
|
||||
|
||||
|
||||
@@ -404,7 +410,7 @@ def visualize_silhouette(X, labels):
|
||||
|
||||
plt.show()
|
||||
except Exception as e:
|
||||
logging.error(f"Error in visualize_silhouette: {e}")
|
||||
logger.error(f"Error in visualize_silhouette: {e}")
|
||||
|
||||
|
||||
|
||||
@@ -435,9 +441,9 @@ def print_and_return_top_keywords(expanded_results_df, num_clusters=5):
|
||||
table = tabulate(top_keywords_df, headers='keys', tablefmt='fancy_grid')
|
||||
# Save the combined table to a file
|
||||
try:
|
||||
save_in_file(top_keywords_df)
|
||||
save_in_file(table)
|
||||
except Exception as save_results_err:
|
||||
logger.error(f"Failed to save search results: {save_results_err}")
|
||||
logger.error(f"🚨 Failed to save search results: {save_results_err}")
|
||||
print(table)
|
||||
return top_keywords_df
|
||||
|
||||
@@ -484,11 +490,15 @@ def do_google_trends_analysis(search_term):
|
||||
for asearch_term in search_term:
|
||||
#FIXME: Lets work with a single root keyword.
|
||||
suggestions_df = get_suggestions_for_keyword(asearch_term)
|
||||
if len(suggestions_df['Keywords']) > 10:
|
||||
result_df = perform_keyword_clustering(suggestions_df)
|
||||
# Display top keywords in each cluster
|
||||
top_keywords = print_and_return_top_keywords(result_df)
|
||||
all_the_keywords.append(top_keywords['Keywords'].tolist())
|
||||
else:
|
||||
all_the_keywords.append(suggestions_df['Keywords'].tolist())
|
||||
all_the_keywords = ','.join([', '.join(filter(None, map(str, sublist))) for sublist in all_the_keywords])
|
||||
|
||||
result_df = perform_keyword_clustering(suggestions_df)
|
||||
# Display top keywords in each cluster
|
||||
top_keywords = print_and_return_top_keywords(result_df)
|
||||
all_the_keywords.append(top_keywords['Keywords'].tolist())
|
||||
#
|
||||
# # FIXME: Get result from vision GPT. Fetch and visualize Google Trends data
|
||||
# #trends_data = fetch_google_trends_interest_overtime("llamaindex")
|
||||
@@ -496,23 +506,17 @@ def do_google_trends_analysis(search_term):
|
||||
# # FIXME: Plot Interest Over time.
|
||||
# result_df = plot_interest_by_region(search_term)
|
||||
#
|
||||
# # Display additional information
|
||||
# Display additional information
|
||||
result_df = get_related_topics_and_save_csv(search_term)
|
||||
# Extract 'Top' topic_title
|
||||
top_topic_title = result_df['topic_title'].values.tolist()
|
||||
|
||||
# Join each sublist into one string separated by comma
|
||||
#top_topic_title = [','.join(filter(None, map(str, sublist))) for sublist in top_topic_title]
|
||||
top_topic_title = ','.join([', '.join(filter(None, map(str, sublist))) for sublist in top_topic_title])
|
||||
|
||||
print(f"\nRising and Top keywords: {top_topic_title}")
|
||||
# Print or use the extracted topic titles
|
||||
all_the_keywords = ','.join([', '.join(filter(None, map(str, sublist))) for sublist in all_the_keywords])
|
||||
print(f"\n\n📢❗🚨 Important keywords to target: {all_the_keywords}\n\n")
|
||||
all_the_keywords += top_topic_title
|
||||
print(all_the_keywords)
|
||||
# TBD: Not getting great results OR unable to understand them.
|
||||
#all_the_keywords += top_topic_title
|
||||
all_the_keywords = all_the_keywords.split(',')
|
||||
|
||||
# Split the list into chunks of 5 keywords
|
||||
chunk_size = 4
|
||||
chunks = [all_the_keywords[i:i + chunk_size] for i in range(0, len(all_the_keywords), chunk_size)]
|
||||
@@ -520,11 +524,15 @@ def do_google_trends_analysis(search_term):
|
||||
combined_df = pd.DataFrame(chunks, columns=[f'K📢eyword Col{i + 1}' for i in range(chunk_size)])
|
||||
|
||||
# Print the table
|
||||
print(tabulate(combined_df, headers='keys', tablefmt='fancy_grid'))
|
||||
#combined_df = pd.DataFrame({'📢❗🚨 Important keywords to target': chunks})
|
||||
table = tabulate(combined_df, headers='keys', tablefmt='fancy_grid')
|
||||
# Save the combined table to a file
|
||||
try:
|
||||
save_in_file(table)
|
||||
except Exception as save_results_err:
|
||||
logger.error(f"Failed to save search results: {save_results_err}")
|
||||
print(table)
|
||||
|
||||
print(all_the_keywords)
|
||||
generate_wordcloud(all_the_keywords.split(','))
|
||||
#generate_wordcloud(all_the_keywords)
|
||||
return(all_the_keywords)
|
||||
except Exception as e:
|
||||
logging.error(f"Error in main: {e}")
|
||||
logger.error(f"Error in Google Trends Analysis: {e}")
|
||||
|
||||
@@ -47,33 +47,33 @@ def gpt_web_researcher(search_keywords, time_range=None, include_domains=list(),
|
||||
google_result = None
|
||||
tavily_result = None
|
||||
report = None
|
||||
# try:
|
||||
# logger.info(f"Doing Google search for: {search_keywords}\n")
|
||||
# google_result = google_search(search_keywords)
|
||||
# blog_titles.append(extract_info(google_result, "titles"))
|
||||
# except Exception as err:
|
||||
# logger.error(f"Failed to do Google Serpapi research: {err}")
|
||||
# # Not failing, as tavily would do same and then GPT-V to search.
|
||||
#
|
||||
# try:
|
||||
# # FIXME: Include the follow-up questions as blog FAQs.
|
||||
# logger.info(f"Doing Tavily AI search for: {search_keywords}")
|
||||
# tavily_result = get_tavilyai_results(search_keywords, include_domains)
|
||||
# blog_titles.append(tavily_extract_information(tavily_result, "titles"))
|
||||
# except Exception as err:
|
||||
# logger.error(f"Failed to do Tavily AI Search: {err}")
|
||||
try:
|
||||
logger.info(f"Doing Google search for: {search_keywords}\n")
|
||||
google_result = google_search(search_keywords)
|
||||
blog_titles.append(extract_info(google_result, "titles"))
|
||||
except Exception as err:
|
||||
logger.error(f"Failed to do Google Serpapi research: {err}")
|
||||
# Not failing, as tavily would do same and then GPT-V to search.
|
||||
|
||||
# try:
|
||||
# logger.info(f"Start Semantic/Neural web search with Metahpor: {search_keywords}")
|
||||
# response_articles = metaphor_search_articles(
|
||||
# search_keywords,
|
||||
# include_domains=include_domains,
|
||||
# time_range=time_range,
|
||||
# similar_url=similar_url)
|
||||
# blog_titles.append(metaphor_extract_titles_or_text(response_articles, return_titles=True))
|
||||
# except Exception as err:
|
||||
# logger.error(f"Failed to do Metaphor search: {err}")
|
||||
# print(blog_titles)
|
||||
try:
|
||||
# FIXME: Include the follow-up questions as blog FAQs.
|
||||
logger.info(f"Doing Tavily AI search for: {search_keywords}")
|
||||
tavily_result = get_tavilyai_results(search_keywords, include_domains)
|
||||
blog_titles.append(tavily_extract_information(tavily_result, "titles"))
|
||||
except Exception as err:
|
||||
logger.error(f"Failed to do Tavily AI Search: {err}")
|
||||
|
||||
try:
|
||||
logger.info(f"Start Semantic/Neural web search with Metahpor: {search_keywords}")
|
||||
response_articles = metaphor_search_articles(
|
||||
search_keywords,
|
||||
include_domains=include_domains,
|
||||
time_range=time_range,
|
||||
similar_url=similar_url)
|
||||
blog_titles.append(metaphor_extract_titles_or_text(response_articles, return_titles=True))
|
||||
except Exception as err:
|
||||
logger.error(f"Failed to do Metaphor search: {err}")
|
||||
print(blog_titles)
|
||||
|
||||
try:
|
||||
logger.info(f"Do Google Trends analysis for given keywords: {search_keywords}")
|
||||
@@ -85,18 +85,7 @@ def gpt_web_researcher(search_keywords, time_range=None, include_domains=list(),
|
||||
# 1. Return a list of related keywords along with search volumes.
|
||||
# 2. New blog titles to write on(niche, top) and blog sections.
|
||||
# 3. Competitors list, similar urls if given.
|
||||
|
||||
|
||||
class Result(NamedTuple):
|
||||
url: str
|
||||
id: str
|
||||
title: str
|
||||
score: float
|
||||
published_date: str
|
||||
author: str
|
||||
text: str
|
||||
highlights: List[str]
|
||||
highlight_scores: List[float]
|
||||
print(f"\n\nReview the analysis in this file at: {os.environ.get('SEARCH_SAVE_FILE')}\n")
|
||||
|
||||
|
||||
def metaphor_extract_titles_or_text(json_data, return_titles=True):
|
||||
@@ -110,12 +99,10 @@ def metaphor_extract_titles_or_text(json_data, return_titles=True):
|
||||
Returns:
|
||||
list: List of titles or text.
|
||||
"""
|
||||
result_list = [Result(**result) for result in json_data]
|
||||
|
||||
if return_titles:
|
||||
return [result.title for result in result_list]
|
||||
return [(result.title) for result in json_data]
|
||||
else:
|
||||
return [result.text for result in result_list]
|
||||
return [result.text for result in json_data]
|
||||
|
||||
|
||||
def extract_info(json_data, info_type):
|
||||
|
||||
@@ -142,10 +142,11 @@ def metaphor_search_articles(query,
|
||||
logger.error(f"Failed in metaphor.search_and_contents: {err}")
|
||||
|
||||
# From each webpage, get a summary of the web page.
|
||||
print(search_response)
|
||||
contents_response = search_response.results
|
||||
for content in tqdm(contents_response, desc="Reading Web URL content:", unit="content"):
|
||||
summarized_content = summarize_web_content(content.text, "gemini")
|
||||
content.text = summarized_content
|
||||
# for content in tqdm(contents_response, desc="Reading Web URL content:", unit="content"):
|
||||
# summarized_content = summarize_web_content(content.text, "gemini")
|
||||
# content.text = summarized_content
|
||||
|
||||
print_search_result(contents_response)
|
||||
|
||||
|
||||
@@ -129,18 +129,18 @@ def print_result_table(output_data):
|
||||
logger.error(f"Failed to save search results: {save_results_err}")
|
||||
|
||||
# Display the 'follow_up_questions' in a table
|
||||
table_headers = [f"Search Engine follow up questions for query: {output_data.get('query')}"]
|
||||
table_data = [[output_data.get("follow_up_questions")]]
|
||||
table = tabulate(table_data,
|
||||
if output_data.get("follow_up_questions"):
|
||||
table_headers = [f"Search Engine follow up questions for query: {output_data.get('query')}"]
|
||||
table_data = [[output_data.get("follow_up_questions")]]
|
||||
table = tabulate(table_data,
|
||||
headers=table_headers,
|
||||
tablefmt="fancy_grid",
|
||||
maxcolwidths=[80])
|
||||
print(table)
|
||||
# Save the combined table to a file
|
||||
try:
|
||||
save_in_file(table)
|
||||
except Exception as save_results_err:
|
||||
logger.error(f"Failed to save search results: {save_results_err}")
|
||||
print(table)
|
||||
try:
|
||||
save_in_file(table)
|
||||
except Exception as save_results_err:
|
||||
logger.error(f"Failed to save search results: {save_results_err}")
|
||||
|
||||
|
||||
def save_in_file(table_content):
|
||||
|
||||
@@ -11,30 +11,8 @@ import json
|
||||
import os
|
||||
import datetime #I wish
|
||||
import sys
|
||||
|
||||
import openai
|
||||
from tqdm import tqdm, trange
|
||||
import time
|
||||
import re
|
||||
from textwrap import dedent
|
||||
|
||||
from .gpt_providers.openai_gpt_provider import gen_new_from_given_img
|
||||
from .gpt_providers.openai_chat_completion import openai_chatgpt
|
||||
from .gpt_providers.gpt_vision_img_details import analyze_and_extract_details_from_image
|
||||
from .generate_image_from_prompt import generate_image
|
||||
from .write_blogs_from_youtube_videos import youtube_to_blog
|
||||
from .wordpress_blog_uploader import compress_image, upload_blog_post, upload_media
|
||||
from .gpt_online_researcher import do_online_research
|
||||
from .save_blog_to_file import save_blog_to_file
|
||||
from .optimize_images_for_upload import optimize_image
|
||||
from .combine_research_and_blog import blog_with_research
|
||||
from .get_blog_meta_desc import generate_blog_description
|
||||
from .get_blog_title import generate_blog_title
|
||||
from .get_tags import get_blog_tags
|
||||
from .get_blog_category import get_blog_categories
|
||||
from .convert_content_to_markdown import convert_tomarkdown_format
|
||||
from .convert_markdown_to_html import convert_markdown_to_html
|
||||
from .utils.youtube_keyword_research import research_yt
|
||||
from loguru import logger
|
||||
logger.remove()
|
||||
logger.add(sys.stdout,
|
||||
@@ -57,402 +35,3 @@ wordpress_url = ''
|
||||
wordpress_username = ''
|
||||
wordpress_password = ''
|
||||
|
||||
def generate_youtube_blog(yt_url_list, output_format="markdown"):
|
||||
"""Takes a list of youtube videos and generates blog for each one of them.
|
||||
"""
|
||||
# Use to store the blog in a string, to save in a *.md file.
|
||||
blog_markdown_str = ""
|
||||
if isinstance(yt_url_list, str):
|
||||
yt_url_list = [yt_url_list]
|
||||
for a_yt_url in yt_url_list:
|
||||
try:
|
||||
logger.info(f"Starting to write blog on URL: {a_yt_url}")
|
||||
blog_markdown_str, yt_title = youtube_to_blog(a_yt_url)
|
||||
logger.warning("\n\n--------------- First Draft of the Blog: --------\n\n")
|
||||
logger.info(f"{blog_markdown_str}\n")
|
||||
logger.warning("--------------------END of First draft----------\n\n")
|
||||
if not yt_title or not blog_markdown_str:
|
||||
logger.error("No content or title for audio to proceed.")
|
||||
sys.exit(1)
|
||||
except Exception as e:
|
||||
logger.error(f"Error in youtube_to_blog: {e}")
|
||||
sys.exit(1)
|
||||
|
||||
try:
|
||||
logger.info(f"Starting with online research for URL title: {yt_title}")
|
||||
research_report = do_online_research(yt_title)
|
||||
if not research_report:
|
||||
logger.error(f"Error in do_online_research returned no report: {e}")
|
||||
sys.exit(1)
|
||||
logger.warning(f"\n\n---------------Online Research Report: {yt_title} --------\n\n")
|
||||
logger.info(f"{research_report}\n")
|
||||
logger.warning("--------------------END of Research Report----------\n\n")
|
||||
except Exception as e:
|
||||
logger.error(f"Error in do_online_research: {e}")
|
||||
sys.exit(1)
|
||||
|
||||
try:
|
||||
logger.info("Preparing a blog content from audio script and online research content...")
|
||||
blog_markdown_str = blog_with_research(research_report, blog_markdown_str)
|
||||
logger.warning("\n\n--------------- Second Blog Draft after online research: --------\n\n")
|
||||
logger.info(f"{blog_markdown_str}\n")
|
||||
logger.warning("--------------------END of Second draft----------\n\n")
|
||||
except Exception as e:
|
||||
logger.error(f"Error in blog_with_research: {e}")
|
||||
sys.exit(1)
|
||||
|
||||
try:
|
||||
# Get the title and meta description of the blog.
|
||||
logger.info("Generating Blog Description.")
|
||||
blog_meta_desc = generate_blog_description(blog_markdown_str, "gemini")
|
||||
logger.info("Generating Blog Title.")
|
||||
title = generate_blog_title(blog_meta_desc, "gemini")
|
||||
logger.info(f"Title is {title} and description is {blog_meta_desc}")
|
||||
# Regex pattern to match 'Title:', 'title:', 'TITLE:', etc., followed by optional whitespace
|
||||
title = re.sub(re.compile(r'(?i)title:\s*'), '', title)
|
||||
#blog_markdown_str = "# " + title.replace('"', '') + "\n\n"
|
||||
|
||||
# Get blog tags and categories.
|
||||
blog_tags = get_blog_tags(blog_meta_desc, "gemini")
|
||||
logger.info(f"Blog tags are: {blog_tags}")
|
||||
blog_categories = get_blog_categories(blog_meta_desc, "gemini")
|
||||
logger.info(f"Blog categories are: {blog_categories}")
|
||||
|
||||
# Generate an introduction for the blog
|
||||
#blog_intro = get_blog_intro(title, blog_markdown_str)
|
||||
#logger.info(f"The Blog intro is:\n {blog_intro}")
|
||||
#blog_markdown_str = blog_markdown_str + "\n\n" + f"{blog_intro}" + "\n\n"
|
||||
|
||||
# Generate an image based on meta description
|
||||
logger.info(f"Calling Image generation with prompt: {blog_meta_desc}")
|
||||
main_img_path = generate_image(blog_meta_desc, image_dir, "dalle3")
|
||||
main_img_path = optimize_image(main_img_path)
|
||||
|
||||
# Get a variation of the yt url screenshot to use in the blog.
|
||||
#varied_img_path = gen_new_from_given_img(yt_img_path, image_dir)
|
||||
#logger.info(f"Image path: {main_img_path} and varied path: {varied_img_path}")
|
||||
#blog_markdown_str = blog_markdown_str + f'})' + f'_{yt_title}_'
|
||||
|
||||
#stbdiff_img_path = generate_image(yt_img_path, image_dir, "stable_diffusion")
|
||||
#logger.info(f"Image path: {main_img_path} from stable diffusion: {stbdiff_img_path}")
|
||||
#blog_markdown_str = blog_markdown_str + f'})' + f'_{yt_title}_'
|
||||
|
||||
# Add the body of the blog content.
|
||||
#blog_markdown_str = blog_markdown_str + "\n\n" + f'{yt_blog}' + "\n\n"
|
||||
# Get the Conclusion of the blog, by passing the generated blog.
|
||||
#blog_conclusion = get_blog_conclusion(blog_markdown_str)
|
||||
# TBD: Add another image.
|
||||
#blog_markdown_str = blog_markdown_str + "### Conclusion" + "\n\n" + f"{blog_conclusion}" + "\n"
|
||||
|
||||
# Proofread the blog, edit and remove dubplicates and refine it further.
|
||||
# Presently, fixing the blog keywords to be tags and categories.
|
||||
#blog_keywords = f"{blog_tags} + {blog_categories}"
|
||||
#blog_markdown_str = blog_proof_editor(blog_markdown_str, blog_keywords)
|
||||
#logger.warning("\n\n--------------- 3rd draft after proofreading: --------\n\n")
|
||||
#logger.info(f"{blog_markdown_str}\n")
|
||||
#logger.warning("--------------------END of 3rd draft----------\n\n")
|
||||
|
||||
# Check the type of blog format needed by the user.
|
||||
if 'html' in output_format:
|
||||
logger.info("Converting final blog to HTML format.")
|
||||
blog_markdown_str = convert_markdown_to_html(blog_markdown_str)
|
||||
elif 'markdown' in output_format:
|
||||
logger.info("Converting final blog to Markdown style.")
|
||||
blog_markdown_str = convert_tomarkdown_format(blog_markdown_str)
|
||||
|
||||
logger.warning("\n\n--------------- Final Blog Content: --------\n\n")
|
||||
logger.info(f"{blog_markdown_str}\n")
|
||||
logger.warning("--------------------END of Blog Content----------\n\n")
|
||||
|
||||
|
||||
# Try to save the blog content in a file, in whichever format. Just dump it.
|
||||
try:
|
||||
save_blog_to_file(blog_markdown_str, title, blog_meta_desc,
|
||||
blog_tags, blog_categories, main_img_path, output_path)
|
||||
except Exception as err:
|
||||
logger.error(f"Failed to Save blog content: {err}")
|
||||
|
||||
except Exception as e:
|
||||
# raise assertionerror
|
||||
logger.error(f"Error: Failed to generate_youtube_blog: {e}")
|
||||
exit(1)
|
||||
|
||||
|
||||
def generate_detailed_blog(num_blogs, blog_keywords, niche, num_subtopics,
|
||||
wordpress=False, research_online=False, output_format="HTML"):
|
||||
"""
|
||||
This function will take a blog Topic to first generate sections for it
|
||||
and then generate content for each section.
|
||||
"""
|
||||
# Use to store the blog in a string, to save in a *.md file.
|
||||
blog_markdown_str = ""
|
||||
|
||||
# TBD: Check if the generated topics are equal to what user asked.
|
||||
blog_topic_arr = generate_blog_topics(blog_keywords, num_blogs, niche)
|
||||
logger.info(f"Generated Blog Topics:---- \n{blog_topic_arr}\n")
|
||||
# Split the string at newlines
|
||||
blog_topic_arr = blog_topic_arr.split('\n')
|
||||
|
||||
# For each of blog topic, generate content.
|
||||
for a_blog_topic in blog_topic_arr:
|
||||
# if md/html
|
||||
a_blog_topic = a_blog_topic.replace('"', '')
|
||||
a_blog_topic = re.sub(r'^[\d.\s]+', '', a_blog_topic)
|
||||
blog_markdown_str = "# " + a_blog_topic + "\n\n"
|
||||
|
||||
# Get the introduction specific to blog title and sub topics.
|
||||
tpc_outlines = generate_topic_outline(a_blog_topic, num_subtopics)
|
||||
tpc_outlines = tpc_outlines.split("\n")
|
||||
|
||||
blog_intro = get_blog_intro(a_blog_topic, tpc_outlines)
|
||||
logger.info(f"The intro is:\n{blog_intro}")
|
||||
blog_markdown_str = blog_markdown_str + "### Introduction" + "\n\n" + f"{blog_intro}" + "\n\n"
|
||||
|
||||
# Now, for each blog we have sub topic. Generate content for each of the sub topic.
|
||||
for a_outline in tpc_outlines:
|
||||
a_outline = a_outline.replace('"', '')
|
||||
logger.info(f"Generating content for sub-topic: {a_outline}")
|
||||
sub_topic_content = generate_topic_content(blog_keywords, a_outline)
|
||||
# a_outline is sub topic heading, hence part ToC also.
|
||||
#blog_markdown_str = blog_markdown_str + "\n\n" + f"### {a_outline}" + "\n\n"
|
||||
blog_markdown_str = blog_markdown_str + "\n" + f"\n {sub_topic_content}" + "\n\n"
|
||||
|
||||
# Get the Conclusion of the blog, by passing the generated blog.
|
||||
blog_conclusion = get_blog_conclusion(blog_markdown_str)
|
||||
blog_markdown_str = blog_markdown_str + "### Conclusion" + "\n" + f"{blog_conclusion}" + "\n"
|
||||
|
||||
# logger.info/check the final blog content.
|
||||
logger.info(f"Final blog content: {blog_markdown_str}")
|
||||
|
||||
#if research_online:
|
||||
# # Call on the got-researcher, tavily apis for this. So many apis floating around.
|
||||
# report = do_online_research_on(blog_keywords)
|
||||
# blog_markdown_str = blog_with_research(report, blog_markdown_str)
|
||||
|
||||
blog_meta_desc = generate_blog_description(blog_markdown_str)
|
||||
logger.info(f"\nThe blog meta description is:{blog_meta_desc}\n")
|
||||
|
||||
# Generate an image based on meta description
|
||||
logger.info(f"Calling Image generation with prompt: {blog_meta_desc}")
|
||||
main_img_path = generate_image(blog_meta_desc, image_dir, "dalle3")
|
||||
|
||||
blog_tags = get_blog_tags(blog_markdown_str)
|
||||
logger.info(f"\nBlog tags for generated content: {blog_tags}\n")
|
||||
|
||||
blog_categories = get_blog_categories(blog_markdown_str)
|
||||
logger.info(f"Generated blog categories: {blog_categories}\n")
|
||||
|
||||
# Use chatgpt to convert the text into HTML or markdown.
|
||||
if 'html' in output_format:
|
||||
blog_markdown_str = convert_markdown_to_html(blog_markdown_str)
|
||||
|
||||
# Check if blog needs to be posted on wordpress.
|
||||
if wordpress:
|
||||
# Fixme: Fetch all tags and categories to check, if present ones are present and
|
||||
# use them else create new ones. Its better to use chatgpt than string comparison.
|
||||
# Similar tags and categories will be missed.
|
||||
# blog_categories =
|
||||
# blog_tags =
|
||||
logger.info("Uploading the blog to wordpress.\n")
|
||||
main_img_path = compress_image(main_img_path, quality=85)
|
||||
try:
|
||||
img_details = analyze_and_extract_details_from_image(main_img_path)
|
||||
alt_text = img_details.get('alt_text')
|
||||
img_description = img_details.get('description')
|
||||
img_title = img_details.get('title')
|
||||
caption = img_details.get('caption')
|
||||
try:
|
||||
media = upload_media(wordpress_url, wordpress_username, wordpress_password,
|
||||
main_img_path, alt_text, img_description, img_title, caption)
|
||||
except Exception as err:
|
||||
sys.exit(f"Error occurred in upload_media: {err}")
|
||||
except Exception as e:
|
||||
sys.exit(f"Error occurred in analyze_and_extract_details_from_image: {e}")
|
||||
|
||||
# Then create the post with the uploaded media as the featured image
|
||||
media_id = media['id']
|
||||
blog_markdown_str = convert_markdown_to_html(blog_markdown_str)
|
||||
try:
|
||||
upload_blog_post(wordpress_url, wordpress_username, wordpress_password, a_blog_topic,
|
||||
blog_markdown_str, media_id, blog_meta_desc, blog_categories, blog_tags, status='publish')
|
||||
except Exception as err:
|
||||
sys.exit(f"Failed to upload blog to wordpress.Error: {err}")
|
||||
|
||||
# TBD: Save the blog content as a .md file. Markdown or HTML ?
|
||||
save_blog_to_file(blog_markdown_str,
|
||||
a_blog_topic,
|
||||
blog_meta_desc, blog_tags,
|
||||
blog_categories, main_img_path)
|
||||
|
||||
# Now, we need perform some *basic checks on the blog content, such as:
|
||||
# is_content_ai_generated.py, plagiarism_checker_from_known_sources.py
|
||||
# seo_analyzer.py . These are present in the lib folder.
|
||||
# prompt: Rewrite, improve and paraphrase [text] and use headings and subheadings
|
||||
# to break up the content and make it easier to read using the keyword [keyword].
|
||||
|
||||
|
||||
|
||||
def generate_blog_topics(blog_keywords, num_blogs, niche):
|
||||
"""
|
||||
For a given prompt, generate blog topics.
|
||||
Using the davinci-instruct-beta-v3 model. It’s proven to be an ideal
|
||||
one for generating unique blog content.
|
||||
Ex: Generate SEO optimized blog topics on given keywords
|
||||
"""
|
||||
prompt = f"""As an SEO specialist and blog writer, write {num_blogs} catchy
|
||||
and SEO-friendly blog topics on {blog_keywords}. The blog title must be less than 80 characters.
|
||||
The blog titles must follow best SEO practises, be engaging and invite/tempt users to read full blog.
|
||||
Do not include descriptions, explanations. Do not number the result."""
|
||||
|
||||
# Beware of keywords stuffing, clustering, semantic should help avoid.
|
||||
if num_blogs > 5:
|
||||
# Get more keywords, based on user given keywords.
|
||||
more_keywords = get_related_keywords(num_blogs, blog_keywords, niche)
|
||||
prompt = prompt + """Use the following keywords wisely, without keyword stuffing: {more_keywords}"""
|
||||
|
||||
logger.info(f"Prompt used for generating blog topics: \n{prompt}\n")
|
||||
try:
|
||||
response = openai_chatgpt(prompt)
|
||||
return response
|
||||
except Exception as err:
|
||||
SystemError(f"Error in generating blog topics: {err}")
|
||||
|
||||
|
||||
"""
|
||||
Given a blog title generate an outline for it
|
||||
"""
|
||||
# TBD: Remove hardcoding, make dynamic
|
||||
prompt = f"""As a SEO expert, suggest only {num_subtopics} beginner-friendly and
|
||||
insightful sub topics for the blog title: {blog_title}.
|
||||
Respond with only answer and no description, explanations."""
|
||||
|
||||
# The suggested {num_subtopics} outline should include few long-tailed keywords and most popular questions.
|
||||
# TBD: Include --niche
|
||||
logger.info(f"Prompt used for blog title Outline :\n{prompt}\n")
|
||||
# TBD: Add logic for which_provider and which_model
|
||||
try:
|
||||
response = openai_chatgpt(prompt)
|
||||
except Exception as err:
|
||||
SystemError(f"Error in generating Blog Title: {err}")
|
||||
return response
|
||||
|
||||
|
||||
def generate_topic_content(blog_keywords, sub_topic):
|
||||
"""
|
||||
For each of given topic generate content for it.
|
||||
"""
|
||||
# The outline should contain various subheadings and include the starting sentence for each section.
|
||||
# TBD: Depending on the usecase 'Voice and style' will change to professional etc.
|
||||
prompt = f"""As a professional blogger and topic authority on {blog_keywords},
|
||||
craft factual (no more than 200 characters) subtopic content on {sub_topic}.
|
||||
Your response should reflect Experience, Expertise, Authoritativeness and Trustworthiness from content.
|
||||
Voice and style guide: Write in a professional manner, giving enlightening details and reasons.
|
||||
Use natural language and phrases that a real person would use: in normal conversations.
|
||||
Format your response using markdown. REMEMBER Not to include introduction or conclusion in your response.
|
||||
Use headings(h3 to h6 only), subheadings, bullet points, and bold to organize the information."""
|
||||
logger.info(f"Generate topic content using prompt:\n{prompt}\n")
|
||||
try:
|
||||
response = openai_chatgpt(prompt)
|
||||
return response
|
||||
except Exception as err:
|
||||
SystemError(f"Error in generating topic content: {err}")
|
||||
|
||||
|
||||
def get_blog_intro(blog_title, blog_topics):
|
||||
"""
|
||||
Generate blog introduction as per title and sub topics
|
||||
"""
|
||||
prompt = f"""As a skilled wordsmith, I'll equip you with a blog title and relevant topics, tasking you with crafting an engaging introduction. Your challenge: Create a brief, compelling entry that entices readers to explore the entire post. This introduction must be concise (under 250 characters) yet powerful, clearly stating the blog's purpose and what readers stand to gain. Reply with only the introduction.
|
||||
|
||||
Intrigue your audience from the start with vibrant language, employing strong verbs and vivid descriptions. Address a common challenge your readers face, demonstrating empathy and positioning yourself as their go-to expert. Pose thought-provoking questions that prompt reader engagement and contemplation.
|
||||
|
||||
Remember, your words matter. This introduction serves as the cornerstone of the blog post. It should not only captivate attention but also encourage deeper exploration. Additionally, strategically integrate relevant keywords to enhance visibility on search engine results pages (SERPs). Your mission: Craft a blog introduction that resonates, leaving readers eager to delve further into the titled piece: '{blog_title}', covering these sub-topics: {blog_topics}."""
|
||||
|
||||
try:
|
||||
# TBD: Add logic for which_provider and which_model
|
||||
response = openai_chatgpt(prompt)
|
||||
except Exception as err:
|
||||
SystemError(f"Error in generating Blog Introduction: {err}")
|
||||
return response
|
||||
|
||||
|
||||
def get_blog_conclusion(blog_content):
|
||||
"""
|
||||
Accepts a blog content and concludes it.
|
||||
"""
|
||||
prompt = f"""As an expert SEO and blog writer, please conclude the given blog providing vital take aways,
|
||||
summarise key points (no more than 300 characters) in bullet points. The blog content: {blog_content}
|
||||
"""
|
||||
logger.info(f"Generating blog conclusion iwth prompt: {prompt}")
|
||||
try:
|
||||
# TBD: Add logic for which_provider and which_model
|
||||
response = openai_chatgpt(prompt)
|
||||
except Exception as err:
|
||||
SystemError(f"Error in generating blog conclusion: {err}")
|
||||
else:
|
||||
return response
|
||||
|
||||
|
||||
def get_related_keywords(num_blogs, keywords, niche):
|
||||
"""
|
||||
Helper function to get more keywords from GPTs.
|
||||
"""
|
||||
# Check if niche: use long tailed, else use popular keywords.
|
||||
if niche:
|
||||
prompt = (f"Generate a list without description of the top {num_blogs} most popular and semantically"
|
||||
f"related long-tailed keywords and entities for the topic of {keywords} that are used in"
|
||||
"high-quality content and relevant to my competitors."
|
||||
)
|
||||
else:
|
||||
prompt = (f"Generate a list without description of the top {num_blogs} most popular and"
|
||||
f" semantically related keywords and entities for the topic of {keywords} that are used"
|
||||
" in high-quality content and relevant to my competitors."
|
||||
)
|
||||
try:
|
||||
# TBD: Add logic for which_provider and which_model
|
||||
response = openai_chatgpt(prompt)
|
||||
return response
|
||||
except Exception as err:
|
||||
SystemError(f"Error in getting related keywords.")
|
||||
|
||||
|
||||
def blog_proof_editor(blog_content, blog_keywords):
|
||||
"""
|
||||
Helper for blog proof reading.
|
||||
"""
|
||||
if not blog_content and not blog_keywords:
|
||||
logger.error("Blog proof reader has no content to proofread.")
|
||||
exit(1)
|
||||
|
||||
prompt = f"""I am looking for detailed editing and enhancement of the given blog post,
|
||||
with a particular focus on originality. I will provide you with a blog content and its keywords.
|
||||
The keywords for the blog are [{blog_keywords}]. Please go through the blog and make direct edits to improve it,
|
||||
ensuring the final output is both high-quality and original.
|
||||
Note: There are duplicates headings and corresponding paragraphs, rewrite into one subheading.
|
||||
|
||||
Here are the specific guidelines to focus on:
|
||||
|
||||
1). Ensure Originality: Edit any sections that lack originality, replacing them with unique and creative content.
|
||||
2). Eliminate Repetitive Language: Rewrite repetitive phrases with varied and engaging language.
|
||||
3). Vocabulary and Grammar Enhancement: Directly correct any grammatical errors and upgrade the
|
||||
vocabulary for better readability.
|
||||
4). Improve Sentence Structure: Enhance sentence construction for better clarity and flow.
|
||||
5). Tone and Brand Alignment: Adjust the tone, voice, personality of given content to make it unique.
|
||||
6). Optimize Content Structure: Reorganize the content for a more impactful presentation,
|
||||
including better paragraphing and transitions.
|
||||
7). Remove Redundancies: Important, Cut out any redundant information or overly complex jargon.
|
||||
8). Refine Overall Structure: Make structural changes to improve the overall impact of the content.
|
||||
9). Remember, rewrite all content that repeated, while maintaining the formatting of the given blog text.
|
||||
10). Remember Not to include SEO meta description and Title in your final response.
|
||||
11). REMEMBER to maintain the formatting style of the provided blog.
|
||||
12). Judge if the given blog is about technology then provide code snippets and examples for it.
|
||||
|
||||
Please make direct changes as per above guideline to the provided blog text below:
|
||||
[{blog_content}]. """
|
||||
|
||||
try:
|
||||
# TBD: Add logic for which_provider and which_model
|
||||
response = openai_chatgpt(prompt)
|
||||
return response
|
||||
except Exception as err:
|
||||
SystemError(f"Error Blog Proof Reading: {err}")
|
||||
|
||||
Reference in New Issue
Block a user