WIP- Try AI-Writer and Web research; working.

This commit is contained in:
AjaySi
2024-02-24 15:15:01 +05:30
parent d89d9ad3d2
commit a87a87a620
21 changed files with 587 additions and 279 deletions

View File

@@ -0,0 +1,51 @@
import os
import sys
import json
from pathlib import Path
from dotenv import load_dotenv
load_dotenv(Path('../.env'))
from ..gpt_providers.openai_chat_completion import openai_chatgpt
from ..gpt_providers.gemini_pro_text import gemini_text_response
from loguru import logger
logger.remove()
logger.add(sys.stdout,
colorize=True,
format="<level>{level}</level>|<green>{file}:{line}:{function}</green>| {message}"
)
# FIXME: Provide num_blogs, num_faqs as inputs.
def write_blog_google_serp(search_keyword, search_results):
"""Combine the given online research and gpt blog content"""
gpt_providers = os.environ["GPT_PROVIDER"]
prompt = f"""
As a SEO expert and content writer, I will provide you with my web research keyword and its google search result in json format.
Your task is to write a SEO optimized, unique blog and 5 FAQs.
1). Your blog content should compete against all, in the provided search results. Follow best SEO practises.
2). Your FAQ should be based on 'People also ask' and 'Related Queries' from given result.
Always include answers for each FAQ, use your knowledge and confirm with snippets given in search result.
3). Your blog should be detailed, unique and written in markdown language.
4). Do not explain, describe your response.
Web Research Keyword: "{search_keyword}"
Google search Result: "{search_results}"
"""
logger.info("Generating blog and FAQs from web search result.")
if 'google' in gpt_providers:
try:
response = gemini_text_response(prompt)
return response
except Exception as err:
logger.error(f"Failed to get response from gemini: {err}")
raise err
elif 'openai' in gpt_providers:
try:
logger.info("Calling OpenAI LLM.")
response = openai_chatgpt(prompt)
return response
except Exception as err:
logger.error(f"Failed to get response from Openai: {err}")
raise err

View File

@@ -0,0 +1,62 @@
import os
import sys
from pathlib import Path
from dotenv import load_dotenv
load_dotenv(Path('../.env'))
from ..gpt_providers.openai_chat_completion import openai_chatgpt
from ..gpt_providers.gemini_pro_text import gemini_text_response
from loguru import logger
logger.remove()
logger.add(sys.stdout,
colorize=True,
format="<level>{level}</level>|<green>{file}:{line}:{function}</green>| {message}"
)
def blog_with_keywords(blog, keywords):
"""Combine the given online research and gpt blog content"""
gpt_providers = os.environ["GPT_PROVIDER"]
prompt = f"""
You are an expert copywriter specializing in content optimization for SEO.
I will provide you with my 'blog content' and 'list of keywords' on the same topic.
Your task is to write an original blog, using the given keywords and blog content.
Your blog should be highly detailed and well formatted.
Do not miss out any details from provided blog content.
Always, include figures, data, results from given content.
It is important that your blog is original and unique. It should be highly readable and SEO optimized.
Blog content: '{blog}'
list of keywords: '{keywords}'
"""
if 'google' in gpt_providers:
prompt = f"""You are an expert copywriter specializing in content optimization for SEO.
I will provide you with my 'blog content' and 'list of keywords' on the same topic.
Your task is to write an original blog, using the given keywords and blog content.
Your blog should be highly detailed and well formatted.
Do not miss out any details from provided blog content.
Always, include figures, data, results from given content.
It is important that your blog is original and unique. It should be highly readable and SEO optimized.
Blog content: '{blog}'
list of keywords: '{keywords}'
"""
try:
response = gemini_text_response(prompt)
return response
except Exception as err:
logger.error(f"Failed to get response from gemini: {err}")
raise err
elif 'openai' in gpt_providers:
try:
logger.info("Calling OpenAI LLM.")
response = openai_chatgpt(prompt)
return response
except Exception as err:
logger.error(f"failed to get response from Openai: {err}")
raise err

View File

@@ -0,0 +1,80 @@
import os
import sys
from pathlib import Path
from dotenv import load_dotenv
load_dotenv(Path('../.env'))
from ..gpt_providers.openai_chat_completion import openai_chatgpt
from ..gpt_providers.gemini_pro_text import gemini_text_response
from loguru import logger
logger.remove()
logger.add(sys.stdout,
colorize=True,
format="<level>{level}</level>|<green>{file}:{line}:{function}</green>| {message}"
)
def blog_with_research(report, blog):
"""Combine the given online research and gpt blog content"""
gpt_providers = os.environ["GPT_PROVIDER"]
prompt = f"""
You are an expert copywriter specializing in content optimization for SEO.
I will provide you with a 'research report' and a 'blog content' on the same topic.
Your task is to transform and combine the given research and blog content into a well-structured markdown, unique
and engaging blog article.
Your objectives include:
1. Master the report and blog content: Understand main ideas, key points, and the core message.
2. Sentence Structure: Rephrase while preserving logical flow and coherence.
3. Identify Main Keywords: Determine the primary topic and combine the articles on the main topic.
4. REMEMBER: From the research report, include links and cititations to make your article more authoratative.
5. Optimize for SEO: Generate high quality informative content.
6. Implement SEO best practises with appropriate keyword density.
7. Craft Engaging and Informative Article: Provide value and insight to readers.
8. Proofread: Important to Check for grammar, spelling, and punctuation errors.
9. Use Creative and Human-like Style: Incorporate contractions, idioms, transitional phrases,
interjections, and colloquialisms. Avoid repetitive phrases and unnatural sentence structures.
10. Blog Structuring: Include an Introduction, subtopics and use bullet points or
numbered lists if appropriate. Important to include FAQs, Conclusion and Referances.
11. Ensure Uniqueness: Guarantee the article is plagiarism-free. Write in unique, informative style.
12. Punctuation: Use appropriate question marks at the end of questions.
13. Pass AI Detection Tools: Create content that easily passes AI plagiarism detection tools.
14. REMEMBER: Use the formatting style of given research report and include highlights, citations, referances in combined article.
Follow these guidelines to combine and write a new, unique, and informative blog article
that will rank well in search engine results and engage readers effectively.
Create a blog post, in markdown, from the given research report and blog content below.
Research report: {report}
Blog content: {blog}
"""
if 'google' in gpt_providers:
prompt = f"""You are an expert copywriter specializing in content optimization for SEO.
I will provide you with my 'research report' and 'blog content' on the same topic.
Your task is to transform and combine the given research and blog content into a blog article.
Your blog should be highly detailed, original and well formatted.
Do not miss out any details from provided content.
Always, enhance the blog FAQs section with more information from given research.
It is important that your blog provides detailed insights and engaging to readers.
It should be highly readable and SEO optimized.
Research report: '{report}'
Blog content: '{blog}'
"""
try:
response = gemini_text_response(prompt)
return response
except Exception as err:
logger.error(f"Failed to get response from gemini: {err}")
raise err
elif 'openai' in gpt_providers:
try:
logger.info("Calling OpenAI LLM.")
response = openai_chatgpt(prompt)
return response
except Exception as err:
logger.error(f"failed to get response from Openai: {err}")
raise err

View File

@@ -0,0 +1,49 @@
import sys
import json
from ..gpt_providers.openai_chat_completion import openai_chatgpt
from ..gpt_providers.gemini_pro_text import gemini_text_response
from loguru import logger
logger.remove()
logger.add(sys.stdout,
colorize=True,
format="<level>{level}</level>|<green>{file}:{line}:{function}</green>| {message}"
)
# FIXME: Provide num_blogs, num_faqs as inputs.
def get_blog_sections_from_websearch(search_keyword, search_results, gpt_providers="gemini"):
"""Combine the given online research and gpt blog content"""
prompt = f"""
As a SEO expert and content writer, I will provide you with a search keyword and its google search result.
Your task is to write a blog title and 5 blog sub titles, from the given google search result.
The subtitles should be less than 40 characters and click worthy.
Do not explain, describe your response. Respond in json format, always name the key as 'blogSections'.
Web Research Keyword: "{search_keyword}"
Google search Result: "{search_results}"
"""
if 'gemini' in gpt_providers:
try:
response = gemini_text_response(prompt)
if '```' in response and '\n' in response:
response = response.strip().split('\n')
# Remove the first and last lines
response = '\n'.join(response[1:-1])
response = json.loads(response)
return response
except Exception as err:
logger.error(f"Failed to get response from gemini: {err}")
logger.error(f"Gemini Error: {response.prompt_feedback}")
raise err
elif 'openai' in gpt_providers:
try:
logger.info("Calling OpenAI LLM.")
response = openai_chatgpt(prompt)
return response
except Exception as err:
logger.error(f"Failed to get response from Openai: {err}")
raise err

View File

@@ -0,0 +1,90 @@
import sys
import os
from pathlib import Path
from datetime import datetime
from dotenv import load_dotenv
load_dotenv(Path('../../.env'))
from loguru import logger
logger.remove()
logger.add(sys.stdout,
colorize=True,
format="<level>{level}</level>|<green>{file}:{line}:{function}</green>| {message}"
)
from ..ai_web_researcher.gpt_online_researcher import do_google_serp_search,\
do_tavily_ai_search, do_metaphor_ai_research, do_google_pytrends_analysis
from .blog_from_google_serp import write_blog_google_serp
from .combine_research_and_blog import blog_with_research
from .combine_blog_and_keywords import blog_with_keywords
from ..ai_web_researcher.you_web_reseacher import get_rag_results, search_ydc_index
def write_blog_from_keywords(search_keywords, url=None, output_format="markdown"):
"""
This function will take a blog Topic to first generate sections for it
and then generate content for each section.
"""
# TBD: Keeping the results directory as fixed, for now.
os.environ["SEARCH_SAVE_FILE"] = os.path.join(os.getcwd(), "workspace", "web_research_reports",
search_keywords.replace(" ", "_") + "_" + datetime.now().strftime("%Y-%m-%d_%H-%M-%S"))
logger.info(f"Researching and Writing Blog on keywords: {search_keywords}")
# Use to store the blog in a string, to save in a *.md file.
blog_markdown_str = ""
# Call on the got-researcher, tavily apis for this. Do google search for organic competition.
google_search_result = do_google_serp_search(search_keywords)
blog_markdown_str = write_blog_google_serp(search_keywords, google_search_result)
# logger.info/check the final blog content.
logger.info(f"Final blog content: {blog_markdown_str}")
# Do Tavily AI research to augument the above blog.
tavily_search_result = do_tavily_ai_search(search_keywords)
blog_markdown_str = blog_with_research(blog_markdown_str, tavily_search_result)
logger.info(f"Final blog content: {blog_markdown_str}")
# Do Metaphor/Exa AI search.
metaphor_search_result = do_metaphor_ai_research(search_keywords)
blog_markdown_str = blog_with_research(blog_markdown_str, metaphor_search_result)
logger.info(f"Final blog content: {blog_markdown_str}")
# Do Google trends analysis and combine with latest blog.
pytrends_search_result = do_google_pytrends_analysis(search_keywords)
blog_markdown_str = blog_with_keywords(blog_markdown_str, pytrends_search_result)
logger.info(f"Final blog content: {blog_markdown_str}")
# Combine YOU.com RAG search with the latest blog content.
#you_rag_result = get_rag_results(search_keywords)
you_search_result = search_ydc_index(search_keywords)
blog_markdown_str = blog_with_research(blog_markdown_str, you_search_result)
logger.info(f"Final blog content: {blog_markdown_str}")
exit(1)
blog_title = generate_blog_title(blog_markdown_str, "gemini")
blog_meta_desc = generate_blog_description(blog_markdown_str, "gemini")
logger.info(f"The blog meta description is: {blog_meta_desc}\n")
blog_tags = get_blog_tags(blog_markdown_str, "gemini")
logger.info(f"Blog tags for generated content: {blog_tags}")
blog_categories = get_blog_categories(blog_markdown_str, "gemini")
logger.info(f"Generated blog categories: {blog_categories}\n")
#blog_markdown_str = gemini_get_code_samples(blog_markdown_str)
#logger.info(f"Blog with code sample: \n {blog_markdown_str}")
# fixme: Remove the hardcoding, need add another option OR in config ?
image_dir = os.path.join(os.getcwd(), "blog_images")
generated_image_name = f"generated_image_{datetime.datetime.now():%Y-%m-%d-%H-%M-%S}.png"
generated_image_filepath = os.path.join(image_dir, generated_image_name)
# Generate an image based on meta description
#logger.info(f"Calling Image generation with prompt: {blog_meta_desc}")
#main_img_path = generate_image(blog_meta_desc, image_dir, "dalle3")
if url:
try:
generated_image_filepath = screenshot_api(url, generated_image_filepath)
except Exception as err:
logger.error(f"Failed in taking compnay page screenshot: {err}")
# TBD: Save the blog content as a .md file. Markdown or HTML ?
save_blog_to_file(blog_markdown_str, blog_title, blog_meta_desc, blog_tags, blog_categories, generated_image_filepath)
logger.info(f"\n\n ################ Finished writing Blog for : {akeyword} #################### \n")