From e33008659b0cce21640e69f1ef3165fdb9800181 Mon Sep 17 00:00:00 2001 From: AjaySi Date: Sat, 6 Apr 2024 20:09:33 +0530 Subject: [PATCH] WIP - Use Google Bard, Improving chatgpt3.5 --- lib/ai_writers/blog_from_google_serp.py | 20 +++--- lib/ai_writers/combine_blog_and_keywords.py | 16 +---- lib/ai_writers/combine_research_and_blog.py | 53 +++++---------- lib/ai_writers/keywords_to_blog.py | 7 +- lib/blog_postprocessing/humanize_blog.py | 13 ++-- lib/check_blog_seo/is_content_ai_generated.py | 65 ------------------- lib/check_blog_seo/prompt | 3 - lib/github_blogs/github_getting_started.py | 2 +- lib/gpt_providers/openai_text_gen.py | 30 +++++++-- lib/scholar_blogs/main_arxiv_to_blog.py | 2 +- lib/scholar_blogs/write_blog_scholar_paper.py | 2 +- .../write_research_review_blog.py | 2 +- main_config | 5 +- 13 files changed, 75 insertions(+), 145 deletions(-) delete mode 100644 lib/check_blog_seo/is_content_ai_generated.py delete mode 100644 lib/check_blog_seo/prompt diff --git a/lib/ai_writers/blog_from_google_serp.py b/lib/ai_writers/blog_from_google_serp.py index cfe92410..895d4551 100644 --- a/lib/ai_writers/blog_from_google_serp.py +++ b/lib/ai_writers/blog_from_google_serp.py @@ -21,19 +21,23 @@ def write_blog_google_serp(search_keyword, search_results): """Combine the given online research and gpt blog content""" gpt_providers = os.environ["GPT_PROVIDER"] prompt = f""" - As a SEO expert and content writer, I will provide you with my web research keyword and its google search result in json format. - Your task is to write a SEO optimized, unique blog and 5 FAQs. + As a SEO expert and content writer, I will provide you with my 'web research keywords' and its 'google search result'. + Your task is to write an original, conversational, SEO optimized blog and also 5 FAQs. - 1). Your blog content should compete against all, in the provided search results. Follow best SEO practises. - 2). Your FAQ should be based on 'People also ask' and 'Related Queries' from given result. + Follow below guidelines: + 1). Your blog content should compete against all blogs from search results. + 2). Your FAQ should be based on 'People also ask' and 'Related Queries' from given search result. Always include answers for each FAQ, use your knowledge and confirm with snippets given in search result. - 3). Your blog should be detailed, unique and written in markdown language. - 4). Do not explain, describe your response. + 3). Your blog should be highly detailed, unique and written in human-like personality & tone. + 4). Act as subject matter expert for given research keywords and include statistics and facts. + 5). Do not explain, describe your response. + 6). Important: Please read the entire prompt before writing anything, and do not do anything extra. + Follow the prompt exactly as I instructed. - Web Research Keyword: "{search_keyword}" + \n\nWeb Research Keyword: "{search_keyword}" Google search Result: "{search_results}" """ - logger.info("Generating blog and FAQs from web search result.") + logger.info("Generating blog and FAQs from Google web search results.") if 'google' in gpt_providers.lower(): try: response = gemini_text_response(prompt) diff --git a/lib/ai_writers/combine_blog_and_keywords.py b/lib/ai_writers/combine_blog_and_keywords.py index bad01b42..d8f86fd7 100644 --- a/lib/ai_writers/combine_blog_and_keywords.py +++ b/lib/ai_writers/combine_blog_and_keywords.py @@ -20,28 +20,16 @@ def blog_with_keywords(blog, keywords): """Combine the given online research and gpt blog content""" gpt_providers = os.environ["GPT_PROVIDER"] prompt = f""" - You are an expert copywriter specializing in content optimization for SEO. + As an expert digital content writer, specializing in content optimization and SEO. I will provide you with my 'blog content' and 'list of keywords' on the same topic. - Your task is to write an original blog, using given keywords and blog content. + Your task is to write an original blog, utilizing given keywords and blog content. Your blog should be highly detailed and well formatted. - Do not miss out any details from provided blog content. Blog content: '{blog}' list of keywords: '{keywords}' """ if 'google' in gpt_providers.lower(): - prompt = f"""You are an expert copywriter specializing in content optimization for SEO. - I will provide you with my 'blog content' and 'list of keywords' on the same topic. - Your task is to write an original blog, using the given keywords and blog content. - Your blog should be highly detailed and well formatted. - Do not miss out any details from provided blog content. - Always, include figures, data, results from given content. - It is important that your blog is original and unique. It should be highly readable and SEO optimized. - - Blog content: '{blog}' - list of keywords: '{keywords}' - """ try: response = gemini_text_response(prompt) return response diff --git a/lib/ai_writers/combine_research_and_blog.py b/lib/ai_writers/combine_research_and_blog.py index 7eaa1a02..26f5cd33 100644 --- a/lib/ai_writers/combine_research_and_blog.py +++ b/lib/ai_writers/combine_research_and_blog.py @@ -20,50 +20,31 @@ def blog_with_research(report, blog): """Combine the given online research and gpt blog content""" gpt_providers = os.environ["GPT_PROVIDER"] prompt = f""" - You are an expert copywriter specializing in SEO content optimization for blogs. + You are an expert content editor specializing in SEO content optimization for blogs. I will provide you with a 'research report' and a 'blog content' on the same topic. - Your task is to transform and combine the given 'research report' and 'blog content' into a well-structured, unique - and original blog article. - - Your objectives include: - 1. Master the report and blog content: Understand main ideas, key points, and the core message. - 2. Sentence Structure: Rephrase while preserving logical flow and coherence. - 3. Identify Main Keywords: Determine the primary topic and combine the articles on the main topic. - 4. REMEMBER: From the research report, include links and cititations to make your article more authoratative. - 5. Optimize for SEO: Generate high quality informative content. - 6. Implement SEO best practises with appropriate keyword density. - 7. Craft Engaging and Informative Article: Provide value and insight to readers. - 8. Proofread: Important to Check for grammar, spelling, and punctuation errors. - 9. Use Creative and Human-like Style: Incorporate contractions, idioms, transitional phrases, - interjections, and colloquialisms. Avoid repetitive phrases and unnatural sentence structures. - 10. Blog Structuring: Include an Introduction, subtopics and use bullet points or - numbered lists if appropriate. Important to include FAQs, Conclusion and Referances. - 11. Ensure Uniqueness: Guarantee the article is plagiarism-free. Write in unique, informative style. - 12. Punctuation: Use appropriate question marks at the end of questions. - 13. Pass AI Detection Tools: Create content that easily passes AI plagiarism detection tools. - 14. REMEMBER: Use the formatting style of given research report and include highlights, citations, referances in combined article. - - Follow these guidelines to combine and write a new, unique, and informative blog article + Your task is to follow below given guidelines to write a new, unique, and informative blog article that will rank well in search engine results and engage readers effectively. - Create a blog post, in markdown, from the given research report and blog content below. + Follow below given guidelines: + 1. Master the report and blog content: Understand main ideas, key points, and the core message. + 2. Sentence Structure: Rephrase while preserving logical flow and conversational tone. + 3. Identify Main Keywords: Determine the primary topic and combine the articles on that main topic. + 4. Implement SEO best practises with appropriate keyword density. + 5. Use Creative and Human-like Style: Incorporate contractions, idioms, transitional phrases, + interjections, and colloquialisms. + 6. Blog Structuring: Include an Introduction, subtopics and use bullet points or + numbered lists if appropriate. Important to include FAQs, Conclusion and Referances. + 7. Ensure Uniqueness: Guarantee the article is plagiarism-free. Write in human-like and informative style. + 9. Pass AI Detection Tools: Create content that easily passes AI plagiarism detection tools. + 10. Act as subject matter expert and include statistics and facts in your combined article. + + Important: Please read the entire prompt before writing anything. Follow the prompt exactly as I instructed.\n\n + Research report: '{report}' Blog content: '{blog}' """ if 'google' in gpt_providers.lower(): - prompt = f"""You are an expert copywriter specializing in content optimization for SEO. - I will provide you with my 'research report' and 'blog content' on the same topic. - Your task is to transform and combine the given research and blog content into a blog article. - Your blog should be highly detailed, original and well formatted. - Do not miss out any details from provided content. - Always, enhance the blog FAQs section with more information from given research. - It is important that your blog provides detailed insights and engaging to readers. - It should be highly readable and SEO optimized. - - Research report: '{report}' - Blog content: '{blog}' - """ try: response = gemini_text_response(prompt) return response diff --git a/lib/ai_writers/keywords_to_blog.py b/lib/ai_writers/keywords_to_blog.py index bc3ca45f..ab38e727 100644 --- a/lib/ai_writers/keywords_to_blog.py +++ b/lib/ai_writers/keywords_to_blog.py @@ -81,11 +81,14 @@ def write_blog_from_keywords(search_keywords, url=None): #blog_markdown_str = blog_with_research(blog_markdown_str, you_search_result) #logger.info(f"Final blog content: {blog_markdown_str}") - # Pass the content to remove obivious words used by AI. - blog_markdown_str = blog_humanize(blog_markdown_str) + logger.info("Pass Final blog for blog-proof reading and *improvements.") # Pass the final content for proofreading. blog_markdown_str = blog_proof_editor(blog_markdown_str) + # Pass the content to remove obivious words used by AI. + logger.info("Pass Final blog for Humanizing it further, Doesn't matter, Really?") + blog_markdown_str = blog_humanize(blog_markdown_str) + blog_title, blog_meta_desc, blog_tags, blog_categories = blog_metadata(blog_markdown_str, search_keywords, example_blog_titles) diff --git a/lib/blog_postprocessing/humanize_blog.py b/lib/blog_postprocessing/humanize_blog.py index ad6dff2d..6a4d920e 100644 --- a/lib/blog_postprocessing/humanize_blog.py +++ b/lib/blog_postprocessing/humanize_blog.py @@ -13,10 +13,15 @@ def blog_humanize(blog_content): """ Helper for blog proof reading. """ gpt_provider = os.environ["GPT_PROVIDER"] - prompt = f"""As an expert content writer and editor, I will provide you with blog content. - - Your task is to replace all occurances of words given below: - ['It’s important to note', 'Delve into', 'Tapestry', 'Bustling', 'In summary', 'In conclusion', 'Unleash', 'Unveiling', 'ever-evolving', '', 'Remember that', 'Take a dive into', 'Navigating', 'Navigating the landscape', 'Navigating the complexities of', 'Landscape', 'The landscape of', 'Testament', 'a testament to', 'In the world of', 'Realm', 'Embark', 'virtuoso', 'Let's explore', 'symphony', 'Harnessing', 'Revolutionizing', 'Empower', 'game changing', 'ever-changing', 'Embrace', 'Embracing', 'game-changing', 'ever-evolving'] + prompt = f"""The following is what I will refer to as an 'Exception-list'. + Do Not include any of the words or phrases on this list in your future responses to this chat thread. + These instructions are critical and require absolute adherence! + + Exception-list: ["Informed decisions", "blueprint", "realm", "holistic", "fosters", "informed investment decisions", "informed", "more than just", "it’s about" "navigating", "beacon", "bustling", "treasure trove", "landscape", "tailored", "tailor", “roadmap” , “are more than just”, "tailoring", "dive in", "delving", “streamlining” "dynamic", "robust", "stay tuned", "in conclusion", "seamless", "bustling", “isn't just”, “not just a”, “isn't merely an”, “cornerstone”, “bridge”, “whopping”, “testament”, “paramount” ,“diving into”, “delve into”, “pivotal”, “navigating”, “This isn't a”, “isn't just about“ ,“dives deep”, "It's not just about", “delve”, “harness”, journey”, “elevate”, “maze”, “puzzle”, “overwhelmed” , "and other robotic cliches”, 'It’s important to note', 'Delve into', 'Tapestry', 'Bustling', 'In summary', 'In conclusion', 'Unleash', 'Unveiling', 'ever-evolving', '', 'Remember that', 'Take a dive into', 'Navigating', 'Navigating the landscape', 'Navigating the complexities of', 'Landscape', 'The landscape of', 'Testament', 'a testament to', 'In the world of', 'Realm', 'Embark', 'virtuoso', 'Let's explore', 'symphony', 'Harnessing', 'Revolutionizing', 'Empower', 'game changing', 'ever-changing', 'Embrace', 'Embracing', 'game-changing', 'ever-evolving'] + + As an expert content writer and editor, I will provide you with blog content. + Your task is to replace all occurances of words from Exception-list from given blog content below. + Before generating any text, examine the Exception-list and avoid all cases of these words and phrases. \n\nBlog Content: '{blog_content}' """ diff --git a/lib/check_blog_seo/is_content_ai_generated.py b/lib/check_blog_seo/is_content_ai_generated.py deleted file mode 100644 index 973952e3..00000000 --- a/lib/check_blog_seo/is_content_ai_generated.py +++ /dev/null @@ -1,65 +0,0 @@ -############################################################################################## -# -# Checks for: -# Short, fragmented sentences that lack human-like coherence. -# Frequent use of overly complex words or technical jargon. -# -# These checks are based on common observations that AI-generated content may sometimes produce -# text with unusual patterns or characteristics. However, please keep in mind that these -# heuristics are not guaranteed to detect all AI-generated content, and false positives or -# negatives can still occur. More advanced techniques and models would be required for more accurate detection. -# -############################################################################################# - -import spacy - -# Load the English language model from spaCy -nlp = spacy.load("en_core_web_sm") - -def is_ai_generated(text): - # Tokenize the text using spaCy - doc = nlp(text) - - # Check for indicators of AI-generated content - ai_indicators = [ - "generated by AI", - "auto-generated", - "machine-generated", - "artificial intelligence", - "neural network", - "GPT-3", - "AI model", - ] - - for indicator in ai_indicators: - if indicator.lower() in text.lower(): - return True - - # Check for repetitive patterns or lack of human-like variations - for i in range(len(doc) - 2): - if doc[i].text == doc[i + 1].text == doc[i + 2].text: - return True - - # Check for short, fragmented sentences that lack human-like coherence - for sentence in doc.sents: - if len(sentence) < 5: - return True - - # Check for frequent use of overly complex words or technical jargon - complex_word_count = sum(1 for token in doc if token.is_alpha and len(token.text) > 10) - if complex_word_count > len(doc) // 10: # Adjust the threshold as needed - return True - - return False - -if __name__ == "__main__": - input_text = """ - This is an article generated by a state-of-the-art AI model. - The content is machine-generated and may not represent human writing style. - """ - - if is_ai_generated(input_text): - print("The content appears to be AI-generated.") - else: - print("The content appears to be written by a human.") - diff --git a/lib/check_blog_seo/prompt b/lib/check_blog_seo/prompt deleted file mode 100644 index e2448734..00000000 --- a/lib/check_blog_seo/prompt +++ /dev/null @@ -1,3 +0,0 @@ -Act as an SEO specialist, analyze [website URL], and make improvement suggestions regarding technical SEO with the ways to make those improvements listed in a table. - - diff --git a/lib/github_blogs/github_getting_started.py b/lib/github_blogs/github_getting_started.py index 58bb538a..17ecc201 100644 --- a/lib/github_blogs/github_getting_started.py +++ b/lib/github_blogs/github_getting_started.py @@ -12,7 +12,7 @@ logger.add(sys.stdout, -def github_readme_blog(readme_content, gpt_providers="openai"): +def github_readme_blog(readme_content): """ """ prompt = f"""As an expert programmer and teacher, Write an original, detailed and step-by-step guide, from the provided Text below. Your guide should be original, engaging and help beginners get started easily. diff --git a/lib/gpt_providers/openai_text_gen.py b/lib/gpt_providers/openai_text_gen.py index 000c7ab4..4cf031c4 100644 --- a/lib/gpt_providers/openai_text_gen.py +++ b/lib/gpt_providers/openai_text_gen.py @@ -40,11 +40,12 @@ def openai_chatgpt(prompt): config = configparser.ConfigParser() config.read(config_path) - model = config.get('model') - temperature = config.getfloat('temperature') - max_tokens = config.getint('max_tokens') - top_p = config.getfloat('top_p') - n = config.getint('n') + model = config.get('llm_options', 'model') + temperature = config.getfloat('llm_options', 'temperature') + max_tokens = config.getint('llm_options', 'max_tokens') + top_p = config.getfloat('llm_options', 'top_p') + n = config.getint('llm_options', 'n') + fp = config.getfloat('llm_options', 'frequency_penalty') except Exception as err: logger.error(f"Unable to read Openai parameters from config file:{err}") @@ -59,10 +60,25 @@ def openai_chatgpt(prompt): messages=[{"role": "user", "content": prompt}], max_tokens=max_tokens, n=n, - top_p=top_p + top_p=top_p, + stream=True, + frequency_penalty=fp # Additional parameters can be included here ) - return response.choices[0].message.content + # create variables to collect the stream of chunks + collected_chunks = [] + collected_messages = [] + # iterate through the stream of events + for chunk in response: + collected_chunks.append(chunk) # save the event response + chunk_message = chunk.choices[0].delta.content # extract the message + collected_messages.append(chunk_message) # save the message + print(chunk.choices[0].delta.content, end = "", flush = True) + + # clean None in collected_messages + collected_messages = [m for m in collected_messages if m is not None] + full_reply_content = ''.join([m for m in collected_messages]) + return full_reply_content except openai.APIError as e: logger.error(f"OpenAI API Error: {e}") diff --git a/lib/scholar_blogs/main_arxiv_to_blog.py b/lib/scholar_blogs/main_arxiv_to_blog.py index 59542def..4689bcde 100644 --- a/lib/scholar_blogs/main_arxiv_to_blog.py +++ b/lib/scholar_blogs/main_arxiv_to_blog.py @@ -155,7 +155,7 @@ def blog_postprocessing(arxiv_id, research_review): raise err try: - blog_title, blog_meta_desc, blog_tags, blog_categories = blog_metadata(research_review, "gemini") + blog_title, blog_meta_desc, blog_tags, blog_categories = blog_metadata(research_review) except Exception as err: logger.error(f"Failed to get blog metadata: {err}") raise err diff --git a/lib/scholar_blogs/write_blog_scholar_paper.py b/lib/scholar_blogs/write_blog_scholar_paper.py index 15f8a74b..c8bad6b9 100644 --- a/lib/scholar_blogs/write_blog_scholar_paper.py +++ b/lib/scholar_blogs/write_blog_scholar_paper.py @@ -11,7 +11,7 @@ logger.add(sys.stdout, ) -def write_blog_from_paper(paper_content, gpt_providers="openai"): +def write_blog_from_paper(paper_content): """ Write blog from given paper url. """ prompt = f"""As an expert in NLP and AI, I will provide you with a content of a research paper. Your task is to write a highly detailed blog(at least 2000 words), breaking down complex concepts for beginners. diff --git a/lib/scholar_blogs/write_research_review_blog.py b/lib/scholar_blogs/write_research_review_blog.py index 5b799f70..0c434935 100644 --- a/lib/scholar_blogs/write_research_review_blog.py +++ b/lib/scholar_blogs/write_research_review_blog.py @@ -12,7 +12,7 @@ logger.add(sys.stdout, ) -def review_research_paper(research_blog, gpt_providers="openai"): +def review_research_paper(research_blog): """ """ prompt = f"""As world's top researcher and academician, I will provide you with research paper. Your task is to write a highly detailed review report. diff --git a/main_config b/main_config index 1823db3b..b4aa1d09 100644 --- a/main_config +++ b/main_config @@ -54,11 +54,12 @@ num_images = 1 # Note: ########################################################### +[llm_options] # Choose one of following: Openai, Google, Minstral -gpt_provider = "openai" +gpt_provider = "google" # Mention which model of the above provider to use. -model = "gpt-3.5-turbo-0125" +model = gpt-3.5-turbo-0125 # Temperature is a parameter that controls the “creativity” or randomness of the text generated by GPT. # greater determinism and higher values indicating more randomness.