Improved longform, Image, prompts

This commit is contained in:
ajaysi
2024-07-01 19:21:28 +05:30
parent 97ece766c9
commit 94b938d31e
4 changed files with 95 additions and 45 deletions

View File

@@ -30,7 +30,7 @@ logger.add(sys.stdout,
from ..utils.read_main_config_params import read_return_config_section from ..utils.read_main_config_params import read_return_config_section
from ..ai_web_researcher.gpt_online_researcher import do_metaphor_ai_research from ..ai_web_researcher.gpt_online_researcher import do_metaphor_ai_research
from ..ai_web_researcher.gpt_online_researcher import do_google_serp_search, do_tavily_ai_search from ..ai_web_researcher.gpt_online_researcher import do_google_serp_search, do_tavily_ai_search
from ..blog_metadata.get_blog_metadata import blog_metadata from ..blog_metadata.get_blog_metadata import get_blog_metadata_longform
from ..blog_postprocessing.save_blog_to_file import save_blog_to_file from ..blog_postprocessing.save_blog_to_file import save_blog_to_file
@@ -132,7 +132,7 @@ def long_form_generator(content_keywords):
genai.configure(api_key=os.getenv('GEMINI_API_KEY')) genai.configure(api_key=os.getenv('GEMINI_API_KEY'))
# Initialize the generative model # Initialize the generative model
model = genai.GenerativeModel('gemini-1.5-flash', generation_config=generation_config) model_flash = genai.GenerativeModel('gemini-1.5-flash', generation_config=generation_config)
model_pro = genai.GenerativeModel('gemini-pro', generation_config=generation_config) model_pro = genai.GenerativeModel('gemini-pro', generation_config=generation_config)
# Do SERP web research for given keywords to generate title and outline. # Do SERP web research for given keywords to generate title and outline.
@@ -148,7 +148,7 @@ def long_form_generator(content_keywords):
return return
try: try:
content_outline = generate_with_retry(model_pro, content_outline.format( content_outline = generate_with_retry(model_flash, content_outline.format(
content_title=content_title, content_title=content_title,
web_research_result=web_research_result)).text web_research_result=web_research_result)).text
logger.info(f"The content Outline is: {content_outline}\n\n") logger.info(f"The content Outline is: {content_outline}\n\n")
@@ -187,9 +187,9 @@ def long_form_generator(content_keywords):
logger.info(f"Starting to write on the outline introduction.") logger.info(f"Starting to write on the outline introduction.")
draft = starting_draft draft = starting_draft
continuation = generate_with_retry(model_pro, continuation_prompt.format( continuation = generate_with_retry(model_pro, continuation_prompt.format(
content_title=content_title, content_title=content_title,
content_outline=content_outline, content_outline=content_outline,
content_text=draft, content_text=draft,
web_research_result=web_research_result, web_research_result=web_research_result,
writing_guidelines=writing_guidelines)).text writing_guidelines=writing_guidelines)).text
except Exception as err: except Exception as err:
@@ -211,7 +211,7 @@ def long_form_generator(content_keywords):
Content Outline:\n Content Outline:\n
'{content_outline}' '{content_outline}'
""" """
search_words = generate_with_retry(model, search_terms).text search_words = generate_with_retry(model_flash, search_terms).text
status.update(label=f"Search terms from written draft: {search_words}") status.update(label=f"Search terms from written draft: {search_words}")
while 'IAMDONE' not in continuation: while 'IAMDONE' not in continuation:
@@ -220,50 +220,48 @@ def long_form_generator(content_keywords):
# Strip quotes from each element # Strip quotes from each element
str_list = [s.strip('\'"') for s in str_list] str_list = [s.strip('\'"') for s in str_list]
for search_term in str_list: # for search_term in str_list:
web_research_result, m_titles, t_titles = do_tavily_ai_search(search_term, max_results=5) # web_research_result, m_titles, t_titles = do_tavily_ai_search(search_term, max_results=5)
try: # status.update(label=f"Search terms from written draft: {search_term}")
continuation = generate_with_retry(model_pro, continuation_prompt.format( # for item in web_research_result.get("results"):
content_title=content_title, # title = item.get("title", "")
content_outline=content_outline, # snippet = item.get("content", "")
content_text=draft, # table_data.append([title, snippet])
web_research_result=web_research_result, # web_research_result = table_data
writing_guidelines=writing_guidelines)).text
try:
draft += '\n\n' + continuation continuation = generate_with_retry(model_pro, continuation_prompt.format(
logger.info(f"Writing in progress... Current draft length: {len(draft)} characters") content_title=content_title,
status.update(label=f"Writing in progress... Current draft length: {len(draft)} characters") content_outline=content_outline,
# At this point, the context is little stale. We should more web research on content_text=draft,
# related queries as per the content outline, to augment the LLM context. web_research_result=web_research_result,
except Exception as err: writing_guidelines=writing_guidelines)).text
st.error(f"Failed to continually write the Essay: {err}")
logger.error(f"Failed to continually write the Essay: {err}") draft += '\n\n' + continuation
return logger.info(f"Writing in progress... Current draft length: {len(draft)} characters")
status.update(label=f"Writing in progress... Current draft length: {len(draft)} characters")
# At this point, the context is little stale. We should more web research on
# related queries as per the content outline, to augment the LLM context.
except Exception as err:
st.error(f"Failed to continually write long-form content: {err}")
logger.error(f"Failed to continually write the Essay: {err}")
return
# Remove 'IAMDONE' and print the final story # Remove 'IAMDONE' and print the final story
final = draft.replace('IAMDONE', '').strip() final = draft.replace('IAMDONE', '').strip()
status.update(label="Success: Finished writing Long form content.") status.update(label="Success: Finished writing Long form content.")
# FIXME: The current implementation is suited for normal length content. # # In long content sending the whole content for each content metadata is expensive.
# In long content sending the whole content for each content metadata is expensive. # # https://ai.google.dev/gemini-api/docs/caching?lang=python
# blog_title, blog_meta_desc, blog_tags, blog_categories = blog_metadata(final, # #blog_title, blog_meta_desc, blog_tags, blog_categories = get_blog_metadata_longform(final)
# content_keywords, m_titles) # blog_categories = get_blog_metadata_longform(final)
# print("\n\n-----{blog_categories}------\n\n")
#
# status.update(label="Success: Finished with Title, Meta Description, Tags, categories") # status.update(label="Success: Finished with Title, Meta Description, Tags, categories")
# generated_image_filepath = None # generated_image_filepath = None
# # TBD: Save the blog content as a .md file. Markdown or HTML ? # # TBD: Save the blog content as a .md file. Markdown or HTML ?
# save_blog_to_file(final, blog_title, blog_meta_desc, blog_tags, blog_categories, generated_image_filepath) # save_blog_to_file(final, blog_title, blog_meta_desc, blog_tags, blog_categories, generated_image_filepath)
#
# blog_frontmatter = dedent(f"""
# \n---------------------------------------------------------------------
# title: {blog_title.strip()}\n
# categories: [{blog_categories.strip()}]\n
# tags: [{blog_tags.strip()}]\n
# Meta description: {blog_meta_desc.replace(":", "-").strip()}\n
# ---------------------------------------------------------------------\n
# """)
#
# logger.info(f"\n{blog_frontmatter}{final}\n\n")
# st.markdown(f"\n{blog_frontmatter}{final}\n\n")
logger.info(f"\n{final}\n\n") logger.info(f"\n{final}\n\n")
logger.info(f"\n\n ################ Finished writing Blog for : {content_keywords} #################### \n") logger.info(f"\n\n ################ Finished writing Blog for : {content_keywords} #################### \n")

View File

@@ -1,3 +1,6 @@
import os
import time
import datetime
import sys import sys
import streamlit as st import streamlit as st
from loguru import logger from loguru import logger
@@ -9,9 +12,12 @@ logger.add(sys.stdout,
colorize=True, colorize=True,
format="<level>{level}</level>|<green>{file}:{line}:{function}</green>| {message}" format="<level>{level}</level>|<green>{file}:{line}:{function}</green>| {message}"
) )
import google.generativeai as genai
from google.generativeai import caching
from ..gpt_providers.text_generation.main_text_generation import llm_text_gen from ..gpt_providers.text_generation.main_text_generation import llm_text_gen
async def blog_metadata(blog_article): async def blog_metadata(blog_article):
""" Common function to get blog metadata """ """ Common function to get blog metadata """
logger.info(f"Generating Content MetaData\n") logger.info(f"Generating Content MetaData\n")
@@ -47,6 +53,7 @@ async def blog_metadata(blog_article):
return blog_title, blog_meta_desc, blog_tags, blog_categories return blog_title, blog_meta_desc, blog_tags, blog_categories
def generate_blog_title(blog_article): def generate_blog_title(blog_article):
""" """
Given a blog title generate an outline for it Given a blog title generate an outline for it
@@ -124,3 +131,48 @@ def run_async(coro):
result = loop.run_until_complete(coro) result = loop.run_until_complete(coro)
loop.close() loop.close()
return result return result
def get_blog_metadata_longform(longform_content):
""" Function for caching long-form content """
# Open the file in write mode ("w") to overwrite existing content.
filepath = os.path.join(os.getenv("CONTENT_SAVE_DIR"), "lognform_metadata_file")
with open(filepath, "w") as file:
# Write the text to the file
file.write(longform_content)
print(f"String saved successfully to: {filepath}")
genai.configure(api_key=os.environ['GEMINI_API_KEY'])
file_path = genai.upload_file(path=filepath)
# Wait for the file to finish processing
while file_path.state.name == 'PROCESSING':
print('Waiting for video to be processed.')
time.sleep(2)
file_path = genai.get_file(video_file.name)
print(f'Video processing complete: {file_path.uri}')
# Create a cache with a 5 minute TTL
cache = caching.CachedContent.create(
model='models/gemini-1.5-flash-001',
display_name='Alwrity Longform content', # used to identify the cache
system_instruction=(
'You are an expert file analyzer , and your job is to answer '
'the user\'s query based on the file you have access to.'
),
contents=[file_path],
ttl=datetime.timedelta(minutes=15),
)
# Construct a GenerativeModel which uses the created cache.
model = genai.GenerativeModel.from_cached_content(cached_content=cache)
# Query the model
response = model.generate_content([(
'SUmmarize the given file '
'in 10 lines '
'list main points')])
#print(response.usage_metadata)
return(response.text)

View File

@@ -2,7 +2,7 @@
"Blog Content Characteristics": { "Blog Content Characteristics": {
"Blog Length": "2000", "Blog Length": "2000",
"Blog Tone": "Casual", "Blog Tone": "Casual",
"Blog Demographic": "Professional", "Blog Demographic": "Digital Marketing",
"Blog Type": "Informational", "Blog Type": "Informational",
"Blog Language": "English", "Blog Language": "English",
"Blog Output Format": "markdown" "Blog Output Format": "markdown"

View File

@@ -62,7 +62,7 @@ starting_prompt: |
First, silently review the content outline and title. Consider how to begin writing your content. Take your time. First, silently review the content outline and title. Consider how to begin writing your content. Take your time.
Start by writing the very beginning of the outline. You are not expected to finish the entire content now. Start by writing the very beginning of the outline. You are not expected to finish the entire content now.
Your writing should be detailed, only scratching the surface of the first bullet of your outline. Your writing should be detailed, only scratching the surface of the first bullet of your outline.
Try to write AT MINIMUM 1000 WORDS. Try to write AT MINIMUM 2000 WORDS.
{writing_guidelines} {writing_guidelines}
@@ -92,7 +92,7 @@ continuation_prompt: |
Your task is to continue writing from where you left off and cover the next part of the outline. Your task is to continue writing from where you left off and cover the next part of the outline.
You are not expected to finish the entire content now. You are not expected to finish the entire content now.
Aim to write at least 1000 words. Aim to write at least 800 words.
However, only once the entire content is completely finished, write IAMDONE. However, only once the entire content is completely finished, write IAMDONE.
{writing_guidelines} {writing_guidelines}