Improved longform, Image, prompts

This commit is contained in:
ajaysi
2024-07-01 19:21:28 +05:30
parent 97ece766c9
commit 94b938d31e
4 changed files with 95 additions and 45 deletions

View File

@@ -30,7 +30,7 @@ logger.add(sys.stdout,
from ..utils.read_main_config_params import read_return_config_section
from ..ai_web_researcher.gpt_online_researcher import do_metaphor_ai_research
from ..ai_web_researcher.gpt_online_researcher import do_google_serp_search, do_tavily_ai_search
from ..blog_metadata.get_blog_metadata import blog_metadata
from ..blog_metadata.get_blog_metadata import get_blog_metadata_longform
from ..blog_postprocessing.save_blog_to_file import save_blog_to_file
@@ -132,7 +132,7 @@ def long_form_generator(content_keywords):
genai.configure(api_key=os.getenv('GEMINI_API_KEY'))
# Initialize the generative model
model = genai.GenerativeModel('gemini-1.5-flash', generation_config=generation_config)
model_flash = genai.GenerativeModel('gemini-1.5-flash', generation_config=generation_config)
model_pro = genai.GenerativeModel('gemini-pro', generation_config=generation_config)
# Do SERP web research for given keywords to generate title and outline.
@@ -148,7 +148,7 @@ def long_form_generator(content_keywords):
return
try:
content_outline = generate_with_retry(model_pro, content_outline.format(
content_outline = generate_with_retry(model_flash, content_outline.format(
content_title=content_title,
web_research_result=web_research_result)).text
logger.info(f"The content Outline is: {content_outline}\n\n")
@@ -187,9 +187,9 @@ def long_form_generator(content_keywords):
logger.info(f"Starting to write on the outline introduction.")
draft = starting_draft
continuation = generate_with_retry(model_pro, continuation_prompt.format(
content_title=content_title,
content_outline=content_outline,
content_text=draft,
content_title=content_title,
content_outline=content_outline,
content_text=draft,
web_research_result=web_research_result,
writing_guidelines=writing_guidelines)).text
except Exception as err:
@@ -211,7 +211,7 @@ def long_form_generator(content_keywords):
Content Outline:\n
'{content_outline}'
"""
search_words = generate_with_retry(model, search_terms).text
search_words = generate_with_retry(model_flash, search_terms).text
status.update(label=f"Search terms from written draft: {search_words}")
while 'IAMDONE' not in continuation:
@@ -220,50 +220,48 @@ def long_form_generator(content_keywords):
# Strip quotes from each element
str_list = [s.strip('\'"') for s in str_list]
for search_term in str_list:
web_research_result, m_titles, t_titles = do_tavily_ai_search(search_term, max_results=5)
try:
continuation = generate_with_retry(model_pro, continuation_prompt.format(
content_title=content_title,
content_outline=content_outline,
content_text=draft,
web_research_result=web_research_result,
writing_guidelines=writing_guidelines)).text
draft += '\n\n' + continuation
logger.info(f"Writing in progress... Current draft length: {len(draft)} characters")
status.update(label=f"Writing in progress... Current draft length: {len(draft)} characters")
# At this point, the context is little stale. We should more web research on
# related queries as per the content outline, to augment the LLM context.
except Exception as err:
st.error(f"Failed to continually write the Essay: {err}")
logger.error(f"Failed to continually write the Essay: {err}")
return
# for search_term in str_list:
# web_research_result, m_titles, t_titles = do_tavily_ai_search(search_term, max_results=5)
# status.update(label=f"Search terms from written draft: {search_term}")
# for item in web_research_result.get("results"):
# title = item.get("title", "")
# snippet = item.get("content", "")
# table_data.append([title, snippet])
# web_research_result = table_data
try:
continuation = generate_with_retry(model_pro, continuation_prompt.format(
content_title=content_title,
content_outline=content_outline,
content_text=draft,
web_research_result=web_research_result,
writing_guidelines=writing_guidelines)).text
draft += '\n\n' + continuation
logger.info(f"Writing in progress... Current draft length: {len(draft)} characters")
status.update(label=f"Writing in progress... Current draft length: {len(draft)} characters")
# At this point, the context is little stale. We should more web research on
# related queries as per the content outline, to augment the LLM context.
except Exception as err:
st.error(f"Failed to continually write long-form content: {err}")
logger.error(f"Failed to continually write the Essay: {err}")
return
# Remove 'IAMDONE' and print the final story
final = draft.replace('IAMDONE', '').strip()
status.update(label="Success: Finished writing Long form content.")
# FIXME: The current implementation is suited for normal length content.
# In long content sending the whole content for each content metadata is expensive.
# blog_title, blog_meta_desc, blog_tags, blog_categories = blog_metadata(final,
# content_keywords, m_titles)
# # In long content sending the whole content for each content metadata is expensive.
# # https://ai.google.dev/gemini-api/docs/caching?lang=python
# #blog_title, blog_meta_desc, blog_tags, blog_categories = get_blog_metadata_longform(final)
# blog_categories = get_blog_metadata_longform(final)
# print("\n\n-----{blog_categories}------\n\n")
#
# status.update(label="Success: Finished with Title, Meta Description, Tags, categories")
# generated_image_filepath = None
# # TBD: Save the blog content as a .md file. Markdown or HTML ?
# save_blog_to_file(final, blog_title, blog_meta_desc, blog_tags, blog_categories, generated_image_filepath)
#
# blog_frontmatter = dedent(f"""
# \n---------------------------------------------------------------------
# title: {blog_title.strip()}\n
# categories: [{blog_categories.strip()}]\n
# tags: [{blog_tags.strip()}]\n
# Meta description: {blog_meta_desc.replace(":", "-").strip()}\n
# ---------------------------------------------------------------------\n
# """)
#
# logger.info(f"\n{blog_frontmatter}{final}\n\n")
# st.markdown(f"\n{blog_frontmatter}{final}\n\n")
logger.info(f"\n{final}\n\n")
logger.info(f"\n\n ################ Finished writing Blog for : {content_keywords} #################### \n")

View File

@@ -1,3 +1,6 @@
import os
import time
import datetime
import sys
import streamlit as st
from loguru import logger
@@ -9,9 +12,12 @@ logger.add(sys.stdout,
colorize=True,
format="<level>{level}</level>|<green>{file}:{line}:{function}</green>| {message}"
)
import google.generativeai as genai
from google.generativeai import caching
from ..gpt_providers.text_generation.main_text_generation import llm_text_gen
async def blog_metadata(blog_article):
""" Common function to get blog metadata """
logger.info(f"Generating Content MetaData\n")
@@ -47,6 +53,7 @@ async def blog_metadata(blog_article):
return blog_title, blog_meta_desc, blog_tags, blog_categories
def generate_blog_title(blog_article):
"""
Given a blog title generate an outline for it
@@ -124,3 +131,48 @@ def run_async(coro):
result = loop.run_until_complete(coro)
loop.close()
return result
def get_blog_metadata_longform(longform_content):
""" Function for caching long-form content """
# Open the file in write mode ("w") to overwrite existing content.
filepath = os.path.join(os.getenv("CONTENT_SAVE_DIR"), "lognform_metadata_file")
with open(filepath, "w") as file:
# Write the text to the file
file.write(longform_content)
print(f"String saved successfully to: {filepath}")
genai.configure(api_key=os.environ['GEMINI_API_KEY'])
file_path = genai.upload_file(path=filepath)
# Wait for the file to finish processing
while file_path.state.name == 'PROCESSING':
print('Waiting for video to be processed.')
time.sleep(2)
file_path = genai.get_file(video_file.name)
print(f'Video processing complete: {file_path.uri}')
# Create a cache with a 5 minute TTL
cache = caching.CachedContent.create(
model='models/gemini-1.5-flash-001',
display_name='Alwrity Longform content', # used to identify the cache
system_instruction=(
'You are an expert file analyzer , and your job is to answer '
'the user\'s query based on the file you have access to.'
),
contents=[file_path],
ttl=datetime.timedelta(minutes=15),
)
# Construct a GenerativeModel which uses the created cache.
model = genai.GenerativeModel.from_cached_content(cached_content=cache)
# Query the model
response = model.generate_content([(
'SUmmarize the given file '
'in 10 lines '
'list main points')])
#print(response.usage_metadata)
return(response.text)

View File

@@ -2,7 +2,7 @@
"Blog Content Characteristics": {
"Blog Length": "2000",
"Blog Tone": "Casual",
"Blog Demographic": "Professional",
"Blog Demographic": "Digital Marketing",
"Blog Type": "Informational",
"Blog Language": "English",
"Blog Output Format": "markdown"

View File

@@ -62,7 +62,7 @@ starting_prompt: |
First, silently review the content outline and title. Consider how to begin writing your content. Take your time.
Start by writing the very beginning of the outline. You are not expected to finish the entire content now.
Your writing should be detailed, only scratching the surface of the first bullet of your outline.
Try to write AT MINIMUM 1000 WORDS.
Try to write AT MINIMUM 2000 WORDS.
{writing_guidelines}
@@ -92,7 +92,7 @@ continuation_prompt: |
Your task is to continue writing from where you left off and cover the next part of the outline.
You are not expected to finish the entire content now.
Aim to write at least 1000 words.
Aim to write at least 800 words.
However, only once the entire content is completely finished, write IAMDONE.
{writing_guidelines}