diff --git a/lib/ai_writers/long_form_ai_writer.py b/lib/ai_writers/long_form_ai_writer.py index 8772984b..2f8c919e 100644 --- a/lib/ai_writers/long_form_ai_writer.py +++ b/lib/ai_writers/long_form_ai_writer.py @@ -30,7 +30,7 @@ logger.add(sys.stdout, from ..utils.read_main_config_params import read_return_config_section from ..ai_web_researcher.gpt_online_researcher import do_metaphor_ai_research from ..ai_web_researcher.gpt_online_researcher import do_google_serp_search, do_tavily_ai_search -from ..blog_metadata.get_blog_metadata import blog_metadata +from ..blog_metadata.get_blog_metadata import get_blog_metadata_longform from ..blog_postprocessing.save_blog_to_file import save_blog_to_file @@ -132,7 +132,7 @@ def long_form_generator(content_keywords): genai.configure(api_key=os.getenv('GEMINI_API_KEY')) # Initialize the generative model - model = genai.GenerativeModel('gemini-1.5-flash', generation_config=generation_config) + model_flash = genai.GenerativeModel('gemini-1.5-flash', generation_config=generation_config) model_pro = genai.GenerativeModel('gemini-pro', generation_config=generation_config) # Do SERP web research for given keywords to generate title and outline. @@ -148,7 +148,7 @@ def long_form_generator(content_keywords): return try: - content_outline = generate_with_retry(model_pro, content_outline.format( + content_outline = generate_with_retry(model_flash, content_outline.format( content_title=content_title, web_research_result=web_research_result)).text logger.info(f"The content Outline is: {content_outline}\n\n") @@ -187,9 +187,9 @@ def long_form_generator(content_keywords): logger.info(f"Starting to write on the outline introduction.") draft = starting_draft continuation = generate_with_retry(model_pro, continuation_prompt.format( - content_title=content_title, - content_outline=content_outline, - content_text=draft, + content_title=content_title, + content_outline=content_outline, + content_text=draft, web_research_result=web_research_result, writing_guidelines=writing_guidelines)).text except Exception as err: @@ -211,7 +211,7 @@ def long_form_generator(content_keywords): Content Outline:\n '{content_outline}' """ - search_words = generate_with_retry(model, search_terms).text + search_words = generate_with_retry(model_flash, search_terms).text status.update(label=f"Search terms from written draft: {search_words}") while 'IAMDONE' not in continuation: @@ -220,50 +220,48 @@ def long_form_generator(content_keywords): # Strip quotes from each element str_list = [s.strip('\'"') for s in str_list] - for search_term in str_list: - web_research_result, m_titles, t_titles = do_tavily_ai_search(search_term, max_results=5) - try: - continuation = generate_with_retry(model_pro, continuation_prompt.format( - content_title=content_title, - content_outline=content_outline, - content_text=draft, - web_research_result=web_research_result, - writing_guidelines=writing_guidelines)).text - - draft += '\n\n' + continuation - logger.info(f"Writing in progress... Current draft length: {len(draft)} characters") - status.update(label=f"Writing in progress... Current draft length: {len(draft)} characters") - # At this point, the context is little stale. We should more web research on - # related queries as per the content outline, to augment the LLM context. - except Exception as err: - st.error(f"Failed to continually write the Essay: {err}") - logger.error(f"Failed to continually write the Essay: {err}") - return +# for search_term in str_list: +# web_research_result, m_titles, t_titles = do_tavily_ai_search(search_term, max_results=5) +# status.update(label=f"Search terms from written draft: {search_term}") +# for item in web_research_result.get("results"): +# title = item.get("title", "") +# snippet = item.get("content", "") +# table_data.append([title, snippet]) +# web_research_result = table_data + + try: + continuation = generate_with_retry(model_pro, continuation_prompt.format( + content_title=content_title, + content_outline=content_outline, + content_text=draft, + web_research_result=web_research_result, + writing_guidelines=writing_guidelines)).text + + draft += '\n\n' + continuation + logger.info(f"Writing in progress... Current draft length: {len(draft)} characters") + status.update(label=f"Writing in progress... Current draft length: {len(draft)} characters") + # At this point, the context is little stale. We should more web research on + # related queries as per the content outline, to augment the LLM context. + except Exception as err: + st.error(f"Failed to continually write long-form content: {err}") + logger.error(f"Failed to continually write the Essay: {err}") + return # Remove 'IAMDONE' and print the final story final = draft.replace('IAMDONE', '').strip() status.update(label="Success: Finished writing Long form content.") - # FIXME: The current implementation is suited for normal length content. - # In long content sending the whole content for each content metadata is expensive. -# blog_title, blog_meta_desc, blog_tags, blog_categories = blog_metadata(final, -# content_keywords, m_titles) +# # In long content sending the whole content for each content metadata is expensive. +# # https://ai.google.dev/gemini-api/docs/caching?lang=python +# #blog_title, blog_meta_desc, blog_tags, blog_categories = get_blog_metadata_longform(final) +# blog_categories = get_blog_metadata_longform(final) +# print("\n\n-----{blog_categories}------\n\n") +# # status.update(label="Success: Finished with Title, Meta Description, Tags, categories") # generated_image_filepath = None # # TBD: Save the blog content as a .md file. Markdown or HTML ? # save_blog_to_file(final, blog_title, blog_meta_desc, blog_tags, blog_categories, generated_image_filepath) -# -# blog_frontmatter = dedent(f""" -# \n--------------------------------------------------------------------- -# title: {blog_title.strip()}\n -# categories: [{blog_categories.strip()}]\n -# tags: [{blog_tags.strip()}]\n -# Meta description: {blog_meta_desc.replace(":", "-").strip()}\n -# ---------------------------------------------------------------------\n -# """) -# -# logger.info(f"\n{blog_frontmatter}{final}\n\n") -# st.markdown(f"\n{blog_frontmatter}{final}\n\n") + logger.info(f"\n{final}\n\n") logger.info(f"\n\n ################ Finished writing Blog for : {content_keywords} #################### \n") diff --git a/lib/blog_metadata/get_blog_metadata.py b/lib/blog_metadata/get_blog_metadata.py index 4e0b1e25..dd34585b 100644 --- a/lib/blog_metadata/get_blog_metadata.py +++ b/lib/blog_metadata/get_blog_metadata.py @@ -1,3 +1,6 @@ +import os +import time +import datetime import sys import streamlit as st from loguru import logger @@ -9,9 +12,12 @@ logger.add(sys.stdout, colorize=True, format="{level}|{file}:{line}:{function}| {message}" ) +import google.generativeai as genai +from google.generativeai import caching from ..gpt_providers.text_generation.main_text_generation import llm_text_gen + async def blog_metadata(blog_article): """ Common function to get blog metadata """ logger.info(f"Generating Content MetaData\n") @@ -47,6 +53,7 @@ async def blog_metadata(blog_article): return blog_title, blog_meta_desc, blog_tags, blog_categories + def generate_blog_title(blog_article): """ Given a blog title generate an outline for it @@ -124,3 +131,48 @@ def run_async(coro): result = loop.run_until_complete(coro) loop.close() return result + + +def get_blog_metadata_longform(longform_content): + """ Function for caching long-form content """ + # Open the file in write mode ("w") to overwrite existing content. + filepath = os.path.join(os.getenv("CONTENT_SAVE_DIR"), "lognform_metadata_file") + with open(filepath, "w") as file: + # Write the text to the file + file.write(longform_content) + print(f"String saved successfully to: {filepath}") + + genai.configure(api_key=os.environ['GEMINI_API_KEY']) + file_path = genai.upload_file(path=filepath) + + # Wait for the file to finish processing + while file_path.state.name == 'PROCESSING': + print('Waiting for video to be processed.') + time.sleep(2) + file_path = genai.get_file(video_file.name) + + print(f'Video processing complete: {file_path.uri}') + + # Create a cache with a 5 minute TTL + cache = caching.CachedContent.create( + model='models/gemini-1.5-flash-001', + display_name='Alwrity Longform content', # used to identify the cache + system_instruction=( + 'You are an expert file analyzer , and your job is to answer ' + 'the user\'s query based on the file you have access to.' + ), + contents=[file_path], + ttl=datetime.timedelta(minutes=15), + ) + + # Construct a GenerativeModel which uses the created cache. + model = genai.GenerativeModel.from_cached_content(cached_content=cache) + + # Query the model + response = model.generate_content([( + 'SUmmarize the given file ' + 'in 10 lines ' + 'list main points')]) + + #print(response.usage_metadata) + return(response.text) diff --git a/lib/workspace/alwrity_config/main_config.json b/lib/workspace/alwrity_config/main_config.json index 9ad590df..337d9b53 100644 --- a/lib/workspace/alwrity_config/main_config.json +++ b/lib/workspace/alwrity_config/main_config.json @@ -2,7 +2,7 @@ "Blog Content Characteristics": { "Blog Length": "2000", "Blog Tone": "Casual", - "Blog Demographic": "Professional", + "Blog Demographic": "Digital Marketing", "Blog Type": "Informational", "Blog Language": "English", "Blog Output Format": "markdown" diff --git a/lib/workspace/alwrity_prompts/long_form_ai_writer.prompts b/lib/workspace/alwrity_prompts/long_form_ai_writer.prompts index 4a583581..77c81ca7 100644 --- a/lib/workspace/alwrity_prompts/long_form_ai_writer.prompts +++ b/lib/workspace/alwrity_prompts/long_form_ai_writer.prompts @@ -62,7 +62,7 @@ starting_prompt: | First, silently review the content outline and title. Consider how to begin writing your content. Take your time. Start by writing the very beginning of the outline. You are not expected to finish the entire content now. Your writing should be detailed, only scratching the surface of the first bullet of your outline. - Try to write AT MINIMUM 1000 WORDS. + Try to write AT MINIMUM 2000 WORDS. {writing_guidelines} @@ -92,7 +92,7 @@ continuation_prompt: | Your task is to continue writing from where you left off and cover the next part of the outline. You are not expected to finish the entire content now. - Aim to write at least 1000 words. + Aim to write at least 800 words. However, only once the entire content is completely finished, write IAMDONE. {writing_guidelines}