Improved longform, Image, prompts

2024-07-01 19:21:28 +05:30
parent 97ece766c9
commit 94b938d31e
4 changed files with 95 additions and 45 deletions
--- a/lib/ai_writers/long_form_ai_writer.py
+++ b/lib/ai_writers/long_form_ai_writer.py
@@ -30,7 +30,7 @@ logger.add(sys.stdout,
 from ..utils.read_main_config_params import read_return_config_section
 from ..ai_web_researcher.gpt_online_researcher import do_metaphor_ai_research
 from ..ai_web_researcher.gpt_online_researcher import do_google_serp_search, do_tavily_ai_search
-from ..blog_metadata.get_blog_metadata import blog_metadata
+from ..blog_metadata.get_blog_metadata import get_blog_metadata_longform
 from ..blog_postprocessing.save_blog_to_file import save_blog_to_file


@@ -132,7 +132,7 @@ def long_form_generator(content_keywords):
        
        genai.configure(api_key=os.getenv('GEMINI_API_KEY'))
        # Initialize the generative model
-        model = genai.GenerativeModel('gemini-1.5-flash', generation_config=generation_config)
+        model_flash = genai.GenerativeModel('gemini-1.5-flash', generation_config=generation_config)
        model_pro = genai.GenerativeModel('gemini-pro', generation_config=generation_config)
        
        # Do SERP web research for given keywords to generate title and outline.
@@ -148,7 +148,7 @@ def long_form_generator(content_keywords):
            return
        
        try:
-            content_outline = generate_with_retry(model_pro, content_outline.format(
+            content_outline = generate_with_retry(model_flash, content_outline.format(
                content_title=content_title, 
                web_research_result=web_research_result)).text
            logger.info(f"The content Outline is: {content_outline}\n\n")
@@ -187,9 +187,9 @@ def long_form_generator(content_keywords):
            logger.info(f"Starting to write on the outline introduction.")
            draft = starting_draft
            continuation = generate_with_retry(model_pro, continuation_prompt.format(
-                    content_title=content_title, 
-                    content_outline=content_outline, 
-                    content_text=draft, 
+                    content_title=content_title,
+                    content_outline=content_outline,
+                    content_text=draft,
                    web_research_result=web_research_result,
                    writing_guidelines=writing_guidelines)).text
        except Exception as err:
@@ -211,7 +211,7 @@ def long_form_generator(content_keywords):
            Content Outline:\n
            '{content_outline}'
            """
-        search_words = generate_with_retry(model, search_terms).text
+        search_words = generate_with_retry(model_flash, search_terms).text
        status.update(label=f"Search terms from written draft: {search_words}")
        
        while 'IAMDONE' not in continuation:
@@ -220,50 +220,48 @@ def long_form_generator(content_keywords):
            # Strip quotes from each element 
            str_list = [s.strip('\'"') for s in str_list]

-            for search_term in str_list:
-                web_research_result, m_titles, t_titles = do_tavily_ai_search(search_term, max_results=5)
-                try:
-                    continuation = generate_with_retry(model_pro, continuation_prompt.format(
-                        content_title=content_title,
-                        content_outline=content_outline, 
-                        content_text=draft, 
-                        web_research_result=web_research_result,
-                        writing_guidelines=writing_guidelines)).text
-    
-                    draft += '\n\n' + continuation
-                    logger.info(f"Writing in progress... Current draft length: {len(draft)} characters")
-                    status.update(label=f"Writing in progress... Current draft length: {len(draft)} characters")
-                    # At this point, the context is little stale. We should more web research on
-                    # related queries as per the content outline, to augment the LLM context.
-                except Exception as err:
-                    st.error(f"Failed to continually write the Essay: {err}")
-                    logger.error(f"Failed to continually write the Essay: {err}")
-                    return
+#            for search_term in str_list:
+#                web_research_result, m_titles, t_titles = do_tavily_ai_search(search_term, max_results=5)
+#                status.update(label=f"Search terms from written draft: {search_term}")
+#                for item in web_research_result.get("results"):
+#                    title = item.get("title", "")
+#                    snippet = item.get("content", "")
+#                    table_data.append([title, snippet])
+#                web_research_result = table_data
+
+            try:
+                continuation = generate_with_retry(model_pro, continuation_prompt.format(
+                            content_title=content_title,
+                            content_outline=content_outline, 
+                            content_text=draft, 
+                            web_research_result=web_research_result,
+                            writing_guidelines=writing_guidelines)).text
+        
+                draft += '\n\n' + continuation
+                logger.info(f"Writing in progress... Current draft length: {len(draft)} characters")
+                status.update(label=f"Writing in progress... Current draft length: {len(draft)} characters")
+                # At this point, the context is little stale. We should more web research on
+                # related queries as per the content outline, to augment the LLM context.
+            except Exception as err:
+                st.error(f"Failed to continually write long-form content: {err}")
+                logger.error(f"Failed to continually write the Essay: {err}")
+                return
        
        # Remove 'IAMDONE' and print the final story
        final = draft.replace('IAMDONE', '').strip()
        status.update(label="Success: Finished writing Long form content.")

-        # FIXME: The current implementation is suited for normal length content.
-        # In long content sending the whole content for each content metadata is expensive.
-#        blog_title, blog_meta_desc, blog_tags, blog_categories = blog_metadata(final,
-#                content_keywords, m_titles)
+#        # In long content sending the whole content for each content metadata is expensive.
+#        # https://ai.google.dev/gemini-api/docs/caching?lang=python
+#        #blog_title, blog_meta_desc, blog_tags, blog_categories = get_blog_metadata_longform(final)
+#        blog_categories = get_blog_metadata_longform(final)
+#        print("\n\n-----{blog_categories}------\n\n")
+#
 #        status.update(label="Success: Finished with Title, Meta Description, Tags, categories")
 #        generated_image_filepath = None
 #        # TBD: Save the blog content as a .md file. Markdown or HTML ?
 #        save_blog_to_file(final, blog_title, blog_meta_desc, blog_tags, blog_categories, generated_image_filepath)
-#    
-#        blog_frontmatter = dedent(f"""
-#        \n---------------------------------------------------------------------
-#        title: {blog_title.strip()}\n
-#        categories: [{blog_categories.strip()}]\n
-#        tags: [{blog_tags.strip()}]\n
-#        Meta description: {blog_meta_desc.replace(":", "-").strip()}\n
-#        ---------------------------------------------------------------------\n
-#        """)
-#
-#        logger.info(f"\n{blog_frontmatter}{final}\n\n")
-#        st.markdown(f"\n{blog_frontmatter}{final}\n\n")
+        
        logger.info(f"\n{final}\n\n")

        logger.info(f"\n\n ################ Finished writing Blog for : {content_keywords} #################### \n")