WIP - Streamlit UI, firecrawl - V0.5

2024-06-11 17:27:50 +05:30
parent f2fa8cfb47
commit ccbaa0e4fa
13 changed files with 442 additions and 211 deletions
--- a/lib/ai_writers/keywords_to_blog.py
+++ b/lib/ai_writers/keywords_to_blog.py
@@ -1,101 +0,0 @@
-import sys
-import os
-from textwrap import dedent
-from pathlib import Path
-from datetime import datetime
-
-from dotenv import load_dotenv
-load_dotenv(Path('../../.env'))
-from loguru import logger
-logger.remove()
-logger.add(sys.stdout,
-        colorize=True,
-        format="<level>{level}</level>|<green>{file}:{line}:{function}</green>| {message}"
-    )
-
-from ..ai_web_researcher.gpt_online_researcher import do_google_serp_search,\
-        do_tavily_ai_search, do_metaphor_ai_research, do_google_pytrends_analysis
-from .blog_from_google_serp import write_blog_google_serp, improve_blog_intro, blog_with_keywords, blog_with_research
-from ..ai_web_researcher.you_web_reseacher import get_rag_results, search_ydc_index
-from ..blog_metadata.get_blog_metadata import blog_metadata
-from ..blog_postprocessing.save_blog_to_file import save_blog_to_file
-
-
-def write_blog_from_keywords(search_keywords, url=None):
-    """
-    This function will take a blog Topic to first generate sections for it
-    and then generate content for each section.
-    """
-    # Use to store the blog in a string, to save in a *.md file.
-    blog_markdown_str = ""
-    example_blog_titles = []
-    
-    logger.info(f"Researching and Writing Blog on keywords: {search_keywords}")
-    # Call on the got-researcher, tavily apis for this. Do google search for organic competition.
-    try:
-        google_search_result, g_titles = do_google_serp_search(search_keywords)
-        example_blog_titles.append(g_titles)
-        blog_markdown_str = write_blog_google_serp(search_keywords, google_search_result)
-        tavily_search_result, t_titles, t_answer = do_tavily_ai_search(search_keywords)
-        # Hate the robotic introductions.
-        blog_markdown_str = improve_blog_intro(blog_markdown_str, t_answer)
-    except Exception as err:
-        logger.error(f"Failed in Google web research: {err}")
-    # logger.info/check the final blog content.
-    logger.info("\n######### Draft1: Finished Blog from Google web search: ###########\n\n")
-
-    # Do Tavily AI research to augument the above blog.
-    try:
-        #example_blog_titles.append(t_titles)
-        blog_markdown_str = blog_with_research(blog_markdown_str, tavily_search_result)
-        logger.info(f"######### Blog content after Tavily AI research: ######### \n\n{blog_markdown_str}\n\n")
-    except Exception as err:
-        logger.error(f"Failed to do Tavily AI research: {err}")
-    logger.info("######### Draft2: Blog content after Tavily AI research: #########\n\n")
-
-#    try:
-#        # Do Metaphor/Exa AI search.
-#        metaphor_search_result, m_titles = do_metaphor_ai_research(search_keywords)
-#        example_blog_titles.append(m_titles)
-#        blog_markdown_str = blog_with_research(blog_markdown_str, metaphor_search_result)
-#    except Exception as err:
-#        logger.error(f"Failed to do Metaphor AI search: {err}")
-#    logger.info("######### Draft3: Blog content after Tavily AI research: ######### \n\n")
-
-    # Do Google trends analysis and combine with latest blog.
-#    try:
-#        pytrends_search_result = do_google_pytrends_analysis(search_keywords)
-#        logger.info(f"Google Trends keywords to use in the blog: {pytrends_search_result}\n")
-#        blog_markdown_str = blog_with_keywords(blog_markdown_str, pytrends_search_result)
-#    except Exception as err:
-#        logger.error(f"Failed to do Google Trends Analysis:{err}")
-#    logger.info(f"########### Blog Content After Google Trends Analysis:######### \n {blog_markdown_str}\n\n")
-#    
-
-    blog_title, blog_meta_desc, blog_tags, blog_categories = blog_metadata(blog_markdown_str, 
-            search_keywords, example_blog_titles)
-
-    # fixme: Remove the hardcoding, need add another option OR in config ?
-    image_dir = os.path.join(os.getcwd(), "blog_images")
-    generated_image_name = f"generated_image_{datetime.now():%Y-%m-%d-%H-%M-%S}.png"
-    generated_image_filepath = os.path.join(image_dir, generated_image_name)
-    # Generate an image based on meta description
-    #logger.info(f"Calling Image generation with prompt: {blog_meta_desc}")
-    #main_img_path = generate_image(blog_meta_desc, image_dir, "dalle3")
-    if url:
-        try:
-            generated_image_filepath = screenshot_api(url, generated_image_filepath)
-        except Exception as err:
-            logger.error(f"Failed in taking compnay page screenshot: {err}")
-    # TBD: Save the blog content as a .md file. Markdown or HTML ?
-    save_blog_to_file(blog_markdown_str, blog_title, blog_meta_desc, blog_tags, blog_categories, generated_image_filepath)
-
-    blog_frontmatter = dedent(f"""\n\n\n\
-                ---
-                title: {blog_title}
-                categories: [{blog_categories}]
-                tags: [{blog_tags}]
-                Meta description: {blog_meta_desc.replace(":", "-")}
-                ---\n\n""")
-    logger.info(f"{blog_frontmatter}{blog_markdown_str}")
-    logger.info(f"\n\n ################ Finished writing Blog for : {search_keywords} #################### \n")
--- a/lib/ai_writers/web_url_ai_writer.py
+++ b/lib/ai_writers/web_url_ai_writer.py
@@ -0,0 +1,113 @@
+import sys
+import os
+from textwrap import dedent
+from pathlib import Path
+from datetime import datetime
+import streamlit as st
+
+from dotenv import load_dotenv
+load_dotenv(Path('../../.env'))
+from loguru import logger
+logger.remove()
+logger.add(sys.stdout,
+        colorize=True,
+        format="<level>{level}</level>|<green>{file}:{line}:{function}</green>| {message}"
+    )
+
+from ..ai_web_researcher.gpt_online_researcher import do_google_serp_search,\
+        do_tavily_ai_search, do_metaphor_ai_research, do_google_pytrends_analysis
+from ..ai_web_researcher.firecrawl_web_crawler import scrape_url
+from .blog_from_google_serp import write_blog_google_serp, blog_with_research
+from ..ai_web_researcher.you_web_reseacher import get_rag_results, search_ydc_index
+from ..blog_metadata.get_blog_metadata import blog_metadata
+from ..blog_postprocessing.save_blog_to_file import save_blog_to_file
+from ..gpt_providers.text_to_image_generation.main_generate_image_from_prompt import generate_image
+
+
+def blog_from_url(weburl):
+    """
+    This function will take a blog Topic to first generate sections for it
+    and then generate content for each section.
+    """
+    # Use to store the blog in a string, to save in a *.md file.
+    blog_markdown_str = None
+    tavily_search_result = None
+    example_blog_titles = []
+
+    logger.info(f"Researching and Writing Blog on: {weburl}")
+    with st.status("Started Writing..", expanded=True) as status:
+        st.empty()
+        status.update(label=f"Researching and Writing Blog on: {weburl}")
+        scraped_text = scrape_url(weburl)
+        print(scraped_text)
+        exit(1)
+        # Call on the got-researcher, tavily apis for this. Do google search for organic competition.
+        try:
+            google_search_result, g_titles = do_google_serp_search(search_keywords)
+            status.update(label=f"🙎 Finished with Google web for Search: {search_keywords}")
+            example_blog_titles.append(g_titles)
+
+            status.update(label=f"🛀 Starting Tavily AI research: {search_keywords}")
+            tavily_search_result, t_titles, t_answer = do_tavily_ai_search(search_keywords)
+            status.update(label=f"🙆 Finished Google Search & Tavily AI Search on: {search_keywords}", 
+                    state="complete", expanded=False)
+
+        except Exception as err:
+            st.error(f"Failed in web research: {err}")
+            logger.error(f"Failed in web research: {err}")
+
+    with st.status("Started Writing blog from google search..", expanded=True) as status:
+        status.update(label="Researching and Writing Blog on keywords.")
+        # Call on the got-researcher, tavily apis for this. Do google search for organic competition.
+        try:
+            status.update(label=f"🛀 Writing blog from Google Search on: {search_keywords}")
+            blog_markdown_str = write_blog_google_serp(search_keywords, google_search_result)
+            st.markdown(blog_markdown_str)
+            status.update(label="🙎 Draft 1: Your Content from Google search result.", state="complete", expanded=False)
+        except Exception as err:
+            st.error(f"Failed in Google web research: {err}")
+            logger.error(f"Failed in Google web research: {err}")
+
+    # logger.info/check the final blog content.
+    logger.info("######### Draft1: Finished Blog from Google web search: ###########")
+    
+    with st.status("Started Writing blog from Tavily Web search..", expanded=True) as status:
+        # Do Tavily AI research to augument the above blog.
+        try:
+            #example_blog_titles.append(t_titles)
+            if blog_markdown_str and tavily_search_result:
+                logger.info(f"\n\n######### Blog content after Tavily AI research: ######### \n\n")
+                blog_markdown_str = write_blog_google_serp(search_keywords, tavily_search_result)
+                status.update(label="Finished Writing Blog From Tavily Results:{blog_markdown_str}")
+            else:
+                print("Not Writing with TAVILY..\n\n")
+        except Exception as err:
+            logger.error(f"Failed to do Tavily AI research: {err}")
+
+        status.update(label="🙎 Generating - Title, Meta Description, Tags, Categories for the content.")
+        try:
+            blog_title, blog_meta_desc, blog_tags, blog_categories = blog_metadata(blog_markdown_str)
+        except Exception as err:
+            st.error(f"Failed to get blog metadata: {err}")
+
+        try:
+            generated_image_filepath = generate_image(f"{blog_title} + ' ' + {blog_meta_desc}")
+        except Exception as err:
+            st.error(f"Failed in Image generation: {err}")
+
+        saved_blog_to_file = save_blog_to_file(blog_markdown_str, blog_title, blog_meta_desc, 
+                            blog_tags, blog_categories, generated_image_filepath)
+        status.update(label=f"Saved the content in this file: {saved_blog_to_file}")
+        blog_frontmatter = dedent(f"""
+        \n---------------------------------------------------------------------
+        title: {blog_title}\n
+        categories: [{blog_categories}]\n
+        tags: [{blog_tags}]\n
+        Meta description: {blog_meta_desc.replace(":", "-")}\n
+        ---------------------------------------------------------------------\n
+        """)
+        logger.info(f"\n\n --------- Finished writing Blog for : {search_keywords} -------------- \n")
+        st.markdown(f"{blog_frontmatter}")
+        st.image(generated_image_filepath)
+        st.markdown(f"{blog_markdown_str}")
+        status.update(label=f"Finished, Review & Use your Original Content Below: {saved_blog_to_file}")