WIP - Streamlit UI, firecrawl - V0.5

2024-06-12 16:01:46 +05:30
parent ccbaa0e4fa
commit f2aa79264e
12 changed files with 201 additions and 261 deletions
--- a/lib/ai_writers/web_url_ai_writer.py
+++ b/lib/ai_writers/web_url_ai_writer.py
@@ -1,6 +1,8 @@
 import sys
 import os
+
 from textwrap import dedent
+import json
 from pathlib import Path
 from datetime import datetime
 import streamlit as st
@@ -14,14 +16,11 @@ logger.add(sys.stdout,
        format="<level>{level}</level>|<green>{file}:{line}:{function}</green>| {message}"
    )

-from ..ai_web_researcher.gpt_online_researcher import do_google_serp_search,\
-        do_tavily_ai_search, do_metaphor_ai_research, do_google_pytrends_analysis
 from ..ai_web_researcher.firecrawl_web_crawler import scrape_url
-from .blog_from_google_serp import write_blog_google_serp, blog_with_research
-from ..ai_web_researcher.you_web_reseacher import get_rag_results, search_ydc_index
 from ..blog_metadata.get_blog_metadata import blog_metadata
 from ..blog_postprocessing.save_blog_to_file import save_blog_to_file
 from ..gpt_providers.text_to_image_generation.main_generate_image_from_prompt import generate_image
+from ..gpt_providers.text_generation.main_text_generation import llm_text_gen


 def blog_from_url(weburl):
@@ -38,62 +37,36 @@ def blog_from_url(weburl):
    with st.status("Started Writing..", expanded=True) as status:
        st.empty()
        status.update(label=f"Researching and Writing Blog on: {weburl}")
-        scraped_text = scrape_url(weburl)
-        print(scraped_text)
-        exit(1)
-        # Call on the got-researcher, tavily apis for this. Do google search for organic competition.
        try:
-            google_search_result, g_titles = do_google_serp_search(search_keywords)
-            status.update(label=f"🙎 Finished with Google web for Search: {search_keywords}")
-            example_blog_titles.append(g_titles)
-
-            status.update(label=f"🛀 Starting Tavily AI research: {search_keywords}")
-            tavily_search_result, t_titles, t_answer = do_tavily_ai_search(search_keywords)
-            status.update(label=f"🙆 Finished Google Search & Tavily AI Search on: {search_keywords}", 
-                    state="complete", expanded=False)
-
+            scraped_text = scrape_url(weburl)
+            logger.info(scraped_text)
        except Exception as err:
-            st.error(f"Failed in web research: {err}")
+            st.error(f"Failed to scrape web page from url-{weburl} - Error: {err}")
            logger.error(f"Failed in web research: {err}")
+            st.stop()
+        status.update(label="Successfully Scraped/Fetched url: {weburl}", expanded=False, state="complete")

-    with st.status("Started Writing blog from google search..", expanded=True) as status:
-        status.update(label="Researching and Writing Blog on keywords.")
-        # Call on the got-researcher, tavily apis for this. Do google search for organic competition.
-        try:
-            status.update(label=f"🛀 Writing blog from Google Search on: {search_keywords}")
-            blog_markdown_str = write_blog_google_serp(search_keywords, google_search_result)
-            st.markdown(blog_markdown_str)
-            status.update(label="🙎 Draft 1: Your Content from Google search result.", state="complete", expanded=False)
-        except Exception as err:
-            st.error(f"Failed in Google web research: {err}")
-            logger.error(f"Failed in Google web research: {err}")
-
-    # logger.info/check the final blog content.
-    logger.info("######### Draft1: Finished Blog from Google web search: ###########")
-    
-    with st.status("Started Writing blog from Tavily Web search..", expanded=True) as status:
+    with st.status(f"Started Writing blog from {weburl}..", expanded=True) as status:
        # Do Tavily AI research to augument the above blog.
        try:
-            #example_blog_titles.append(t_titles)
-            if blog_markdown_str and tavily_search_result:
-                logger.info(f"\n\n######### Blog content after Tavily AI research: ######### \n\n")
-                blog_markdown_str = write_blog_google_serp(search_keywords, tavily_search_result)
-                status.update(label="Finished Writing Blog From Tavily Results:{blog_markdown_str}")
-            else:
-                print("Not Writing with TAVILY..\n\n")
+            blog_markdown_str = write_blog_from_weburl(scraped_text)
+            status.update(label="Finished Writing Blog From: {weburl}")
        except Exception as err:
-            logger.error(f"Failed to do Tavily AI research: {err}")
+            logger.error(f"Failed to write blog from: {weburl}")
+            st.error(f"Failed to write blog from: {weburl}")
+            st.stop()

-        status.update(label="🙎 Generating - Title, Meta Description, Tags, Categories for the content.")
        try:
+            status.update(label="🙎 Generating - Title, Meta Description, Tags, Categories for the content.")
            blog_title, blog_meta_desc, blog_tags, blog_categories = blog_metadata(blog_markdown_str)
        except Exception as err:
            st.error(f"Failed to get blog metadata: {err}")

        try:
+            status.update(label="🙎 Generating Image for the new blog.")
            generated_image_filepath = generate_image(f"{blog_title} + ' ' + {blog_meta_desc}")
        except Exception as err:
-            st.error(f"Failed in Image generation: {err}")
+            st.warning(f"Failed in Image generation: {err}")

        saved_blog_to_file = save_blog_to_file(blog_markdown_str, blog_title, blog_meta_desc, 
                            blog_tags, blog_categories, generated_image_filepath)
@@ -106,8 +79,45 @@ def blog_from_url(weburl):
        Meta description: {blog_meta_desc.replace(":", "-")}\n
        ---------------------------------------------------------------------\n
        """)
-        logger.info(f"\n\n --------- Finished writing Blog for : {search_keywords} -------------- \n")
+        logger.info(f"\n\n --------- Finished writing Blog for : {weburl} -------------- \n")
        st.markdown(f"{blog_frontmatter}")
        st.image(generated_image_filepath)
        st.markdown(f"{blog_markdown_str}")
        status.update(label=f"Finished, Review & Use your Original Content Below: {saved_blog_to_file}")
+        
+
+def write_blog_from_weburl(scraped_website):
+    """Combine the given online research and GPT blog content"""
+    try:
+        config_path = Path(os.environ["ALWRITY_CONFIG"])
+        with open(config_path, 'r', encoding='utf-8') as file:
+            config = json.load(file)
+    except Exception as err:
+        logger.error(f"Error: Failed to read values from config: {err}")
+        exit(1)
+
+    blog_characteristics = config['Blog Content Characteristics']
+    
+    prompt = f"""
+        As expert Creative Content writer, I will provide you with scraped website content.
+        I want you to write a detailed {blog_characteristics['Blog Type']} blog post including 5 FAQs.
+        
+        Below are the guidelines to follow:
+        1). You must respond in {blog_characteristics['Blog Language']} language.
+        2). Tone and Brand Alignment: Adjust your tone, voice, personality for {blog_characteristics['Blog Tone']} audience.
+        3). Make sure your response content length is of {blog_characteristics['Blog Length']} words.
+        4). Include FAQs from 'People also Ask' section of provided context 'google search result'.
+
+        I want the post to offer unique insights, relatable examples, and a fresh perspective on the topic.
+        \n\n
+        Website Content:
+        '''{scraped_website}'''
+        """ 
+    logger.info("Generating blog and FAQs from Google web search results.")
+    
+    try:
+        response = llm_text_gen(prompt)
+        return response
+    except Exception as err:
+        logger.error(f"Exit: Failed to get response from LLM: {err}")
+        exit(1)