Fixing Errors - WIP - Making improvements, content workflows

2024-09-13 19:41:48 +05:30
parent ca8618a6a4
commit 52753901f1
11 changed files with 159 additions and 26 deletions
--- a/lib/ai_seo_tools/TBD
+++ b/lib/ai_seo_tools/TBD
@@ -33,3 +33,36 @@ https://www.kaggle.com/code/eliasdabbas/advertools-seo-crawl-analysis-template

 https://www.semrush.com/blog/content-analysis-xml-sitemaps-python/

+
+different configurations that influence your technical SEO and how to optimize them to maximize your organic search visibility. 
+
+ALwrity’ll cover:
+
+    HTTP status
+
+    URL structure
+
+    Website links
+
+    XML sitemaps
+
+    Robots.txt
+
+    Meta robots tag
+
+    Canonicalization
+
+    JavaScript usage
+
+    HTTPS usage
+
+    Mobile friendliness
+
+    Structured data
+
+    Core Web Vitals
+
+    Hreflang annotations
+
+
+
--- a/lib/ai_web_researcher/common_utils.py
+++ b/lib/ai_web_researcher/common_utils.py
@@ -4,7 +4,6 @@ import sys
 import re
 import json
 from pathlib import Path
-import streamlit as st
 from datetime import datetime, timedelta
 from pathlib import Path
 from loguru import logger
@@ -93,7 +92,6 @@ def save_in_file(table_content):
    try:
        # Save the content to the file
        with open(file_path, "a+", encoding="utf-8") as file:
-            st.write(table_content)
            file.write(table_content)
            file.write("\n" * 3)  # Add three newlines at the end
        logger.info(f"Search content saved to {file_path}")
--- a/lib/ai_web_researcher/google_serp_search.py
+++ b/lib/ai_web_researcher/google_serp_search.py
@@ -49,9 +49,9 @@ logger.add(
           )

 from .common_utils import save_in_file, cfg_search_param
-
-
 from tenacity import retry, stop_after_attempt, wait_random_exponential
+
+
@retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(6))
 def google_search(query):
    """
@@ -75,10 +75,12 @@ def google_search(query):
    try:
        logger.info("Trying Google search with Serper.dev: https://serper.dev/api-key")
        search_result = perform_serperdev_google_search(query)
-        process_search_results(search_result)
-        return(search_result)
+        if search_result:
+            process_search_results(search_result)
+            return(search_result)
    except Exception as err:
-        logger.error(f"Failed to do Google search with serper.dev: {err}")
+        logger.error(f"Failed Google search with serper.dev: {err}")
+        return None

 
 #    # Retry with BROWSERLESS API
--- a/lib/ai_web_researcher/gpt_online_researcher.py
+++ b/lib/ai_web_researcher/gpt_online_researcher.py
@@ -62,10 +62,12 @@ def do_google_serp_search(search_keywords):
    try:
        logger.info(f"Doing Google search for: {search_keywords}\n")
        g_results = google_search(search_keywords)
-        g_titles = extract_info(g_results, 'titles')
-        return(g_results, g_titles)
+        if g_results:
+            g_titles = extract_info(g_results, 'titles')
+            return(g_results, g_titles)
    except Exception as err:
-        logger.error(f"Failed to do Google Serpapi research: {err}")
+        logger.error(f"Failed to do Google SERP research: {err}")
+        return None
        # Not failing, as tavily would do same and then GPT-V to search.


--- a/lib/ai_web_researcher/tavily_ai_search.py
+++ b/lib/ai_web_researcher/tavily_ai_search.py
@@ -36,7 +36,7 @@ from tabulate import tabulate
 # Load environment variables from .env file
 load_dotenv(Path('../../.env'))
 from rich import print
-
+import streamlit as st
 # Configure logger
 logger.remove()
 logger.add(sys.stdout,
@@ -95,11 +95,37 @@ def get_tavilyai_results(keywords, max_results=5):
                    max_results=max_results)

        print_result_table(tavily_search_result)
+        streamlit_display_results(tavily_search_result)
        return(tavily_search_result)
    except Exception as err:
        logger.error(f"Failed to do Tavily Research: {err}")


+def streamlit_display_results(output_data):
+    """Display Tavily AI search results in Streamlit UI."""
+
+    # Prepare data for display
+    table_data = []
+    for item in output_data.get("results", []):
+        title = item.get("title", "")
+        snippet = item.get("content", "")
+        link = item.get("url", "")
+        table_data.append([title, snippet, link])
+
+    # Display the table in Streamlit
+    st.table(table_data)
+
+    # Display the 'answer' in Streamlit
+    answer = output_data.get("answer", "No answer available")
+    st.write(f"**The answer to your search query:** {answer}")
+
+    # Display follow-up questions if available
+    follow_up_questions = output_data.get("follow_up_questions", [])
+    if follow_up_questions:
+        st.write(f"**Follow-up questions for the query:** {output_data.get('query')}")
+        st.write(", ".join(follow_up_questions))
+
+
 def print_result_table(output_data):
    """ Pretty print the tavily AI search result. """
    # Prepare data for tabulate
--- a/lib/ai_writers/.keywords_to_blog_streamlit.py.swp
+++ b/lib/ai_writers/.keywords_to_blog_streamlit.py.swp
--- a/lib/ai_writers/blog_from_google_serp.py
+++ b/lib/ai_writers/blog_from_google_serp.py
@@ -17,7 +17,7 @@ def write_blog_google_serp(search_keyword, search_results):
    """Combine the given online research and GPT blog content"""
    prompt = f"""
        As expert Creative Content writer,
-        I want you to write blog post, that explores {search_keyword} and also include 5 FAQs.
+        I want you to write highly detailed blog post, that explores {search_keyword} and also include 5 FAQs.

        I want the post to offer unique insights, relatable examples, and a fresh perspective on the topic.
        Here are some Google search results to spark your creativity on {search_keyword}:
@@ -65,16 +65,15 @@ def improve_blog_intro(blog_content, blog_intro):
 def blog_with_keywords(blog, keywords):
    """Combine the given online research and gpt blog content"""
    prompt = f"""
+        You are Sarah, the Creative Content writer, writing up fresh ideas and crafts them with care. 
+        She makes complex topics easy to understand and writes in a friendly tone that connects with everyone.
+        She excels at simplifying complex topics and communicates with charisma, making technical jargon come alive for her audience.
+
        As an expert digital content writer, specializing in content optimization and SEO. 
        I will provide you with my 'blog content' and 'list of keywords' on the same topic.
        Your task is to write an original blog, utilizing given keywords and blog content.
        Your blog should be highly detailed and well formatted. 

-        You are Sarah, the Creative Content writer, writing up fresh ideas and crafts them with care. 
-        She makes complex topics easy to understand and writes in a friendly tone that connects with everyone.
-        She excels at simplifying complex topics and communicates with charisma, making technical jargon come alive for her audience.
-        
-
        Blog content: '{blog}'
        list of keywords: '{keywords}'
        """
--- a/lib/ai_writers/keywords_to_blog_streamlit.py
+++ b/lib/ai_writers/keywords_to_blog_streamlit.py
@@ -55,23 +55,30 @@ def write_blog_from_keywords(search_keywords, url=None):
    example_blog_titles = []

    logger.info(f"Researching and Writing Blog on keywords: {search_keywords}")
-    with st.status("Started Writing..", expanded=True) as status:
+    with st.status("Started Web Research..", expanded=True) as status:
        st.empty()
        status.update(label="Researching and Writing Blog on keywords.")
        # Call on the got-researcher, tavily apis for this. Do google search for organic competition.
        try:
            google_search_result, g_titles = do_google_serp_search(search_keywords)
-            status.update(label=f"🙎 Finished with Google web for Search: {search_keywords}")
-            example_blog_titles.append(g_titles)
+            if google_search_result:
+                status.update(label=f"🙎 Finished with Google web for Search: {search_keywords}")
+                example_blog_titles.append(g_titles)
+            else:
+                st.warning("Failed to Google SERP results.")
+        except Exception as err:
+            st.warning(f"Failed in Google web research: {err}")
+            logger.error(f"Failed in Google web research: {err}")

+        try:
            status.update(label=f"🛀 Starting Tavily AI research: {search_keywords}")
            tavily_search_result, t_titles, t_answer = do_tavily_ai_search(search_keywords)
            status.update(label=f"🙆 Finished Google Search & Tavily AI Search on: {search_keywords}",
                          state="complete", expanded=False)
-
        except Exception as err:
-            st.error(f"Failed in web research: {err}")
-            logger.error(f"Failed in web research: {err}")
+            st.warning(f"Failed in Tavily web research: {err}")
+            logger.error(f"Failed in Tavily web research: {err}")
+

    with st.status("Started Writing blog from google search..", expanded=True) as status:
        status.update(label="Researching and Writing Blog on keywords.")
@@ -82,6 +89,7 @@ def write_blog_from_keywords(search_keywords, url=None):
            st.markdown(blog_markdown_str)
            status.update(label="🙎 Draft 1: Your Content from Google search result.", state="complete", expanded=False)
        except Exception as err:
+            status.update(label="🙎 Failed Content from Google SERP.", state="error", expanded=False)
            st.error(f"Failed in Google web research: {err}")
            logger.error(f"Failed in Google web research: {err}")

@@ -92,11 +100,12 @@ def write_blog_from_keywords(search_keywords, url=None):
        # Do Tavily AI research to augment the above blog.
        try:
            # example_blog_titles.append(t_titles)
-            if blog_markdown_str and tavily_search_result:
+            if tavily_search_result:
                logger.info(f"\n\n######### Blog content after Tavily AI research: ######### \n\n")
                blog_markdown_str = write_blog_google_serp(search_keywords, tavily_search_result)
                status.update(label=f"Finished Writing Blog From Tavily Results:{blog_markdown_str}", expanded=True)
        except Exception as err:
+            status.update(label="🙎 Failed content from Tavily search.", state="error", expanded=False)
            logger.error(f"Failed to do Tavily AI research: {err}")

        status.update(label="🙎 Generating - Title, Meta Description, Tags, Categories for the content.", expanded=True)
--- a/lib/gpt_providers/text_generation/gemini_pro_text.py
+++ b/lib/gpt_providers/text_generation/gemini_pro_text.py
@@ -52,3 +52,47 @@ def gemini_text_response(prompt, temperature, top_p, n, max_tokens, system_promp
        return response.text
    except Exception as err:
        logger.error(f"Failed to get response from Gemini: {err}. Retrying.")
+
+
+#@retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(6))
+#def gemini_blog_metadata_json(blog_content):
+#    """ Common functiont to get response from gemini pro Text. """
+#    prompt =  f"I will provide you with the content of a blog post. Based on this content, you need to generate the following elements in JSON format:\n\n1. **Blog Title**: A compelling and relevant title that summarizes the blog content.\n2. **Meta Description**: A concise meta description (up to 160 characters) that captures the essence of the blog post and encourages clicks.\n3. **Tags**: A list of 5-10 relevant tags that represent the key topics covered in the blog post.\n4. **Categories**: A list of 1-3 appropriate categories that best describe the blog post's main themes.\n\nOutput your response in the following JSON format:\n\n```json\n{\n  \"type\": \"object\",\n  \"properties\": {\n    \"blog_title\": {\n      \"type\": \"string\"\n    },\n    \"meta_description\": {\n      \"type\": \"string\"\n    },\n    \"tags\": {\n      \"type\": \"array\",\n      \"items\": {\n        \"type\": \"string\"\n      }\n    },\n    \"categories\": {\n      \"type\": \"array\",\n      \"items\": {\n        \"type\": \"string\"\n      }\n    }\n  }\n}\n\n. The Blog Content is given below: \n\n{blog_content}\n\n"
+#    
+#    try:
+#        genai.configure(api_key=os.getenv('GEMINI_API_KEY'))
+#    except Exception as err:
+#        logger.error(f"Failed to configure Gemini: {err}")
+#
+#    # Create the model
+#    generation_config = {
+#        "temperature": 1,
+#        "top_p": 0.95,
+#        "top_k": 64,
+#        "max_output_tokens": 8192,
+#        "response_schema": content.Schema(
+#        type = content.Type.OBJECT,
+#            properties = {
+#                "response": content.Schema(
+#                    type = content.Type.STRING,
+#                    ),
+#            },
+#        ),
+#        "response_mime_type": "application/json",
+#    }
+#
+#    model = genai.GenerativeModel(
+#        model_name="gemini-1.5-flash",
+#        generation_config=generation_config,
+#        # safety_settings = Adjust safety settings
+#        # See https://ai.google.dev/gemini-api/docs/safety-settings
+#        )
+#
+#        try:
+#        # text_response = []
+#        response = model.generate_content(prompt)
+#        if response:
+#            logger.info(f"Number of Token in Prompt Sent: {model.count_tokens(prompt)}")
+#            return response.text
+#    except Exception as err:
+#        logger.error(f"Failed to get SEO METADATA from Gemini: {err}. Retrying.")
--- a/lib/gpt_providers/text_generation/main_text_generation.py
+++ b/lib/gpt_providers/text_generation/main_text_generation.py
@@ -33,8 +33,28 @@ def llm_text_gen(prompt):
            blog_output_format, blog_length = read_return_config_section('blog_characteristics')

        # Construct the system prompt with the sidebar config params.
-        system_instructions = read_return_config_section('system_prompt') 
+        #system_instructions = read_return_config_section('system_prompt')
+        system_instructions = f"""You are a highly skilled content writer with a knack for creating engaging and informative content. 
+            Your expertise spans various writing styles and formats.

+            Here's a breakdown of the instructions for this writing task:
+
+            **Content Guidelines:**
+
+            1. **Language:** Your response must be in **{blog_language}** language. 
+            2. **Tone and Brand Alignment:** Adjust your tone, voice, and personality to be appropriate for a **{blog_tone}** audience. 
+            3. **Content Length:**  Ensure your response is approximately **{blog_length}** words in length.
+            4. **Blog Type:**  The type of blog is **{blog_type}**. Write accordingly, adhering to the conventions and expectations of this type of content.
+            5. **Target Audience:** The demographic for this content is **{blog_demographic}**. Keep their interests and needs in mind.
+            6. **Output Format:** Your response should be in **{blog_output_format}** format. This could be Markdown, HTML, or a specific structured format, depending on the user's preference.
+
+            **Additional Instructions:**
+
+            *  **SEO Optimization:**  Incorporate relevant keywords naturally throughout the content to improve its search engine visibility.
+            * **Call to Action:** Include a call to action if appropriate for the blog type and target audience.
+            * **Factual Accuracy:**  Ensure your content is accurate and reliable. Back up any claims with credible sources.
+            * **Unique Voice and Style:** Inject your unique voice and writing style to make the content engaging and memorable. """
+        
        #gpt_provider = check_gpt_provider(gpt_provider)
        # Check if API key is provided for the given gpt_provider
        get_api_key(gpt_provider)
--- a/lib/workspace/alwrity_config/main_config.json
+++ b/lib/workspace/alwrity_config/main_config.json
@@ -5,7 +5,7 @@
        "Blog Demographic": "Professional",
        "Blog Type": "Informational",
        "Blog Language": "English",
-        "Blog Output Format": "markdown"
+        "Blog Output Format": "HTML"
    },
    "Blog Images Details": {
        "Image Generation Model": "stable-diffusion",