From 52753901f1071aac7b1b2aed1e7fe9241d722203 Mon Sep 17 00:00:00 2001 From: ajaysi Date: Fri, 13 Sep 2024 19:41:48 +0530 Subject: [PATCH] Fixing Errors - WIP - Making improvements, content workflows --- lib/ai_seo_tools/TBD | 33 +++++++++++++ lib/ai_web_researcher/common_utils.py | 2 - lib/ai_web_researcher/google_serp_search.py | 12 +++-- .../gpt_online_researcher.py | 8 ++-- lib/ai_web_researcher/tavily_ai_search.py | 28 ++++++++++- .../.keywords_to_blog_streamlit.py.swp | Bin 0 -> 16384 bytes lib/ai_writers/blog_from_google_serp.py | 11 ++--- lib/ai_writers/keywords_to_blog_streamlit.py | 23 ++++++--- .../text_generation/gemini_pro_text.py | 44 ++++++++++++++++++ .../text_generation/main_text_generation.py | 22 ++++++++- lib/workspace/alwrity_config/main_config.json | 2 +- 11 files changed, 159 insertions(+), 26 deletions(-) create mode 100644 lib/ai_writers/.keywords_to_blog_streamlit.py.swp diff --git a/lib/ai_seo_tools/TBD b/lib/ai_seo_tools/TBD index cbe6e3e2..0dc0a3a9 100644 --- a/lib/ai_seo_tools/TBD +++ b/lib/ai_seo_tools/TBD @@ -33,3 +33,36 @@ https://www.kaggle.com/code/eliasdabbas/advertools-seo-crawl-analysis-template https://www.semrush.com/blog/content-analysis-xml-sitemaps-python/ + +different configurations that influence your technical SEO and how to optimize them to maximize your organic search visibility. + +ALwrity’ll cover: + + HTTP status + + URL structure + + Website links + + XML sitemaps + + Robots.txt + + Meta robots tag + + Canonicalization + + JavaScript usage + + HTTPS usage + + Mobile friendliness + + Structured data + + Core Web Vitals + + Hreflang annotations + + + diff --git a/lib/ai_web_researcher/common_utils.py b/lib/ai_web_researcher/common_utils.py index a2bef0e7..2bf44055 100644 --- a/lib/ai_web_researcher/common_utils.py +++ b/lib/ai_web_researcher/common_utils.py @@ -4,7 +4,6 @@ import sys import re import json from pathlib import Path -import streamlit as st from datetime import datetime, timedelta from pathlib import Path from loguru import logger @@ -93,7 +92,6 @@ def save_in_file(table_content): try: # Save the content to the file with open(file_path, "a+", encoding="utf-8") as file: - st.write(table_content) file.write(table_content) file.write("\n" * 3) # Add three newlines at the end logger.info(f"Search content saved to {file_path}") diff --git a/lib/ai_web_researcher/google_serp_search.py b/lib/ai_web_researcher/google_serp_search.py index 06799ac8..d834d735 100644 --- a/lib/ai_web_researcher/google_serp_search.py +++ b/lib/ai_web_researcher/google_serp_search.py @@ -49,9 +49,9 @@ logger.add( ) from .common_utils import save_in_file, cfg_search_param - - from tenacity import retry, stop_after_attempt, wait_random_exponential + + @retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(6)) def google_search(query): """ @@ -75,10 +75,12 @@ def google_search(query): try: logger.info("Trying Google search with Serper.dev: https://serper.dev/api-key") search_result = perform_serperdev_google_search(query) - process_search_results(search_result) - return(search_result) + if search_result: + process_search_results(search_result) + return(search_result) except Exception as err: - logger.error(f"Failed to do Google search with serper.dev: {err}") + logger.error(f"Failed Google search with serper.dev: {err}") + return None # # Retry with BROWSERLESS API diff --git a/lib/ai_web_researcher/gpt_online_researcher.py b/lib/ai_web_researcher/gpt_online_researcher.py index fd54594a..c183d003 100644 --- a/lib/ai_web_researcher/gpt_online_researcher.py +++ b/lib/ai_web_researcher/gpt_online_researcher.py @@ -62,10 +62,12 @@ def do_google_serp_search(search_keywords): try: logger.info(f"Doing Google search for: {search_keywords}\n") g_results = google_search(search_keywords) - g_titles = extract_info(g_results, 'titles') - return(g_results, g_titles) + if g_results: + g_titles = extract_info(g_results, 'titles') + return(g_results, g_titles) except Exception as err: - logger.error(f"Failed to do Google Serpapi research: {err}") + logger.error(f"Failed to do Google SERP research: {err}") + return None # Not failing, as tavily would do same and then GPT-V to search. diff --git a/lib/ai_web_researcher/tavily_ai_search.py b/lib/ai_web_researcher/tavily_ai_search.py index 2bd5d647..35d9f2c9 100644 --- a/lib/ai_web_researcher/tavily_ai_search.py +++ b/lib/ai_web_researcher/tavily_ai_search.py @@ -36,7 +36,7 @@ from tabulate import tabulate # Load environment variables from .env file load_dotenv(Path('../../.env')) from rich import print - +import streamlit as st # Configure logger logger.remove() logger.add(sys.stdout, @@ -95,11 +95,37 @@ def get_tavilyai_results(keywords, max_results=5): max_results=max_results) print_result_table(tavily_search_result) + streamlit_display_results(tavily_search_result) return(tavily_search_result) except Exception as err: logger.error(f"Failed to do Tavily Research: {err}") +def streamlit_display_results(output_data): + """Display Tavily AI search results in Streamlit UI.""" + + # Prepare data for display + table_data = [] + for item in output_data.get("results", []): + title = item.get("title", "") + snippet = item.get("content", "") + link = item.get("url", "") + table_data.append([title, snippet, link]) + + # Display the table in Streamlit + st.table(table_data) + + # Display the 'answer' in Streamlit + answer = output_data.get("answer", "No answer available") + st.write(f"**The answer to your search query:** {answer}") + + # Display follow-up questions if available + follow_up_questions = output_data.get("follow_up_questions", []) + if follow_up_questions: + st.write(f"**Follow-up questions for the query:** {output_data.get('query')}") + st.write(", ".join(follow_up_questions)) + + def print_result_table(output_data): """ Pretty print the tavily AI search result. """ # Prepare data for tabulate diff --git a/lib/ai_writers/.keywords_to_blog_streamlit.py.swp b/lib/ai_writers/.keywords_to_blog_streamlit.py.swp new file mode 100644 index 0000000000000000000000000000000000000000..9d8a1610d4267c66dba2d37ea380b3d3bc606708 GIT binary patch literal 16384 zcmeHOTZ|-C87_C(WkFUK0evy?*qtyvZchy>h%oMK!d_;X#D!&N)dWjb z`kN)nHdyj+bh~wOJ=Joag_p0;P^?eS%lK6D{de>h2 zeUUmYKD4@UN&2afi-F%<mc1WE~% z5-255N}!a$f1w1p=Qtk*UEiRDAJ6}7{{PSSJI>#LKLWo1eh54XNZ>N?2(Spa!0o`B z?{l2L051VQ1)c%E3e7aT9tK{!1#|+h0?z_Z1CIh90X_u${t(szt^rqpZQv;I`psAm z_!>|F?gD;!&~d&GJOJzm-aOzqF9P2N)_~iABfx&(`J1o~@CJ$_KLVZvR)L#=gTMjc zc@$Qfz&nud-vN^I*GbkhE|$BJl&Af~?Z+NZMJ?b>5iGYW|MHdQXD2 zWz=J5qo@-IwkEi2by*ZHvF${CZ`k)Lb}I9ZojsMqrO}TnJH>+lV_4c1tP`aRQs}Wl z)>+Co{b0y=>?f=pN%Zj(mz|E#QLU(Zs-{SGc<8rSE9%7}1%*-Qj#in*P33}FhlShF%*R9l<*ov`)@RrwQU(7R}U?LLm z61Kw1g+8%#M@Uyl8Od6^vcmlUEBGN>*OC~BCbI&9l+iCQS(*U^4!8`#)Qyi9TP+c% z?2P(=0Etd4k?j^OP`ro@v0Ndk2O<@DR#$kCh-}+woMw7btFC>8t@xpzbir&WtB=n7k$v{Z9%)F>!dgFZ{J)maBSq<156F3@5 zB}kvlc7f>JZjH^F-#K+A3LMPT1S6k!tqFS?zh_Ozshic-G#c&UnG5Gh01$p+Pm$dv z0p*xfEV0E&k@P!Qdb3;}$0GD<9kY4G&7kRKYSmMjR;k0AW1XzYnzB*58!MXMZyS44 z)Tbi#9c7V` zMiO7(+LU*IA?LU!K{RX-n)#+6>(1H7+@8nUu=d(%RVo#=3i&0B4p~QpLc-L-zlEs? zQ|0pnhr3NgD|2@1aFDFs57jL;@Znlhz9F!HT7Nx?;Uckc+m}h2O={-QPS#J=(nNwM z+DyG7sShMM`;ki@Hf(+FTahPfH1oXc3H2j0A#@wvmEiHhrV%(UCxKL(Ik9NH##OCACgbkC`JL;#KDmuRimT2gAfqa;Ia|DG?)=rN)j& zOm#L}D5MNg2=x}D^)tcLa7i09DVz;Z?{U11aF^K0X?=m?Z=ppdkrsw{obsd=V`Y# z=empZA8yRe&#Ug-WBBFEDD07$-{jIK^8hiZv!zF8%Zw&;jHYSJr#pAZDp(DKX|R1p zFSqMN#LNuulKMUPBfT9s5bC#XjX^HnnYWEZy>6hxJ&-)MEl6@ew_S-R!?5K?_GgsX zZ;+=$`Ts@ar;qK2%j%AOR$B zC-4Gt^UncqBM*NI_zmz3Ab}O&HRR<#0bT?i18xOg0*~JT5`g$U50qCafqz#5Fj|ur zk}~C$YZXsWRt_JaLa5BLXOYCL9#yu6k^@p%3SLblfb3j*dlj-GDAR7=5-=WvXs4MA zk#oTYJYWzFmd9!YRkfNn{P@Zdc1&44=|=s)V_gcK5mfpe1i=VRJjW7JW#_pH2-I*D zzEB`dflwbtpJExBSA-onh&qGY5o{pzgcQ<)&x|^kUF!OXvr`cSMQ3D}!{>MzB$M>? z>PUb#eKBBntBe8hgJh3NA9V46oid^RNdZw&9s`EGLKL%8$;uwsbeoPKJ=&fS z{+O!+$*zjHYBs#`Yy<}9N#;J|ff8Voe~fp^lHe*>K_o*(I$@H2s-k7S=HhDR%8h3= zx!mROa>y3!l}p-9rST-XtAuAtGM9~0~;mMtDy*3;SQ2*k!YrDxH|`6aY3nk+kZW z{E6^f=_X%kT%{=8Qiv!W@)#>K?x{F5i$%&(j4o+?Q)vigB}yAJ30T<{vImTwVA-)f z=@Y6NMml8~&6(*a^%$Seo)sms9d?w>;onha4v>}Ij?Twv0=kgftv-J4%o1Bip(BzM zuD1QHtl(Uz-dKLe&kmST%~amyrYPA>*9|S#srm{!O&sUS zerR;gBuJAQh&a~rQ;@>6vxo-@*Pdc$wLXxuSzzl_p{=uXn4g`7(n>$3@^Q?Z#MvofwGD~bUu39yIwwj-K5k-&IhVrd#6~mCW*qWugmjek` zoLklwEGF$8aWrWwZ*27BBTj9Q%we|iS6&utc%<>4KFYJLirO27&G{?^hSTWL`(om) i9ASHYAeRM=69+rfZW8uH^9`K)QMpPLy>t<5-uV|b98=@~ literal 0 HcmV?d00001 diff --git a/lib/ai_writers/blog_from_google_serp.py b/lib/ai_writers/blog_from_google_serp.py index df9249e3..806ce130 100644 --- a/lib/ai_writers/blog_from_google_serp.py +++ b/lib/ai_writers/blog_from_google_serp.py @@ -17,7 +17,7 @@ def write_blog_google_serp(search_keyword, search_results): """Combine the given online research and GPT blog content""" prompt = f""" As expert Creative Content writer, - I want you to write blog post, that explores {search_keyword} and also include 5 FAQs. + I want you to write highly detailed blog post, that explores {search_keyword} and also include 5 FAQs. I want the post to offer unique insights, relatable examples, and a fresh perspective on the topic. Here are some Google search results to spark your creativity on {search_keyword}: @@ -65,16 +65,15 @@ def improve_blog_intro(blog_content, blog_intro): def blog_with_keywords(blog, keywords): """Combine the given online research and gpt blog content""" prompt = f""" + You are Sarah, the Creative Content writer, writing up fresh ideas and crafts them with care. + She makes complex topics easy to understand and writes in a friendly tone that connects with everyone. + She excels at simplifying complex topics and communicates with charisma, making technical jargon come alive for her audience. + As an expert digital content writer, specializing in content optimization and SEO. I will provide you with my 'blog content' and 'list of keywords' on the same topic. Your task is to write an original blog, utilizing given keywords and blog content. Your blog should be highly detailed and well formatted. - You are Sarah, the Creative Content writer, writing up fresh ideas and crafts them with care. - She makes complex topics easy to understand and writes in a friendly tone that connects with everyone. - She excels at simplifying complex topics and communicates with charisma, making technical jargon come alive for her audience. - - Blog content: '{blog}' list of keywords: '{keywords}' """ diff --git a/lib/ai_writers/keywords_to_blog_streamlit.py b/lib/ai_writers/keywords_to_blog_streamlit.py index f938c6d5..9c9c4a35 100644 --- a/lib/ai_writers/keywords_to_blog_streamlit.py +++ b/lib/ai_writers/keywords_to_blog_streamlit.py @@ -55,23 +55,30 @@ def write_blog_from_keywords(search_keywords, url=None): example_blog_titles = [] logger.info(f"Researching and Writing Blog on keywords: {search_keywords}") - with st.status("Started Writing..", expanded=True) as status: + with st.status("Started Web Research..", expanded=True) as status: st.empty() status.update(label="Researching and Writing Blog on keywords.") # Call on the got-researcher, tavily apis for this. Do google search for organic competition. try: google_search_result, g_titles = do_google_serp_search(search_keywords) - status.update(label=f"πŸ™Ž Finished with Google web for Search: {search_keywords}") - example_blog_titles.append(g_titles) + if google_search_result: + status.update(label=f"πŸ™Ž Finished with Google web for Search: {search_keywords}") + example_blog_titles.append(g_titles) + else: + st.warning("Failed to Google SERP results.") + except Exception as err: + st.warning(f"Failed in Google web research: {err}") + logger.error(f"Failed in Google web research: {err}") + try: status.update(label=f"πŸ›€ Starting Tavily AI research: {search_keywords}") tavily_search_result, t_titles, t_answer = do_tavily_ai_search(search_keywords) status.update(label=f"πŸ™† Finished Google Search & Tavily AI Search on: {search_keywords}", state="complete", expanded=False) - except Exception as err: - st.error(f"Failed in web research: {err}") - logger.error(f"Failed in web research: {err}") + st.warning(f"Failed in Tavily web research: {err}") + logger.error(f"Failed in Tavily web research: {err}") + with st.status("Started Writing blog from google search..", expanded=True) as status: status.update(label="Researching and Writing Blog on keywords.") @@ -82,6 +89,7 @@ def write_blog_from_keywords(search_keywords, url=None): st.markdown(blog_markdown_str) status.update(label="πŸ™Ž Draft 1: Your Content from Google search result.", state="complete", expanded=False) except Exception as err: + status.update(label="πŸ™Ž Failed Content from Google SERP.", state="error", expanded=False) st.error(f"Failed in Google web research: {err}") logger.error(f"Failed in Google web research: {err}") @@ -92,11 +100,12 @@ def write_blog_from_keywords(search_keywords, url=None): # Do Tavily AI research to augment the above blog. try: # example_blog_titles.append(t_titles) - if blog_markdown_str and tavily_search_result: + if tavily_search_result: logger.info(f"\n\n######### Blog content after Tavily AI research: ######### \n\n") blog_markdown_str = write_blog_google_serp(search_keywords, tavily_search_result) status.update(label=f"Finished Writing Blog From Tavily Results:{blog_markdown_str}", expanded=True) except Exception as err: + status.update(label="πŸ™Ž Failed content from Tavily search.", state="error", expanded=False) logger.error(f"Failed to do Tavily AI research: {err}") status.update(label="πŸ™Ž Generating - Title, Meta Description, Tags, Categories for the content.", expanded=True) diff --git a/lib/gpt_providers/text_generation/gemini_pro_text.py b/lib/gpt_providers/text_generation/gemini_pro_text.py index 5a999dd3..e4118030 100644 --- a/lib/gpt_providers/text_generation/gemini_pro_text.py +++ b/lib/gpt_providers/text_generation/gemini_pro_text.py @@ -52,3 +52,47 @@ def gemini_text_response(prompt, temperature, top_p, n, max_tokens, system_promp return response.text except Exception as err: logger.error(f"Failed to get response from Gemini: {err}. Retrying.") + + +#@retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(6)) +#def gemini_blog_metadata_json(blog_content): +# """ Common functiont to get response from gemini pro Text. """ +# prompt = f"I will provide you with the content of a blog post. Based on this content, you need to generate the following elements in JSON format:\n\n1. **Blog Title**: A compelling and relevant title that summarizes the blog content.\n2. **Meta Description**: A concise meta description (up to 160 characters) that captures the essence of the blog post and encourages clicks.\n3. **Tags**: A list of 5-10 relevant tags that represent the key topics covered in the blog post.\n4. **Categories**: A list of 1-3 appropriate categories that best describe the blog post's main themes.\n\nOutput your response in the following JSON format:\n\n```json\n{\n \"type\": \"object\",\n \"properties\": {\n \"blog_title\": {\n \"type\": \"string\"\n },\n \"meta_description\": {\n \"type\": \"string\"\n },\n \"tags\": {\n \"type\": \"array\",\n \"items\": {\n \"type\": \"string\"\n }\n },\n \"categories\": {\n \"type\": \"array\",\n \"items\": {\n \"type\": \"string\"\n }\n }\n }\n}\n\n. The Blog Content is given below: \n\n{blog_content}\n\n" +# +# try: +# genai.configure(api_key=os.getenv('GEMINI_API_KEY')) +# except Exception as err: +# logger.error(f"Failed to configure Gemini: {err}") +# +# # Create the model +# generation_config = { +# "temperature": 1, +# "top_p": 0.95, +# "top_k": 64, +# "max_output_tokens": 8192, +# "response_schema": content.Schema( +# type = content.Type.OBJECT, +# properties = { +# "response": content.Schema( +# type = content.Type.STRING, +# ), +# }, +# ), +# "response_mime_type": "application/json", +# } +# +# model = genai.GenerativeModel( +# model_name="gemini-1.5-flash", +# generation_config=generation_config, +# # safety_settings = Adjust safety settings +# # See https://ai.google.dev/gemini-api/docs/safety-settings +# ) +# +# try: +# # text_response = [] +# response = model.generate_content(prompt) +# if response: +# logger.info(f"Number of Token in Prompt Sent: {model.count_tokens(prompt)}") +# return response.text +# except Exception as err: +# logger.error(f"Failed to get SEO METADATA from Gemini: {err}. Retrying.") diff --git a/lib/gpt_providers/text_generation/main_text_generation.py b/lib/gpt_providers/text_generation/main_text_generation.py index af692658..598a189a 100644 --- a/lib/gpt_providers/text_generation/main_text_generation.py +++ b/lib/gpt_providers/text_generation/main_text_generation.py @@ -33,8 +33,28 @@ def llm_text_gen(prompt): blog_output_format, blog_length = read_return_config_section('blog_characteristics') # Construct the system prompt with the sidebar config params. - system_instructions = read_return_config_section('system_prompt') + #system_instructions = read_return_config_section('system_prompt') + system_instructions = f"""You are a highly skilled content writer with a knack for creating engaging and informative content. + Your expertise spans various writing styles and formats. + Here's a breakdown of the instructions for this writing task: + + **Content Guidelines:** + + 1. **Language:** Your response must be in **{blog_language}** language. + 2. **Tone and Brand Alignment:** Adjust your tone, voice, and personality to be appropriate for a **{blog_tone}** audience. + 3. **Content Length:** Ensure your response is approximately **{blog_length}** words in length. + 4. **Blog Type:** The type of blog is **{blog_type}**. Write accordingly, adhering to the conventions and expectations of this type of content. + 5. **Target Audience:** The demographic for this content is **{blog_demographic}**. Keep their interests and needs in mind. + 6. **Output Format:** Your response should be in **{blog_output_format}** format. This could be Markdown, HTML, or a specific structured format, depending on the user's preference. + + **Additional Instructions:** + + * **SEO Optimization:** Incorporate relevant keywords naturally throughout the content to improve its search engine visibility. + * **Call to Action:** Include a call to action if appropriate for the blog type and target audience. + * **Factual Accuracy:** Ensure your content is accurate and reliable. Back up any claims with credible sources. + * **Unique Voice and Style:** Inject your unique voice and writing style to make the content engaging and memorable. """ + #gpt_provider = check_gpt_provider(gpt_provider) # Check if API key is provided for the given gpt_provider get_api_key(gpt_provider) diff --git a/lib/workspace/alwrity_config/main_config.json b/lib/workspace/alwrity_config/main_config.json index 9ad590df..eb6e84aa 100644 --- a/lib/workspace/alwrity_config/main_config.json +++ b/lib/workspace/alwrity_config/main_config.json @@ -5,7 +5,7 @@ "Blog Demographic": "Professional", "Blog Type": "Informational", "Blog Language": "English", - "Blog Output Format": "markdown" + "Blog Output Format": "HTML" }, "Blog Images Details": { "Image Generation Model": "stable-diffusion",