Alwrity - Bug fixes

2024-04-12 17:36:37 +05:30
parent cf6516eeee
commit e3c3c03729
18 changed files with 81 additions and 81 deletions
--- a/lib/ai_web_researcher/arxiv_schlorly_research.py
+++ b/lib/ai_web_researcher/arxiv_schlorly_research.py
@@ -112,7 +112,7 @@ def get_arxiv_main_content(url):
            pdf_text = ''

            # Read the downloaded PDF
-            with open(pdf_filename, 'rb') as f:
+            with open(pdf_filename, 'rb', encoding="utf-8") as f:
                pdf_reader = PyPDF2.PdfReader(f)

                for page in pdf_reader.pages:
@@ -168,7 +168,7 @@ def download_image(image_url, base_url, folder="images"):
        response.raise_for_status()

        image_name = image_url.split("/")[-1]
-        with open(os.path.join(folder, image_name), 'wb') as file:
+        with open(os.path.join(folder, image_name), 'wb', encoding="utf-8") as file:
            file.write(response.content)
        return True

@@ -297,7 +297,7 @@ def read_written_ids(file_path):
    """
    written_ids = set()
    try:
-        with open(file_path, 'r') as file:
+        with open(file_path, 'r', encoding="utf-8") as file:
            for line in file:
                written_ids.add(line.strip())
    except FileNotFoundError:
@@ -320,12 +320,12 @@ def append_id_to_file(arxiv_id, output_file_path):
        if not os.path.exists(output_file_path):
            logger.info(f"File does not exist. Creating new file: {output_file_path}")
            # Create a new file and append the ID
-            with open(output_file_path, 'a') as outfile:
+            with open(output_file_path, 'a', encoding="utf-8") as outfile:
                outfile.write(arxiv_id + '\n')
        else:
            logger.info(f"Appending to existing file: {output_file_path}")
            # File exists, append the ID
-            with open(output_file_path, 'a') as outfile:
+            with open(output_file_path, 'a', encoding="utf-8") as outfile:
                outfile.write(arxiv_id + '\n')

    except Exception as e:
--- a/lib/ai_web_researcher/common_utils.py
+++ b/lib/ai_web_researcher/common_utils.py
@@ -93,7 +93,7 @@ def save_in_file(table_content):
    file_path = os.environ.get('SEARCH_SAVE_FILE')
    try:
        # Save the content to the file
-        with open(file_path, "a+") as file:
+        with open(file_path, "a+", encoding="utf-8") as file:
            file.write(table_content)
            file.write("\n" * 3)  # Add three newlines at the end
        logger.info(f"Search content saved to {file_path}")
--- a/lib/ai_web_researcher/google_trends_researcher.py
+++ b/lib/ai_web_researcher/google_trends_researcher.py
@@ -482,7 +482,7 @@ def save_in_file(table_content):
    file_path = os.environ.get('SEARCH_SAVE_FILE')
    try:
        # Save the content to the file
-        with open(file_path, "a+") as file:
+        with open(file_path, "a+", encoding="utf-8") as file:
            file.write(table_content)
            file.write("\n" * 3)  # Add three newlines at the end
        logger.info(f"Search content saved to {file_path}")
--- a/lib/ai_web_researcher/tavily_ai_search.py
+++ b/lib/ai_web_researcher/tavily_ai_search.py
@@ -160,7 +160,7 @@ def save_in_file(table_content):
    file_path = os.environ.get('SEARCH_SAVE_FILE')
    try:
        # Save the content to the file
-        with open(file_path, "a") as file:
+        with open(file_path, "a", encoding="utf-8") as file:
            file.write(table_content)
            file.write("\n" * 3)  # Add three newlines at the end
        logger.info(f"Search content saved to {file_path}")
--- a/lib/ai_writers/keywords_to_blog.py
+++ b/lib/ai_writers/keywords_to_blog.py
@@ -37,26 +37,28 @@ def write_blog_from_keywords(search_keywords, url=None):
    blog_markdown_str = ""
    example_blog_titles = []
    
-#    logger.info(f"Researching and Writing Blog on keywords: {search_keywords}")
-#    # Call on the got-researcher, tavily apis for this. Do google search for organic competition.
-#    try:
-#        google_search_result, g_titles = do_google_serp_search(search_keywords)
-#        example_blog_titles.append(g_titles)
-#        blog_markdown_str = write_blog_google_serp(search_keywords, google_search_result)
-#    except Exception as err:
-#        logger.error(f"Failed in Google web research: {err}")
-#    # logger.info/check the final blog content.
-#    logger.info("\n######### Draft1: Finished Blog from Google web search: ###########\n\n")
+    logger.info(f"Researching and Writing Blog on keywords: {search_keywords}")
+    # Call on the got-researcher, tavily apis for this. Do google search for organic competition.
+    try:
+        google_search_result, g_titles = do_google_serp_search(search_keywords)
+        example_blog_titles.append(g_titles)
+        blog_markdown_str = write_blog_google_serp(search_keywords, google_search_result)
+    except Exception as err:
+        logger.error(f"Failed in Google web research: {err}")
+    # logger.info/check the final blog content.
+    logger.info("\n######### Draft1: Finished Blog from Google web search: ###########\n\n")
+    exit(1)

-#    # Do Tavily AI research to augument the above blog.
-#    try:
-#        tavily_search_result, t_titles = do_tavily_ai_search(search_keywords)
-#        example_blog_titles.append(t_titles)
-#        blog_markdown_str = blog_with_research(blog_markdown_str, tavily_search_result)
-#        logger.info(f"######### Blog content after Tavily AI research: ######### \n\n{blog_markdown_str}\n\n")
-#    except Exception as err:
-#        logger.error(f"Failed to do Tavily AI research: {err}")
-#    logger.info("######### Draft2: Blog content after Tavily AI research: #########\n\n")
+
+    # Do Tavily AI research to augument the above blog.
+    try:
+        tavily_search_result, t_titles = do_tavily_ai_search(search_keywords)
+        example_blog_titles.append(t_titles)
+        blog_markdown_str = blog_with_research(blog_markdown_str, tavily_search_result)
+        logger.info(f"######### Blog content after Tavily AI research: ######### \n\n{blog_markdown_str}\n\n")
+    except Exception as err:
+        logger.error(f"Failed to do Tavily AI research: {err}")
+    logger.info("######### Draft2: Blog content after Tavily AI research: #########\n\n")

    try:
        # Do Metaphor/Exa AI search.
--- a/lib/blog_postprocessing/save_blog_to_file.py
+++ b/lib/blog_postprocessing/save_blog_to_file.py
@@ -109,7 +109,7 @@ def save_blog_to_file(blog_content, blog_title, blog_meta_desc, blog_tags, blog_

        # Write to the file
        try:
-            with open(blog_output_path, "w") as f:
+            with open(blog_output_path, "w", encoding="utf-8") as f:
                f.write(blog_frontmatter)
                f.write(blog_content)
        except Exception as e:
--- a/lib/blog_postprocessing/save_image.py
+++ b/lib/blog_postprocessing/save_image.py
@@ -19,7 +19,7 @@ def save_generated_image(img_generation_response, image_dir):
    try:
        response = requests.get(generated_image_url, stream=True)
        response.raise_for_status()
-        with open(generated_image_filepath, "wb") as image_file:
+        with open(generated_image_filepath, "wb", encoding="utf-8") as image_file:
            image_file.write(response.content)
    except requests.exceptions.RequestException as e:
        logger.error(f"Failed to get generated image content: {e}")
--- a/lib/github_blogs/main_getting_started_blogs.py
+++ b/lib/github_blogs/main_getting_started_blogs.py
@@ -34,7 +34,7 @@ def blog_from_github(github_opts, flag):
    elif 'csv' in flag:
        try:
            gh_urls = []
-            with open(github_opts, 'r') as file:
+            with open(github_opts, 'r', encoding="utf-8") as file:
                # Read each line in the file
                for gh_url in file:
                    gh_urls.append(gh_url.strip())
--- a/lib/github_blogs/scrape_github_readme.py
+++ b/lib/github_blogs/scrape_github_readme.py
@@ -276,7 +276,7 @@ def check_if_already_written(github_url, file_path='papers_already_written_on.tx
        bool: True if an exact match is found, False otherwise.
    """
    try:
-        with open(file_path, 'r') as file:
+        with open(file_path, 'r', encoding="utf-8") as file:
            # Read each line in the file
            for line in file:
                # Check for an exact match
--- a/lib/gpt_providers/audio_to_text_generation/stt_audio_blog.py
+++ b/lib/gpt_providers/audio_to_text_generation/stt_audio_blog.py
@@ -79,7 +79,7 @@ def speech_to_text(video_url, output_path='.'):
            logger.info("Transcribing using OpenAI's Whisper model.")
            transcript = client.audio.transcriptions.create(
                model="whisper-1",
-                file=open(audio_file, "rb"),
+                file=open(audio_file, "rb", encoding="utf-8"),
                response_format="text"
            )
            logger.info(f"\nYouTube video transcription:\n{yt.title}\n{transcript}\n")
@@ -133,7 +133,7 @@ def long_video(temp_file_name):
    for i, chunk in enumerate(chunks):
        with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as audio_chunk_file:
            chunk.write_audiofile(audio_chunk_file.name, codec="mp3")
-            with open(audio_chunk_file.name, "rb") as audio_file:
+            with open(audio_chunk_file.name, "rb", encoding="utf-8") as audio_file:
                # Transcribe each chunk using OpenAI's Whisper API
                app.logger.info(f"Transcribing chunk {i+1}/{len(chunks)}")
                transcript = openai.Audio.transcribe("whisper-1", audio_file)
--- a/lib/gpt_providers/image_generation/gen_variation_img.py
+++ b/lib/gpt_providers/image_generation/gen_variation_img.py
@@ -35,7 +35,7 @@ def gen_new_from_given_img(img_path, image_dir, num_img=1, img_size="1024x1024",

        client = OpenAI()
        variation_response = client.images.create_variation(
-            image=open(img_path, "rb"),
+            image=open(img_path, "rb", encoding="utf-8"),
            n=num_img,
            size=img_size,
            response_format=response_format
--- a/lib/gpt_providers/image_to_text_gen/openai_vision_img_details.py
+++ b/lib/gpt_providers/image_to_text_gen/openai_vision_img_details.py
@@ -32,7 +32,7 @@ def analyze_and_extract_details_from_image(image_path):

        def encode_image(path):
            """ Encodes an image to a base64 string. """
-            with open(path, "rb") as image_file:
+            with open(path, "rb", encoding="utf-8") as image_file:
                return base64.b64encode(image_file.read()).decode('utf-8')

        base64_image = encode_image(image_path)
--- a/lib/gpt_providers/text_generation/gemini_pro_text.py
+++ b/lib/gpt_providers/text_generation/gemini_pro_text.py
@@ -4,6 +4,7 @@ import sys
 from pathlib import Path

 import google.generativeai as genai
+from google.api_core import retry
 from dotenv import load_dotenv
 load_dotenv(Path('../../../.env'))
 from loguru import logger
@@ -13,16 +14,10 @@ logger.add(sys.stdout,
        format="<level>{level}</level>|<green>{file}:{line}:{function}</green>| {message}"
    )

-from tenacity import (
-    retry,
-    stop_after_attempt,
-    wait_random_exponential,
-)  # for exponential backoff

-
-@retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(6))
 def gemini_text_response(prompt, temperature, top_p, n, max_tokens):
    """ Common functiont to get response from gemini pro Text. """
+    #FIXME: Include : https://github.com/google-gemini/cookbook/blob/main/quickstarts/rest/System_instructions_REST.ipynb
    try:
        genai.configure(api_key=os.getenv('GEMINI_API_KEY'))
    except Exception as err:
@@ -35,10 +30,13 @@ def gemini_text_response(prompt, temperature, top_p, n, max_tokens):
        "top_k": n,
        "max_output_tokens": max_tokens
    }
+    # FIXME: Expose model_name in main_config
    model = genai.GenerativeModel(model_name="gemini-1.0-pro", generation_config=generation_config)
    try:
-        response = model.generate_content(prompt, stream=True)
+        # text_response = []
+        response = model.generate_content(prompt, stream=True, request_options={'retry':retry.Retry()})
        for chunk in response:
+            # text_response.append(chunk.text)
            print(chunk.text)
        return response.text
    except Exception as err:
--- a/lib/image_to_text/gpt_vision_image_details.py
+++ b/lib/image_to_text/gpt_vision_image_details.py
@@ -22,7 +22,7 @@ def analyze_and_extract_details_from_image(image_path, api_key):
    """
    def encode_image(path):
        """ Encodes an image to a base64 string. """
-        with open(path, "rb") as image_file:
+        with open(path, "rb", encoding="utf-8") as image_file:
            return base64.b64encode(image_file.read()).decode('utf-8')

    base64_image = encode_image(image_path)
--- a/lib/scholar_blogs/main_arxiv_to_blog.py
+++ b/lib/scholar_blogs/main_arxiv_to_blog.py
@@ -78,7 +78,7 @@ def blog_arxiv_url_list(file_path):
    """ Write blogs on all the arxiv links given in a file. """
    extracted_ids = []
    try:
-        with open(file_path, 'r') as file:
+        with open(file_path, 'r', encoding="utf-8") as file:
            for line in file:
                arxiv_id = extract_arxiv_ids_from_line(line)
                if arxiv_id:
--- a/lib/utils/take_url_screenshot.py
+++ b/lib/utils/take_url_screenshot.py
@@ -47,7 +47,7 @@ def screenshot_api(url, generated_image_filepath):
        image = client.take(options)

        # store the screenshot the example.png file
-        with open(generated_image_filepath, 'wb') as result_file:
+        with open(generated_image_filepath, 'wb', encoding="utf-8") as result_file:
            shutil.copyfileobj(image, result_file)

        # Display the screenshot using Image.show
@@ -89,7 +89,7 @@ def take_screenshot(url, generated_image_filepath):
        screenshot = driver.get_screenshot_as_png()

        # Save the screenshot to a file
-        with open(generated_image_filepath, "wb") as f:
+        with open(generated_image_filepath, "wb", encoding="utf-8") as f:
            f.write(screenshot)

        # Display the screenshot using Image.show
--- a/lib/utils/wordpress_blog_uploader.py
+++ b/lib/utils/wordpress_blog_uploader.py
@@ -249,7 +249,7 @@ def upload_media(url, username, password, media_path, alt_text, description, tit
        'Content-Disposition': 'attachment; filename={}'.format(os.path.basename(media_path))
    }

-    with open(media_path, 'rb') as media:
+    with open(media_path, 'rb', encoding="utf-8") as media:
        media_name = os.path.basename(media_path)
        files = {'file': (media_name, media, mime_type)}