Files
ALwrity/lib/ai_web_researcher/common_utils.py
2024-04-12 18:46:42 +05:30

102 lines
4.2 KiB
Python

# Common utils for web_researcher
import os
import sys
import re
import configparser
from datetime import datetime, timedelta
from pathlib import Path
from loguru import logger
logger.remove()
logger.add(sys.stdout,
colorize=True,
format="<level>{level}</level>|<green>{file}:{line}:{function}</green>| {message}"
)
def cfg_search_param(flag):
"""
Read values from the main_config file and return them as variables and a dictionary.
Args:
file_path (str): The path to the main_config file.
Returns:
dict: A dictionary containing the values read from the config file.
str: The geographic location value.
str: The search language value.
int: The number of search results to fetch.
"""
try:
file_path = Path(__file__).resolve().parents[2] / "main_config"
logger.info(f"Reading search config params from {file_path}")
config = configparser.ConfigParser()
config.read(file_path, encoding="utf-8")
web_research_section = config["web_research"]
if 'serperdev' in flag:
# Get values as variables
geo_location = web_research_section.get("geo_location")
search_language = web_research_section.get("search_language")
num_results = web_research_section.getint("num_results")
return geo_location, search_language, num_results
elif 'tavily' in flag:
include_urls = web_research_section.get("include_domains")
pattern = re.compile(r"^(https?://\w+)(,\s*https?://\w+)*$")
if pattern.match(include_urls) is not None:
include_urls = include_urls.split(',')
elif re.match(r"^http?://\w+$", include_urls) is not None:
include_urls = include_urls.split(" ")
else:
include_urls = None
return include_urls
elif 'exa' in flag:
include_urls = web_research_section.get("include_domains")
pattern = re.compile(r"^(https?://\w+)(,\s*https?://\w+)*$")
if pattern.match(include_urls) is not None:
include_urls = include_urls.split(',')
elif re.match(r"^http?://\w+$", include_urls) is not None:
include_urls = include_urls.split(" ")
else:
include_urls = None
num_results = web_research_section.getint("num_results")
similar_url = web_research_section.get("similar_url")
time_range = web_research_section.get("time_range")
if time_range == "past day":
start_published_date = (datetime.now() - timedelta(days=1)).strftime('%Y-%m-%d')
elif time_range == "past week":
start_published_date = (datetime.now() - timedelta(days=7)).strftime("%Y-%m-%d")
elif time_range == "past month":
start_published_date = (datetime.now() - timedelta(days=30)).strftime('%Y-%m-%d')
elif time_range == "past year":
start_published_date = (datetime.now() - timedelta(days=365)).strftime('%Y-%m-%d')
elif time_range == "anytime" or not time_range:
start_published_date = None
time_range = start_published_date
return include_urls, time_range, num_results, similar_url
except FileNotFoundError:
logger.error(f"Error: Config file '{file_path}' not found.")
return {}, None, None, None
except KeyError as e:
logger.error(f"Error: Missing section or option in config file: {e}")
return {}, None, None, None
except ValueError as e:
logger.error(f"Error: Invalid value in config file: {e}")
return {}, None, None, None
def save_in_file(table_content):
""" Helper function to save search analysis in a file. """
file_path = os.environ.get('SEARCH_SAVE_FILE')
try:
# Save the content to the file
with open(file_path, "a+", encoding="utf-8") as file:
file.write(table_content)
file.write("\n" * 3) # Add three newlines at the end
logger.info(f"Search content saved to {file_path}")
except Exception as e:
logger.error(f"Error occurred while writing to the file: {e}")