# Common utils for web_researcher import os import sys import re import json from pathlib import Path import streamlit as st from datetime import datetime, timedelta from pathlib import Path from loguru import logger logger.remove() logger.add(sys.stdout, colorize=True, format="{level}|{file}:{line}:{function}| {message}" ) def cfg_search_param(flag): """ Read values from the main_config.json file and return them as variables and a dictionary. Args: flag (str): A flag to determine which configuration values to return. Returns: various: The values read from the config file based on the flag. """ try: file_path = Path(os.environ.get("ALWRITY_CONFIG", "")) if not file_path.is_file(): raise FileNotFoundError(f"Configuration file not found: {file_path}") logger.info(f"Reading search config params from {file_path}") with open(file_path, 'r', encoding='utf-8') as file: config = json.load(file) web_research_section = config["Search Engine Parameters"] if 'serperdev' in flag: # Get values as variables geo_location = web_research_section.get("Geographic Location") search_language = web_research_section.get("Search Language") num_results = web_research_section.get("Number of Results") return geo_location, search_language, num_results elif 'tavily' in flag: include_urls = web_research_section.get("Include Domains") pattern = re.compile(r"^(https?://[^\s,]+)(,\s*https?://[^\s,]+)*$") if pattern.match(include_urls): include_urls = [url.strip() for url in include_urls.split(',')] else: include_urls = None return include_urls elif 'exa' in flag: include_urls = web_research_section.get("Include Domains") pattern = re.compile(r"^(https?://\w+)(,\s*https?://\w+)*$") if pattern.match(include_urls) is not None: include_urls = include_urls.split(',') elif re.match(r"^http?://\w+$", include_urls) is not None: include_urls = include_urls.split(" ") else: include_urls = None num_results = web_research_section.get("Number of Results") similar_url = web_research_section.get("Similar URL") time_range = web_research_section.get("Time Range") if time_range == "past day": start_published_date = (datetime.now() - timedelta(days=1)).strftime('%Y-%m-%d') elif time_range == "past week": start_published_date = (datetime.now() - timedelta(days=7)).strftime("%Y-%m-%d") elif time_range == "past month": start_published_date = (datetime.now() - timedelta(days=30)).strftime('%Y-%m-%d') elif time_range == "past year": start_published_date = (datetime.now() - timedelta(days=365)).strftime('%Y-%m-%d') elif time_range == "anytime" or not time_range: start_published_date = None time_range = start_published_date return include_urls, time_range, num_results, similar_url except FileNotFoundError: logger.error(f"Error: Config file '{file_path}' not found.") return {}, None, None, None except KeyError as e: logger.error(f"Error: Missing section or option in config file: {e}") return {}, None, None, None except ValueError as e: logger.error(f"Error: Invalid value in config file: {e}") return {}, None, None, None def save_in_file(table_content): """ Helper function to save search analysis in a file. """ file_path = os.environ.get('SEARCH_SAVE_FILE') try: # Save the content to the file with open(file_path, "a+", encoding="utf-8") as file: st.write(table_content) file.write(table_content) file.write("\n" * 3) # Add three newlines at the end logger.info(f"Search content saved to {file_path}") return file_path except Exception as e: logger.error(f"Error occurred while writing to the file: {e}")