ALwrity Version 0.5.1 (Fastapi + React)

2025-08-06 16:29:49 +05:30
parent dbf761c31f
commit 2579c12ba4
331 changed files with 0 additions and 22 deletions
--- a/ToBeMigrated/ai_web_researcher/firecrawl_web_crawler.py
+++ b/ToBeMigrated/ai_web_researcher/firecrawl_web_crawler.py
@@ -0,0 +1,96 @@
+import os
+from pathlib import Path
+from firecrawl import FirecrawlApp
+import logging
+from dotenv import load_dotenv
+
+# Load environment variables from .env file
+load_dotenv(Path('../../.env'))
+
+# Configure logging
+logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
+
+def initialize_client() -> FirecrawlApp:
+    """
+    Initialize and return a Firecrawl client.
+
+    Returns:
+        FirecrawlApp: An instance of the Firecrawl client.
+    """
+    return FirecrawlApp(api_key=os.getenv("FIRECRAWL_API_KEY"))
+
+def scrape_website(website_url: str, depth: int = 1, max_pages: int = 10) -> dict:
+    """
+    Scrape a website starting from the given URL.
+
+    Args:
+        website_url (str): The URL of the website to scrape.
+        depth (int, optional): The depth of crawling. Default is 1.
+        max_pages (int, optional): The maximum number of pages to scrape. Default is 10.
+
+    Returns:
+        dict: The result of the website scraping, or None if an error occurred.
+    """
+    client = initialize_client()
+    try:
+        result = client.crawl_url({
+            'url': website_url,
+            'depth': depth,
+            'max_pages': max_pages
+        })
+        return result
+    except KeyError as e:
+        logging.error(f"Missing key in data: {e}")
+    except ValueError as e:
+        logging.error(f"Value error: {e}")
+    except Exception as e:
+        logging.error(f"Error scraping website: {e}")
+    return None
+
+def scrape_url(url: str) -> dict:
+    """
+    Scrape a specific URL.
+
+    Args:
+        url (str): The URL to scrape.
+
+    Returns:
+        dict: The result of the URL scraping, or None if an error occurred.
+    """
+    client = initialize_client()
+    try:
+        result = client.scrape_url(url)
+        return result
+    except KeyError as e:
+        logging.error(f"Missing key in data: {e}")
+    except ValueError as e:
+        logging.error(f"Value error: {e}")
+    except Exception as e:
+        logging.error(f"Error scraping URL: {e}")
+    return None
+
+def extract_data(url: str, schema: dict) -> dict:
+    """
+    Extract structured data from a URL using the provided schema.
+
+    Args:
+        url (str): The URL to extract data from.
+        schema (dict): The schema to use for data extraction.
+
+    Returns:
+        dict: The extracted data, or None if an error occurred.
+    """
+    client = initialize_client()
+    try:
+        result = client.extract({
+            'url': url,
+            'schema': schema
+        })
+        return result
+    except KeyError as e:
+        logging.error(f"Missing key in data: {e}")
+    except ValueError as e:
+        logging.error(f"Value error: {e}")
+    except Exception as e:
+        logging.error(f"Error extracting data: {e}")
+    return None