feat: implement contact information extraction using Firecrawl's LLM Extract feature

2024-09-17 11:58:01 +05:30
parent 8930f3d2b2
commit 3e9d641ac5
1 changed files with 29 additions and 4 deletions
--- a/lib/ai_marketing_tools/ai_backlinking.py
+++ b/lib/ai_marketing_tools/ai_backlinking.py
@@ -203,15 +203,40 @@ def search_for_urls(query):
    return []


-def extract_contact_info(scraped_data):
+from lib.ai_web_researcher.firecrawl_web_crawler import extract_data
+
+def extract_contact_info(url):
    """
-    Placeholder function to extract contact information from scraped data.
+    Extract contact information from a website using Firecrawl's LLM Extract feature.
    
    Args:
-        scraped_data (dict): The data scraped from a website.
+        url (str): The URL of the website to extract contact information from.
    
    Returns:
        dict: Extracted contact information.
    """
-    # This function needs to be implemented
+    schema = {
+        "type": "object",
+        "properties": {
+            "emails": {
+                "type": "array",
+                "items": {
+                    "type": "string",
+                    "format": "email"
+                }
+            },
+            "contact_forms": {
+                "type": "array",
+                "items": {
+                    "type": "string",
+                    "format": "uri"
+                }
+            }
+        },
+        "required": ["emails", "contact_forms"]
+    }
+    
+    result = extract_data(url, schema)
+    if result and 'extract' in result:
+        return result['extract']
    return {}