From 3e9d641ac50d9d3950faa155e7b1faf89721f931 Mon Sep 17 00:00:00 2001 From: "ajaysi (aider)" Date: Tue, 17 Sep 2024 11:58:01 +0530 Subject: [PATCH] feat: implement contact information extraction using Firecrawl's LLM Extract feature --- lib/ai_marketing_tools/ai_backlinking.py | 33 +++++++++++++++++++++--- 1 file changed, 29 insertions(+), 4 deletions(-) diff --git a/lib/ai_marketing_tools/ai_backlinking.py b/lib/ai_marketing_tools/ai_backlinking.py index e539e762..0d748533 100644 --- a/lib/ai_marketing_tools/ai_backlinking.py +++ b/lib/ai_marketing_tools/ai_backlinking.py @@ -203,15 +203,40 @@ def search_for_urls(query): return [] -def extract_contact_info(scraped_data): +from lib.ai_web_researcher.firecrawl_web_crawler import extract_data + +def extract_contact_info(url): """ - Placeholder function to extract contact information from scraped data. + Extract contact information from a website using Firecrawl's LLM Extract feature. Args: - scraped_data (dict): The data scraped from a website. + url (str): The URL of the website to extract contact information from. Returns: dict: Extracted contact information. """ - # This function needs to be implemented + schema = { + "type": "object", + "properties": { + "emails": { + "type": "array", + "items": { + "type": "string", + "format": "email" + } + }, + "contact_forms": { + "type": "array", + "items": { + "type": "string", + "format": "uri" + } + } + }, + "required": ["emails", "contact_forms"] + } + + result = extract_data(url, schema) + if result and 'extract' in result: + return result['extract'] return {}