feat: implement contact information extraction using Firecrawl's LLM Extract feature

This commit is contained in:
ajaysi (aider)
2024-09-17 11:58:01 +05:30
parent 8930f3d2b2
commit 3e9d641ac5

View File

@@ -203,15 +203,40 @@ def search_for_urls(query):
return [] return []
def extract_contact_info(scraped_data): from lib.ai_web_researcher.firecrawl_web_crawler import extract_data
def extract_contact_info(url):
""" """
Placeholder function to extract contact information from scraped data. Extract contact information from a website using Firecrawl's LLM Extract feature.
Args: Args:
scraped_data (dict): The data scraped from a website. url (str): The URL of the website to extract contact information from.
Returns: Returns:
dict: Extracted contact information. dict: Extracted contact information.
""" """
# This function needs to be implemented schema = {
"type": "object",
"properties": {
"emails": {
"type": "array",
"items": {
"type": "string",
"format": "email"
}
},
"contact_forms": {
"type": "array",
"items": {
"type": "string",
"format": "uri"
}
}
},
"required": ["emails", "contact_forms"]
}
result = extract_data(url, schema)
if result and 'extract' in result:
return result['extract']
return {} return {}