Files
ALwrity/lib/online_research_agent.py
2023-12-21 21:21:09 +05:30

202 lines
5.7 KiB
Python

import os
import requests
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain import PromptTemplate
from langchain.chains.summarize import load_summarize_chain
from bs4 import BeautifulSoup
from langchain.chat_models import ChatOpenAI
from dotenv import load_dotenv
import json
from autogen import config_list_from_json
from autogen.agentchat.contrib.gpt_assistant_agent import GPTAssistantAgent
from autogen import UserProxyAgent
import autogen
load_dotenv()
brwoserless_api_key = os.getenv("BROWSERLESS_API_KEY")
serper_api_key = os.getenv("SERP_API_KEY")
airtable_api_key = os.getenv("AIRTABLE_API_KEY")
config_list = config_list_from_json("OAI_CONFIG_LIST")
# ------------------ Create functions ------------------ #
# Function for google search
def google_search(search_keyword):
url = "https://google.serper.dev/search"
payload = json.dumps({
"q": search_keyword
})
headers = {
'X-API-KEY': serper_api_key,
'Content-Type': 'application/json'
}
response = requests.request("POST", url, headers=headers, data=payload)
print("RESPONSE:", response.text)
return response.text
# Function for scraping
def summary(objective, content):
llm = ChatOpenAI(temperature = 0, model = "gpt-3.5-turbo-16k-0613")
text_splitter = RecursiveCharacterTextSplitter(separators=["\n\n", "\n"], chunk_size = 10000, chunk_overlap=500)
docs = text_splitter.create_documents([content])
map_prompt = """
Write a summary of the following text for {objective}:
"{text}"
SUMMARY:
"""
map_prompt_template = PromptTemplate(template=map_prompt, input_variables=["text", "objective"])
summary_chain = load_summarize_chain(
llm=llm,
chain_type='map_reduce',
map_prompt = map_prompt_template,
combine_prompt = map_prompt_template,
verbose = False
)
output = summary_chain.run(input_documents=docs, objective=objective)
return output
def web_scraping(objective: str, url: str):
#scrape website, and also will summarize the content based on objective if the content is too large
#objective is the original objective & task that user give to the agent, url is the url of the website to be scraped
print("Scraping website...")
# Define the headers for the request
headers = {
'Cache-Control': 'no-cache',
'Content-Type': 'application/json',
}
# Define the data to be sent in the request
data = {
"url": url
}
# Convert Python object to JSON string
data_json = json.dumps(data)
# Send the POST request
response = requests.post(f"https://chrome.browserless.io/content?token={brwoserless_api_key}", headers=headers, data=data_json)
# Check the response status code
if response.status_code == 200:
soup = BeautifulSoup(response.content, "html.parser")
text = soup.get_text()
print("CONTENTTTTTT:", text)
if len(text) > 10000:
output = summary(objective,text)
return output
else:
return text
else:
print(f"HTTP request failed with status code {response.status_code}")
# Function for get airtable records
def get_airtable_records(base_id, table_id):
url = f"https://api.airtable.com/v0/{base_id}/{table_id}"
headers = {
'Authorization': f'Bearer {airtable_api_key}',
}
response = requests.request("GET", url, headers=headers)
data = response.json()
print(data)
return data
# Function for update airtable records
def update_single_airtable_record(base_id, table_id, id, fields):
url = f"https://api.airtable.com/v0/{base_id}/{table_id}"
headers = {
'Authorization': f'Bearer {airtable_api_key}',
"Content-Type": "application/json"
}
data = {
"records": [{
"id": id,
"fields": fields
}]
}
response = requests.patch(url, headers=headers, data=json.dumps(data))
data = response.json()
return data
# ------------------ Create agent ------------------ #
# Create user proxy agent
user_proxy = UserProxyAgent(name="user_proxy",
is_termination_msg=lambda msg: "TERMINATE" in msg["content"],
human_input_mode="ALWAYS",
max_consecutive_auto_reply=1
)
# Create researcher agent
researcher = GPTAssistantAgent(
name = "researcher",
llm_config = {
"config_list": config_list,
"assistant_id": "asst_qyvioid5My8K3SdFClaEnwmB"
}
)
researcher.register_function(
function_map={
"web_scraping": web_scraping,
"google_search": google_search
}
)
# Create research manager agent
research_manager = GPTAssistantAgent(
name="research_manager",
llm_config = {
"config_list": config_list,
"assistant_id": "asst_C1Ta5XmmEcYD6vnOSVflnwG9"
}
)
# Create director agent
director = GPTAssistantAgent(
name = "director",
llm_config = {
"config_list": config_list,
"assistant_id": "asst_zVBJGch5mOyCYl9H1J3L9Ime",
}
)
director.register_function(
function_map={
"get_airtable_records": get_airtable_records,
"update_single_airtable_record": update_single_airtable_record
}
)
# Create group chat
groupchat = autogen.GroupChat(agents=[user_proxy, researcher, research_manager, director], messages=[], max_round=15)
group_chat_manager = autogen.GroupChatManager(groupchat=groupchat, llm_config={"config_list": config_list})
# ------------------ start conversation ------------------ #
message = """
Research the funding stage/amount & pricing for each company in the list: https://airtable.com/appj0J4gFpvLrQWjI/tblF4OmG6oLjYtgZl/viwmFx2ttAVrJm0E3?blocks=hide
"""
user_proxy.initiate_chat(group_chat_manager, message=message)