WIP- Under maintenence- Web research working.
This commit is contained in:
@@ -1 +0,0 @@
|
||||
true
|
||||
@@ -103,3 +103,5 @@ Focus is getting the prompts right. Shit in, shit out, irrespective of dollars a
|
||||
Pydantically speakng, Due to experimental nature of prompting, its getting expensive soon enough. Gemini is free for now.
|
||||
3). Missing frontend: A smart backend will enable a good frontend. WIP, backend. So, frontend; coming soon.
|
||||
4).Getting AI agents to 'brainstrom' blog ideas seems more pressing. CrewAI seems more straightforward than autogen.
|
||||
5). Too Many APIs floating around: The implementation is using tools that dont depend on API keys and rather scrape them.
|
||||
Duh, scraping wont scale, that is GPT vision based scraping will come in handy.
|
||||
|
||||
@@ -1 +0,0 @@
|
||||
https://arxiv.org/abs/2201.11990
|
||||
229
blogen.py
Normal file
229
blogen.py
Normal file
@@ -0,0 +1,229 @@
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
import typer
|
||||
from PyInquirer import prompt
|
||||
from rich import print
|
||||
from rich.text import Text
|
||||
|
||||
from dotenv import load_dotenv
|
||||
load_dotenv(Path('.env'))
|
||||
|
||||
app = typer.Typer()
|
||||
|
||||
from lib.ai_web_researcher.gpt_online_researcher import gpt_web_researcher
|
||||
|
||||
|
||||
|
||||
def prompt_for_time_range():
|
||||
os.system("clear" if os.name == "posix" else "cls")
|
||||
print("\n🙋 If you researching keywords that are recent than use accordingly, Default is Anytime.\n")
|
||||
questions = [
|
||||
{
|
||||
'type': 'list',
|
||||
'name': 'time_range',
|
||||
'message': '👋 Select Search result time range:',
|
||||
'choices': ["past day", "past week", "past month", "past year", "anytime"],
|
||||
}
|
||||
]
|
||||
answers = prompt(questions)
|
||||
return answers['time_range']
|
||||
|
||||
def write_blog_options():
|
||||
questions = [
|
||||
{
|
||||
'type': 'list',
|
||||
'name': 'blog_type',
|
||||
'message': '📝 Choose a blog type:',
|
||||
'choices': ['Keywords', 'Audio YouTube', 'GitHub', 'Scholar', 'Quit'],
|
||||
}
|
||||
]
|
||||
answers = prompt(questions)
|
||||
return answers['blog_type']
|
||||
|
||||
|
||||
@app.command()
|
||||
def start_interactive_mode():
|
||||
"""
|
||||
This function is executed when no command is provided.
|
||||
It prompts the user to choose between "Write Blog" and "Do Web Research."
|
||||
"""
|
||||
os.system("clear" if os.name == "posix" else "cls")
|
||||
text = Text()
|
||||
text.append("_______________________________________________________________________")
|
||||
text.append("\n⚠️ Alert! 💥❓💥\n", style="bold red")
|
||||
text.append("If you know what to write, choose 'Write Blog'\n", style="bold blue")
|
||||
text.append("If unsure, lets 'do web research' to write on\n", style="bold red")
|
||||
text.append("_______________________________________________________________________\n")
|
||||
|
||||
print(text)
|
||||
|
||||
questions = [
|
||||
{
|
||||
'type': 'list',
|
||||
'name': 'mode',
|
||||
'message': 'Choose an option:',
|
||||
'choices': ['Write Blog', 'Do Web Research', 'Competitor Analysis', 'FAQ Generator', 'Quit'],
|
||||
}
|
||||
]
|
||||
answers = prompt(questions)
|
||||
mode = answers['mode']
|
||||
if mode == 'Write Blog':
|
||||
write_blog()
|
||||
elif mode == 'Do Web Research':
|
||||
do_web_research()
|
||||
elif mode == 'FAQ Generator':
|
||||
faq_generator()
|
||||
elif mode == 'Competitor Analysis':
|
||||
# https://github.com/com-puter-tips/SEO-Analysis
|
||||
# https://github.com/sundios/SEO-Lighthouse-Multiple-URLs
|
||||
# https://github.com/Gingerbreadfork/Cutlery
|
||||
# Metaphor similar search
|
||||
competitor_analysis()
|
||||
elif mode == 'News Analysis':
|
||||
print("""1. Get tavily News.
|
||||
2. Get metaphor news.
|
||||
3. Get from NewsApi
|
||||
4. Get YOU.com News.""")
|
||||
elif mode == 'Quit':
|
||||
typer.echo("Exiting, Fuck Off!")
|
||||
raise typer.Exit()
|
||||
|
||||
|
||||
def get_api_key(api_key: str, api_description: str):
|
||||
"""
|
||||
Ask the user to input the missing API key and add it to the .env file.
|
||||
|
||||
Args:
|
||||
api_key (str): The name of the API key variable.
|
||||
api_description (str): The description of the API key.
|
||||
"""
|
||||
user_input = typer.prompt(f"{api_description} is missing. Please enter {api_key} API Key:")
|
||||
with open(".env", "a") as env_file:
|
||||
env_file.write(f"{api_key}={user_input}\n")
|
||||
print(f"✅ {api_description} API Key added to .env file.")
|
||||
|
||||
|
||||
|
||||
def check_environment_variables():
|
||||
"""
|
||||
Check if necessary environment variables are present.
|
||||
Display messages with links on how to get them if not present.
|
||||
"""
|
||||
print("\n\n🙋♂️ 🙋♂️ Before doing web research, ensure the following API keys are available:")
|
||||
print("Blogen uses Basic, Semantic, Neural web search using above APIs for contextual blog generation.\n")
|
||||
|
||||
api_keys = {
|
||||
"METAPHOR_API_KEY": "Metaphor AI Key (Get it here: [link=https://dashboard.exa.ai/login]Metaphor API[/link])",
|
||||
"TAVILY_API_KEY": "Tavily AI Key (Get it here: [link=https://tavily.com/#api]Tavily API[/link])",
|
||||
"SERPER_API_KEY": "Serper API Key (Get it here: [link=https://serper.dev/signup]SerperDev API[/link])",
|
||||
}
|
||||
|
||||
missing_keys = []
|
||||
|
||||
with typer.progressbar(api_keys.items(), label="Checking API keys", length=len(api_keys)) as progress:
|
||||
for key, description in progress:
|
||||
if os.getenv(key) is None:
|
||||
print(f"[bold red]✖ 🚫 {key} is missing:[/bold red] [link={key}]Get {key} API Key[/link]")
|
||||
missing_keys.append((key, description))
|
||||
|
||||
if missing_keys:
|
||||
print("\nMost are Free APIs and really worth your while signing up for them.")
|
||||
print(":pile_of_poo::pile_of_poo::pile_of_poo: GO GET THEM, on above urls. [bold red]")
|
||||
print("Note: They offer free/limited api calls, so we use most of them to have a lot of free api calls.")
|
||||
print("\n[bold red]TBD: Provide option to use user defined search engines.\n")
|
||||
for key, description in missing_keys:
|
||||
get_api_key(key, description)
|
||||
else:
|
||||
return True
|
||||
|
||||
def faq_generator():
|
||||
return
|
||||
|
||||
|
||||
def competitor_analysis():
|
||||
return
|
||||
|
||||
|
||||
def write_blog():
|
||||
"""
|
||||
Write Blog option with sub-options like Keywords, Audio YouTube, GitHub, and Scholar.
|
||||
"""
|
||||
blog_type = write_blog_options()
|
||||
|
||||
if blog_type == 'Keywords':
|
||||
keywords = typer.prompt("Enter keywords for blog generation:")
|
||||
print(f"Write blog based on keywords: {keywords}")
|
||||
elif blog_type == 'Audio YouTube':
|
||||
audio_youtube = typer.prompt("Enter YouTube URL for audio blog generation:")
|
||||
print(f"Write audio blog based on YouTube URL: {audio_youtube}")
|
||||
elif blog_type == 'GitHub':
|
||||
github = typer.prompt("Enter GitHub URL, CSV file, or topic:")
|
||||
print(f"Write blog based on GitHub: {github}")
|
||||
elif blog_type == 'Scholar':
|
||||
scholar = typer.prompt("Enter research papers keywords:")
|
||||
print(f"Write blog based on scholar: {scholar}")
|
||||
elif blog_type == 'Quit':
|
||||
typer.echo("Exiting, Fuck Off!")
|
||||
raise typer.Exit()
|
||||
|
||||
|
||||
def do_web_research():
|
||||
"""
|
||||
Do Web Research option with time_range, search_keywords, and include_urls sub-options.
|
||||
"""
|
||||
if check_environment_variables():
|
||||
while True:
|
||||
print("________________________________________________________________")
|
||||
search_keywords = typer.prompt("👋 Enter keywords for web research:")
|
||||
if search_keywords and len(search_keywords.split()) >= 3:
|
||||
break
|
||||
else:
|
||||
print("🚫 Search keywords should be at least three words long. Please try again.")
|
||||
|
||||
# Display available choices
|
||||
# print("Choose from the following options:")
|
||||
# search_keyword_choices = ["choice1", "choice2", "choice3"]
|
||||
# for i, choice in enumerate(search_keyword_choices, start=1):
|
||||
# print(f"{i}. '{choice}'")
|
||||
#
|
||||
# choice_index = typer.prompt("Enter the NUMBER to choose which keywords to use:")
|
||||
#
|
||||
# try:
|
||||
# choice_index = int(choice_index)
|
||||
# if 1 <= choice_index <= len(search_keyword_choices):
|
||||
# search_keywords = search_keyword_choices[choice_index - 1]
|
||||
# break
|
||||
# else:
|
||||
# print("🚫 Invalid choice. Please try again.")
|
||||
# except ValueError:
|
||||
# print("🚫 Invalid input. Please enter a valid number.")
|
||||
|
||||
|
||||
print("________________________________________________________________")
|
||||
time_range = prompt_for_time_range()
|
||||
|
||||
os.system("clear" if os.name == "posix" else "cls")
|
||||
print("\n________________________________________________________________")
|
||||
print("\n🙋 Include a [green]URL[/green] to get [bold]similar/semantic[/bold]. For example, competitor's url.")
|
||||
print("📡 Usecases: Competitor Analysis Tool. Discover similar companies, startups and technologies.\n")
|
||||
similar_url = typer.prompt("👋 Enter a similar search URL (press Enter to continue):", default="")
|
||||
|
||||
os.system("clear" if os.name == "posix" else "cls")
|
||||
print("\n________________________________________________________________")
|
||||
print("\n🙋 If you wish to [bold]confine search[/bold] to certain domains like wikipedia etc.\n")
|
||||
include_urls = typer.prompt("👋 Enter comma-separated URLs to include in web research (press Enter if none):", default="")
|
||||
|
||||
try:
|
||||
print(f"🚀🚀 [bold green]Starting web research on given keywords: {search_keywords}..")
|
||||
#print(f"Web Research: Time Range - {time_range}, Search Keywords - {search_keywords}, Include URLs - {include_urls}")
|
||||
web_research_result = gpt_web_researcher(search_keywords,
|
||||
time_range=time_range,
|
||||
include_domains=include_urls,
|
||||
similar_url=similar_url)
|
||||
except Exception as err:
|
||||
print(f"\n💥🤯 [bold red]ERROR 🤯 : Failed to do web research: {err}\n")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
app()
|
||||
@@ -258,6 +258,15 @@ def arxiv_bibtex(arxiv_id):
|
||||
#search = GoogleSearch(params)
|
||||
#results = search.get_dict()
|
||||
|
||||
#from llmsherpa.readers import LayoutPDFReader
|
||||
|
||||
#llmsherpa_api_url = "https://readers.llmsherpa.com/api/document/developer/parseDocument?renderFormat=all"
|
||||
#pdf_url = "https://arxiv.org/pdf/1910.13461.pdf" # also allowed is a file path e.g. /home/downloads/xyz.pdf
|
||||
#pdf_reader = LayoutPDFReader(llmsherpa_api_url)
|
||||
#doc = pdf_reader.read_pdf(pdf_url)
|
||||
|
||||
|
||||
|
||||
|
||||
def extract_arxiv_ids_from_line(line):
|
||||
"""
|
||||
302
lib/ai_web_researcher/google_serp_search.py
Normal file
302
lib/ai_web_researcher/google_serp_search.py
Normal file
@@ -0,0 +1,302 @@
|
||||
"""
|
||||
This Python script performs Google searches using various services such as SerpApi, Serper.dev, and more. It displays the search results, including organic results, People Also Ask, and Related Searches, in formatted tables. The script also utilizes GPT to generate titles and FAQs for the Google search results.
|
||||
|
||||
Features:
|
||||
- Utilizes SerpApi, Serper.dev, and other services for Google searches.
|
||||
- Displays organic search results, including position, title, link, and snippet.
|
||||
- Presents People Also Ask questions and snippets in a formatted table.
|
||||
- Includes Related Searches in the combined table with People Also Ask.
|
||||
- Configures logging with Loguru for informative messages.
|
||||
- Uses Rich and Tabulate for visually appealing and formatted tables.
|
||||
|
||||
Usage:
|
||||
- Ensure the necessary API keys are set in the .env file.
|
||||
- Run the script to perform a Google search with the specified query.
|
||||
- View the displayed tables with organic results, People Also Ask, and Related Searches.
|
||||
- Additional information, such as generated titles and FAQs using GPT, is presented.
|
||||
|
||||
Modifications:
|
||||
- Update the environment variables in the .env file with the required API keys.
|
||||
- Customize the search parameters, such as location and language, in the functions as needed.
|
||||
- Adjust logging configurations, table formatting, and other aspects based on preferences.
|
||||
|
||||
To-Do (TBD):
|
||||
- Consider adding further enhancements or customization based on specific use cases.
|
||||
|
||||
Note: This script depends on external libraries such as SerpApi, Loguru, Rich, and Tabulate. Install them using 'pip install serpapi loguru rich tabulate' if not already installed.
|
||||
"""
|
||||
|
||||
import os
|
||||
from pathlib import Path
|
||||
import sys
|
||||
|
||||
import pandas as pd
|
||||
import json
|
||||
import requests
|
||||
from clint.textui import progress
|
||||
#from serpapi import GoogleSearch
|
||||
from loguru import logger
|
||||
from tabulate import tabulate
|
||||
|
||||
# Configure logger
|
||||
logger.remove()
|
||||
from dotenv import load_dotenv
|
||||
# Load environment variables from .env file
|
||||
load_dotenv(Path('../../.env'))
|
||||
logger.add(
|
||||
sys.stdout,
|
||||
colorize=True,
|
||||
format="<level>{level}</level>|<green>{file}:{line}:{function}</green>| {message}"
|
||||
)
|
||||
|
||||
from .gpt_titles_faq import gpt_titles_faqs_google_search
|
||||
|
||||
#from tenacity import retry, stop_after_attempt, wait_random_exponential
|
||||
#@retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(6))
|
||||
|
||||
|
||||
#FIXME: Accept language, country and time frame to search for.
|
||||
def google_search(query):
|
||||
"""
|
||||
Perform a Google search for the given query.
|
||||
|
||||
Args:
|
||||
query (str): The search query.
|
||||
flag (str, optional): The search flag (default is "faq").
|
||||
|
||||
Returns:
|
||||
list: List of search results based on the specified flag.
|
||||
"""
|
||||
try:
|
||||
perform_serpapi_google_search(query)
|
||||
logger.info(f"FIXME: Google serapi: {query}")
|
||||
#return process_search_results(search_result)
|
||||
except Exception as err:
|
||||
logger.error(f"ERROR: Check Here: https://serpapi.com/. Your requests may be over. {err}")
|
||||
|
||||
# Retry with serper.dev
|
||||
try:
|
||||
logger.info("Trying Google search with Serper.dev: https://serper.dev/api-key")
|
||||
search_result = perform_serperdev_google_search(query)
|
||||
process_search_results(search_result)
|
||||
except Exception as err:
|
||||
logger.error(f"Failed to do Google search with serper.dev: {err}")
|
||||
|
||||
return(search_result)
|
||||
|
||||
# # Retry with BROWSERLESS API
|
||||
# try:
|
||||
# search_result = perform_browserless_google_search(query)
|
||||
# #return process_search_results(search_result, flag)
|
||||
# except Exception as err:
|
||||
# logger.error("FIXME: Failed to do Google search with BROWSERLESS API.")
|
||||
# logger.debug("FIXME: Trying with dataforSEO API.")
|
||||
#
|
||||
# # Retry with dataforSEO API
|
||||
# try:
|
||||
# logger.info("Perform SERP with Data for SEO.")
|
||||
# #search_result = perform_dataforseo_google_search(query)
|
||||
# #return process_search_results(search_result, flag)
|
||||
# except Exception as err:
|
||||
# logger.error("FIXME: Failed to do Google search with dataforSEO API.")
|
||||
# logger.debug("All retries failed. Giving up.")
|
||||
# raise
|
||||
|
||||
|
||||
|
||||
def perform_serpapi_google_search(query, location="in"):
|
||||
"""
|
||||
Perform a Google search using the SerpApi service.
|
||||
|
||||
Args:
|
||||
query (str): The search query.
|
||||
location (str, optional): The location for the search (default is "Austin, Texas").
|
||||
api_key (str, optional): Your secret API key for SerpApi.
|
||||
|
||||
Returns:
|
||||
dict: A dictionary containing the search results.
|
||||
"""
|
||||
try:
|
||||
# Check if API key is provided
|
||||
if not os.getenv("SERPAPI_KEY"):
|
||||
raise ValueError("SERPAPI_KEY key is required for SerpApi")
|
||||
|
||||
# Create a GoogleSearch instance
|
||||
search = GoogleSearch({
|
||||
"q": query,
|
||||
"location": location,
|
||||
"api_key": api_key
|
||||
})
|
||||
# Get search results as a dictionary
|
||||
result = search.get_dict()
|
||||
return result
|
||||
|
||||
except ValueError as ve:
|
||||
# Handle missing API key error
|
||||
logger.info(f"SERPAPI ValueError: {ve}")
|
||||
except Exception as e:
|
||||
# Handle other exceptions
|
||||
logger.info(f"SERPAPI An error occurred: {e}")
|
||||
|
||||
|
||||
def perform_serperdev_google_search(query):
|
||||
"""
|
||||
Perform a Google search using the Serper API.
|
||||
|
||||
Args:
|
||||
query (str): The search query.
|
||||
|
||||
Returns:
|
||||
dict: The JSON response from the Serper API.
|
||||
"""
|
||||
# Get the Serper API key from environment variables
|
||||
logger.info("Doing serper.dev google search.")
|
||||
serper_api_key = os.getenv('SERPER_API_KEY')
|
||||
|
||||
# Check if the API key is available
|
||||
if not serper_api_key:
|
||||
raise ValueError("SERPER_API_KEY is missing. Set it in the .env file.")
|
||||
|
||||
# Serper API endpoint URL
|
||||
url = "https://google.serper.dev/search"
|
||||
|
||||
# FIXME: Expose options to end user. Request payload
|
||||
payload = json.dumps({
|
||||
"q": query,
|
||||
"gl": "in",
|
||||
"hl": "en",
|
||||
"num": 5,
|
||||
"autocorrect": True,
|
||||
"page": 1,
|
||||
"type": "search",
|
||||
"engine": "google"
|
||||
})
|
||||
|
||||
# Request headers with API key
|
||||
headers = {
|
||||
'X-API-KEY': serper_api_key,
|
||||
'Content-Type': 'application/json'
|
||||
}
|
||||
|
||||
# Send a POST request to the Serper API with progress bar
|
||||
with progress.Bar(label="Searching", expected_size=100) as bar:
|
||||
response = requests.post(url, headers=headers, data=payload, stream=True)
|
||||
# Check if the request was successful
|
||||
if response.status_code == 200:
|
||||
# Parse and return the JSON response
|
||||
return response.json()
|
||||
else:
|
||||
# Print an error message if the request fails
|
||||
logger.error(f"Error: {response.status_code}, {response.text}")
|
||||
return None
|
||||
|
||||
|
||||
|
||||
def perform_browserless_google_search():
|
||||
return
|
||||
|
||||
def perform_dataforseo_google_search():
|
||||
return
|
||||
|
||||
|
||||
|
||||
def process_search_results(search_results):
|
||||
"""
|
||||
Create a Pandas DataFrame from the search results.
|
||||
|
||||
Args:
|
||||
search_results (dict): The search results JSON.
|
||||
|
||||
Returns:
|
||||
pd.DataFrame: Pandas DataFrame containing the search results.
|
||||
"""
|
||||
data = []
|
||||
logger.info(f"Google Search Parameters: {search_results.get('searchParameters', {})}")
|
||||
organic_results = search_results.get("organic", [])
|
||||
print(search_results)
|
||||
|
||||
# Displaying Organic Results
|
||||
organic_data = []
|
||||
for result in search_results["organic"]:
|
||||
position = result.get("position", "")
|
||||
title = result.get("title", "")
|
||||
link = result.get("link", "")
|
||||
snippet = result.get("snippet", "")
|
||||
organic_data.append([position, title, link, snippet])
|
||||
|
||||
organic_headers = ["Rank", "Title", "Link", "Snippet"]
|
||||
organic_table = tabulate(organic_data,
|
||||
headers=organic_headers,
|
||||
tablefmt="fancy_grid",
|
||||
colalign=["center", "left", "left", "left"],
|
||||
maxcolwidths=[5, 25, 35, 50])
|
||||
|
||||
# Print the tables
|
||||
print("\n\n📢❗🚨 Google search Organic Results:")
|
||||
print(organic_table)
|
||||
|
||||
# Displaying People Also Ask and Related Searches combined
|
||||
combined_data = []
|
||||
try:
|
||||
people_also_ask_data = []
|
||||
if "peopleAlsoAsk" in search_results:
|
||||
for question in search_results["peopleAlsoAsk"]:
|
||||
title = question.get("title", "")
|
||||
snippet = question.get("snippet", "")
|
||||
link = question.get("link", "")
|
||||
people_also_ask_data.append([title, snippet, link])
|
||||
except Exception as people_also_ask_err:
|
||||
logger.error(f"Error processing 'peopleAlsoAsk': {people_also_ask_err}")
|
||||
people_also_ask_data = []
|
||||
|
||||
related_searches_data = []
|
||||
for query in search_results.get("relatedSearches", []):
|
||||
related_searches_data.append([query.get("query", "")])
|
||||
related_searches_headers = ["Related Search"]
|
||||
|
||||
if people_also_ask_data:
|
||||
# Add Related Searches as a column to People Also Ask
|
||||
combined_data = [
|
||||
row + [related_searches_data[i][0] if i < len(related_searches_data) else ""]
|
||||
for i, row in enumerate(people_also_ask_data)
|
||||
]
|
||||
combined_headers = ["Question", "Snippet", "Link", "Related Search"]
|
||||
# Display the combined table
|
||||
combined_table = tabulate(
|
||||
combined_data,
|
||||
headers=combined_headers,
|
||||
tablefmt="fancy_grid",
|
||||
colalign=["left", "left", "left", "left"],
|
||||
maxcolwidths=[20, 50, 20, 30]
|
||||
)
|
||||
else:
|
||||
combined_table = tabulate(
|
||||
related_searches_data,
|
||||
headers=related_searches_headers,
|
||||
tablefmt="fancy_grid",
|
||||
colalign=["left"],
|
||||
maxcolwidths=[60]
|
||||
)
|
||||
|
||||
print("\n\n📢❗🚨 People Also Ask & Related Searches:")
|
||||
print(combined_table)
|
||||
# Save the combined table to a file
|
||||
try:
|
||||
save_in_file(organic_table)
|
||||
save_in_file(combined_table)
|
||||
except Exception as save_results_err:
|
||||
logger.error(f"Failed to save search results: {save_results_err}")
|
||||
return search_results
|
||||
|
||||
|
||||
def save_in_file(table_content):
|
||||
""" Helper function to save search analysis in a file. """
|
||||
file_path = os.environ.get('SEARCH_SAVE_FILE')
|
||||
try:
|
||||
# Save the content to the file
|
||||
with open(file_path, "a") as file:
|
||||
file.write(table_content)
|
||||
file.write("\n" * 3) # Add three newlines at the end
|
||||
logger.info(f"Search content saved to {file_path}")
|
||||
except Exception as e:
|
||||
logger.error(f"Error occurred while writing to the file: {e}")
|
||||
530
lib/ai_web_researcher/google_trends_researcher.py
Normal file
530
lib/ai_web_researcher/google_trends_researcher.py
Normal file
@@ -0,0 +1,530 @@
|
||||
"""
|
||||
This Python script analyzes Google search keywords by fetching auto-suggestions, performing keyword clustering, and visualizing Google Trends data. It uses various libraries such as pytrends, requests_html, tqdm, and more.
|
||||
|
||||
Features:
|
||||
- Fetches auto-suggestions for a given search keyword from Google.
|
||||
- Performs keyword clustering using K-means algorithm based on TF-IDF vectors.
|
||||
- Visualizes Google Trends data, including interest over time and interest by region.
|
||||
- Retrieves related queries and topics for a set of search keywords.
|
||||
- Utilizes visualization libraries such as Matplotlib, Plotly, and Rich for displaying results.
|
||||
- Incorporates logging for error handling and informative messages.
|
||||
|
||||
Usage:
|
||||
- Provide a search term or a list of search terms for analysis.
|
||||
- Run the script to fetch auto-suggestions, perform clustering, and visualize Google Trends data.
|
||||
- Explore the displayed results, including top keywords in each cluster and related topics.
|
||||
|
||||
Modifications:
|
||||
- Customize the search terms in the 'do_google_trends_analysis' function.
|
||||
- Adjust the number of clusters for keyword clustering and other parameters as needed.
|
||||
- Explore further visualizations and analyses based on the generated data.
|
||||
|
||||
Note: Ensure that the required libraries are installed using 'pip install pytrends requests_html tqdm tabulate plotly rich'.
|
||||
"""
|
||||
|
||||
import requests
|
||||
import numpy as np
|
||||
import sys
|
||||
from sklearn.feature_extraction.text import TfidfVectorizer
|
||||
from sklearn.cluster import KMeans
|
||||
import matplotlib.pyplot as plt
|
||||
from sklearn.metrics import silhouette_score, silhouette_samples
|
||||
from rich.console import Console
|
||||
from rich.progress import Progress
|
||||
import urllib
|
||||
import json
|
||||
import pandas as pd
|
||||
import matplotlib.pyplot as plt
|
||||
import plotly.express as px
|
||||
import plotly.io as pio
|
||||
import logging
|
||||
from requests_html import HTML, HTMLSession
|
||||
from urllib.parse import quote_plus
|
||||
from tqdm import tqdm
|
||||
from tabulate import tabulate
|
||||
from pytrends.request import TrendReq
|
||||
import wordcloud
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
from loguru import logger
|
||||
|
||||
# Configure logger
|
||||
logger.remove()
|
||||
logger.add(sys.stdout,
|
||||
colorize=True,
|
||||
format="<level>{level}</level>|<green>{file}:{line}:{function}</green>| {message}"
|
||||
)
|
||||
|
||||
|
||||
def fetch_google_trends_interest_overtime(keyword):
|
||||
try:
|
||||
pytrends = TrendReq(hl='en-US', tz=360)
|
||||
pytrends.build_payload([keyword], timeframe='today 1-y', geo='US')
|
||||
|
||||
# 1. Interest Over Time
|
||||
data = pytrends.interest_over_time()
|
||||
data = data.reset_index()
|
||||
|
||||
# Visualization using Matplotlib
|
||||
plt.figure(figsize=(10, 6))
|
||||
plt.plot(data['date'], data[keyword], label=keyword)
|
||||
plt.title(f'Interest Over Time for "{keyword}"')
|
||||
plt.xlabel('Date')
|
||||
plt.ylabel('Interest')
|
||||
plt.legend()
|
||||
plt.show()
|
||||
|
||||
return data
|
||||
except Exception as e:
|
||||
logging.error(f"Error in fetch_google_trends_data: {e}")
|
||||
return pd.DataFrame()
|
||||
|
||||
|
||||
def plot_interest_by_region(kw_list):
|
||||
try:
|
||||
from pytrends.request import TrendReq
|
||||
import matplotlib.pyplot as plt
|
||||
trends = TrendReq()
|
||||
trends.build_payload(kw_list=kw_list)
|
||||
kw_list = ' '.join(kw_list)
|
||||
data = trends.interest_by_region() #sorting by region
|
||||
data = data.sort_values(by=f"{kw_list}", ascending=False)
|
||||
print("\n📢❗🚨 ")
|
||||
print(f"Top 10 regions with highest interest for keyword: {kw_list}")
|
||||
data = data.head(10) #Top 10
|
||||
print(data)
|
||||
data.reset_index().plot(x="geoName", y=f"{kw_list}",
|
||||
figsize=(20,15), kind="bar")
|
||||
plt.style.use('fivethirtyeight')
|
||||
plt.show()
|
||||
# FIXME: Send this image to vision GPT for analysis.
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error plotting interest by region: {e}")
|
||||
return None
|
||||
|
||||
|
||||
|
||||
|
||||
def get_related_queries_and_save_csv(keywords, hl='en-US', tz=360, cat=0, timeframe='today 12-m'):
|
||||
"""
|
||||
Get related queries for the given search keywords and save the result to a CSV file.
|
||||
|
||||
Args:
|
||||
search_keywords (list): List of search keywords.
|
||||
hl (str): Language parameter, default is 'en-US'.
|
||||
tz (int): Timezone parameter, default is 360.
|
||||
cat (int): Category parameter, default is 0.
|
||||
timeframe (str): Timeframe parameter, default is 'today 12-m'.
|
||||
|
||||
Returns:
|
||||
pd.DataFrame: DataFrame containing related queries.
|
||||
"""
|
||||
try:
|
||||
# Build model
|
||||
pytrends = TrendReq(hl=hl, tz=tz)
|
||||
pytrends.build_payload(kw_list=keywords, cat=cat, timeframe=timeframe)
|
||||
|
||||
# Get related queries
|
||||
data = pytrends.related_queries()
|
||||
|
||||
# Extract data from the result
|
||||
top_queries = list(data.values())[0]['top']
|
||||
rising_queries = list(data.values())[0]['rising']
|
||||
top_rising_queries = top_queries + rising_queries
|
||||
|
||||
# Convert lists to DataFrames
|
||||
df_top_queries = pd.DataFrame(top_queries)
|
||||
df_rising_queries = pd.DataFrame(rising_queries) # Added this line
|
||||
|
||||
# Rename columns to avoid duplicates
|
||||
df_top_queries.columns = ['Top query', 'value']
|
||||
df_rising_queries.columns = ['Rising query', 'value']
|
||||
|
||||
# Save to CSV
|
||||
all_queries_df = pd.concat([df_top_queries, df_rising_queries], axis=1)
|
||||
#all_queries_df.to_csv('related_queries.csv', index=False)
|
||||
|
||||
# Display additional information
|
||||
console = Console()
|
||||
# Display additional information with emojis and bold formatting
|
||||
print("\n📢❗🚨 ")
|
||||
print("\n\033[1m🔝 Top\033[0m: The most popular search queries. Scoring is on a relative scale where a value of 100 is the most commonly searched query, 50 is a query searched half as often, and a value of 0 is a query searched for less than 1% as often as the most popular query.\n")
|
||||
print("\n\033[1m🚀 Rising\033[0m: Queries with the biggest increase in search frequency since the last time period. Results marked 'Breakout' had a tremendous increase, probably because these queries are new and had few (if any) prior searches.\n")
|
||||
# Display the DataFrame using tabulate
|
||||
print(tabulate(all_queries_df, headers='keys', tablefmt='fancy_grid'))
|
||||
# Save the combined table to a file
|
||||
try:
|
||||
save_in_file(all_queries_df)
|
||||
except Exception as save_results_err:
|
||||
logger.error(f"Failed to save search results: {save_results_err}")
|
||||
return top_rising_queries
|
||||
|
||||
except Exception as e:
|
||||
print(f"get_related_queries_and_save_csv: ERROR: An error occurred: {e}")
|
||||
|
||||
|
||||
def get_related_topics_and_save_csv(search_keywords):
|
||||
"""
|
||||
Get related topics for the given search keywords and save the result to a CSV file.
|
||||
|
||||
Args:
|
||||
search_keywords (list): List of search keywords.
|
||||
|
||||
Returns:
|
||||
pd.DataFrame: DataFrame containing related topics.
|
||||
"""
|
||||
try:
|
||||
# Build model
|
||||
pytrends = TrendReq(hl='en-US', tz=360)
|
||||
|
||||
# Build payload
|
||||
pytrends.build_payload(search_keywords, cat=0, timeframe='today 12-m')
|
||||
|
||||
# Get related topics
|
||||
data = pytrends.related_topics()
|
||||
# Extract data from the result
|
||||
top_topics = list(data.values())[0]['top']
|
||||
rising_topics = list(data.values())[0]['rising']
|
||||
|
||||
# Convert lists to DataFrames
|
||||
df_top_topics = pd.DataFrame(top_topics)
|
||||
df_rising_topics = pd.DataFrame(rising_topics)
|
||||
|
||||
# FIXME:Exclude specified columns
|
||||
columns_to_exclude = ['hasData', 'value', 'topic_mid', 'link']
|
||||
df_top_topics = df_top_topics.drop(columns=columns_to_exclude, errors='ignore')
|
||||
df_rising_topics = df_rising_topics.drop(columns=columns_to_exclude, errors='ignore')
|
||||
|
||||
# Rename columns to avoid duplicates and provide meaningful names
|
||||
df_top_topics.columns = ['Top- ' + col if col != 'topic_title' else col for col in df_top_topics.columns]
|
||||
df_rising_topics.columns = ['Rising- ' + col if col != 'topic_title' else col for col in df_rising_topics.columns]
|
||||
|
||||
# Save to CSV
|
||||
all_topics_df = pd.concat([df_top_topics, df_rising_topics], axis=1)
|
||||
#all_topics_df.to_csv('related_topics.csv', index=False)
|
||||
|
||||
print(f"\n\n 📢❗🚨 Rising and Trending Keywords for {search_keywords}\n")
|
||||
print("\033[1m🔝 Top\033[0m: The most popular search topics.")
|
||||
print("\033[1m🚀 Rising\033[0m: Topics experiencing a significant increase in search frequency since the last time period. Topics marked :pile_of_poop:'Breakout' had a tremendous surge, likely because they are new and had few prior searches.")
|
||||
# Display the DataFrame using tabulate
|
||||
pd.set_option('display.max_rows', all_topics_df.shape[0]+1)
|
||||
print(all_topics_df.head(10))
|
||||
#print(tabulate(all_topics_df, headers='keys', tablefmt='fancy_grid'))
|
||||
return all_topics_df
|
||||
|
||||
except Exception as e:
|
||||
print(f"ERROR: An error occurred: {e}")
|
||||
return pd.DataFrame()
|
||||
|
||||
|
||||
def get_source(url):
|
||||
try:
|
||||
session = HTMLSession()
|
||||
response = session.get(url)
|
||||
response.raise_for_status() # Raise an HTTPError for bad responses
|
||||
return response
|
||||
except requests.exceptions.RequestException as e:
|
||||
logging.error(f"Error during HTTP request: {e}")
|
||||
return None
|
||||
|
||||
|
||||
|
||||
def get_results(query):
|
||||
try:
|
||||
query = urllib.parse.quote_plus(query)
|
||||
response = get_source(f"https://suggestqueries.google.com/complete/search?output=chrome&hl=en&q={query}")
|
||||
if response:
|
||||
response.raise_for_status()
|
||||
results = json.loads(response.text)
|
||||
return results
|
||||
else:
|
||||
return None
|
||||
except json.JSONDecodeError as e:
|
||||
logging.error(f"Error decoding JSON response: {e}")
|
||||
return None
|
||||
except requests.exceptions.RequestException as e:
|
||||
logging.error(f"Error during HTTP request: {e}")
|
||||
return None
|
||||
|
||||
|
||||
|
||||
def format_results(results):
|
||||
try:
|
||||
suggestions = []
|
||||
for index, value in enumerate(results[1]):
|
||||
suggestion = {'term': value, 'relevance': results[4]['google:suggestrelevance'][index]}
|
||||
suggestions.append(suggestion)
|
||||
return suggestions
|
||||
except (KeyError, IndexError) as e:
|
||||
logging.error(f"Error parsing search results: {e}")
|
||||
return []
|
||||
|
||||
|
||||
|
||||
def get_expanded_term_suffixes():
|
||||
return ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm','n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']
|
||||
|
||||
|
||||
|
||||
def get_expanded_term_prefixes():
|
||||
# For shopping, review type blogs.
|
||||
#return ['discount *', 'pricing *', 'cheap', 'best price *', 'lowest price', 'best value', 'sale', 'affordable', 'promo', 'budget''what *', 'where *', 'how to *', 'why *', 'buy*', 'how much*','best *', 'worse *', 'rent*', 'sale*', 'offer*','vs*','or*']
|
||||
return ['what *', 'where *', 'how to *', 'why *','best *', 'vs*', 'or*']
|
||||
|
||||
|
||||
|
||||
def get_expanded_terms(query):
|
||||
try:
|
||||
expanded_term_prefixes = get_expanded_term_prefixes()
|
||||
expanded_term_suffixes = get_expanded_term_suffixes()
|
||||
|
||||
terms = [query]
|
||||
|
||||
for term in expanded_term_prefixes:
|
||||
terms.append(f"{term} {query}")
|
||||
|
||||
for term in expanded_term_suffixes:
|
||||
terms.append(f"{query} {term}")
|
||||
|
||||
return terms
|
||||
except Exception as e:
|
||||
logging.error(f"Error in get_expanded_terms: {e}")
|
||||
return []
|
||||
|
||||
|
||||
|
||||
def get_expanded_suggestions(query):
|
||||
try:
|
||||
all_results = []
|
||||
|
||||
expanded_terms = get_expanded_terms(query)
|
||||
for term in tqdm(expanded_terms, desc="📢❗🚨 Fetching Google AutoSuggestions", unit="term"):
|
||||
results = get_results(term)
|
||||
if results:
|
||||
formatted_results = format_results(results)
|
||||
all_results += formatted_results
|
||||
all_results = sorted(all_results, key=lambda k: k.get('relevance', 0), reverse=True)
|
||||
|
||||
return all_results
|
||||
except Exception as e:
|
||||
logging.error(f"Error in get_expanded_suggestions: {e}")
|
||||
return []
|
||||
|
||||
|
||||
|
||||
def get_suggestions_for_keyword(search_term):
|
||||
""" """
|
||||
try:
|
||||
expanded_results = get_expanded_suggestions(search_term)
|
||||
expanded_results_df = pd.DataFrame(expanded_results)
|
||||
expanded_results_df.columns = ['Keywords', 'Relevance']
|
||||
#expanded_results_df.to_csv('results.csv', index=False)
|
||||
pd.set_option('display.max_rows', expanded_results_df.shape[0]+1)
|
||||
expanded_results_df.drop_duplicates('Keywords', inplace=True)
|
||||
|
||||
return expanded_results_df
|
||||
except Exception as e:
|
||||
logging.error(f"get_suggestions_for_keyword: Error in main: {e}")
|
||||
|
||||
|
||||
|
||||
def perform_keyword_clustering(expanded_results_df, num_clusters=5):
|
||||
try:
|
||||
# Preprocessing: Convert the keywords to lowercase
|
||||
expanded_results_df['Keywords'] = expanded_results_df['Keywords'].str.lower()
|
||||
|
||||
# Vectorization: Create a TF-IDF vectorizer
|
||||
vectorizer = TfidfVectorizer()
|
||||
|
||||
# Fit the vectorizer to the keywords
|
||||
tfidf_vectors = vectorizer.fit_transform(expanded_results_df['Keywords'])
|
||||
|
||||
# Applying K-means clustering
|
||||
kmeans = KMeans(n_clusters=num_clusters, random_state=42)
|
||||
cluster_labels = kmeans.fit_predict(tfidf_vectors)
|
||||
|
||||
# Add cluster labels to the DataFrame
|
||||
expanded_results_df['cluster_label'] = cluster_labels
|
||||
|
||||
# Assessing cluster quality through silhouette score
|
||||
silhouette_avg = silhouette_score(tfidf_vectors, cluster_labels)
|
||||
print(f"Silhouette Score: {silhouette_avg}")
|
||||
|
||||
# Visualize cluster quality using a silhouette plot
|
||||
#visualize_silhouette(tfidf_vectors, cluster_labels)
|
||||
|
||||
return expanded_results_df
|
||||
except Exception as e:
|
||||
logging.error(f"Error in perform_keyword_clustering: {e}")
|
||||
return pd.DataFrame()
|
||||
|
||||
|
||||
|
||||
def visualize_silhouette(X, labels):
|
||||
try:
|
||||
silhouette_avg = silhouette_score(X, labels)
|
||||
print(f"Silhouette Score: {silhouette_avg}")
|
||||
|
||||
# Create a subplot with 1 row and 2 columns
|
||||
fig, ax1 = plt.subplots(1, 1, figsize=(8, 6))
|
||||
|
||||
# The 1st subplot is the silhouette plot
|
||||
ax1.set_xlim([-0.1, 1])
|
||||
ax1.set_ylim([0, X.shape[0] + (len(set(labels)) + 1) * 10])
|
||||
|
||||
# Compute the silhouette scores for each sample
|
||||
sample_silhouette_values = silhouette_samples(X, labels)
|
||||
|
||||
y_lower = 10
|
||||
for i in set(labels):
|
||||
# Aggregate the silhouette scores for samples belonging to the cluster
|
||||
ith_cluster_silhouette_values = sample_silhouette_values[labels == i]
|
||||
ith_cluster_silhouette_values.sort()
|
||||
|
||||
size_cluster_i = ith_cluster_silhouette_values.shape[0]
|
||||
y_upper = y_lower + size_cluster_i
|
||||
|
||||
color = plt.cm.nipy_spectral(float(i) / len(set(labels)))
|
||||
ax1.fill_betweenx(np.arange(y_lower, y_upper),
|
||||
0, ith_cluster_silhouette_values,
|
||||
facecolor=color, edgecolor=color, alpha=0.7)
|
||||
|
||||
# Label the silhouette plots with their cluster numbers at the middle
|
||||
ax1.text(-0.05, y_lower + 0.5 * size_cluster_i, str(i))
|
||||
|
||||
# Compute the new y_lower for the next plot
|
||||
y_lower = y_upper + 10 # 10 for the 0 samples
|
||||
|
||||
ax1.set_title("Silhouette plot for KMeans clustering")
|
||||
ax1.set_xlabel("Silhouette coefficient values")
|
||||
ax1.set_ylabel("Cluster label")
|
||||
|
||||
# The vertical line for the average silhouette score of all the values
|
||||
ax1.axvline(x=silhouette_avg, color="red", linestyle="--")
|
||||
|
||||
plt.show()
|
||||
except Exception as e:
|
||||
logging.error(f"Error in visualize_silhouette: {e}")
|
||||
|
||||
|
||||
|
||||
def print_and_return_top_keywords(expanded_results_df, num_clusters=5):
|
||||
"""
|
||||
Display and return top keywords in each cluster.
|
||||
|
||||
Args:
|
||||
expanded_results_df (pd.DataFrame): DataFrame containing expanded keywords, relevance, and cluster labels.
|
||||
num_clusters (int or str): Number of clusters or 'all'.
|
||||
|
||||
Returns:
|
||||
pd.DataFrame: DataFrame with top keywords for each cluster.
|
||||
"""
|
||||
top_keywords_df = pd.DataFrame()
|
||||
|
||||
if num_clusters == 'all':
|
||||
unique_clusters = expanded_results_df['cluster_label'].unique()
|
||||
else:
|
||||
unique_clusters = range(int(num_clusters))
|
||||
|
||||
for i in unique_clusters:
|
||||
cluster_df = expanded_results_df[expanded_results_df['cluster_label'] == i]
|
||||
top_keywords = cluster_df.sort_values(by='Relevance', ascending=False).head(5)
|
||||
top_keywords_df = pd.concat([top_keywords_df, top_keywords])
|
||||
|
||||
print(f"\n📢❗🚨 GTop Keywords for All Clusters:")
|
||||
table = tabulate(top_keywords_df, headers='keys', tablefmt='fancy_grid')
|
||||
# Save the combined table to a file
|
||||
try:
|
||||
save_in_file(top_keywords_df)
|
||||
except Exception as save_results_err:
|
||||
logger.error(f"Failed to save search results: {save_results_err}")
|
||||
print(table)
|
||||
return top_keywords_df
|
||||
|
||||
|
||||
def generate_wordcloud(keywords):
|
||||
"""
|
||||
Generate and display a word cloud from a list of keywords.
|
||||
|
||||
Args:
|
||||
keywords (list): List of keywords.
|
||||
"""
|
||||
# Convert the list of keywords to a string
|
||||
text = ' '.join(keywords)
|
||||
|
||||
# Generate word cloud
|
||||
wordcloud = WordCloud(width=800, height=400, background_color='white').generate(text)
|
||||
|
||||
# Display the word cloud using matplotlib
|
||||
plt.figure(figsize=(600, 200))
|
||||
plt.imshow(wordcloud, interpolation='bilinear')
|
||||
plt.axis('off')
|
||||
plt.show()
|
||||
|
||||
|
||||
|
||||
def save_in_file(table_content):
|
||||
""" Helper function to save search analysis in a file. """
|
||||
file_path = os.environ.get('SEARCH_SAVE_FILE')
|
||||
try:
|
||||
# Save the content to the file
|
||||
with open(file_path, "w") as file:
|
||||
file.write(table_content)
|
||||
file.write("\n" * 3) # Add three newlines at the end
|
||||
logger.info(f"Search content saved to {file_path}")
|
||||
except Exception as e:
|
||||
logger.error(f"Error occurred while writing to the file: {e}")
|
||||
|
||||
|
||||
def do_google_trends_analysis(search_term):
|
||||
""" Get a google search keywords, get its stats."""
|
||||
search_term = [f"{search_term}"]
|
||||
all_the_keywords = []
|
||||
try:
|
||||
for asearch_term in search_term:
|
||||
#FIXME: Lets work with a single root keyword.
|
||||
suggestions_df = get_suggestions_for_keyword(asearch_term)
|
||||
|
||||
result_df = perform_keyword_clustering(suggestions_df)
|
||||
# Display top keywords in each cluster
|
||||
top_keywords = print_and_return_top_keywords(result_df)
|
||||
all_the_keywords.append(top_keywords['Keywords'].tolist())
|
||||
#
|
||||
# # FIXME: Get result from vision GPT. Fetch and visualize Google Trends data
|
||||
# #trends_data = fetch_google_trends_interest_overtime("llamaindex")
|
||||
#
|
||||
# # FIXME: Plot Interest Over time.
|
||||
# result_df = plot_interest_by_region(search_term)
|
||||
#
|
||||
# # Display additional information
|
||||
result_df = get_related_topics_and_save_csv(search_term)
|
||||
# Extract 'Top' topic_title
|
||||
top_topic_title = result_df['topic_title'].values.tolist()
|
||||
|
||||
# Join each sublist into one string separated by comma
|
||||
#top_topic_title = [','.join(filter(None, map(str, sublist))) for sublist in top_topic_title]
|
||||
top_topic_title = ','.join([', '.join(filter(None, map(str, sublist))) for sublist in top_topic_title])
|
||||
|
||||
print(f"\nRising and Top keywords: {top_topic_title}")
|
||||
# Print or use the extracted topic titles
|
||||
all_the_keywords = ','.join([', '.join(filter(None, map(str, sublist))) for sublist in all_the_keywords])
|
||||
print(f"\n\n📢❗🚨 Important keywords to target: {all_the_keywords}\n\n")
|
||||
all_the_keywords += top_topic_title
|
||||
print(all_the_keywords)
|
||||
all_the_keywords = all_the_keywords.split(',')
|
||||
|
||||
# Split the list into chunks of 5 keywords
|
||||
chunk_size = 4
|
||||
chunks = [all_the_keywords[i:i + chunk_size] for i in range(0, len(all_the_keywords), chunk_size)]
|
||||
# Create a DataFrame with columns named 'Keyword 1', 'Keyword 2', etc.
|
||||
combined_df = pd.DataFrame(chunks, columns=[f'K📢eyword Col{i + 1}' for i in range(chunk_size)])
|
||||
|
||||
# Print the table
|
||||
print(tabulate(combined_df, headers='keys', tablefmt='fancy_grid'))
|
||||
#combined_df = pd.DataFrame({'📢❗🚨 Important keywords to target': chunks})
|
||||
|
||||
print(all_the_keywords)
|
||||
generate_wordcloud(all_the_keywords.split(','))
|
||||
return(all_the_keywords)
|
||||
except Exception as e:
|
||||
logging.error(f"Error in main: {e}")
|
||||
49
lib/ai_web_researcher/gpt_blog_sections.py
Normal file
49
lib/ai_web_researcher/gpt_blog_sections.py
Normal file
@@ -0,0 +1,49 @@
|
||||
import sys
|
||||
import json
|
||||
|
||||
from ..gpt_providers.openai_chat_completion import openai_chatgpt
|
||||
from ..gpt_providers.gemini_pro_text import gemini_text_response
|
||||
|
||||
from loguru import logger
|
||||
logger.remove()
|
||||
logger.add(sys.stdout,
|
||||
colorize=True,
|
||||
format="<level>{level}</level>|<green>{file}:{line}:{function}</green>| {message}"
|
||||
)
|
||||
|
||||
|
||||
# FIXME: Provide num_blogs, num_faqs as inputs.
|
||||
def get_blog_sections_from_websearch(search_keyword, search_results, gpt_providers="gemini"):
|
||||
"""Combine the given online research and gpt blog content"""
|
||||
|
||||
prompt = f"""
|
||||
As a SEO expert and content writer, I will provide you with a search keyword and its google search result.
|
||||
Your task is to write a blog title and 5 blog sub titles, from the given google search result.
|
||||
The subtitles should be less than 40 characters and click worthy.
|
||||
Do not explain, describe your response. Respond in json format, always name the key as 'blogSections'.
|
||||
|
||||
Web Research Keyword: "{search_keyword}"
|
||||
Google search Result: "{search_results}"
|
||||
"""
|
||||
|
||||
if 'gemini' in gpt_providers:
|
||||
try:
|
||||
response = gemini_text_response(prompt)
|
||||
if '```' in response and '\n' in response:
|
||||
response = response.strip().split('\n')
|
||||
# Remove the first and last lines
|
||||
response = '\n'.join(response[1:-1])
|
||||
response = json.loads(response)
|
||||
return response
|
||||
except Exception as err:
|
||||
logger.error(f"Failed to get response from gemini: {err}")
|
||||
logger.error(f"Gemini Error: {response.prompt_feedback}")
|
||||
raise err
|
||||
elif 'openai' in gpt_providers:
|
||||
try:
|
||||
logger.info("Calling OpenAI LLM.")
|
||||
response = openai_chatgpt(prompt)
|
||||
return response
|
||||
except Exception as err:
|
||||
logger.error(f"Failed to get response from Openai: {err}")
|
||||
raise err
|
||||
41
lib/ai_web_researcher/gpt_competitor_analysis.py
Normal file
41
lib/ai_web_researcher/gpt_competitor_analysis.py
Normal file
@@ -0,0 +1,41 @@
|
||||
import sys
|
||||
|
||||
from ..gpt_providers.openai_chat_completion import openai_chatgpt
|
||||
from ..gpt_providers.gemini_pro_text import gemini_text_response
|
||||
|
||||
from loguru import logger
|
||||
logger.remove()
|
||||
logger.add(sys.stdout,
|
||||
colorize=True,
|
||||
format="<level>{level}</level>|<green>{file}:{line}:{function}</green>| {message}"
|
||||
)
|
||||
|
||||
|
||||
def summarize_competitor_content(research_content, gpt_providers="openai"):
|
||||
"""Combine the given online research and gpt blog content"""
|
||||
|
||||
prompt = f""" Web page content: {research_content} """
|
||||
|
||||
if 'gemini' in gpt_providers:
|
||||
prompt = f"""You are a helpful assistant writing a research report about a company. I will provide you with company details.
|
||||
Summarize the given company details into multiple paragraphs.
|
||||
Be extremely concise, professional, and factual as possible.
|
||||
The first paragraph should be an introduction and summary of the company.
|
||||
The second paragraph should include pros and cons of the company.
|
||||
The third paragraph should be on their pricing model.
|
||||
Include a conclusion, summarizing your research about the given company details.
|
||||
Company details: '{research_content}'"""
|
||||
try:
|
||||
response = gemini_text_response(prompt)
|
||||
return response
|
||||
except Exception as err:
|
||||
logger.error(f"Failed to get response from gemini: {err}")
|
||||
raise err
|
||||
elif 'openai' in gpt_providers:
|
||||
try:
|
||||
logger.info("Calling OpenAI LLM.")
|
||||
response = openai_chatgpt(prompt)
|
||||
return response
|
||||
except Exception as err:
|
||||
logger.error(f"failed to get response from Openai: {err}")
|
||||
raise err
|
||||
185
lib/ai_web_researcher/gpt_online_researcher.py
Normal file
185
lib/ai_web_researcher/gpt_online_researcher.py
Normal file
@@ -0,0 +1,185 @@
|
||||
################################################################
|
||||
#
|
||||
#
|
||||
#
|
||||
##############################################################
|
||||
|
||||
import os
|
||||
import json
|
||||
from pathlib import Path
|
||||
import sys
|
||||
from typing import List, NamedTuple
|
||||
from loguru import logger
|
||||
from datetime import datetime
|
||||
|
||||
from ..gpt_providers.gemini_pro_text import gemini_text_response
|
||||
from .tavily_ai_search import get_tavilyai_results
|
||||
from .metaphor_basic_neural_web_search import metaphor_find_similar, metaphor_search_articles
|
||||
from .google_serp_search import google_search
|
||||
from .google_trends_researcher import do_google_trends_analysis
|
||||
from .gpt_blog_sections import get_blog_sections_from_websearch
|
||||
from .web_research_report import write_web_research_report
|
||||
|
||||
|
||||
# Configure logger
|
||||
logger.remove()
|
||||
logger.add(sys.stdout,
|
||||
colorize=True,
|
||||
format="<level>{level}</level>|<green>{file}:{line}:{function}</green>| {message}"
|
||||
)
|
||||
|
||||
|
||||
def gpt_web_researcher(search_keywords, time_range=None, include_domains=list(), similar_url=None):
|
||||
""" """
|
||||
print(f"Web Research:Time Range - {time_range},Search Keywords - {search_keywords},Include URLs - {include_domains}")
|
||||
if not include_domains:
|
||||
include_domains = list()
|
||||
# TBD: Keeping the results directory as fixed, for now.
|
||||
os.environ["SEARCH_SAVE_FILE"] = os.path.join(os.getcwd(), "workspace", "web_research_reports",
|
||||
search_keywords.replace(" ", "_") + "_" + datetime.now().strftime("%Y-%m-%d_%H-%M-%S"))
|
||||
|
||||
# Collect all blog titles featuring in search results. This *may help in generating blog titles
|
||||
# closest to competing ones. All search blog titles, given keyword and keywords from analysis, give
|
||||
# llm a good context for the task of generating blog titles.
|
||||
blog_titles = []
|
||||
# Get a list of FAQs from search results.
|
||||
blog_faqs = None
|
||||
google_result = None
|
||||
tavily_result = None
|
||||
report = None
|
||||
# try:
|
||||
# logger.info(f"Doing Google search for: {search_keywords}\n")
|
||||
# google_result = google_search(search_keywords)
|
||||
# blog_titles.append(extract_info(google_result, "titles"))
|
||||
# except Exception as err:
|
||||
# logger.error(f"Failed to do Google Serpapi research: {err}")
|
||||
# # Not failing, as tavily would do same and then GPT-V to search.
|
||||
#
|
||||
# try:
|
||||
# # FIXME: Include the follow-up questions as blog FAQs.
|
||||
# logger.info(f"Doing Tavily AI search for: {search_keywords}")
|
||||
# tavily_result = get_tavilyai_results(search_keywords, include_domains)
|
||||
# blog_titles.append(tavily_extract_information(tavily_result, "titles"))
|
||||
# except Exception as err:
|
||||
# logger.error(f"Failed to do Tavily AI Search: {err}")
|
||||
|
||||
# try:
|
||||
# logger.info(f"Start Semantic/Neural web search with Metahpor: {search_keywords}")
|
||||
# response_articles = metaphor_search_articles(
|
||||
# search_keywords,
|
||||
# include_domains=include_domains,
|
||||
# time_range=time_range,
|
||||
# similar_url=similar_url)
|
||||
# blog_titles.append(metaphor_extract_titles_or_text(response_articles, return_titles=True))
|
||||
# except Exception as err:
|
||||
# logger.error(f"Failed to do Metaphor search: {err}")
|
||||
# print(blog_titles)
|
||||
|
||||
try:
|
||||
logger.info(f"Do Google Trends analysis for given keywords: {search_keywords}")
|
||||
important_keywords = do_google_trends_analysis(search_keywords)
|
||||
except Exception as err:
|
||||
logger.error(f"Failed to do google trends analysis: {err}")
|
||||
print(important_keywords)
|
||||
# Now that we have search results from given keywords. Generate blog title and subtopics suggestions.
|
||||
# 1. Return a list of related keywords along with search volumes.
|
||||
# 2. New blog titles to write on(niche, top) and blog sections.
|
||||
# 3. Competitors list, similar urls if given.
|
||||
|
||||
|
||||
class Result(NamedTuple):
|
||||
url: str
|
||||
id: str
|
||||
title: str
|
||||
score: float
|
||||
published_date: str
|
||||
author: str
|
||||
text: str
|
||||
highlights: List[str]
|
||||
highlight_scores: List[float]
|
||||
|
||||
|
||||
def metaphor_extract_titles_or_text(json_data, return_titles=True):
|
||||
"""
|
||||
Extract either titles or text from the given JSON structure.
|
||||
|
||||
Args:
|
||||
json_data (list): List of Result objects in JSON format.
|
||||
return_titles (bool): If True, return titles. If False, return text.
|
||||
|
||||
Returns:
|
||||
list: List of titles or text.
|
||||
"""
|
||||
result_list = [Result(**result) for result in json_data]
|
||||
|
||||
if return_titles:
|
||||
return [result.title for result in result_list]
|
||||
else:
|
||||
return [result.text for result in result_list]
|
||||
|
||||
|
||||
def extract_info(json_data, info_type):
|
||||
"""
|
||||
Extract information (titles, peopleAlsoAsk, or relatedSearches) from the given JSON.
|
||||
|
||||
Args:
|
||||
json_data (dict): The JSON data.
|
||||
info_type (str): The type of information to extract (titles, peopleAlsoAsk, relatedSearches).
|
||||
|
||||
Returns:
|
||||
list or None: A list containing the requested information, or None if the type is invalid.
|
||||
"""
|
||||
if info_type == "titles":
|
||||
return [result.get("title") for result in json_data.get("organic", [])]
|
||||
elif info_type == "peopleAlsoAsk":
|
||||
return [item.get("question") for item in json_data.get("peopleAlsoAsk", [])]
|
||||
elif info_type == "relatedSearches":
|
||||
return [item.get("query") for item in json_data.get("relatedSearches", [])]
|
||||
else:
|
||||
print("Invalid info_type. Please use 'titles', 'peopleAlsoAsk', or 'relatedSearches'.")
|
||||
return None
|
||||
|
||||
|
||||
def tavily_extract_information(json_data, keyword):
|
||||
"""
|
||||
Extract information from the given JSON based on the specified keyword.
|
||||
|
||||
Args:
|
||||
json_data (dict): The JSON data.
|
||||
keyword (str): The keyword (title, content, answer, follow-query).
|
||||
|
||||
Returns:
|
||||
list or str: The extracted information based on the keyword.
|
||||
"""
|
||||
if keyword == 'title':
|
||||
return [result['title'] for result in json_data['results']]
|
||||
elif keyword == 'content':
|
||||
return [result['content'] for result in json_data['results']]
|
||||
elif keyword == 'answer':
|
||||
return json_data['answer']
|
||||
elif keyword == 'follow-query':
|
||||
return json_data['follow_up_questions']
|
||||
else:
|
||||
return f"Invalid keyword: {keyword}"
|
||||
|
||||
|
||||
def compete_organic_results(query, report, organic_results):
|
||||
""" Given a blog content and google search organinc results, create a new blog to compete against them."""
|
||||
prompt = f""" As an SEO expert and copywriter, I will provide you with my blog content on topic '{query}', and
|
||||
Top google search results.
|
||||
Your task is to rewrite the given blog to make it compete against top position results.
|
||||
Make sure, the new blog has high probability of ranking highest against given organic search result competitors.
|
||||
Modify the given blog content following best SEO practises.
|
||||
Make sure the blog is original, unique and highly readable.
|
||||
Remember, Maintain and adopt the formatting, structure, style and tone of the provided blog content.
|
||||
Include relevant emojis in your final blog for visual appeal. Use it sparingly.
|
||||
Your response should be well-structured, objective, and critically acclaimed blog article based on provided texts.
|
||||
|
||||
Remember, your goal is to create a detailed blog article that will compete against given organic result competitors.
|
||||
Do not provide explanations, suggestions for your response, reply only with your final response.
|
||||
Take your time in crafting your content, do not rush to give the response.
|
||||
Blog Content: '{report}'\n
|
||||
Organic Search result: '{organic_results}'
|
||||
"""
|
||||
report = gemini_text_response(prompt)
|
||||
return report
|
||||
38
lib/ai_web_researcher/gpt_summarize_web_content.py
Normal file
38
lib/ai_web_researcher/gpt_summarize_web_content.py
Normal file
@@ -0,0 +1,38 @@
|
||||
import sys
|
||||
|
||||
from ..gpt_providers.openai_chat_completion import openai_chatgpt
|
||||
from ..gpt_providers.gemini_pro_text import gemini_text_response
|
||||
|
||||
from loguru import logger
|
||||
logger.remove()
|
||||
logger.add(sys.stdout,
|
||||
colorize=True,
|
||||
format="<level>{level}</level>|<green>{file}:{line}:{function}</green>| {message}"
|
||||
)
|
||||
|
||||
|
||||
def summarize_web_content(page_content, gpt_providers="openai"):
|
||||
"""Combine the given online research and gpt blog content"""
|
||||
|
||||
prompt = f"""
|
||||
Web page content: {page_content}
|
||||
"""
|
||||
|
||||
if 'gemini' in gpt_providers:
|
||||
prompt = f"""You are a helpful assistant that briefly summarizes the content of a webpage.
|
||||
Summarize the given web page content below.
|
||||
Web page content: '{page_content}'"""
|
||||
try:
|
||||
response = gemini_text_response(prompt)
|
||||
return response
|
||||
except Exception as err:
|
||||
logger.error(f"Failed to get response from gemini: {err}")
|
||||
raise err
|
||||
elif 'openai' in gpt_providers:
|
||||
try:
|
||||
logger.info("Calling OpenAI LLM.")
|
||||
response = openai_chatgpt(prompt)
|
||||
return response
|
||||
except Exception as err:
|
||||
logger.error(f"failed to get response from Openai: {err}")
|
||||
raise err
|
||||
53
lib/ai_web_researcher/gpt_titles_faq.py
Normal file
53
lib/ai_web_researcher/gpt_titles_faq.py
Normal file
@@ -0,0 +1,53 @@
|
||||
import sys
|
||||
import json
|
||||
|
||||
from ..gpt_providers.openai_chat_completion import openai_chatgpt
|
||||
from ..gpt_providers.gemini_pro_text import gemini_text_response
|
||||
|
||||
from loguru import logger
|
||||
logger.remove()
|
||||
logger.add(sys.stdout,
|
||||
colorize=True,
|
||||
format="<level>{level}</level>|<green>{file}:{line}:{function}</green>| {message}"
|
||||
)
|
||||
|
||||
|
||||
# FIXME: Provide num_blogs, num_faqs as inputs.
|
||||
def gpt_titles_faqs_google_search(search_keyword, search_results, gpt_providers="openai"):
|
||||
"""Combine the given online research and gpt blog content"""
|
||||
|
||||
prompt = f"""
|
||||
As a SEO expert and content writer, I will provide you with my web research keyword and its google search result in json format.
|
||||
Your task is to write 1 blog title and 10 FAQs.
|
||||
|
||||
1). Your blog title should compete against all the provided search results.
|
||||
2). Your FAQ should be based on 'People also ask' and 'Related Queries' from given result.
|
||||
Always include answers for each FAQ, use your knowledge and confirm with snippets given in search result.
|
||||
3). Respond in json data with 'blogTitles' and 'FAQs' as json keys. Do not explain, describe your response.
|
||||
4). Follow best practises of SEO.
|
||||
|
||||
Web Research Keyword: "{search_keyword}"
|
||||
Google search Result: "{search_results}"
|
||||
"""
|
||||
logger.info("Generating blog title and FAQs from web search result.")
|
||||
if 'gemini' in gpt_providers:
|
||||
try:
|
||||
response = gemini_text_response(prompt)
|
||||
print(f"\n\n\n RESPONSE: {response}\n\n\n")
|
||||
if '```' in response and '\n' in response:
|
||||
response = response.strip().split('\n')
|
||||
# Remove the first and last lines
|
||||
response = '\n'.join(response[1:-1])
|
||||
response = json.loads(response)
|
||||
return response
|
||||
except Exception as err:
|
||||
logger.error(f"Failed to get response from gemini: {err}")
|
||||
raise err
|
||||
elif 'openai' in gpt_providers:
|
||||
try:
|
||||
logger.info("Calling OpenAI LLM.")
|
||||
response = openai_chatgpt(prompt)
|
||||
return response
|
||||
except Exception as err:
|
||||
logger.error(f"Failed to get response from Openai: {err}")
|
||||
raise err
|
||||
223
lib/ai_web_researcher/metaphor_basic_neural_web_search.py
Normal file
223
lib/ai_web_researcher/metaphor_basic_neural_web_search.py
Normal file
@@ -0,0 +1,223 @@
|
||||
import os
|
||||
import sys
|
||||
import pandas as pd
|
||||
from io import StringIO
|
||||
from pathlib import Path
|
||||
|
||||
from metaphor_python import Metaphor
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
from loguru import logger
|
||||
from tqdm import tqdm
|
||||
from tabulate import tabulate
|
||||
from collections import namedtuple
|
||||
import textwrap
|
||||
logger.remove()
|
||||
logger.add(sys.stdout,
|
||||
colorize=True,
|
||||
format="<level>{level}</level>|<green>{file}:{line}:{function}</green>| {message}"
|
||||
)
|
||||
|
||||
from dotenv import load_dotenv
|
||||
load_dotenv(Path('../../.env'))
|
||||
|
||||
from exa_py import Exa
|
||||
|
||||
from tenacity import (retry, stop_after_attempt, wait_random_exponential,)# for exponential backoff
|
||||
from .gpt_summarize_web_content import summarize_web_content
|
||||
from .gpt_competitor_analysis import summarize_competitor_content
|
||||
|
||||
|
||||
@retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(6))
|
||||
def get_metaphor_client():
|
||||
"""
|
||||
Get the Metaphor client.
|
||||
|
||||
Returns:
|
||||
Metaphor: An instance of the Metaphor client.
|
||||
"""
|
||||
METAPHOR_API_KEY = os.environ.get('METAPHOR_API_KEY')
|
||||
if not METAPHOR_API_KEY:
|
||||
raise ValueError("METAPHOR_API_KEY environment variable not set!")
|
||||
return Exa(METAPHOR_API_KEY)
|
||||
|
||||
|
||||
def metaphor_rag_search():
|
||||
""" Mainly used for researching blog sections. """
|
||||
metaphor = get_metaphor_client()
|
||||
|
||||
|
||||
|
||||
def metaphor_find_similar(similar_url):
|
||||
"""
|
||||
Find similar content using the Metaphor API.
|
||||
|
||||
Args:
|
||||
url (str): The URL to find similar content.
|
||||
|
||||
Returns:
|
||||
MetaphorResponse: The response from the Metaphor API.
|
||||
"""
|
||||
metaphor = get_metaphor_client()
|
||||
try:
|
||||
logger.info(f"Doing similar web search for url: {similar_url}")
|
||||
search_response = metaphor.find_similar_and_contents(
|
||||
similar_url,
|
||||
highlights=True,
|
||||
num_results=10)
|
||||
except Exception as e:
|
||||
logger.error(f"Metaphor: Error in finding similar content: {e}")
|
||||
raise
|
||||
|
||||
competitors = search_response.results
|
||||
for acompetitor in tqdm(competitors, desc="Processing Competitors", unit="competitor"):
|
||||
all_contents = ""
|
||||
try:
|
||||
search_response = metaphor.search_and_contents(
|
||||
acompetitor.url,
|
||||
type="keyword",
|
||||
num_results=5
|
||||
)
|
||||
except Exception as err:
|
||||
logger.error(f"Failed to do metaphor keyword/url research: {err}")
|
||||
|
||||
research_response = search_response.results
|
||||
|
||||
# Add a progress bar for the inner loop
|
||||
for r in tqdm(research_response, desc=f"{acompetitor.url}", unit="research"):
|
||||
all_contents += r.text
|
||||
try:
|
||||
acompetitor.text = summarize_competitor_content(all_contents, "gemini")
|
||||
except Exception as err:
|
||||
logger.error(f"Failed to summarize_web_content: {err}")
|
||||
|
||||
# Convert the data into a list of lists
|
||||
print_search_result(competitors)
|
||||
return search_response
|
||||
|
||||
|
||||
|
||||
def metaphor_search_articles(query,
|
||||
num_results=5,
|
||||
use_autoprompt=True,
|
||||
include_domains=[],
|
||||
time_range=None,
|
||||
similar_url=None):
|
||||
"""
|
||||
Search for articles using the Metaphor API.
|
||||
|
||||
Args:
|
||||
query (str): The search query.
|
||||
num_results (int): Number of results to retrieve.
|
||||
use_autoprompt (bool): Whether to use autoprompt.
|
||||
include_domains (list): List of domains to include.
|
||||
time_range (str): Time range for published articles ("day", "week", "month", "year", "anytime").
|
||||
|
||||
Returns:
|
||||
MetaphorResponse: The response from the Metaphor API.
|
||||
"""
|
||||
metaphor = get_metaphor_client()
|
||||
try:
|
||||
if time_range == "past day":
|
||||
start_published_date = (datetime.now() - timedelta(days=1)).strftime('%Y-%m-%d')
|
||||
elif time_range == "past week":
|
||||
start_published_date = (datetime.now() - timedelta(days=7)).strftime("%Y-%m-%d")
|
||||
elif time_range == "past month":
|
||||
start_published_date = (datetime.now() - timedelta(days=30)).strftime('%Y-%m-%d')
|
||||
elif time_range == "past year":
|
||||
start_published_date = (datetime.now() - timedelta(days=365)).strftime('%Y-%m-%d')
|
||||
else:
|
||||
start_published_date = None
|
||||
|
||||
logger.info(f"Metaphor web search with Date: {start_published_date} and Query: {query}")
|
||||
try:
|
||||
search_response = metaphor.search_and_contents(
|
||||
query,
|
||||
include_domains=include_domains,
|
||||
use_autoprompt=True,
|
||||
start_published_date=start_published_date,
|
||||
num_results=num_results
|
||||
)
|
||||
except Exception as err:
|
||||
logger.error(f"Failed in metaphor.search_and_contents: {err}")
|
||||
|
||||
# From each webpage, get a summary of the web page.
|
||||
contents_response = search_response.results
|
||||
for content in tqdm(contents_response, desc="Reading Web URL content:", unit="content"):
|
||||
summarized_content = summarize_web_content(content.text, "gemini")
|
||||
content.text = summarized_content
|
||||
|
||||
print_search_result(contents_response)
|
||||
|
||||
if similar_url:
|
||||
logger.info(f"Doing similar/semantic search for URL: {similar_url}")
|
||||
metaphor_find_similar(similar_url)
|
||||
return contents_response
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error in Metaphor searching articles: {e}")
|
||||
raise
|
||||
|
||||
|
||||
def print_search_result(contents_response):
|
||||
# Define the Result namedtuple
|
||||
Result = namedtuple("Result", ["url", "title", "published_date", "text"])
|
||||
# Tabulate the data
|
||||
table_headers = ["URL", "Title", "Published Date", "Summary"]
|
||||
table_data = [(result.url, result.title, result.published_date, result.text) for result in contents_response]
|
||||
|
||||
table = tabulate(table_data,
|
||||
headers=table_headers,
|
||||
tablefmt="fancy_grid",
|
||||
colalign=["left", "left", "left", "left"],
|
||||
maxcolwidths=[20, 20, 10, 60])
|
||||
print(table)
|
||||
# Save the combined table to a file
|
||||
try:
|
||||
save_in_file(table)
|
||||
except Exception as save_results_err:
|
||||
logger.error(f"Failed to save search results: {save_results_err}")
|
||||
|
||||
|
||||
def save_in_file(table_content):
|
||||
""" Helper function to save search analysis in a file. """
|
||||
file_path = os.environ.get('SEARCH_SAVE_FILE')
|
||||
try:
|
||||
# Save the content to the file
|
||||
with open(file_path, "a") as file:
|
||||
file.write(table_content)
|
||||
file.write("\n" * 3) # Add three newlines at the end
|
||||
logger.info(f"Search content saved to {file_path}")
|
||||
except Exception as e:
|
||||
logger.error(f"Error occurred while writing to the file: {e}")
|
||||
|
||||
|
||||
def metaphor_scholar_search(query, include_domains=None, time_range="anytime"):
|
||||
"""
|
||||
Search for papers using the Metaphor API.
|
||||
|
||||
Args:
|
||||
query (str): The search query.
|
||||
include_domains (list): List of domains to include.
|
||||
time_range (str): Time range for published articles ("day", "week", "month", "year", "anytime").
|
||||
|
||||
Returns:
|
||||
MetaphorResponse: The response from the Metaphor API.
|
||||
"""
|
||||
client = get_metaphor_client()
|
||||
try:
|
||||
if time_range == "day":
|
||||
start_published_date = (datetime.utcnow() - timedelta(days=1)).strftime('%Y-%m-%dT%H:%M:%SZ')
|
||||
elif time_range == "week":
|
||||
start_published_date = (datetime.utcnow() - timedelta(weeks=1)).strftime('%Y-%m-%dT%H:%M:%SZ')
|
||||
elif time_range == "month":
|
||||
start_published_date = (datetime.utcnow() - timedelta(weeks=4)).strftime('%Y-%m-%dT%H:%M:%SZ')
|
||||
elif time_range == "year":
|
||||
start_published_date = (datetime.utcnow() - timedelta(days=365)).strftime('%Y-%m-%dT%H:%M:%SZ')
|
||||
else:
|
||||
start_published_date = None
|
||||
|
||||
response = client.search(query, include_domains=include_domains, start_published_date=start_published_date, use_autoprompt=True)
|
||||
return response
|
||||
except Exception as e:
|
||||
logger.error(f"Error in searching papers: {e}")
|
||||
156
lib/ai_web_researcher/tavily_ai_search.py
Normal file
156
lib/ai_web_researcher/tavily_ai_search.py
Normal file
@@ -0,0 +1,156 @@
|
||||
"""
|
||||
This Python script uses the Tavily AI service to perform advanced searches based on specified keywords and options. It retrieves Tavily AI search results, pretty-prints them using Rich and Tabulate, and provides additional information such as the answer to the search query and follow-up questions.
|
||||
|
||||
Features:
|
||||
- Utilizes the Tavily AI service for advanced searches.
|
||||
- Retrieves API keys from the environment variables loaded from a .env file.
|
||||
- Configures logging with Loguru for informative messages.
|
||||
- Implements a retry mechanism using Tenacity to handle transient failures during Tavily searches.
|
||||
- Displays search results, including titles, snippets, and links, in a visually appealing table using Tabulate and Rich.
|
||||
|
||||
Usage:
|
||||
- Ensure the necessary API keys are set in the .env file.
|
||||
- Run the script to perform a Tavily AI search with specified keywords and options.
|
||||
- The search results, including titles, snippets, and links, are displayed in a formatted table.
|
||||
- Additional information, such as the answer to the search query and follow-up questions, is presented in separate tables.
|
||||
|
||||
Modifications:
|
||||
- To modify the script, update the environment variables in the .env file with the required API keys.
|
||||
- Adjust the search parameters, such as keywords and search depth, in the `get_tavilyai_results` function as needed.
|
||||
- Customize logging configurations and table formatting according to preferences.
|
||||
|
||||
To-Do (TBD):
|
||||
- Consider adding further enhancements or customization based on specific use cases.
|
||||
|
||||
Note: This script depends on external libraries such as Tavily, Rich, Tabulate, Loguru, and Tenacity. Install them using 'pip install tavily rich tabulate loguru tenacity' if not already installed.
|
||||
"""
|
||||
|
||||
|
||||
import os
|
||||
from pathlib import Path
|
||||
import sys
|
||||
from dotenv import load_dotenv
|
||||
from loguru import logger
|
||||
from tavily import TavilyClient
|
||||
from rich import print
|
||||
from tabulate import tabulate
|
||||
# Load environment variables from .env file
|
||||
load_dotenv(Path('../../.env'))
|
||||
from rich import print
|
||||
|
||||
# Configure logger
|
||||
logger.remove()
|
||||
logger.add(sys.stdout,
|
||||
colorize=True,
|
||||
format="<level>{level}</level>|<green>{file}:{line}:{function}</green>| {message}"
|
||||
)
|
||||
from tenacity import retry, stop_after_attempt, wait_random_exponential
|
||||
|
||||
from .gpt_titles_faq import gpt_titles_faqs_google_search
|
||||
|
||||
@retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(6))
|
||||
def get_tavilyai_results(keywords, include_urls, search_depth="advanced"):
|
||||
"""
|
||||
Get Tavily AI search results based on specified keywords and options.
|
||||
|
||||
Args:
|
||||
keywords (str): Keywords for Tavily AI search.
|
||||
include_urls (str): Comma-separated URLs to include in the search.
|
||||
search_depth (str, optional): Search depth option (default is "advanced").
|
||||
|
||||
Returns:
|
||||
dict: Tavily AI search results.
|
||||
"""
|
||||
# Run Tavily search
|
||||
logger.info(f"Running Tavily search on: {keywords}")
|
||||
|
||||
# Retrieve API keys
|
||||
api_key = os.getenv('TAVILY_API_KEY')
|
||||
if not api_key:
|
||||
raise ValueError("API keys for Tavily or OpenAI are not set.")
|
||||
|
||||
# Initialize Tavily client
|
||||
try:
|
||||
client = TavilyClient(api_key=api_key)
|
||||
except Exception as err:
|
||||
logger.error(f"Failed to create Tavily client. Check TAVILY_API_KEY: {err}")
|
||||
exit(1)
|
||||
try:
|
||||
if include_urls:
|
||||
tavily_search_result = client.search(keywords, search_depth, include_answer=True, include_domains=include_urls)
|
||||
else:
|
||||
tavily_search_result = client.search(keywords, search_depth, include_answer=True)
|
||||
print_result_table(tavily_search_result)
|
||||
return(tavily_search_result)
|
||||
|
||||
except Exception as err:
|
||||
logger.error(f"Failed to do Tavily Research: {err}")
|
||||
|
||||
|
||||
def print_result_table(output_data):
|
||||
""" Pretty print the tavily AI serch result. """
|
||||
# Prepare data for tabulate
|
||||
table_data = []
|
||||
for item in output_data.get("results"):
|
||||
title = item.get("title", "")
|
||||
snippet = item.get("content", "")
|
||||
link = item.get("url", "")
|
||||
table_data.append([title, snippet, link])
|
||||
|
||||
# Define table headers
|
||||
table_headers = ["Title", "Snippet", "Link"]
|
||||
# Display the table using tabulate
|
||||
table = tabulate(table_data,
|
||||
headers=table_headers,
|
||||
tablefmt="fancy_grid",
|
||||
colalign=["left", "left", "left"],
|
||||
maxcolwidths=[30, 60, 30])
|
||||
# Print the table
|
||||
print(table)
|
||||
|
||||
# Save the combined table to a file
|
||||
try:
|
||||
save_in_file(table)
|
||||
except Exception as save_results_err:
|
||||
logger.error(f"Failed to save search results: {save_results_err}")
|
||||
|
||||
# Display the 'answer' in a table
|
||||
table_headers = [f"The answer to search query: {output_data.get('query')}"]
|
||||
table_data = [[output_data.get("answer")]]
|
||||
table = tabulate(table_data,
|
||||
headers=table_headers,
|
||||
tablefmt="fancy_grid",
|
||||
maxcolwidths=[80])
|
||||
print(table)
|
||||
# Save the combined table to a file
|
||||
try:
|
||||
save_in_file(table)
|
||||
except Exception as save_results_err:
|
||||
logger.error(f"Failed to save search results: {save_results_err}")
|
||||
|
||||
# Display the 'follow_up_questions' in a table
|
||||
table_headers = [f"Search Engine follow up questions for query: {output_data.get('query')}"]
|
||||
table_data = [[output_data.get("follow_up_questions")]]
|
||||
table = tabulate(table_data,
|
||||
headers=table_headers,
|
||||
tablefmt="fancy_grid",
|
||||
maxcolwidths=[80])
|
||||
print(table)
|
||||
# Save the combined table to a file
|
||||
try:
|
||||
save_in_file(table)
|
||||
except Exception as save_results_err:
|
||||
logger.error(f"Failed to save search results: {save_results_err}")
|
||||
|
||||
|
||||
def save_in_file(table_content):
|
||||
""" Helper function to save search analysis in a file. """
|
||||
file_path = os.environ.get('SEARCH_SAVE_FILE')
|
||||
try:
|
||||
# Save the content to the file
|
||||
with open(file_path, "a") as file:
|
||||
file.write(table_content)
|
||||
file.write("\n" * 3) # Add three newlines at the end
|
||||
logger.info(f"Search content saved to {file_path}")
|
||||
except Exception as e:
|
||||
logger.error(f"Error occurred while writing to the file: {e}")
|
||||
23
lib/ai_web_researcher/web_research_report.py
Normal file
23
lib/ai_web_researcher/web_research_report.py
Normal file
@@ -0,0 +1,23 @@
|
||||
from langchain.adapters.openai import convert_openai_messages
|
||||
from langchain.chat_models import ChatOpenAI
|
||||
|
||||
from ..gpt_providers.gemini_pro_text import gemini_text_response
|
||||
|
||||
|
||||
def write_web_research_report(web_research, faq_questions, gpt_provider="gemini"):
|
||||
""" """
|
||||
if "gemini" in gpt_provider:
|
||||
prompt = ["You are an SEO and marketing expert, who writes unique, factual and comprehensive research reports."
|
||||
"I will provide you web research report as json data and a list of related FAQ questions."
|
||||
"Use given json as context for writing your research report."
|
||||
"Your sole purpose is to write well written, critically acclaimed, objective and structured research report"
|
||||
"Use the urls from json content to provide cititations and include it in referances section of your report."
|
||||
"Include appropriate emojis in your research report."
|
||||
"Format your report in MLA format and markdown style, with special focus on readibility."
|
||||
f"Do not provide explanations for your response.\nWeb research Report: \"\"\" {web_research} \"\"\"\n "
|
||||
f"\nList of FAQ questions: \"\"\" {faq_questions} \"\"\"\n"]
|
||||
report = gemini_text_response(prompt)
|
||||
|
||||
elif "openai" in gpt_provider:
|
||||
report = openai_research_report(prompt)
|
||||
return report
|
||||
137
lib/ai_web_researcher/you_web_reseacher.py
Normal file
137
lib/ai_web_researcher/you_web_reseacher.py
Normal file
@@ -0,0 +1,137 @@
|
||||
import requests
|
||||
from clint.textui import progress
|
||||
from loguru import logger
|
||||
|
||||
|
||||
|
||||
def search_ydc_index(search_query, num_web_results=10, country="IN", api_key="<api-key>"):
|
||||
"""
|
||||
Search YDC Index API and retrieve results.
|
||||
|
||||
Args:
|
||||
search_query (str): The search query.
|
||||
num_web_results (int): Number of web results to retrieve.
|
||||
country (str): Country code.
|
||||
api_key (str): YDC Index API key.
|
||||
|
||||
Returns:
|
||||
dict: The response from the YDC Index API in JSON format.
|
||||
"""
|
||||
try:
|
||||
url = "https://api.ydc-index.io/search"
|
||||
|
||||
querystring = {
|
||||
"query": search_query,
|
||||
"num_web_results": str(num_web_results),
|
||||
"country": country
|
||||
}
|
||||
|
||||
headers = {"X-API-Key": api_key}
|
||||
|
||||
with progress.Bar(expected_size=num_web_results, label="Searching YDC Index") as bar:
|
||||
response = requests.get(url, headers=headers, params=querystring, stream=True)
|
||||
response.raise_for_status() # Raise an HTTPError for bad responses (4xx or 5xx)
|
||||
|
||||
result_json = response.json()
|
||||
bar.show(result_json.get("web_results", [])) # Update progress bar with the number of web results
|
||||
|
||||
return result_json
|
||||
|
||||
except requests.exceptions.RequestException as req_exc:
|
||||
logger.error(f"Request to YDC Index API failed: {req_exc}")
|
||||
return {"error": str(req_exc)}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"An error occurred: {e}")
|
||||
return {"error": str(e)}
|
||||
|
||||
def get_rag_results(search_query, num_web_results=10, country="IN", api_key="<api-key>"):
|
||||
"""
|
||||
Retrieve RAG (Relevance, Authority, and Goodness) results from YDC Index API.
|
||||
|
||||
Args:
|
||||
search_query (str): The search query.
|
||||
num_web_results (int): Number of web results to retrieve.
|
||||
country (str): Country code.
|
||||
api_key (str): YDC Index API key.
|
||||
|
||||
Returns:
|
||||
dict: The response from the YDC Index API in JSON format.
|
||||
"""
|
||||
try:
|
||||
url = "https://api.ydc-index.io/rag"
|
||||
|
||||
querystring = {
|
||||
"query": search_query,
|
||||
"num_web_results": str(num_web_results),
|
||||
"country": country
|
||||
}
|
||||
|
||||
headers = {"X-API-Key": api_key}
|
||||
|
||||
with progress.Bar(expected_size=num_web_results, label="Fetching RAG Results") as bar:
|
||||
response = requests.get(url, headers=headers, params=querystring, stream=True)
|
||||
response.raise_for_status() # Raise an HTTPError for bad responses (4xx or 5xx)
|
||||
|
||||
result_json = response.json()
|
||||
bar.show(result_json.get("web_results", [])) # Update progress bar with the number of web results
|
||||
|
||||
return result_json
|
||||
|
||||
except requests.exceptions.RequestException as req_exc:
|
||||
logger.error(f"Request to YDC Index API failed: {req_exc}")
|
||||
return {"error": str(req_exc)}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"An error occurred: {e}")
|
||||
return {"error": str(e)}
|
||||
|
||||
|
||||
def get_news_results(query, spellcheck=True, api_key="<api-key>"):
|
||||
"""
|
||||
Retrieve news results from YDC Index API.
|
||||
|
||||
Args:
|
||||
query (str): The search query.
|
||||
spellcheck (bool): Whether to enable spellcheck.
|
||||
api_key (str): YDC Index API key.
|
||||
|
||||
Returns:
|
||||
dict: The response from the YDC Index API in JSON format.
|
||||
"""
|
||||
try:
|
||||
url = "https://api.ydc-index.io/news"
|
||||
|
||||
querystring = {
|
||||
"q": query,
|
||||
"spellcheck": str(spellcheck).lower()
|
||||
}
|
||||
|
||||
headers = {"X-API-Key": api_key}
|
||||
|
||||
with progress.Bar(expected_size=1, label="Fetching News Results") as bar:
|
||||
response = requests.get(url, headers=headers, params=querystring, stream=True)
|
||||
response.raise_for_status() # Raise an HTTPError for bad responses (4xx or 5xx)
|
||||
|
||||
result_json = response.json()
|
||||
bar.show() # Update progress bar
|
||||
|
||||
return result_json
|
||||
|
||||
except requests.exceptions.RequestException as req_exc:
|
||||
logger.error(f"Request to YDC Index API failed: {req_exc}")
|
||||
return {"error": str(req_exc)}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"An error occurred: {e}")
|
||||
return {"error": str(e)}
|
||||
|
||||
|
||||
# Example usage
|
||||
search_query = "Getting started with llamaindex"
|
||||
result = get_news_results(search_query)
|
||||
print(result)
|
||||
result = get_rag_results(search_query)
|
||||
print(result)
|
||||
result = search_ydc_index(search_query)
|
||||
print(result)
|
||||
37
lib/blog_sections/faqs_generator_blog.py
Normal file
37
lib/blog_sections/faqs_generator_blog.py
Normal file
@@ -0,0 +1,37 @@
|
||||
import sys
|
||||
|
||||
from .gpt_providers.openai_chat_completion import openai_chatgpt
|
||||
from .gpt_providers.gemini_pro_text import gemini_text_response
|
||||
|
||||
from loguru import logger
|
||||
logger.remove()
|
||||
logger.add(sys.stdout,
|
||||
colorize=True,
|
||||
format="<level>{level}</level>|<green>{file}:{line}:{function}</green>| {message}"
|
||||
)
|
||||
|
||||
|
||||
def generate_blog_faq(blog_article, gpt_providers="openai"):
|
||||
"""
|
||||
Given a blog title generate an outline for it
|
||||
"""
|
||||
logger.info("Generating blog FAQs.")
|
||||
prompt = f"""As an expert writer, I will provide you with blog content below.
|
||||
Your task is to write 5 FAQs based on the given blog content.
|
||||
Always, write fact based answers. Use emojis where applicable.
|
||||
You must reply in MARKDOWN format.
|
||||
blog content: '{blog_article}' """
|
||||
|
||||
if 'gemini' in gpt_providers:
|
||||
try:
|
||||
response = gemini_text_response(prompt)
|
||||
return response
|
||||
except Exception as err:
|
||||
logger.error(f"Failed to get response from gemini: {err}")
|
||||
elif 'openai' in gpt_providers:
|
||||
try:
|
||||
logger.info("Calling OpenAI LLM.")
|
||||
response = openai_chatgpt(prompt)
|
||||
return response
|
||||
except Exception as err:
|
||||
SystemError(f"Failed to get response from Openai: {err}")
|
||||
@@ -1,8 +0,0 @@
|
||||
{
|
||||
"wordpress_url": "https://latestaitools.in/",
|
||||
"wordpress_username": "username",
|
||||
"wordpress_password": "password",
|
||||
"image_dir": "path/to/image_dir",
|
||||
"output_path": "path/to/output_path"
|
||||
}
|
||||
|
||||
@@ -1,18 +0,0 @@
|
||||
def generate_topic_outline(blog_title, num_subtopics):
|
||||
"""
|
||||
Given a blog title generate an outline for it
|
||||
"""
|
||||
# TBD: Remove hardcoding, make dynamic
|
||||
prompt = f"""As a SEO expert, suggest only {num_subtopics} beginner-friendly and
|
||||
insightful sub topics for the blog title: {blog_title}.
|
||||
Respond with only answer and no description, explanations."""
|
||||
|
||||
# The suggested {num_subtopics} outline should include few long-tailed keywords and most popular questions.
|
||||
# TBD: Include --niche
|
||||
logger.info(f"Prompt used for blog title Outline :\n{prompt}\n")
|
||||
# TBD: Add logic for which_provider and which_model
|
||||
try:
|
||||
response = openai_chatgpt(prompt)
|
||||
except Exception as err:
|
||||
SystemError(f"Error in generating Blog Title: {err}")
|
||||
return response
|
||||
39
lib/github_blogs/github_getting_started.py
Normal file
39
lib/github_blogs/github_getting_started.py
Normal file
@@ -0,0 +1,39 @@
|
||||
import sys
|
||||
|
||||
from .gpt_providers.openai_chat_completion import openai_chatgpt
|
||||
from .gpt_providers.gemini_pro_text import gemini_text_response
|
||||
|
||||
from loguru import logger
|
||||
logger.remove()
|
||||
logger.add(sys.stdout,
|
||||
colorize=True,
|
||||
format="<level>{level}</level>|<green>{file}:{line}:{function}</green>| {message}"
|
||||
)
|
||||
|
||||
|
||||
|
||||
def github_readme_blog(readme_content, gpt_providers="openai"):
|
||||
""" """
|
||||
prompt = f"""As an expert programmer and teacher, Write an original, detailed and step-by-step guide, from the provided Text below.
|
||||
Your guide should be original, engaging and help beginners get started easily.
|
||||
Write new example codes and detailed comments on how to run them. Include appropriate emoji where applicable.
|
||||
Include a referances section that links to more code examples.
|
||||
Your response MUST be a how-to blog in markdown format.
|
||||
Respond ONLY with your blog content.
|
||||
|
||||
Text: '{readme_content}'
|
||||
"""
|
||||
if 'gemini' in gpt_providers:
|
||||
try:
|
||||
response = gemini_text_response(prompt)
|
||||
return response
|
||||
except Exception as err:
|
||||
logger.error(f"Failed to get response from gemini: {err}")
|
||||
sys.exit(1)
|
||||
elif 'openai' in gpt_providers:
|
||||
try:
|
||||
logger.info("Calling OpenAI LLM.")
|
||||
response = openai_chatgpt(prompt)
|
||||
return response
|
||||
except Exception as err:
|
||||
SystemError(f"Failed to get response from Openai: {err}")
|
||||
140
lib/github_blogs/main_getting_started_blogs.py
Normal file
140
lib/github_blogs/main_getting_started_blogs.py
Normal file
@@ -0,0 +1,140 @@
|
||||
""" Package for writing getting-started and how to guides. """
|
||||
|
||||
import os
|
||||
import sys
|
||||
import datetime
|
||||
import json
|
||||
|
||||
from loguru import logger
|
||||
logger.remove()
|
||||
logger.add(sys.stdout,
|
||||
colorize=True,
|
||||
format="<level>{level}</level>|<green>{file}:{line}:{function}</green>| {message}"
|
||||
)
|
||||
|
||||
from .scrape_github_readme import get_gh_details_vision, get_readme_content
|
||||
from .scrape_github_readme import research_github_topics, check_if_already_written
|
||||
from .github_getting_started import github_readme_blog
|
||||
from .gpt_online_researcher import do_online_research
|
||||
from .faqs_generator_blog import generate_blog_faq
|
||||
from .get_blog_metadata import blog_metadata
|
||||
from .save_blog_to_file import save_blog_to_file
|
||||
from .arxiv_schlorly_research import read_written_ids, extract_arxiv_ids_from_line, append_id_to_file
|
||||
|
||||
|
||||
|
||||
def blog_from_github(github_opts, flag):
|
||||
""" Module for writing getting started code examples from github. """
|
||||
if 'url' in flag:
|
||||
try:
|
||||
write_from_url(github_opts)
|
||||
except Exception as err:
|
||||
logger.error(f"Failed to write from github url: {github_opts}")
|
||||
sys.exit(1)
|
||||
elif 'csv' in flag:
|
||||
try:
|
||||
gh_urls = []
|
||||
with open(github_opts, 'r') as file:
|
||||
# Read each line in the file
|
||||
for gh_url in file:
|
||||
gh_urls.append(gh_url.strip())
|
||||
except FileNotFoundError:
|
||||
logger.error(f"CSV File not found: {file_path}")
|
||||
except Exception as e:
|
||||
logger.error(f"CSV: An error occurred: {str(e)}")
|
||||
|
||||
for gh_url in gh_urls:
|
||||
try:
|
||||
write_from_url(gh_url.strip())
|
||||
except Exception as err:
|
||||
logger.error(f"Failed to write blog from github: {err}")
|
||||
|
||||
|
||||
|
||||
def write_from_url(gh_url):
|
||||
# String to store the blog content.
|
||||
howto_blog = ''
|
||||
# The url was not found in already_written data.
|
||||
if not check_if_already_written(gh_url):
|
||||
logger.info(f"Writing getting started from url: {gh_url}")
|
||||
else:
|
||||
logger.error(f"Skipping, already written on url: {gh_url}")
|
||||
return
|
||||
|
||||
# Direct link to the raw content of README file
|
||||
# fixme: Remove the hardcoding, need add another option OR in config ?
|
||||
image_dir = os.path.join(os.getcwd(), "blog_images")
|
||||
generated_image_name = f"screenshot_image_{datetime.datetime.now():%Y-%m-%d-%H-%M-%S}.png"
|
||||
generated_image_filepath = os.path.join(image_dir, generated_image_name)
|
||||
try:
|
||||
logger.info(f"Getting github repo details from vision model: {generated_image_filepath}")
|
||||
gh_json = get_gh_details_vision(gh_url, generated_image_filepath)
|
||||
except Exception as err:
|
||||
logger.error(f"Failed to get gemini vision details from GH repo image: {err}")
|
||||
sys.exit(1)
|
||||
howto_blog = "```" + f"\nGithub URL:{gh_url}\nStars:{gh_json.get('stars')}\n"
|
||||
howto_blog += f"Forks:{gh_json.get('forks')}\n"
|
||||
howto_blog += f"Description:{gh_json.get('about')}\nBranch:{gh_json.get('branch_name')}\n" + "```\n\n"
|
||||
|
||||
raw_readme_url_base = "https://raw.githubusercontent.com/" + "/".join(gh_url.split("/")[-2:])
|
||||
if gh_json.get('branch_name'):
|
||||
raw_readme_url = raw_readme_url_base + f"/{gh_json.get('branch_name')}/" + "README.md"
|
||||
else:
|
||||
raw_readme_url = raw_readme_url_base + f"/main/" + "README.md"
|
||||
logger.info(f"Using this url to fetch the README file: {raw_readme_url}")
|
||||
|
||||
try:
|
||||
# Get and print the main content
|
||||
readme_content = get_readme_content(raw_readme_url)
|
||||
except Exception as err:
|
||||
logger.error(f"Failed to get README from URL: {raw_readme_url}: {err}")
|
||||
# If the readme is still None, try with master branch.
|
||||
if not readme_content:
|
||||
raw_readme_url = raw_readme_url_base + f"/master/" + "README.md"
|
||||
logger.warning(f"Trying with master branch: {raw_readme_url}")
|
||||
readme_content = get_readme_content(raw_readme_url)
|
||||
if not readme_content:
|
||||
logger.error(f"Still failed to get the README: {readme_content}")
|
||||
sys.exit(1)
|
||||
|
||||
# Create a getting-started blog, adapted from the GH url README.
|
||||
howto_blog += github_readme_blog(readme_content, "gemini")
|
||||
|
||||
# Do online research for faqs on the github url.
|
||||
try:
|
||||
# Repo names are misnomers for others search, include its decription too.
|
||||
# Which, skews the result favourably towards its home/paid pages.
|
||||
#online_query = f"{''.join(gh_url.split('/')[-1:])} " + gh_json.get('about')
|
||||
online_query = f"{''.join(gh_url.split('/')[-1:])} "
|
||||
logger.info("Do web research with Tavily & Metaphor AI.")
|
||||
research_report = do_online_research(online_query, "gemini", gh_url)
|
||||
except Exception as err:
|
||||
logger.error(f"failed to do online research: {err}")
|
||||
|
||||
# Generate FAQs from the online research report.
|
||||
try:
|
||||
blog_faqs = generate_blog_faq(research_report, "gemini")
|
||||
howto_blog += f"\n\n## {''.join(gh_url.split('/')[-1:])} FAQs\n\n" + blog_faqs
|
||||
except Exception as err:
|
||||
logger.error(f"Failed to generate FAQs from web research_report: {err}")
|
||||
|
||||
logger.info(f"\n\nFinal Blog Content: {howto_blog}\n\n")
|
||||
|
||||
try:
|
||||
blog_title, blog_meta_desc, blog_tags, blog_categories = blog_metadata(howto_blog, "gemini")
|
||||
except Exception as err:
|
||||
logger.error(f"Failed to get blog metadata: {err}")
|
||||
raise err
|
||||
|
||||
try:
|
||||
save_blog_to_file(howto_blog, blog_title, blog_meta_desc, blog_tags,\
|
||||
blog_categories, generated_image_filepath)
|
||||
except Exception as err:
|
||||
logger.error(f"Failed to save blog to a file: {err}")
|
||||
sys.exit(1)
|
||||
|
||||
try:
|
||||
append_id_to_file(gh_url, "papers_already_written_on.txt")
|
||||
except Exception as err:
|
||||
logger.error(f"Failed to write/append ID to papers_already_written_on.txt: {err}")
|
||||
raise err
|
||||
297
lib/github_blogs/scrape_github_readme.py
Normal file
297
lib/github_blogs/scrape_github_readme.py
Normal file
@@ -0,0 +1,297 @@
|
||||
import os
|
||||
import sys
|
||||
import datetime
|
||||
import pandas as pd
|
||||
|
||||
import json
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
from loguru import logger
|
||||
logger.remove()
|
||||
logger.add(sys.stdout,
|
||||
colorize=True,
|
||||
format="<level>{level}</level>|<green>{file}:{line}:{function}</green>| {message}"
|
||||
)
|
||||
|
||||
|
||||
from .take_url_screenshot import take_screenshot
|
||||
from .gpt_providers.gemini_image_details import gemini_get_img_info
|
||||
|
||||
|
||||
|
||||
def get_readme_content(url):
|
||||
try:
|
||||
# Fetch the README content directly from the URL
|
||||
response = requests.get(url)
|
||||
print(response.status_code)
|
||||
if response.status_code == 200:
|
||||
logger.debug("Successfully fetched the README.md")
|
||||
readme_content = response.text
|
||||
else:
|
||||
readme_content = None
|
||||
return readme_content
|
||||
except Exception as err:
|
||||
logger.error(f"Failed to fetch raw readme from {url}: {err}: {response.status_code}")
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
def get_gh_repo_metadata(github_url):
|
||||
""" Function to get the repo details like stars, commits, forks etc """
|
||||
logger.info("Scraping github with BS4 and requests.")
|
||||
# download the target page
|
||||
page = requests.get(github_url)
|
||||
# parse the HTML document returned by the server
|
||||
soup = BeautifulSoup(page.text, 'html.parser')
|
||||
|
||||
# initialize the object that will contain the scraped data
|
||||
repo = {}
|
||||
|
||||
# repo scraping logic
|
||||
name_html_element = soup.select_one('[itemprop="name"]')
|
||||
name = name_html_element.get_text().strip()
|
||||
|
||||
git_branch_icon_html_element = soup.select_one('.octicon-git-branch')
|
||||
main_branch_html_element = git_branch_icon_html_element.find_next_sibling('span')
|
||||
main_branch = main_branch_html_element.get_text().strip()
|
||||
|
||||
# scrape the repo history data
|
||||
boxheader_html_element = soup.select_one('.Box .Box-header')
|
||||
|
||||
# scrape the repo details in the right box
|
||||
bordergrid_html_element = soup.select_one('.BorderGrid')
|
||||
|
||||
about_html_element = bordergrid_html_element.select_one('h2')
|
||||
description_html_element = about_html_element.find_next_sibling('p')
|
||||
description = description_html_element.get_text().strip()
|
||||
|
||||
star_icon_html_element = bordergrid_html_element.select_one('.octicon-star')
|
||||
stars_html_element = star_icon_html_element.find_next_sibling('strong')
|
||||
stars = stars_html_element.get_text().strip().replace(',', '')
|
||||
|
||||
eye_icon_html_element = bordergrid_html_element.select_one('.octicon-eye')
|
||||
watchers_html_element = eye_icon_html_element.find_next_sibling('strong')
|
||||
watchers = watchers_html_element.get_text().strip().replace(',', '')
|
||||
|
||||
fork_icon_html_element = bordergrid_html_element.select_one('.octicon-repo-forked')
|
||||
forks_html_element = fork_icon_html_element.find_next_sibling('strong')
|
||||
forks = forks_html_element.get_text().strip().replace(',', '')
|
||||
|
||||
# Find the div with class "f6" containing topic links
|
||||
topic_div = soup.find('div', class_='f6')
|
||||
if topic_div:
|
||||
# Find all the topic links within the div
|
||||
topic_links = topic_div.find_all('a', class_='topic-tag-link')
|
||||
# Extract and print the topics
|
||||
repo['topics'] = [link.text.strip() for link in topic_links]
|
||||
|
||||
# FIXME: Unable to scrape branch name.
|
||||
repo['branch_name'] = None
|
||||
# store the scraped data
|
||||
repo['name'] = name
|
||||
repo['about'] = description
|
||||
repo['stars'] = stars
|
||||
repo['watchers'] = watchers
|
||||
repo['forks'] = forks
|
||||
#repo['readme'] = readme
|
||||
logger.info(f"Github Repo Details: {repo}")
|
||||
return(repo)
|
||||
|
||||
|
||||
def get_gh_details_vision(github_url, generated_image_filepath):
|
||||
""" Take a screenshot of the url and feed to vision models for scraping details. """
|
||||
logger.info(f"Take screenshot and pass it to gemini for repo details of {github_url}")
|
||||
|
||||
generated_image_filepath = take_screenshot(github_url, generated_image_filepath)
|
||||
prompt = """From the given image of a github page, find out the number of stars, about, forks, last commit days, link url, topics and branch name. Return the result as json."""
|
||||
|
||||
try:
|
||||
gh_details = gemini_get_img_info(prompt, generated_image_filepath)
|
||||
logger.info(f"Github Repo details, from vision model: {gh_details}")
|
||||
#gh_details = get_gh_repo_metadata(github_url)
|
||||
except Exception as err:
|
||||
logger.error(f"Failed to get gh images details: {err}")
|
||||
gh_details = get_gh_repo_metadata(github_url)
|
||||
return gh_details
|
||||
|
||||
# Convert string to dictionary Split the string into lines
|
||||
lines = gh_details.split('\n')
|
||||
# Remove the first and last line
|
||||
modified_lines = lines[1:-1]
|
||||
# Join the modified lines back into a string
|
||||
gh_details = '\n'.join(modified_lines)
|
||||
gh_details = json.loads(gh_details)
|
||||
|
||||
return(gh_details)
|
||||
|
||||
|
||||
def research_github_topics(topics):
|
||||
""" Scrape github topics of interest for top repos to write on """
|
||||
# https://www.kaggle.com/code/subhaskumarray/scraping-github-topics-with-their-repositories
|
||||
# We are going to scrape https://github.com/topics
|
||||
# We will get a list of topics. For each topic, we will extract topic name, topic description and topic url.
|
||||
# For each topic, we will get top 30 repositories with repo name, repo username, stars and repo url.
|
||||
# Finally we are going to create csv file for each topic with respective repo details.
|
||||
|
||||
#github_topics = "https://github.com/topics/"
|
||||
#response = requests.get(github_topics)
|
||||
#if response.status_code != 200:
|
||||
# logger.error(f'There is something wrong with {url}')
|
||||
#response_contents = response.text
|
||||
# Now we will parse the contents using BeautifulSoup:
|
||||
#parsed_contents = BeautifulSoup(response_contents,'html.parser')
|
||||
#logger.info("Get all topics, Titles and their urls from github.")
|
||||
#topic_titles = get_topic_titles(parsed_contents)
|
||||
#topic_desc = get_topic_desc(parsed_contents)
|
||||
#topic_urls = get_topic_url(parsed_contents)
|
||||
#topic_df = pd.DataFrame(list(zip(topic_titles, topic_desc,topic_urls)),\
|
||||
# columns =['title', 'description', 'url'])
|
||||
#logger.info(f"Scraped data from github: {topic_df}")
|
||||
|
||||
gh_topics = ['ai', 'ai-tools', 'ai-assistant', 'ai-agents-framework', 'llm', 'multi-agent', 'fine-tuning', 'rag', 'generative', 'prompt-engineering', 'generative-ai', 'text-to-image-generation', 'llm-ops', 'retrieval-augmented-generation', 'langchain', 'gemini-api', 'vertex-ai', 'huggingface', 'auto-gpt', 'llmops', 'ai-toolkit', 'chatbot', 'chatgpt', 'code-assistant', 'text-to-video', 'llms', 'gpt-4']
|
||||
|
||||
repo_info_dict = {
|
||||
'username':[],
|
||||
'repo_name': [],
|
||||
'stars': [],
|
||||
'repo_url': []
|
||||
}
|
||||
for agh_topic in gh_topics:
|
||||
topic_url = f"https://github.com/topics/{agh_topic}"
|
||||
first_topic_repo_page = download_repo_page(topic_url)
|
||||
logger.info(f"Get details on github topic: {topic_url}")
|
||||
repo_tags = first_topic_repo_page.find_all('h3', {'class': 'f3 color-fg-muted text-normal lh-condensed'})
|
||||
star_tags = first_topic_repo_page.find_all('span', {'class': 'Counter js-social-count'})
|
||||
|
||||
for i in range(len(repo_tags)):
|
||||
repo_details = get_repo_info(repo_tags[i], star_tags[i])
|
||||
|
||||
# Check if the repo URL is not already present in the dictionary
|
||||
if repo_details[3] not in repo_info_dict['repo_url']:
|
||||
# Store repos with more than 5000 stars.
|
||||
if repo_details[2] > 5000:
|
||||
repo_info_dict['username'].append(repo_details[0])
|
||||
repo_info_dict['repo_name'].append(repo_details[1])
|
||||
repo_info_dict['stars'].append(repo_details[2])
|
||||
repo_info_dict['repo_url'].append(repo_details[3])
|
||||
|
||||
# Create a DataFrame from repo_info_dict
|
||||
df_repo_info = pd.DataFrame(repo_info_dict['repo_url'])
|
||||
|
||||
# Check if the file already exists
|
||||
csv_filename = 'github_url_to_write.csv'
|
||||
if os.path.isfile(csv_filename):
|
||||
# Append to the existing file
|
||||
df_repo_info.to_csv(csv_filename, mode='a', header=False, index=False)
|
||||
logger.info(f"Data appended to existing file: {csv_filename}")
|
||||
else:
|
||||
# Create a new file
|
||||
df_repo_info.to_csv(csv_filename, index=False)
|
||||
|
||||
|
||||
def get_topic_titles(parsed_content):
|
||||
try:
|
||||
selected_class = 'f3 lh-condensed mb-0 mt-1 Link--primary'
|
||||
topic_title_tags = parsed_content.find_all('p',{'class':selected_class})
|
||||
# We can make a list of topics
|
||||
topic_titles = []
|
||||
for tags in topic_title_tags:
|
||||
topic_titles.append(tags.text)
|
||||
return topic_titles
|
||||
except Exception as err:
|
||||
logger.error(f"Failed to get github topic titles: {err}")
|
||||
|
||||
|
||||
def get_topic_desc(parsed_contents):
|
||||
try:
|
||||
desc_selector = 'f5 color-fg-muted mb-0 mt-1'
|
||||
topic_desc_tags = parsed_contents.find_all('p',{'class': desc_selector})
|
||||
print(f"{topic_desc_tags}")
|
||||
topic_desc = []
|
||||
for desc in topic_desc_tags:
|
||||
print("dsfsfs")
|
||||
topic_desc.append(desc.text.strip()) # strip() is used for trimming all extra spaces in description.
|
||||
return topic_desc
|
||||
except Exception as err:
|
||||
logger.error(f"Failed to get github topic desc: {err}")
|
||||
|
||||
|
||||
def get_topic_url(parsed_contents):
|
||||
try:
|
||||
topic_link_tag = parsed_contents.find_all('a',{'class':'no-underline flex-1 d-flex flex-column'})
|
||||
topic_urls = []
|
||||
base_url = 'http://github.com'
|
||||
for urls in topic_link_tag:
|
||||
topic_urls.append(base_url + urls['href'])
|
||||
return topic_urls
|
||||
except Exception as err:
|
||||
logger.error(f"Failed to get github topic urls: {err}")
|
||||
|
||||
|
||||
def download_repo_page(topic_url):
|
||||
response = requests.get(topic_url)
|
||||
if response.status_code != 200:
|
||||
print('There is some error in {}'.format(topic_url))
|
||||
response_contents = response.text
|
||||
|
||||
parsed_contents = BeautifulSoup(response_contents,'html.parser')
|
||||
return parsed_contents
|
||||
|
||||
|
||||
def get_repo_info(repo_tags,star_tags):
|
||||
# returns all info for a repo
|
||||
a_tags = repo_tags.find_all('a')
|
||||
username = a_tags[0].text.strip()
|
||||
repo_name = a_tags[1].text.strip()
|
||||
base_url = 'http://github.com/'
|
||||
repo_url = base_url + a_tags[1]['href'].strip()
|
||||
|
||||
# Defining a function so that it will convert our star count to integer
|
||||
def star_counts_converter(stars):
|
||||
stars = stars.strip()
|
||||
if stars[-1] == 'k':
|
||||
return int(float(stars[:-1]) * 1000)
|
||||
return int(stars)
|
||||
star_counts = star_counts_converter(star_tags.text.strip())
|
||||
return username,repo_name,star_counts,repo_url
|
||||
|
||||
|
||||
def save_to_csv(topic_url,topic_name):
|
||||
file_name = topic_name + '.csv'
|
||||
if os.path.exists(file_name):
|
||||
logger.debug(f"The file {file_name} already exists. Skipping.")
|
||||
topics_df = topic_repo_details(topic_url)
|
||||
topics_df.to_csv(file_name,index=None)
|
||||
logger.info(f"Successfully scraped topic {topic_name}")
|
||||
|
||||
|
||||
def check_if_already_written(github_url, file_path='papers_already_written_on.txt'):
|
||||
"""
|
||||
Check if a GitHub URL is an exact match in each line of a file.
|
||||
|
||||
Args:
|
||||
github_url (str): GitHub URL string to check.
|
||||
file_path (str): Path to the file containing lines to check against. Default is 'papers_already_written_on.txt'.
|
||||
|
||||
Returns:
|
||||
bool: True if an exact match is found, False otherwise.
|
||||
"""
|
||||
try:
|
||||
with open(file_path, 'r') as file:
|
||||
# Read each line in the file
|
||||
for line in file:
|
||||
# Check for an exact match
|
||||
if github_url.strip() == line.strip():
|
||||
return True
|
||||
except FileNotFoundError:
|
||||
print(f"File not found: {file_path}")
|
||||
except Exception as e:
|
||||
print(f"An error occurred: {str(e)}")
|
||||
return False
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -1,17 +0,0 @@
|
||||
import serpapi
|
||||
import os
|
||||
|
||||
from dotenv import load_dotenv
|
||||
load_dotenv()
|
||||
api_key = os.getenv('SERPAPI_KEY')
|
||||
|
||||
client = serpapi.Client(api_key=api_key)
|
||||
result = client.search(
|
||||
q="Retrieval Augumented Generation RAG",
|
||||
engine="google",
|
||||
location="Austin, Texas",
|
||||
hl="en",
|
||||
gl="us",
|
||||
)
|
||||
|
||||
print(result["related_questions"]) # Get all the related questions
|
||||
@@ -1,181 +0,0 @@
|
||||
################################################################
|
||||
#
|
||||
# GPT Researcher is an autonomous agent designed for comprehensive online research on a variety of tasks.
|
||||
# The agent can produce detailed, factual and unbiased research reports, with customization options for
|
||||
# focusing on relevant resources, outlines, and lessons. Inspired by the recent Plan-and-Solve and RAG papers,
|
||||
# GPT Researcher addresses issues of speed, determinism and reliability, offering a more stable
|
||||
# performance and increased speed through parallelized agent work, as opposed to synchronous operations.
|
||||
#
|
||||
# The main idea is to run "planner" and "execution" agents, whereas the planner generates questions to research,
|
||||
# and the execution agents seek the most related information based on each generated research question.
|
||||
# Finally, the planner filters and aggregates all related information and creates a research report.
|
||||
#
|
||||
# The agents leverage both gpt3.5-turbo and gpt-4-turbo (128K context) to complete a research task.
|
||||
# We optimize for costs using each only when necessary.
|
||||
# The average research task takes around 3 minutes to complete, and costs ~$0.1.
|
||||
#
|
||||
##############################################################
|
||||
|
||||
import os
|
||||
from pathlib import Path
|
||||
import logging
|
||||
|
||||
from tavily import TavilyClient
|
||||
import serpapi
|
||||
|
||||
from dotenv import load_dotenv
|
||||
load_dotenv(Path('../.env'))
|
||||
|
||||
from langchain.adapters.openai import convert_openai_messages
|
||||
from langchain.chat_models import ChatOpenAI
|
||||
import google.generativeai as genai
|
||||
|
||||
logging.basicConfig(level=logging.INFO, format='%(asctime)s-%(levelname)s-%(module)s-%(lineno)d-%(message)s')
|
||||
from tenacity import (
|
||||
retry,
|
||||
stop_after_attempt,
|
||||
wait_random_exponential,
|
||||
) # for exponential backoff
|
||||
|
||||
from .gpt_providers.gemini_pro_text import gemini_text_response
|
||||
from .blog_proof_reader import blog_proof_editor
|
||||
from .convert_content_to_markdown import convert_tomarkdown_format
|
||||
|
||||
|
||||
@retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(6))
|
||||
def do_online_research(query, gpt_provider="openai"):
|
||||
# Do a google search for the given keyword. The search results will give urls, questions for faq
|
||||
faq_questions = []
|
||||
organic_results = []
|
||||
report = ''
|
||||
try:
|
||||
faq_questions = google_search(query, "faq")
|
||||
logging.info(f"Google search FAQ questions: {faq_questions}")
|
||||
# Now, get top 10 google organic results and polish the content to compete for these keywords.
|
||||
organic_results = google_search(query, "organic_result")
|
||||
except Exception as err:
|
||||
logging.error(f"Failed to do Serpapi research: {err}")
|
||||
# Not failing, as tavily would do same and then GPT-V to search.
|
||||
#exit(1)
|
||||
try:
|
||||
# Retrieve API keys
|
||||
api_key = os.getenv('TAVILY_API_KEY')
|
||||
openai_api_key = os.getenv('OPENAI_API_KEY')
|
||||
if not api_key or not openai_api_key:
|
||||
raise ValueError("API keys for Tavily or OpenAI are not set.")
|
||||
|
||||
# Initialize Tavily client
|
||||
try:
|
||||
client = TavilyClient(api_key=api_key)
|
||||
except Exception as err:
|
||||
logging.error("Failed to create Tavily client. Check TAVILY_API_KEY")
|
||||
exit(1)
|
||||
# Run tavily search
|
||||
logging.info(f"Running Tavily search on: {query}")
|
||||
try:
|
||||
content = client.search(query, search_depth="advanced")["results"]
|
||||
except Exception as err:
|
||||
logging.error(f"Failed to do Tavily Research: {err}")
|
||||
exit(1)
|
||||
|
||||
if "gemini" in gpt_provider:
|
||||
prompt = ["You are an AI critical thinker research assistant."
|
||||
"I will provide you with json content and a list of faq questions."
|
||||
"Use given json as context for writing your research report."
|
||||
"Your sole purpose is to write well written, critically acclaimed, objective and structured research report"
|
||||
"Important: Include and write code examples in your final report."
|
||||
"Include your own insights on the topic to make it comprehensive and detailed."
|
||||
"Use the urls from json content to provide cititations and include it in referances section of your report."
|
||||
"Include appropriate emojis in your research report."
|
||||
"Include FAQs relevant to your research report. Use the given faq questions. Write answers for each faq."
|
||||
"Format your report in MLA format and markdown style, with special focus on readibility."
|
||||
f"Do not provide explanations for your response.\njson content: \"\"\" {content} \"\"\"\n "
|
||||
f"\nList of FAQ questions: \"\"\" {faq_questions} \"\"\"\n"]
|
||||
report = gemini_text_response(prompt)
|
||||
|
||||
elif "openai" in gpt_provider:
|
||||
# Setup prompt for GPT-4
|
||||
prompt = [{
|
||||
"role": "system",
|
||||
"content": ('You are an AI critical thinker research assistant. '
|
||||
'Your sole purpose is to write well written, critically acclaimed, '
|
||||
'objective and structured reports on given text.')
|
||||
}, {
|
||||
"role": "user",
|
||||
"content": (f'Information: """{content}"""\n\n'
|
||||
f'Using the above information, answer the following '
|
||||
f'query: "{query}" in a detailed report --'
|
||||
f'Please use MLA format and markdown syntax.')
|
||||
}]
|
||||
report = openai_research_report(prompt)
|
||||
report = compete_organic_results(query, report, organic_results)
|
||||
return report
|
||||
except Exception as e:
|
||||
logging.error(f"Failed in online research: {e}")
|
||||
exit(1)
|
||||
|
||||
|
||||
def openai_research_report(query):
|
||||
""" Generate research report with openai """
|
||||
# Run GPT-4
|
||||
logging.info("Generating Research report with GPT-4...")
|
||||
lc_messages = convert_openai_messages(prompt)
|
||||
try:
|
||||
report = ChatOpenAI(model='gpt-4', openai_api_key=openai_api_key).invoke(lc_messages).content
|
||||
#logging.info(f"\n Below is the online research report for given keywords/title: \n\n{report}")
|
||||
return report
|
||||
except Exception as err:
|
||||
logging.error("Failed to generate do_online_research with ChatOpenAI")
|
||||
exit(1)
|
||||
|
||||
|
||||
def compete_organic_results(query, report, organic_results):
|
||||
""" Given a blog content and google search organinc results, create a new blog to compete against them."""
|
||||
prompt = f""" As an SEO expert and copywriter, I will provide you with my blog content on topic '{query}', and
|
||||
Top google search results.
|
||||
Your task is to rewrite the given blog to make it compete against top position results.
|
||||
Make sure, the new blog has high probability of ranking highest against given organic search result competitors.
|
||||
Modify the given blog content following best SEO practises.
|
||||
Make sure the blog is original, unique and highly readable.
|
||||
Remember, Maintain and adopt the formatting, structure, style and tone of the provided blog content.
|
||||
Include relevant emojis in your final blog for visual appeal. Use it sparingly.
|
||||
Your response should be well-structured, objective, and critically acclaimed blog article based on provided texts.
|
||||
|
||||
Remember, your goal is to create a detailed blog article that will compete against given organic result competitors.
|
||||
Do not provide explanations, suggestions for your response, reply only with your final response.
|
||||
Take your time in crafting your content, do not rush to give the response.
|
||||
Blog Content: '{report}'\n
|
||||
Organic Search result: '{organic_results}'
|
||||
"""
|
||||
report = gemini_text_response(prompt)
|
||||
return report
|
||||
|
||||
|
||||
def google_search(query, flag="faq"):
|
||||
""" Do google search for given query """
|
||||
try:
|
||||
api_key = os.getenv('SERPAPI_KEY')
|
||||
client = serpapi.Client(api_key=api_key)
|
||||
result = client.search(
|
||||
q=query,
|
||||
engine="google",
|
||||
hl="en",
|
||||
)
|
||||
except Exception as err:
|
||||
logging.error(f"Failed in Google Search: {err}")
|
||||
exit(1)
|
||||
if 'faq' in flag:
|
||||
# Check if 'inline_people_also_search_for' and 'related_questions' exist in result
|
||||
related_search = [item['title'] for item in result.get('inline_people_also_search_for', [])]
|
||||
related_questions = [item['question'] for item in result.get('related_questions', [])]
|
||||
|
||||
# Determine which list to use for faq_questions
|
||||
if not related_search and not related_questions:
|
||||
faq_questions = [item['query'] for item in result.get('related_searches', [])]
|
||||
else:
|
||||
faq_questions = related_search + related_questions
|
||||
return faq_questions
|
||||
|
||||
elif 'organic_result' in flag:
|
||||
# Check if 'organic_results' exists in result
|
||||
return result.get('organic_results', [])
|
||||
@@ -17,10 +17,11 @@ from tenacity import (
|
||||
|
||||
|
||||
@retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(6))
|
||||
def gemini_arxiv_img_info(img_path):
|
||||
def gemini_get_img_info(prompt, img_path):
|
||||
""" Get image details from arxiv papers. """
|
||||
logging.info(f"Get image details from Gemini Pro.")
|
||||
try:
|
||||
genai.configure(api_key=os.getenv("API_KEY"))
|
||||
genai.configure(api_key=os.getenv("GEMINI_API_KEY"))
|
||||
except Exception as e:
|
||||
logging.error(f"Could not load gemini API key: {e}")
|
||||
raise e
|
||||
@@ -35,19 +36,19 @@ def gemini_arxiv_img_info(img_path):
|
||||
|
||||
safety_settings = [{
|
||||
"category": "HARM_CATEGORY_HARASSMENT",
|
||||
"threshold": "BLOCK_MEDIUM_AND_ABOVE"
|
||||
"threshold": "BLOCK_NONE"
|
||||
},
|
||||
{
|
||||
"category": "HARM_CATEGORY_HATE_SPEECH",
|
||||
"threshold": "BLOCK_MEDIUM_AND_ABOVE"
|
||||
"threshold": "BLOCK_NONE"
|
||||
},
|
||||
{
|
||||
"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
|
||||
"threshold": "BLOCK_MEDIUM_AND_ABOVE"
|
||||
"threshold": "BLOCK_NONE"
|
||||
},
|
||||
{
|
||||
"category": "HARM_CATEGORY_DANGEROUS_CONTENT",
|
||||
"threshold": "BLOCK_MEDIUM_AND_ABOVE"
|
||||
"threshold": "BLOCK_NONE"
|
||||
},]
|
||||
|
||||
try:
|
||||
@@ -67,13 +68,12 @@ def gemini_arxiv_img_info(img_path):
|
||||
"data": Path(img_path).read_bytes()
|
||||
},]
|
||||
|
||||
prompt_parts = [
|
||||
"As scholar on evaluating research papers, I will provide you with an image from a research paper. Your task is to explain the image in details so that I can use it in a blog article. Explain the key findings and conclusions from the image. Your description should be in simple terms to explain to a wider audience. Explain key findings from the given image.",
|
||||
image_parts[0],]
|
||||
prompt_parts = [f"{prompt}", image_parts[0],]
|
||||
|
||||
try:
|
||||
response = model.generate_content(prompt_parts)
|
||||
return response.text
|
||||
except Exception as e:
|
||||
logging.error(f"Could not generate gemini content: {e}")
|
||||
logging.error(f"Gemini is blocking this request: {response.prompt_feedback.block_reason}")
|
||||
logging.error(f"Gemini Vision, Failed to give image Details: {e}\n{response.prompt_feedback}")
|
||||
raise e
|
||||
@@ -32,11 +32,9 @@ def gemini_text_response(prompt):
|
||||
model = genai.GenerativeModel(model_name="gemini-pro", generation_config=generation_config)
|
||||
try:
|
||||
response = model.generate_content(prompt)
|
||||
|
||||
except Exception as err:
|
||||
logger.error(f"Failed to get response from Gemini: {err}. Retrying.")
|
||||
# Try with minstral.
|
||||
print(f"\n\n\n--MINSTRAL--\n\n\n\n")
|
||||
response = mistral_text_response(prompt)
|
||||
return response
|
||||
#response = mistral_text_response(prompt)
|
||||
#return response
|
||||
return response.text
|
||||
|
||||
@@ -93,13 +93,6 @@ def blog_arxiv_url_list(file_path):
|
||||
# Read already written IDs
|
||||
written_ids = read_written_ids('papers_already_written_on.txt')
|
||||
|
||||
# Write blogs on each of arxiv_id from the file.
|
||||
for arxiv_id in extracted_ids:
|
||||
# Check if we have already written on this research_paper. For this, all arxiv ids are written in
|
||||
# a file called 'papers_already_written_on.txt'. If arxiv ID is found in this file, skip writing again.
|
||||
# YUP, use a DB. KISS for now.
|
||||
written_ids = read_written_ids('papers_already_written_on.txt')
|
||||
|
||||
# Loop through extracted IDs
|
||||
for arxiv_id in extracted_ids:
|
||||
if arxiv_id not in written_ids:
|
||||
@@ -178,8 +171,8 @@ def blog_postprocessing(arxiv_id, research_review):
|
||||
save_blog_to_file(research_review, blog_title, blog_meta_desc, blog_tags,\
|
||||
blog_categories, generated_image_filepath)
|
||||
except Exception as err:
|
||||
logger.__repr__ror(f"Failed to save blog to a file: {err}")
|
||||
raise err
|
||||
logger.error(f"Failed to save blog to a file: {err}")
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
def take_paper_screenshot(arxiv_url):
|
||||
@@ -1,71 +0,0 @@
|
||||
import os
|
||||
import datetime
|
||||
|
||||
from selenium import webdriver
|
||||
from PIL import Image
|
||||
import shutil
|
||||
from screenshotone import Client, TakeOptions
|
||||
from pathlib import Path
|
||||
|
||||
from dotenv import load_dotenv
|
||||
load_dotenv(Path('../.env'))
|
||||
|
||||
|
||||
def screenshot_api(url, generated_image_filepath):
|
||||
""" Use screenshotone API to take company webpage screenshots """
|
||||
try:
|
||||
# create API client
|
||||
client = Client(os.getenv('SCREENSHOTONE_ACCESS_KEY'), os.getenv('SCREENSHOTONE_SECRET_KEY'))
|
||||
|
||||
# set up options
|
||||
options = (TakeOptions.url(url)
|
||||
.format("png")
|
||||
.viewport_width(1024)
|
||||
.viewport_height(768)
|
||||
.block_cookie_banners(True)
|
||||
.block_chats(True))
|
||||
|
||||
# generate the screenshot URL and share it with a user
|
||||
#url = client.generate_take_url(options)
|
||||
# or render a screenshot and download the image as stream
|
||||
image = client.take(options)
|
||||
|
||||
# store the screenshot the example.png file
|
||||
with open(generated_image_filepath, 'wb') as result_file:
|
||||
shutil.copyfileobj(image, result_file)
|
||||
|
||||
# Display the screenshot using Image.show
|
||||
image = Image.open(generated_image_filepath)
|
||||
image.show()
|
||||
|
||||
except Exception as err:
|
||||
print(f"Failed in screenshotone api: {err}")
|
||||
generated_image_filepath = take_screenshot(url, generated_image_filepath)
|
||||
|
||||
return generated_image_filepath
|
||||
|
||||
def take_screenshot(url, generated_image_filepath):
|
||||
# Create a webdriver instance
|
||||
driver = webdriver.Chrome()
|
||||
|
||||
# Navigate to the given url
|
||||
driver.get(url)
|
||||
|
||||
# Set a fixed window size (you can adjust this as needed)
|
||||
driver.set_window_size(800, 600)
|
||||
|
||||
# Take a screenshot of the webpage
|
||||
screenshot = driver.get_screenshot_as_png()
|
||||
|
||||
# Close the webdriver instance
|
||||
driver.quit()
|
||||
|
||||
# Save the screenshot to a file
|
||||
with open(generated_image_filepath, "wb") as f:
|
||||
f.write(screenshot)
|
||||
|
||||
# Display the screenshot using Image.show
|
||||
image = Image.open(generated_image_filepath)
|
||||
image.show()
|
||||
|
||||
return generated_image_filepath
|
||||
113
lib/utils/take_url_screenshot.py
Normal file
113
lib/utils/take_url_screenshot.py
Normal file
@@ -0,0 +1,113 @@
|
||||
import os
|
||||
import sys
|
||||
import datetime
|
||||
import subprocess
|
||||
|
||||
from time import sleep
|
||||
from selenium import webdriver
|
||||
from selenium.webdriver.common.by import By
|
||||
from selenium.webdriver.support.ui import WebDriverWait
|
||||
from selenium.webdriver.support import expected_conditions as EC
|
||||
from PIL import Image
|
||||
|
||||
from selenium import webdriver
|
||||
from PIL import Image
|
||||
import shutil
|
||||
from screenshotone import Client, TakeOptions
|
||||
from pathlib import Path
|
||||
|
||||
from dotenv import load_dotenv
|
||||
load_dotenv(Path('../.env'))
|
||||
|
||||
from loguru import logger
|
||||
logger.remove()
|
||||
logger.add(sys.stdout,
|
||||
colorize=True,
|
||||
format="<level>{level}</level>|<green>{file}:{line}:{function}</green>| {message}"
|
||||
)
|
||||
|
||||
|
||||
def screenshot_api(url, generated_image_filepath):
|
||||
""" Use screenshotone API to take company webpage screenshots """
|
||||
try:
|
||||
# create API client
|
||||
client = Client(os.getenv('SCREENSHOTONE_ACCESS_KEY'), os.getenv('SCREENSHOTONE_SECRET_KEY'))
|
||||
|
||||
# set up options
|
||||
options = (TakeOptions.url(url)
|
||||
.format("png")
|
||||
.viewport_width(1024)
|
||||
.viewport_height(768)
|
||||
.block_cookie_banners(True)
|
||||
.block_chats(True))
|
||||
|
||||
# generate the screenshot URL and share it with a user
|
||||
#url = client.generate_take_url(options)
|
||||
# or render a screenshot and download the image as stream
|
||||
image = client.take(options)
|
||||
|
||||
# store the screenshot the example.png file
|
||||
with open(generated_image_filepath, 'wb') as result_file:
|
||||
shutil.copyfileobj(image, result_file)
|
||||
|
||||
# Display the screenshot using Image.show
|
||||
image = Image.open(generated_image_filepath)
|
||||
image.show()
|
||||
# Wait for 2 seconds (adjust the delay as needed)
|
||||
sleep(2)
|
||||
# Close the image window
|
||||
image.close()
|
||||
|
||||
except Exception as err:
|
||||
print(f"Failed in screenshotone api: {err}")
|
||||
generated_image_filepath = take_screenshot(url, generated_image_filepath)
|
||||
|
||||
return generated_image_filepath
|
||||
|
||||
|
||||
def take_screenshot(url, generated_image_filepath):
|
||||
# Create a webdriver instance in headless mode
|
||||
options = webdriver.ChromeOptions()
|
||||
options.add_argument("--headless")
|
||||
driver = webdriver.Chrome(options=options)
|
||||
logger.debug(f"Taking screenshot of url: {url}")
|
||||
|
||||
try:
|
||||
# Navigate to the given url
|
||||
driver.get(url)
|
||||
|
||||
# Optionally, increase the delay to ensure all content is loaded
|
||||
sleep(2)
|
||||
|
||||
# Explicitly wait for the page to load (adjust timeout as needed)
|
||||
WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.TAG_NAME, "body")))
|
||||
|
||||
# Set a larger window size
|
||||
driver.set_window_size(1200, 800)
|
||||
|
||||
# Take a screenshot of the webpage
|
||||
screenshot = driver.get_screenshot_as_png()
|
||||
|
||||
# Save the screenshot to a file
|
||||
with open(generated_image_filepath, "wb") as f:
|
||||
f.write(screenshot)
|
||||
|
||||
# Display the screenshot using Image.show
|
||||
image = Image.open(generated_image_filepath)
|
||||
image.show()
|
||||
# Wait for 2 seconds (adjust the delay as needed)
|
||||
sleep(2)
|
||||
|
||||
# Close the image window using subprocess (platform-dependent)
|
||||
subprocess.run(["pkill", "-f", "display"]) # Adjust based on your platform and viewer
|
||||
|
||||
# If using macOS, you can use the following:
|
||||
# subprocess.run(["osascript", "-e", 'tell application "Preview" to close every window'])
|
||||
# If using Windows, you can use the following:
|
||||
# subprocess.run(["taskkill", "/F", "/IM", "Microsoft.Photos.exe"])
|
||||
|
||||
logger.debug(f"Screenshot successfully stored at: {generated_image_filepath}")
|
||||
return generated_image_filepath
|
||||
finally:
|
||||
# Close the webdriver instance
|
||||
driver.quit()
|
||||
38
main_config
Normal file
38
main_config
Normal file
@@ -0,0 +1,38 @@
|
||||
###################################################
|
||||
#
|
||||
# This is the main config file which drives the code.
|
||||
# This config will restrict code modifications and hence
|
||||
# ease of usuability.
|
||||
#
|
||||
##################################################
|
||||
|
||||
|
||||
###################################################
|
||||
#
|
||||
# Define Blog Content charateristics
|
||||
#
|
||||
###################################################
|
||||
|
||||
blog_tone="professional, how-to, begginer, research, programming,"
|
||||
blog_character="???"
|
||||
blog_tempo="???"
|
||||
blog_audience="???"
|
||||
blog_geographic="COUNTRY, hyper local"
|
||||
|
||||
search_intent="informational, commercial, company, news, finance, competitor, programming, scholar"
|
||||
search_language="EN"
|
||||
|
||||
##################################################
|
||||
#
|
||||
# Blog postprocessing.
|
||||
#
|
||||
##################################################
|
||||
|
||||
# Specify the output format of the blog as: HTML, markdown, plaintext. Defaults to markdown.
|
||||
blog_output_format="markdown"
|
||||
|
||||
# Specify full path to folder where the final blog should be stored. ex: _posts
|
||||
blog_output_folder=""
|
||||
|
||||
# Specify full path to folder where blog images will be stored. ex: assets
|
||||
blog_image_output_folder=""
|
||||
@@ -1,62 +0,0 @@
|
||||
###################################################
|
||||
#
|
||||
# This is the main config file which drives the code.
|
||||
# This config will restrict code modifications and hence
|
||||
# ease of usuability.
|
||||
#
|
||||
##################################################
|
||||
|
||||
|
||||
# Set the Openai API key.
|
||||
# https://help.openai.com/en/articles/4936850-where-do-i-find-my-secret-api-key
|
||||
openai_api_key=""
|
||||
# bard_api=""
|
||||
# ms_bing_api=""
|
||||
|
||||
# Mention which model to use, default is GPT-3.5
|
||||
model_name=""
|
||||
|
||||
|
||||
###################################################
|
||||
#
|
||||
# Define Blog Content charateristics
|
||||
#
|
||||
###################################################
|
||||
|
||||
blog_tone="professional"
|
||||
blog_character="Use transition words. Use active voice."
|
||||
blog_tempo="???"
|
||||
blog_audience="begginer style"
|
||||
search_intent = [informational, commercial, transactional]
|
||||
buyer_stage= [awareness, consideration, decision]
|
||||
target_audience = "small businesses in the United States"
|
||||
|
||||
|
||||
###################################################
|
||||
#
|
||||
# Wordpress and WIX integration and details
|
||||
#
|
||||
###################################################
|
||||
|
||||
# Set webhosting as "wordpress" or "wix"
|
||||
webhosting="wix"
|
||||
# https://dev.wix.com/docs/rest/articles/getting-started/api-keys
|
||||
wix_site_id = "1234567890"
|
||||
wix_api_key = "YOUR_WIX_API_KEY"
|
||||
|
||||
# Set the WordPress site URL, username, and password.
|
||||
# https://wordpress.stackexchange.com/questions/301035/how-to-check-wordpress-website-username-and-password-is-correct
|
||||
wordpress_site_url = "https://example.com"
|
||||
wordpress_username = "YOUR_WORDPRESS_USERNAME"
|
||||
wordpress_password = "YOUR_WORDPRESS_PASSWORD"
|
||||
|
||||
|
||||
####################################################
|
||||
#
|
||||
# Details for generating blog images.
|
||||
#
|
||||
####################################################
|
||||
#dall-e2
|
||||
# https://imagen.research.google/
|
||||
#imagen
|
||||
#bing images
|
||||
@@ -0,0 +1,5 @@
|
||||
{
|
||||
"load_extensions": {
|
||||
"jupyterlab-plotly/extension": true
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,86 @@
|
||||
{
|
||||
"name": "jupyterlab-plotly",
|
||||
"version": "5.18.0",
|
||||
"description": "The plotly Jupyter extension",
|
||||
"author": "The plotly.py team",
|
||||
"license": "MIT",
|
||||
"main": "lib/index.js",
|
||||
"repository": {
|
||||
"type": "git",
|
||||
"url": "https://github.com/plotly/plotly.py"
|
||||
},
|
||||
"keywords": [
|
||||
"jupyter",
|
||||
"widgets",
|
||||
"ipython",
|
||||
"ipywidgets",
|
||||
"plotly"
|
||||
],
|
||||
"files": [
|
||||
"lib/**/*.js",
|
||||
"dist/*.js",
|
||||
"style/*.*"
|
||||
],
|
||||
"scripts": {
|
||||
"build:dev": "npm run build:lib && npm run build:nbextension && npm run build:labextension:dev",
|
||||
"build:prod": "npm run build:lib && npm run build:nbextension && npm run build:labextension",
|
||||
"build:labextension": "jupyter labextension build .",
|
||||
"build:labextension:dev": "jupyter labextension build --development True .",
|
||||
"build:lib": "tsc",
|
||||
"build:nbextension": "webpack --mode=production",
|
||||
"clean": "npm run clean:lib && npm run clean:nbextension && npm run clean:labextension",
|
||||
"clean:lib": "rimraf lib",
|
||||
"clean:labextension": "rimraf ../../python/plotly/jupyterlab_plotly/labextension",
|
||||
"clean:nbextension": "rimraf ../../python/plotly/jupyterlab_plotly/nbextension/index.js*",
|
||||
"lint": "eslint . --ext .ts,.tsx --fix",
|
||||
"lint:check": "eslint . --ext .ts,.tsx",
|
||||
"prepack": "npm run build:lib",
|
||||
"test": "echo \"Error: no test specified\" && exit 1",
|
||||
"watch": "npm-run-all -p watch:*",
|
||||
"watch:lib": "tsc -w",
|
||||
"watch:nbextension": "webpack --watch"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@jupyterlab/builder": "^3.0.0",
|
||||
"@lumino/application": "^1.6.0",
|
||||
"@types/plotly.js": "^1.54.10",
|
||||
"@types/webpack-env": "^1.13.6",
|
||||
"acorn": "^7.2.0",
|
||||
"css-loader": "^5.2.6",
|
||||
"fs-extra": "^7.0.0",
|
||||
"mkdirp": "^0.5.1",
|
||||
"npm-run-all": "^4.1.3",
|
||||
"prettier": "^2.0.5",
|
||||
"rimraf": "^2.6.2",
|
||||
"source-map-loader": "^1.1.3",
|
||||
"style-loader": "^1.0.0",
|
||||
"ts-loader": "^8.0.0",
|
||||
"typescript": "~4.1.3",
|
||||
"webpack": "^5.0.0",
|
||||
"webpack-cli": "^4.0.0"
|
||||
},
|
||||
"dependencies": {
|
||||
"@jupyter-widgets/base": ">=2.0.0 <7.0.0",
|
||||
"@jupyterlab/rendermime-interfaces": "^1.3.0 || ^2.0.0 || ^3.0.0",
|
||||
"@lumino/messaging": "^1.2.3",
|
||||
"@lumino/widgets": "^1.8.1",
|
||||
"lodash": "^4.17.4",
|
||||
"plotly.js": "^2.27.0"
|
||||
},
|
||||
"jupyterlab": {
|
||||
"extension": "lib/jupyterlab-plugin",
|
||||
"mimeExtension": "lib/plotly-renderer",
|
||||
"outputDir": "../../python/plotly/jupyterlab_plotly/labextension",
|
||||
"sharedPackages": {
|
||||
"@jupyter-widgets/base": {
|
||||
"bundled": false,
|
||||
"singleton": true
|
||||
}
|
||||
},
|
||||
"_build": {
|
||||
"load": "static/remoteEntry.70a4f7e7a0383740860d.js",
|
||||
"extension": "./extension",
|
||||
"mimeExtension": "./mimeExtension"
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1 @@
|
||||
"use strict";(self.webpackChunkjupyterlab_plotly=self.webpackChunkjupyterlab_plotly||[]).push([[133,657],{133:(e,t,n)=>{n.r(t),n.d(t,{default:()=>p});var l=n(900),i=n(657);const p={id:"jupyterlab-plotly",requires:[l.IJupyterWidgetRegistry],activate:function(e,t){t.registerWidget({name:i.o,version:i.Y,exports:()=>Promise.all([n.e(478),n.e(855)]).then(n.bind(n,855))})},autoStart:!0}},657:(e,t,n)=>{n.d(t,{Y:()=>i,o:()=>p});const l=n(147),i=l.version,p=l.name},147:e=>{e.exports=JSON.parse('{"name":"jupyterlab-plotly","version":"5.18.0","description":"The plotly Jupyter extension","author":"The plotly.py team","license":"MIT","main":"lib/index.js","repository":{"type":"git","url":"https://github.com/plotly/plotly.py"},"keywords":["jupyter","widgets","ipython","ipywidgets","plotly"],"files":["lib/**/*.js","dist/*.js","style/*.*"],"scripts":{"build:dev":"npm run build:lib && npm run build:nbextension && npm run build:labextension:dev","build:prod":"npm run build:lib && npm run build:nbextension && npm run build:labextension","build:labextension":"jupyter labextension build .","build:labextension:dev":"jupyter labextension build --development True .","build:lib":"tsc","build:nbextension":"webpack --mode=production","clean":"npm run clean:lib && npm run clean:nbextension && npm run clean:labextension","clean:lib":"rimraf lib","clean:labextension":"rimraf ../../python/plotly/jupyterlab_plotly/labextension","clean:nbextension":"rimraf ../../python/plotly/jupyterlab_plotly/nbextension/index.js*","lint":"eslint . --ext .ts,.tsx --fix","lint:check":"eslint . --ext .ts,.tsx","prepack":"npm run build:lib","test":"echo \\"Error: no test specified\\" && exit 1","watch":"npm-run-all -p watch:*","watch:lib":"tsc -w","watch:nbextension":"webpack --watch"},"devDependencies":{"@jupyterlab/builder":"^3.0.0","@lumino/application":"^1.6.0","@types/plotly.js":"^1.54.10","@types/webpack-env":"^1.13.6","acorn":"^7.2.0","css-loader":"^5.2.6","fs-extra":"^7.0.0","mkdirp":"^0.5.1","npm-run-all":"^4.1.3","prettier":"^2.0.5","rimraf":"^2.6.2","source-map-loader":"^1.1.3","style-loader":"^1.0.0","ts-loader":"^8.0.0","typescript":"~4.1.3","webpack":"^5.0.0","webpack-cli":"^4.0.0"},"dependencies":{"@jupyter-widgets/base":">=2.0.0 <7.0.0","@jupyterlab/rendermime-interfaces":"^1.3.0 || ^2.0.0 || ^3.0.0","@lumino/messaging":"^1.2.3","@lumino/widgets":"^1.8.1","lodash":"^4.17.4","plotly.js":"^2.27.0"},"jupyterlab":{"extension":"lib/jupyterlab-plugin","mimeExtension":"lib/plotly-renderer","outputDir":"../../python/plotly/jupyterlab_plotly/labextension","sharedPackages":{"@jupyter-widgets/base":{"bundled":false,"singleton":true}}}}')}}]);
|
||||
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
@@ -0,0 +1,53 @@
|
||||
/*
|
||||
* @copyright 2016 Sean Connelly (@voidqk), http://syntheti.cc
|
||||
* @license MIT
|
||||
* @preserve Project Home: https://github.com/voidqk/polybooljs
|
||||
*/
|
||||
|
||||
/*
|
||||
object-assign
|
||||
(c) Sindre Sorhus
|
||||
@license MIT
|
||||
*/
|
||||
|
||||
/*!
|
||||
* Determine if an object is a Buffer
|
||||
*
|
||||
* @author Feross Aboukhadijeh <https://feross.org>
|
||||
* @license MIT
|
||||
*/
|
||||
|
||||
/*!
|
||||
* The buffer module from node.js, for the browser.
|
||||
*
|
||||
* @author Feross Aboukhadijeh <feross@feross.org> <http://feross.org>
|
||||
* @license MIT
|
||||
*/
|
||||
|
||||
/*!
|
||||
* The buffer module from node.js, for the browser.
|
||||
*
|
||||
* @author Feross Aboukhadijeh <https://feross.org>
|
||||
* @license MIT
|
||||
*/
|
||||
|
||||
/*!
|
||||
* pad-left <https://github.com/jonschlinkert/pad-left>
|
||||
*
|
||||
* Copyright (c) 2014-2015, Jon Schlinkert.
|
||||
* Licensed under the MIT license.
|
||||
*/
|
||||
|
||||
/*!
|
||||
* repeat-string <https://github.com/jonschlinkert/repeat-string>
|
||||
*
|
||||
* Copyright (c) 2014-2015, Jon Schlinkert.
|
||||
* Licensed under the MIT License.
|
||||
*/
|
||||
|
||||
/*! Native Promise Only
|
||||
v0.8.1 (c) Kyle Simpson
|
||||
MIT License: http://getify.mit-license.org
|
||||
*/
|
||||
|
||||
/*! ieee754. BSD-3-Clause License. Feross Aboukhadijeh <https://feross.org/opensource> */
|
||||
File diff suppressed because one or more lines are too long
@@ -0,0 +1,8 @@
|
||||
/**
|
||||
* @license
|
||||
* Lodash <https://lodash.com/>
|
||||
* Copyright OpenJS Foundation and other contributors <https://openjsf.org/>
|
||||
* Released under MIT license <https://lodash.com/license>
|
||||
* Based on Underscore.js 1.8.3 <http://underscorejs.org/LICENSE>
|
||||
* Copyright Jeremy Ashkenas, DocumentCloud and Investigative Reporters & Editors
|
||||
*/
|
||||
@@ -0,0 +1 @@
|
||||
"use strict";(self.webpackChunkjupyterlab_plotly=self.webpackChunkjupyterlab_plotly||[]).push([[657],{657:(e,l,n)=>{n.d(l,{Y:()=>i,o:()=>p});const t=n(147),i=t.version,p=t.name},147:e=>{e.exports=JSON.parse('{"name":"jupyterlab-plotly","version":"5.18.0","description":"The plotly Jupyter extension","author":"The plotly.py team","license":"MIT","main":"lib/index.js","repository":{"type":"git","url":"https://github.com/plotly/plotly.py"},"keywords":["jupyter","widgets","ipython","ipywidgets","plotly"],"files":["lib/**/*.js","dist/*.js","style/*.*"],"scripts":{"build:dev":"npm run build:lib && npm run build:nbextension && npm run build:labextension:dev","build:prod":"npm run build:lib && npm run build:nbextension && npm run build:labextension","build:labextension":"jupyter labextension build .","build:labextension:dev":"jupyter labextension build --development True .","build:lib":"tsc","build:nbextension":"webpack --mode=production","clean":"npm run clean:lib && npm run clean:nbextension && npm run clean:labextension","clean:lib":"rimraf lib","clean:labextension":"rimraf ../../python/plotly/jupyterlab_plotly/labextension","clean:nbextension":"rimraf ../../python/plotly/jupyterlab_plotly/nbextension/index.js*","lint":"eslint . --ext .ts,.tsx --fix","lint:check":"eslint . --ext .ts,.tsx","prepack":"npm run build:lib","test":"echo \\"Error: no test specified\\" && exit 1","watch":"npm-run-all -p watch:*","watch:lib":"tsc -w","watch:nbextension":"webpack --watch"},"devDependencies":{"@jupyterlab/builder":"^3.0.0","@lumino/application":"^1.6.0","@types/plotly.js":"^1.54.10","@types/webpack-env":"^1.13.6","acorn":"^7.2.0","css-loader":"^5.2.6","fs-extra":"^7.0.0","mkdirp":"^0.5.1","npm-run-all":"^4.1.3","prettier":"^2.0.5","rimraf":"^2.6.2","source-map-loader":"^1.1.3","style-loader":"^1.0.0","ts-loader":"^8.0.0","typescript":"~4.1.3","webpack":"^5.0.0","webpack-cli":"^4.0.0"},"dependencies":{"@jupyter-widgets/base":">=2.0.0 <7.0.0","@jupyterlab/rendermime-interfaces":"^1.3.0 || ^2.0.0 || ^3.0.0","@lumino/messaging":"^1.2.3","@lumino/widgets":"^1.8.1","lodash":"^4.17.4","plotly.js":"^2.27.0"},"jupyterlab":{"extension":"lib/jupyterlab-plugin","mimeExtension":"lib/plotly-renderer","outputDir":"../../python/plotly/jupyterlab_plotly/labextension","sharedPackages":{"@jupyter-widgets/base":{"bundled":false,"singleton":true}}}}')}}]);
|
||||
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
@@ -0,0 +1,4 @@
|
||||
/* This is a generated file of CSS imports */
|
||||
/* It was generated by @jupyterlab/builder in Build.ensureAssets() */
|
||||
|
||||
|
||||
@@ -0,0 +1,28 @@
|
||||
{
|
||||
"packages": [
|
||||
{
|
||||
"name": "css-loader",
|
||||
"versionInfo": "5.2.6",
|
||||
"licenseId": "MIT",
|
||||
"extractedText": "Copyright JS Foundation and other contributors\n\nPermission is hereby granted, free of charge, to any person obtaining\na copy of this software and associated documentation files (the\n'Software'), to deal in the Software without restriction, including\nwithout limitation the rights to use, copy, modify, merge, publish,\ndistribute, sublicense, and/or sell copies of the Software, and to\npermit persons to whom the Software is furnished to do so, subject to\nthe following conditions:\n\nThe above copyright notice and this permission notice shall be\nincluded in all copies or substantial portions of the Software.\n\nTHE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,\nEXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF\nMERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.\nIN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY\nCLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,\nTORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE\nSOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.\n"
|
||||
},
|
||||
{
|
||||
"name": "lodash",
|
||||
"versionInfo": "4.17.21",
|
||||
"licenseId": "MIT",
|
||||
"extractedText": "Copyright OpenJS Foundation and other contributors <https://openjsf.org/>\n\nBased on Underscore.js, copyright Jeremy Ashkenas,\nDocumentCloud and Investigative Reporters & Editors <http://underscorejs.org/>\n\nThis software consists of voluntary contributions made by many\nindividuals. For exact contribution history, see the revision history\navailable at https://github.com/lodash/lodash\n\nThe following license applies to all parts of this software except as\ndocumented below:\n\n====\n\nPermission is hereby granted, free of charge, to any person obtaining\na copy of this software and associated documentation files (the\n\"Software\"), to deal in the Software without restriction, including\nwithout limitation the rights to use, copy, modify, merge, publish,\ndistribute, sublicense, and/or sell copies of the Software, and to\npermit persons to whom the Software is furnished to do so, subject to\nthe following conditions:\n\nThe above copyright notice and this permission notice shall be\nincluded in all copies or substantial portions of the Software.\n\nTHE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND,\nEXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF\nMERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND\nNONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE\nLIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION\nOF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION\nWITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.\n\n====\n\nCopyright and related rights for sample code are waived via CC0. Sample\ncode is defined as all source code displayed within the prose of the\ndocumentation.\n\nCC0: http://creativecommons.org/publicdomain/zero/1.0/\n\n====\n\nFiles located in the node_modules and vendor directories are externally\nmaintained libraries used by this software which have their own\nlicenses; we recommend you read them, as their terms may differ from the\nterms above.\n"
|
||||
},
|
||||
{
|
||||
"name": "plotly.js",
|
||||
"versionInfo": "2.27.0",
|
||||
"licenseId": "MIT",
|
||||
"extractedText": "The MIT License (MIT)\n\nCopyright (c) 2021 Plotly, Inc\n\nPermission is hereby granted, free of charge, to any person obtaining a copy\nof this software and associated documentation files (the \"Software\"), to deal\nin the Software without restriction, including without limitation the rights\nto use, copy, modify, merge, publish, distribute, sublicense, and/or sell\ncopies of the Software, and to permit persons to whom the Software is\nfurnished to do so, subject to the following conditions:\n\nThe above copyright notice and this permission notice shall be included in\nall copies or substantial portions of the Software.\n\nTHE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\nIMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\nFITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\nAUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\nLIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\nOUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\nTHE SOFTWARE.\n"
|
||||
},
|
||||
{
|
||||
"name": "style-loader",
|
||||
"versionInfo": "1.3.0",
|
||||
"licenseId": "MIT",
|
||||
"extractedText": "Copyright JS Foundation and other contributors\n\nPermission is hereby granted, free of charge, to any person obtaining\na copy of this software and associated documentation files (the\n'Software'), to deal in the Software without restriction, including\nwithout limitation the rights to use, copy, modify, merge, publish,\ndistribute, sublicense, and/or sell copies of the Software, and to\npermit persons to whom the Software is furnished to do so, subject to\nthe following conditions:\n\nThe above copyright notice and this permission notice shall be\nincluded in all copies or substantial portions of the Software.\n\nTHE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,\nEXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF\nMERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.\nIN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY\nCLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,\nTORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE\nSOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.\n"
|
||||
}
|
||||
]
|
||||
}
|
||||
@@ -0,0 +1,18 @@
|
||||
|
||||
// Entry point for the notebook bundle containing custom model definitions.
|
||||
//
|
||||
define(function() {
|
||||
"use strict";
|
||||
|
||||
window['requirejs'].config({
|
||||
map: {
|
||||
'*': {
|
||||
'jupyterlab-plotly': 'nbextensions/jupyterlab-plotly/index',
|
||||
},
|
||||
}
|
||||
});
|
||||
// Export the required load_ipython_extension function
|
||||
return {
|
||||
load_ipython_extension : function() {}
|
||||
};
|
||||
});
|
||||
File diff suppressed because one or more lines are too long
@@ -0,0 +1,62 @@
|
||||
/*
|
||||
* @copyright 2016 Sean Connelly (@voidqk), http://syntheti.cc
|
||||
* @license MIT
|
||||
* @preserve Project Home: https://github.com/voidqk/polybooljs
|
||||
*/
|
||||
|
||||
/*
|
||||
object-assign
|
||||
(c) Sindre Sorhus
|
||||
@license MIT
|
||||
*/
|
||||
|
||||
/*!
|
||||
* Determine if an object is a Buffer
|
||||
*
|
||||
* @author Feross Aboukhadijeh <https://feross.org>
|
||||
* @license MIT
|
||||
*/
|
||||
|
||||
/*!
|
||||
* The buffer module from node.js, for the browser.
|
||||
*
|
||||
* @author Feross Aboukhadijeh <feross@feross.org> <http://feross.org>
|
||||
* @license MIT
|
||||
*/
|
||||
|
||||
/*!
|
||||
* The buffer module from node.js, for the browser.
|
||||
*
|
||||
* @author Feross Aboukhadijeh <https://feross.org>
|
||||
* @license MIT
|
||||
*/
|
||||
|
||||
/*!
|
||||
* pad-left <https://github.com/jonschlinkert/pad-left>
|
||||
*
|
||||
* Copyright (c) 2014-2015, Jon Schlinkert.
|
||||
* Licensed under the MIT license.
|
||||
*/
|
||||
|
||||
/*!
|
||||
* repeat-string <https://github.com/jonschlinkert/repeat-string>
|
||||
*
|
||||
* Copyright (c) 2014-2015, Jon Schlinkert.
|
||||
* Licensed under the MIT License.
|
||||
*/
|
||||
|
||||
/*! Native Promise Only
|
||||
v0.8.1 (c) Kyle Simpson
|
||||
MIT License: http://getify.mit-license.org
|
||||
*/
|
||||
|
||||
/*! ieee754. BSD-3-Clause License. Feross Aboukhadijeh <https://feross.org/opensource> */
|
||||
|
||||
/**
|
||||
* @license
|
||||
* Lodash <https://lodash.com/>
|
||||
* Copyright OpenJS Foundation and other contributors <https://openjsf.org/>
|
||||
* Released under MIT license <https://lodash.com/license>
|
||||
* Based on Underscore.js 1.8.3 <http://underscorejs.org/LICENSE>
|
||||
* Copyright Jeremy Ashkenas, DocumentCloud and Investigative Reporters & Editors
|
||||
*/
|
||||
183
pseo_main.py
183
pseo_main.py
@@ -1,183 +0,0 @@
|
||||
#!/usr/bin/python3
|
||||
|
||||
"""
|
||||
Main module for calling PSEO related functions. This is the end user interface and is user-driven.
|
||||
Allows the user to specify various parameters for blog generation without needing to edit the code.
|
||||
"""
|
||||
|
||||
import sys
|
||||
import os
|
||||
import re
|
||||
import argparse
|
||||
import requests
|
||||
from loguru import logger
|
||||
import csv
|
||||
import json
|
||||
|
||||
# Logger configuration
|
||||
logger.remove()
|
||||
logger.add(sys.stdout, colorize=True, format="<level>{level}</level>|<green>{file}:{line}:{function}</green>| {message}")
|
||||
|
||||
# Importing custom functions
|
||||
from lib.get_text_response import generate_detailed_blog, generate_youtube_blog
|
||||
from lib.main_youtube_research_blog import generate_youtube_research_blog
|
||||
from lib.main_keywords_to_blog import generate_keyword_blog
|
||||
from lib.main_arxiv_to_blog import blog_arxiv_keyword, blog_arxiv_url_list
|
||||
|
||||
|
||||
def parse_arguments():
|
||||
"""Parses command-line arguments.
|
||||
|
||||
Returns:
|
||||
argparse.Namespace: Parsed arguments.
|
||||
"""
|
||||
example_usage = """
|
||||
Example Usage:
|
||||
Keyword usage: python pseo_main.py --keywords "Writesonic AI SEO-optimized blog writing,PepperType AI virtual content assistant,Copysmith AI enterprise eCommerce content,Copy AI artificial intelligence content generator,Jasper AI creative content platform,Contents generative AI content strategy"
|
||||
YouTube usage: python pseo_main.py --youtube https://www.youtube.com/watch?v=yu27PWzJI_Y,https://www.youtube.com/watch?v=WGzoBD-xthI,https://www.youtube.com/watch?v=zizonToFXDs
|
||||
Scholar usage: python pseo_main.py --scholar "GPT-4 Technical Report"
|
||||
"""
|
||||
|
||||
parser = argparse.ArgumentParser(description="Generate blogs based on user input.", epilog=example_usage, formatter_class=argparse.RawDescriptionHelpFormatter)
|
||||
# Inputs csv, keywords, youtube_urls and scholar are mandatory.
|
||||
parser.add_argument("--csv", type=str, help="Provide path csv file. Check the template csv for example.")
|
||||
parser.add_argument("--keywords", type=str, help="Keywords for blog generation.")
|
||||
parser.add_argument("--youtube_urls", type=str, help="Comma-separated YouTube URLs for blog generation.")
|
||||
parser.add_argument("--scholar", type=str, help="Write blog from latest research papers on given keywords. Use 'arxiv_papers_url' to provide a file arxiv url list.")
|
||||
# Optional options.
|
||||
parser.add_argument("--niche", action='store_true', default=False, help="Flag to generate niche blogs (default: False).")
|
||||
parser.add_argument("--wordpress", action='store_true', default=False, help="Flag to upload blogs to WordPress (default: False).")
|
||||
# Add options for blog_tone and blog_personality.
|
||||
parser.add_argument("--output_format", choices=['plaintext', 'markdown', 'html'], default='markdown', help="Output format of the blogs (default: plaintext).")
|
||||
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
def check_openai_api_key(api_key):
|
||||
"""Checks if the OpenAI API key is valid.
|
||||
|
||||
Args:
|
||||
api_key (str): The OpenAI API key.
|
||||
|
||||
Returns:
|
||||
bool: True if the key is valid, False otherwise.
|
||||
"""
|
||||
headers = {"Authorization": f"Bearer {api_key}"}
|
||||
response = requests.get("https://api.openai.com/v1/engines", headers=headers)
|
||||
return response.status_code == 200
|
||||
|
||||
|
||||
def main():
|
||||
"""Main function to handle blog generation based on user input."""
|
||||
try:
|
||||
args = parse_arguments()
|
||||
logger.info("Fetch and Validate Openai key.")
|
||||
# Validate user input
|
||||
if not args.keywords and not args.youtube_urls and not args.csv and not args.scholar:
|
||||
raise ValueError("Either --keywords, --youtube_urls, --csv Or --scholar must be provided.")
|
||||
|
||||
# Validate OpenAI API key
|
||||
openai_api_key = os.environ.get("OPENAI_API_KEY")
|
||||
if not openai_api_key or not check_openai_api_key(openai_api_key):
|
||||
raise EnvironmentError("Invalid or missing OPENAI_API_KEY environment variable.")
|
||||
|
||||
logger.info("Valid OpenAI API key found.")
|
||||
|
||||
# Handle blog generation based on input
|
||||
if args.youtube_urls:
|
||||
yt_urls = args.youtube_urls.split(",")
|
||||
valid_urls = [url for url in yt_urls if is_valid_url(url)]
|
||||
quoted_strings = [url for url in yt_urls if not is_valid_url(url)]
|
||||
|
||||
if valid_urls:
|
||||
logger.info(f"Generating blogs from YouTube URLs: {valid_urls}")
|
||||
generate_youtube_blog(valid_urls)
|
||||
if quoted_strings:
|
||||
logger.info(f"Do youtube research and write blogs for: {quoted_strings}")
|
||||
generate_youtube_research_blog(quoted_strings)
|
||||
|
||||
elif args.keywords:
|
||||
logger.info(f"Generating {args.num_blogs} blogs on '{args.keywords}' with {args.num_subtopics} subtopics.")
|
||||
#generate_detailed_blog(args.num_blogs, args.keywords, args.niche,
|
||||
# args.num_subtopics, args.wordpress, args.output_format)
|
||||
keyword_list = args.keywords.split(",")
|
||||
generate_keyword_blog(keyword_list)
|
||||
|
||||
elif args.csv:
|
||||
try:
|
||||
data = read_csv_to_json(args.csv)
|
||||
logger.info(f"Generating blogs from csv file: {json.dumps(data, indent=4)}")
|
||||
for item in data:
|
||||
keyword_list = [item['keyword']]
|
||||
generate_keyword_blog(keyword_list, item['URL'])
|
||||
except Exception as err:
|
||||
logger.error(f"Failed to generate blogs the CSV file:{err}")
|
||||
sys.exit(1)
|
||||
|
||||
elif args.scholar:
|
||||
logger.info(f"Writing blog on {args.scholar} from research papers of arxiv, google & Semantic scholar.")
|
||||
# Write from arxiv urls given in a file.
|
||||
if 'arxiv_papers_url' in args.scholar:
|
||||
try:
|
||||
logger.info(f"Writing scholar blogs from arxiv url list.")
|
||||
blog_arxiv_url_list(args.scholar)
|
||||
except Exception as err:
|
||||
logger.error(f"Failed to write from file {args.scholar} in present directory: {err}")
|
||||
sys.exit(1)
|
||||
# Write scholar blogs from given keywords.
|
||||
else:
|
||||
try:
|
||||
blog_arxiv_keyword(args.scholar)
|
||||
except Exception as err:
|
||||
logger.error(f"Failed to write blog from research papers: {err}")
|
||||
raise err
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"An error occurred: {e}")
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
def read_csv_to_json(file_path):
|
||||
# Initialize a list to store JSON objects
|
||||
json_data = []
|
||||
|
||||
try:
|
||||
# Read the CSV file
|
||||
with open(file_path, newline='', encoding='utf-8') as csvfile:
|
||||
reader = csv.DictReader(csvfile)
|
||||
|
||||
# Iterate over each row and convert it to a JSON object
|
||||
for row in reader:
|
||||
json_data.append(row)
|
||||
|
||||
return json_data
|
||||
except Exception as err:
|
||||
logger.error(f"Failed to read the CSV file:{err}")
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
def is_valid_url(url):
|
||||
"""
|
||||
Check if the given string is a valid URL.
|
||||
|
||||
Args:
|
||||
url (str): String to check.
|
||||
|
||||
Returns:
|
||||
bool: True if the string is a valid URL, False otherwise.
|
||||
"""
|
||||
# Regular expression to check for a valid URL
|
||||
url_pattern = re.compile(
|
||||
r'^(?:http|ftp)s?://' # http:// or https://
|
||||
r'(?:(?:[A-Z0-9](?:[A-Z0-9-]{0,61}[A-Z0-9])?\.)+(?:[A-Z]{2,6}\.?|[A-Z0-9-]{2,}\.?)|' # domain...
|
||||
r'localhost|' # localhost...
|
||||
r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}|' # ...or ipv4
|
||||
r'\[?[A-F0-9]*:[A-F0-9:]+\]?)' # ...or ipv6
|
||||
r'(?::\d+)?' # optional port
|
||||
r'(?:/?|[/?]\S+)$', re.IGNORECASE)
|
||||
|
||||
return re.match(url_pattern, url) is not None
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -1,4 +1,5 @@
|
||||
aiofiles
|
||||
typer[all]
|
||||
aiohttp
|
||||
aiosignal
|
||||
annotated-types
|
||||
|
||||
20
workspace/arxiv_papers_url.txt
Normal file
20
workspace/arxiv_papers_url.txt
Normal file
@@ -0,0 +1,20 @@
|
||||
https://arxiv.org/abs/1910.10683
|
||||
https://arxiv.org/abs/2306.03438
|
||||
https://arxiv.org/pdf/2302.06144.pdf
|
||||
https://arxiv.org/pdf/2303.03004v3.pdf
|
||||
https://arxiv.org/abs/2001.00059
|
||||
https://arxiv.org/abs/2012.07023
|
||||
https://arxiv.org/abs/2105.08645
|
||||
https://arxiv.org/abs/2105.04297
|
||||
https://arxiv.org/abs/2010.03150
|
||||
https://arxiv.org/abs/2105.12485
|
||||
https://arxiv.org/abs/2010.07987
|
||||
https://arxiv.org/pdf/2306.13549.pdf
|
||||
https://arxiv.org/pdf/2312.16602.pdf
|
||||
https://arxiv.org/pdf/2310.03744.pdf
|
||||
https://arxiv.org/abs/2312.06647
|
||||
https://arxiv.org/pdf/2312.03700.pdf
|
||||
https://arxiv.org/abs/2312.09237
|
||||
https://arxiv.org/abs/2312.13286
|
||||
https://arxiv.org/pdf/2310.20550.pdf
|
||||
|
||||
233
workspace/blogs_already_written_on.py
Normal file
233
workspace/blogs_already_written_on.py
Normal file
@@ -0,0 +1,233 @@
|
||||
2201.11990
|
||||
2210.02414
|
||||
2112.11446v2
|
||||
2203.15556
|
||||
2201.08239
|
||||
2204.06745
|
||||
2305.10403
|
||||
2307.09288
|
||||
2208.11857
|
||||
2302.12095
|
||||
1905.00537
|
||||
2209.12356
|
||||
2301.08745
|
||||
2302.10198
|
||||
2009.03300
|
||||
2208.03299
|
||||
2212.13138
|
||||
2201.11903
|
||||
2211.14275
|
||||
2001.08361
|
||||
2001.08361
|
||||
2307.01952
|
||||
2206.07682
|
||||
2302.06476
|
||||
2206.04615
|
||||
2211.02011
|
||||
2212.10403
|
||||
2303.17564
|
||||
2204.02329
|
||||
2205.10625
|
||||
2205.09712
|
||||
2206.02336
|
||||
2206.06315
|
||||
2207.10342
|
||||
2209.14610
|
||||
2210.03057
|
||||
2209.07686
|
||||
2210.03493
|
||||
2210.02441
|
||||
2210.07128
|
||||
2210.11610
|
||||
2212.08635
|
||||
2212.09597
|
||||
2212.09561
|
||||
2212.10001
|
||||
2212.10071
|
||||
2301.13379
|
||||
2302.00923
|
||||
2302.00093
|
||||
2302.12246
|
||||
2303.11381
|
||||
2305.04118
|
||||
2305.11255
|
||||
2305.17812
|
||||
2301.13848
|
||||
2311.16452
|
||||
2303.08774
|
||||
2304.01373
|
||||
2302.13971v1
|
||||
2303.04360
|
||||
2208.10442
|
||||
2302.13007
|
||||
2303.15056
|
||||
2302.04166
|
||||
2303.12712
|
||||
2303.11366
|
||||
2308.12950
|
||||
2306.08568
|
||||
2107.03374
|
||||
2305.06161
|
||||
2305.07922
|
||||
2203.13474
|
||||
2204.02311
|
||||
2302.13971
|
||||
2303.17568
|
||||
2203.07814
|
||||
2301.03988
|
||||
2305.02309
|
||||
2207.01780
|
||||
2301.13816
|
||||
2307.04349
|
||||
2207.10397
|
||||
2304.05128
|
||||
2306.09896
|
||||
2306.02907
|
||||
2108.07732
|
||||
2306.03091
|
||||
2308.10335
|
||||
2312.17244
|
||||
2305.02301
|
||||
2305.15717
|
||||
2310.02421
|
||||
2305.11170
|
||||
2309.00384
|
||||
2310.06839
|
||||
2312.04737
|
||||
2309.14021
|
||||
2312.07046
|
||||
2308.07633
|
||||
2305.17888
|
||||
2306.08162
|
||||
2309.05210
|
||||
2308.14903
|
||||
2310.19102
|
||||
2311.09550
|
||||
2311.00502
|
||||
2312.08583
|
||||
2305.11627
|
||||
2301.00774
|
||||
2212.09095
|
||||
2310.01801
|
||||
2310.01382
|
||||
2310.08915
|
||||
2310.09499
|
||||
https://github.com/Significant-Gravitas/AutoGPT
|
||||
https://github.com/gpt-engineer-org/gpt-engineer
|
||||
https://github.com/reworkd/AgentGPT
|
||||
https://github.com/geekan/MetaGPT
|
||||
https://github.com/Josh-XT/AGiXT
|
||||
https://github.com/litanlitudan/skyagi
|
||||
https://github.com/joonspk-research/generative_agents
|
||||
https://github.com/smol-ai/developer
|
||||
https://github.com/Forethought-Technologies/AutoChain
|
||||
https://github.com/TransformerOptimus/SuperAGI
|
||||
https://github.com/homanp/superagent
|
||||
https://github.com/a16z-infra/ai-town
|
||||
https://github.com/AI-Engineer-Foundation/agent-protocol
|
||||
https://github.com/microsoft/autogen
|
||||
https://github.com/cpacker/MemGPT
|
||||
https://github.com/shroominic/codeinterpreter-api
|
||||
https://github.com/aiwaves-cn/agents
|
||||
https://github.com/dataelement/bisheng
|
||||
https://github.com/Maplemx/Agently
|
||||
https://github.com/zilliztech/GPTCache
|
||||
http://github.com//Significant-Gravitas/AutoGPT
|
||||
http://github.com//AUTOMATIC1111/stable-diffusion-webui
|
||||
http://github.com//gpt-engineer-org/gpt-engineer
|
||||
http://github.com//lencx/ChatGPT
|
||||
http://github.com//Pythagora-io/gpt-pilot
|
||||
http://github.com//mouredev/Hello-Python
|
||||
http://github.com//Bin-Huang/chatbox
|
||||
http://github.com//getumbrel/llama-gpt
|
||||
http://github.com//transitive-bullshit/chatgpt-api
|
||||
http://github.com//python-telegram-bot/python-telegram-bot
|
||||
http://github.com//skorch-dev/skorch
|
||||
http://github.com//botpress/botpress
|
||||
http://github.com//TransformerOptimus/SuperAGI
|
||||
http://github.com//AMAI-GmbH/AI-Expert-Roadmap
|
||||
http://github.com//babysor/MockingBird
|
||||
http://github.com//gventuri/pandas-ai
|
||||
http://github.com//hpcaitech/ColossalAI
|
||||
http://github.com//LAION-AI/Open-Assistant
|
||||
http://github.com//xitu/gold-miner
|
||||
http://github.com//google-research/google-research
|
||||
http://github.com//photoprism/photoprism
|
||||
http://github.com//explosion/spaCy
|
||||
http://github.com//StanGirard/quivr
|
||||
http://github.com//microsoft/AI-For-Beginners
|
||||
http://github.com//GitHubDaily/GitHubDaily
|
||||
http://github.com//Lightning-AI/pytorch-lightning
|
||||
http://github.com//lutzroeder/netron
|
||||
http://github.com//bentoml/OpenLLM
|
||||
http://github.com//cloneofsimo/lora
|
||||
http://github.com//eosphoros-ai/DB-GPT
|
||||
http://github.com//labring/FastGPT
|
||||
http://github.com//Mintplex-Labs/anything-llm
|
||||
http://github.com//danswer-ai/danswer
|
||||
http://github.com//neuml/txtai
|
||||
http://github.com//run-llama/rags
|
||||
http://github.com//postgresml/postgresml
|
||||
http://github.com//JushBJJ/Mr.-Ranedeer-AI-Tutor
|
||||
http://github.com//s0md3v/roop
|
||||
http://github.com//microsoft/generative-ai-for-beginners
|
||||
http://github.com//leon-ai/leon
|
||||
http://github.com//geekan/MetaGPT
|
||||
http://github.com//jmorganca/ollama
|
||||
http://github.com//run-llama/llama_index
|
||||
http://github.com//milvus-io/milvus
|
||||
http://github.com//chatchat-space/Langchain-Chatchat
|
||||
http://github.com//zhayujie/chatgpt-on-wechat
|
||||
http://github.com//mindsdb/mindsdb
|
||||
http://github.com//FlowiseAI/Flowise
|
||||
http://github.com//microsoft/unilm
|
||||
http://github.com//mlabonne/llm-course
|
||||
http://github.com//sweepai/sweep
|
||||
http://github.com//lucidrains/imagen-pytorch
|
||||
http://github.com//GokuMohandas/Made-With-ML
|
||||
http://github.com//TabbyML/tabby
|
||||
http://github.com//chroma-core/chroma
|
||||
http://github.com//eugeneyan/open-llms
|
||||
http://github.com//cleanlab/cleanlab
|
||||
http://github.com//microsoft/semantic-kernel
|
||||
http://github.com//ymcui/Chinese-LLaMA-Alpaca
|
||||
http://github.com//mudler/LocalAI
|
||||
http://github.com//mlc-ai/mlc-llm
|
||||
http://github.com//THUDM/ChatGLM2-6B
|
||||
http://github.com//langgenius/dify
|
||||
http://github.com//vllm-project/vllm
|
||||
http://github.com//ludwig-ai/ludwig
|
||||
http://github.com//hiyouga/LLaMA-Factory
|
||||
http://github.com//h2oai/h2ogpt
|
||||
http://github.com//css-doodle/css-doodle
|
||||
http://github.com//williamngan/pts
|
||||
http://github.com//dair-ai/Prompt-Engineering-Guide
|
||||
http://github.com//AI4Finance-Foundation/FinGPT
|
||||
http://github.com//yzfly/awesome-chatgpt-zh
|
||||
http://github.com//microsoft/promptflow
|
||||
http://github.com//jina-ai/jina
|
||||
http://github.com//deepset-ai/haystack
|
||||
http://github.com//open-mmlab/mmagic
|
||||
http://github.com//bentoml/BentoML
|
||||
http://github.com//openvinotoolkit/openvino
|
||||
http://github.com//reworkd/AgentGPT
|
||||
http://github.com//logspace-ai/langflow
|
||||
http://github.com//mayooear/gpt4-pdf-chatbot-langchain
|
||||
http://github.com//activeloopai/deeplake
|
||||
http://github.com//danny-avila/LibreChat
|
||||
http://github.com//liaokongVFX/LangChain-Chinese-Getting-Started-Guide
|
||||
http://github.com//kyrolabs/awesome-langchain
|
||||
http://github.com//zilliztech/GPTCache
|
||||
http://github.com//speechbrain/speechbrain
|
||||
http://github.com//vercel/ai
|
||||
http://github.com//baichuan-inc/Baichuan-7B
|
||||
http://github.com//microsoft/autogen
|
||||
http://github.com//f/awesome-chatgpt-prompts
|
||||
http://github.com//xtekky/gpt4free
|
||||
http://github.com//wechaty/wechaty
|
||||
http://github.com//RasaHQ/rasa
|
||||
http://github.com//lobehub/lobe-chat
|
||||
http://github.com//GaiZhenbiao/ChuanhuChatGPT
|
||||
http://github.com//gunthercox/ChatterBot
|
||||
http://github.com//mamoe/mirai
|
||||
http://github.com//haotian-liu/LLaVA
|
||||
1
workspace/github_topics
Normal file
1
workspace/github_topics
Normal file
@@ -0,0 +1 @@
|
||||
image-generation,txt2img,img2img,image2image,text2image,diffusion,generative-art,stability-ai,stable-diffusion,ai,ai-tools,ai-assistant,ai-agents-framework,llm,multi-agent,agent,llama2,mistral,fine-tuning,rag,generative,prompt-engineering,prompt-tuning,generative-ai,text-to-image-generation,llm-ops,retrieval-augmented-generation,langchain,gemini-api,vertex-ai,huggingface,semantic-search,auto-gpt,llmops,ai-toolkit,chatbot,chatgpt,chat-gpt,multimodal,code-assistant,text-to-video,llms,gpt-4
|
||||
135
workspace/github_url_to_write.csv
Normal file
135
workspace/github_url_to_write.csv
Normal file
@@ -0,0 +1,135 @@
|
||||
https://github.com/Significant-Gravitas/AutoGPT
|
||||
https://github.com/gpt-engineer-org/gpt-engineer
|
||||
https://github.com/reworkd/AgentGPT
|
||||
https://github.com/geekan/MetaGPT
|
||||
https://github.com/Josh-XT/AGiXT
|
||||
https://github.com/litanlitudan/skyagi
|
||||
https://github.com/joonspk-research/generative_agents
|
||||
https://github.com/smol-ai/developer
|
||||
https://github.com/Forethought-Technologies/AutoChain
|
||||
https://github.com/TransformerOptimus/SuperAGI
|
||||
https://github.com/homanp/superagent
|
||||
https://github.com/a16z-infra/ai-town
|
||||
https://github.com/AI-Engineer-Foundation/agent-protocol
|
||||
https://github.com/microsoft/autogen
|
||||
https://github.com/cpacker/MemGPT
|
||||
https://github.com/shroominic/codeinterpreter-api
|
||||
https://github.com/aiwaves-cn/agents
|
||||
https://github.com/dataelement/bisheng
|
||||
https://github.com/Maplemx/Agently
|
||||
https://github.com/zilliztech/GPTCache
|
||||
http://github.com//Significant-Gravitas/AutoGPT
|
||||
http://github.com//AUTOMATIC1111/stable-diffusion-webui
|
||||
http://github.com//gpt-engineer-org/gpt-engineer
|
||||
http://github.com//lencx/ChatGPT
|
||||
http://github.com//hpcaitech/ColossalAI
|
||||
http://github.com//LAION-AI/Open-Assistant
|
||||
http://github.com//xitu/gold-miner
|
||||
http://github.com//babysor/MockingBird
|
||||
http://github.com//google-research/google-research
|
||||
http://github.com//photoprism/photoprism
|
||||
http://github.com//explosion/spaCy
|
||||
http://github.com//AMAI-GmbH/AI-Expert-Roadmap
|
||||
http://github.com//StanGirard/quivr
|
||||
http://github.com//microsoft/AI-For-Beginners
|
||||
http://github.com//GitHubDaily/GitHubDaily
|
||||
http://github.com//Lightning-AI/pytorch-lightning
|
||||
http://github.com//lutzroeder/netron
|
||||
http://github.com//JushBJJ/Mr.-Ranedeer-AI-Tutor
|
||||
http://github.com//s0md3v/roop
|
||||
http://github.com//microsoft/generative-ai-for-beginners
|
||||
http://github.com//leon-ai/leon
|
||||
http://github.com//geekan/MetaGPT
|
||||
http://github.com//jmorganca/ollama
|
||||
http://github.com//run-llama/llama_index
|
||||
http://github.com//milvus-io/milvus
|
||||
http://github.com//chatchat-space/Langchain-Chatchat
|
||||
http://github.com//zhayujie/chatgpt-on-wechat
|
||||
http://github.com//mindsdb/mindsdb
|
||||
http://github.com//FlowiseAI/Flowise
|
||||
http://github.com//microsoft/unilm
|
||||
http://github.com//mlabonne/llm-course
|
||||
http://github.com//microsoft/semantic-kernel
|
||||
http://github.com//ymcui/Chinese-LLaMA-Alpaca
|
||||
http://github.com//mudler/LocalAI
|
||||
http://github.com//mlc-ai/mlc-llm
|
||||
http://github.com//THUDM/ChatGLM2-6B
|
||||
http://github.com//langgenius/dify
|
||||
http://github.com//vllm-project/vllm
|
||||
http://github.com//TransformerOptimus/SuperAGI
|
||||
http://github.com//ludwig-ai/ludwig
|
||||
http://github.com//hiyouga/LLaMA-Factory
|
||||
http://github.com//bentoml/OpenLLM
|
||||
http://github.com//cloneofsimo/lora
|
||||
http://github.com//eosphoros-ai/DB-GPT
|
||||
http://github.com//labring/FastGPT
|
||||
http://github.com//Mintplex-Labs/anything-llm
|
||||
http://github.com//danswer-ai/danswer
|
||||
http://github.com//neuml/txtai
|
||||
http://github.com//run-llama/rags
|
||||
http://github.com//postgresml/postgresml
|
||||
http://github.com//h2oai/h2ogpt
|
||||
http://github.com//css-doodle/css-doodle
|
||||
http://github.com//williamngan/pts
|
||||
http://github.com//dair-ai/Prompt-Engineering-Guide
|
||||
http://github.com//AI4Finance-Foundation/FinGPT
|
||||
http://github.com//yzfly/awesome-chatgpt-zh
|
||||
http://github.com//microsoft/promptflow
|
||||
http://github.com//jina-ai/jina
|
||||
http://github.com//deepset-ai/haystack
|
||||
http://github.com//open-mmlab/mmagic
|
||||
http://github.com//bentoml/BentoML
|
||||
http://github.com//openvinotoolkit/openvino
|
||||
http://github.com//reworkd/AgentGPT
|
||||
http://github.com//logspace-ai/langflow
|
||||
http://github.com//mayooear/gpt4-pdf-chatbot-langchain
|
||||
http://github.com//botpress/botpress
|
||||
http://github.com//activeloopai/deeplake
|
||||
http://github.com//danny-avila/LibreChat
|
||||
http://github.com//liaokongVFX/LangChain-Chinese-Getting-Started-Guide
|
||||
http://github.com//kyrolabs/awesome-langchain
|
||||
http://github.com//zilliztech/GPTCache
|
||||
http://github.com//speechbrain/speechbrain
|
||||
http://github.com//vercel/ai
|
||||
http://github.com//skorch-dev/skorch
|
||||
http://github.com//baichuan-inc/Baichuan-7B
|
||||
http://github.com//microsoft/autogen
|
||||
http://github.com//f/awesome-chatgpt-prompts
|
||||
http://github.com//xtekky/gpt4free
|
||||
http://github.com//python-telegram-bot/python-telegram-bot
|
||||
http://github.com//wechaty/wechaty
|
||||
http://github.com//RasaHQ/rasa
|
||||
http://github.com//lobehub/lobe-chat
|
||||
http://github.com//transitive-bullshit/chatgpt-api
|
||||
http://github.com//GaiZhenbiao/ChuanhuChatGPT
|
||||
http://github.com//gunthercox/ChatterBot
|
||||
http://github.com//mamoe/mirai
|
||||
http://github.com//haotian-liu/LLaVA
|
||||
http://github.com//howdyai/botkit
|
||||
http://github.com//databrickslabs/dolly
|
||||
http://github.com//chiphuyen/stanford-tensorflow-tutorials
|
||||
http://github.com//ChatGPTNextWeb/ChatGPT-Next-Web
|
||||
http://github.com//openai/openai-cookbook
|
||||
http://github.com//binary-husky/gpt_academic
|
||||
http://github.com//PlexPt/awesome-chatgpt-prompts-zh
|
||||
http://github.com//KillianLucas/open-interpreter
|
||||
http://github.com//acheong08/ChatGPT
|
||||
http://github.com//tw93/Pake
|
||||
http://github.com//LC044/WeChatMsg
|
||||
http://github.com//openai/chatgpt-retrieval-plugin
|
||||
http://github.com//openai-translator/openai-translator
|
||||
http://github.com//sweepai/sweep
|
||||
http://github.com//lucidrains/imagen-pytorch
|
||||
http://github.com//GokuMohandas/Made-With-ML
|
||||
http://github.com//TabbyML/tabby
|
||||
http://github.com//chroma-core/chroma
|
||||
http://github.com//eugeneyan/open-llms
|
||||
http://github.com//cleanlab/cleanlab
|
||||
http://github.com//RUCAIBox/LLMSurvey
|
||||
http://github.com//OpenNMT/OpenNMT-py
|
||||
http://github.com//joaomdmoura/crewAI
|
||||
http://github.com//Pythagora-io/gpt-pilot
|
||||
http://github.com//mouredev/Hello-Python
|
||||
http://github.com//Bin-Huang/chatbox
|
||||
http://github.com//getumbrel/llama-gpt
|
||||
http://github.com//gventuri/pandas-ai
|
||||
|
9
workspace/github_urls_to_research_yet
Normal file
9
workspace/github_urls_to_research_yet
Normal file
@@ -0,0 +1,9 @@
|
||||
https://github.com/louisfb01/best_AI_papers_2023
|
||||
https://github.com/Giskard-AI/awesome-ai-safety
|
||||
https://github.com/mahseema/awesome-ai-tools
|
||||
https://github.com/Hyraze/ai-collective-tools#image-generator
|
||||
https://github.com/Horhorist/Awesome-ai
|
||||
https://github.com/youraibot/AI-Toolkit
|
||||
https://github.com/hades217/awesome-ai
|
||||
https://github.com/WooooDyy/LLM-Agent-Paper-List
|
||||
https://github.com/e2b-dev/awesome-ai-agents
|
||||
@@ -0,0 +1,26 @@
|
||||
╒════════╤═══════════════════════════╤═════════════════════════════════════╤════════════════════════════════════════════════════╕
|
||||
│ Rank │ Title │ Link │ Snippet │
|
||||
╞════════╪═══════════════════════════╪═════════════════════════════════════╪════════════════════════════════════════════════════╡
|
||||
│ 1 │ What is LlamaIndex?: How │ https://nanonets.com/blog/llamainde │ The core essence of LlamaIndex lies in its ability │
|
||||
│ │ It Works, and Optimizing │ x/ │ to build structured indices over ingested data, │
|
||||
│ │ Data Query │ │ represented as either Documents or Nodes. │
|
||||
├────────┼───────────────────────────┼─────────────────────────────────────┼────────────────────────────────────────────────────┤
|
||||
│ 2 │ Starter Tutorial - │ https://docs.llamaindex.ai/en/lates │ The easiest way to get it is to download it via │
|
||||
│ │ LlamaIndex 0.9.43 │ t/getting_started/starter_example.h │ this link and save it in a folder called data . │
|
||||
│ │ │ tml │ Set ... │
|
||||
├────────┼───────────────────────────┼─────────────────────────────────────┼────────────────────────────────────────────────────┤
|
||||
│ 3 │ LlamaIndex: Adding │ https://www.datacamp.com/tutorial/l │ You can download your resume by going on to the │
|
||||
│ │ Personal Data to LLMs - │ lama-index-adding-personal-data-to- │ Linkedin profile page, clicking on More, and then │
|
||||
│ │ DataCamp │ llms │ Save to PDF. │
|
||||
├────────┼───────────────────────────┼─────────────────────────────────────┼────────────────────────────────────────────────────┤
|
||||
│ 4 │ LlamaIndex 0.9.43 │ https://docs.llamaindex.ai/ │ LlamaIndex is a data framework for LLM-based │
|
||||
│ │ │ │ applications to ingest, structure, and access │
|
||||
│ │ │ │ private or domain-specific data. It's available in │
|
||||
│ │ │ │ Python (these docs) ... │
|
||||
├────────┼───────────────────────────┼─────────────────────────────────────┼────────────────────────────────────────────────────┤
|
||||
│ 5 │ Generative AI: An │ https://www.singlestore.com/blog/ge │ Delve into the world of LlamaIndex with this │
|
||||
│ │ Absolute Beginner's Guide │ nerative-ai-a-guide-to-llamaindex/ │ comprehensive beginner's guide, including an │
|
||||
│ │ to LlamaIndex │ │ insightful tutorial. │
|
||||
╘════════╧═══════════════════════════╧═════════════════════════════════════╧════════════════════════════════════════════════════╛
|
||||
|
||||
|
||||
@@ -0,0 +1,9 @@
|
||||
╒════════════════════════════════════════════════════════════════════════════════╕
|
||||
│ Search Engine follow up questions for query: how to llamaindex │
|
||||
╞════════════════════════════════════════════════════════════════════════════════╡
|
||||
│ ['What are the benefits of llamaindex?', 'Are there any tutorials or guides on │
|
||||
│ how to implement llamaindex?', 'What are some alternative methods to │
|
||||
│ llamaindex?'] │
|
||||
╘════════════════════════════════════════════════════════════════════════════════╛
|
||||
|
||||
|
||||
@@ -0,0 +1,83 @@
|
||||
╒════════════════════════════════╤══════════════════════════════════════════════════════════════╤════════════════════════════════╕
|
||||
│ Title │ Snippet │ Link │
|
||||
╞════════════════════════════════╪══════════════════════════════════════════════════════════════╪════════════════════════════════╡
|
||||
│ LlamaIndex: A Data Framework │ Setting up LlamaIndex LlamaIndex Use Cases How LlamaIndex │ https://www.datacamp.com/tutor │
|
||||
│ for the Large Language Models │ Works? to the LlamaIndex documentation.LlamaIndex is a data │ ial/llama-index-adding- │
|
||||
│ ... - DataCamp │ framework for Large Language Models (LLMs) based │ personal-data-to-llms │
|
||||
│ │ applications. LLMs like GPT-4 come pre-trained on massive │ │
|
||||
│ │ public datasets, allowing for incredible natural language │ │
|
||||
│ │ processing capabilities out of the box. However, their │ │
|
||||
│ │ utility is limited without access to your own private data. │ │
|
||||
├────────────────────────────────┼──────────────────────────────────────────────────────────────┼────────────────────────────────┤
|
||||
│ A Beginner's Guide to │ What is LlamaIndex? How LlamaIndex works LlamaIndex core │ https://dev.to/pavanbelagatti/ │
|
||||
│ LlamaIndex! - DEV Community │ functionalities + applications you'd like to index.How │ a-beginners-guide-to- │
|
||||
│ │ LlamaIndex works LlamaIndex serves as a bridge, connecting │ llamaindex-3mip │
|
||||
│ │ the powerful capabilities of LLMs with diverse data sources, │ │
|
||||
│ │ thereby unlocking a new realm of applications that can │ │
|
||||
│ │ leverage the synergy between custom data and advanced │ │
|
||||
│ │ language models. │ │
|
||||
├────────────────────────────────┼──────────────────────────────────────────────────────────────┼────────────────────────────────┤
|
||||
│ What is LlamaIndex?: How It │ Understanding LlamaIndex Then follow either of the two │ https://nanonets.com/blog/llam │
|
||||
│ Works, and Optimizing Data │ approaches below - Creating Llamaindex Documents LlamaIndex │ aindex/ │
|
||||
│ Query - Nanonets │ provides a high-level API that facilitates straightforward │ │
|
||||
│ │ querying, ideal for common use cases. LlamaIndex equips you │ │
|
||||
│ │ with a suite of tools to shape your knowledge │ │
|
||||
│ │ base:LlamaIndex is your go-to platform for creating robust │ │
|
||||
│ │ applications powered by Large Language Models (LLMs) over │ │
|
||||
│ │ your customized data. Be it a sophisticated Q&A system, an │ │
|
||||
│ │ interactive chatbot, or intelligent agents, LlamaIndex lays │ │
|
||||
│ │ down the foundation for your ventures into the realm of │ │
|
||||
│ │ Retrieval Augmented Generation (RAG). │ │
|
||||
├────────────────────────────────┼──────────────────────────────────────────────────────────────┼────────────────────────────────┤
|
||||
│ LlamaIndex 0.9.42.post1 - Read │ LlamaIndex is a data framework for LLM-based applications to │ https://docs.llamaindex.ai/en/ │
|
||||
│ the Docs │ ingest, structure, and access private or domain-specific 🦙 │ stable/ │
|
||||
│ │ How can LlamaIndex help?# LlamaIndex provides the following │ │
|
||||
│ │ tools: Getting Started# To install the library: pip install │ │
|
||||
│ │ llama-index LLM to generate an answer immediately, │ │
|
||||
│ │ LlamaIndex:LlamaIndex provides tools for beginners, advanced │ │
|
||||
│ │ users, and everyone in between. Our high-level API allows │ │
|
||||
│ │ beginner users to use LlamaIndex to ingest and query their │ │
|
||||
│ │ data in 5 lines of code. For more complex applications, our │ │
|
||||
│ │ lower-level APIs allow advanced users to customize and │ │
|
||||
│ │ extend any module—data connectors, indices, retrievers, │ │
|
||||
│ │ query ... │ │
|
||||
├────────────────────────────────┼──────────────────────────────────────────────────────────────┼────────────────────────────────┤
|
||||
│ Getting Started With │ LlamaIndex does number 3. Here’s how it works: Creating a │ https://betterprogramming.pub/ │
|
||||
│ LlamaIndex - Better │ new LlamaIndex Project 4. Store the Index 3. Index │ getting-started-with- │
|
||||
│ Programming │ Construction Actually, things do get simpler. Take a look at │ llamaindex-169bbf475a94 │
|
||||
│ │ the code below:The basic workflow in LlamaIndex Starting │ │
|
||||
│ │ with your documents, you first load them into LlamaIndex. It │ │
|
||||
│ │ comes with many ready-made readers for sources such as │ │
|
||||
│ │ databases, Discord, Slack, Google Docs, Notion, and (the one │ │
|
||||
│ │ we will use today) GitHub repos. Next, you use LlamaIndex to │ │
|
||||
│ │ parse the documents into nodes — basically chunks of text. │ │
|
||||
╘════════════════════════════════╧══════════════════════════════════════════════════════════════╧════════════════════════════════╛
|
||||
|
||||
|
||||
╒══════════════════════════════════════════════════════════════════════════════════╕
|
||||
│ The answer to search query: how to llamaindex │
|
||||
╞══════════════════════════════════════════════════════════════════════════════════╡
|
||||
│ LlamaIndex is a data framework for LLM-based applications that allows users to │
|
||||
│ ingest, structure, and access private or domain-specific data. It provides tools │
|
||||
│ for beginners as well as advanced users. To get started with LlamaIndex, you can │
|
||||
│ install the library using the command "pip install llama-index". LlamaIndex │
|
||||
│ offers a high-level API that enables beginners to ingest and query their data │
|
||||
│ with just a few lines of code. For more complex applications, there are lower- │
|
||||
│ level APIs available for customization and extension. LlamaIndex supports │
|
||||
│ various data sources such as databases, Discord, Slack, Google Docs, Notion, and │
|
||||
│ GitHub repos. You can parse the documents into nodes using LlamaIndex. By │
|
||||
│ connecting LLMs with diverse data sources, LlamaIndex unlocks new possibilities │
|
||||
│ for applications that leverage the synergy between custom data and advanced │
|
||||
│ language models. │
|
||||
╘══════════════════════════════════════════════════════════════════════════════════╛
|
||||
|
||||
|
||||
╒════════════════════════════════════════════════════════════════════════════════╕
|
||||
│ Search Engine follow up questions for query: how to llamaindex │
|
||||
╞════════════════════════════════════════════════════════════════════════════════╡
|
||||
│ ['What is the purpose of llamaindex?', 'What are the benefits of using │
|
||||
│ llamaindex?', 'Are there any alternative methods to achieve the same result as │
|
||||
│ llamaindex?'] │
|
||||
╘════════════════════════════════════════════════════════════════════════════════╛
|
||||
|
||||
|
||||
@@ -0,0 +1,108 @@
|
||||
╒════════╤═══════════════════════════╤═════════════════════════════════════╤════════════════════════════════════════════════════╕
|
||||
│ Rank │ Title │ Link │ Snippet │
|
||||
╞════════╪═══════════════════════════╪═════════════════════════════════════╪════════════════════════════════════════════════════╡
|
||||
│ 1 │ What is LlamaIndex?: How │ https://nanonets.com/blog/llamainde │ The core essence of LlamaIndex lies in its ability │
|
||||
│ │ It Works, and Optimizing │ x/ │ to build structured indices over ingested data, │
|
||||
│ │ Data Query │ │ represented as either Documents or Nodes. │
|
||||
├────────┼───────────────────────────┼─────────────────────────────────────┼────────────────────────────────────────────────────┤
|
||||
│ 2 │ Starter Tutorial - │ https://docs.llamaindex.ai/en/lates │ The easiest way to get it is to download it via │
|
||||
│ │ LlamaIndex 0.9.43 │ t/getting_started/starter_example.h │ this link and save it in a folder called data . │
|
||||
│ │ │ tml │ Set ... │
|
||||
├────────┼───────────────────────────┼─────────────────────────────────────┼────────────────────────────────────────────────────┤
|
||||
│ 3 │ LlamaIndex: Adding │ https://www.datacamp.com/tutorial/l │ You can download your resume by going on to the │
|
||||
│ │ Personal Data to LLMs - │ lama-index-adding-personal-data-to- │ Linkedin profile page, clicking on More, and then │
|
||||
│ │ DataCamp │ llms │ Save to PDF. │
|
||||
├────────┼───────────────────────────┼─────────────────────────────────────┼────────────────────────────────────────────────────┤
|
||||
│ 4 │ LlamaIndex 0.9.43 │ https://docs.llamaindex.ai/ │ LlamaIndex is a data framework for LLM-based │
|
||||
│ │ │ │ applications to ingest, structure, and access │
|
||||
│ │ │ │ private or domain-specific data. It's available in │
|
||||
│ │ │ │ Python (these docs) ... │
|
||||
├────────┼───────────────────────────┼─────────────────────────────────────┼────────────────────────────────────────────────────┤
|
||||
│ 5 │ Generative AI: An │ https://www.singlestore.com/blog/ge │ Delve into the world of LlamaIndex with this │
|
||||
│ │ Absolute Beginner's Guide │ nerative-ai-a-guide-to-llamaindex/ │ comprehensive beginner's guide, including an │
|
||||
│ │ to LlamaIndex │ │ insightful tutorial. │
|
||||
╘════════╧═══════════════════════════╧═════════════════════════════════════╧════════════════════════════════════════════════════╛
|
||||
|
||||
|
||||
╒═════════════════════════════╤══════════════════════════════════════════════════════════════╤════════════════════════════════╕
|
||||
│ Title │ Snippet │ Link │
|
||||
╞═════════════════════════════╪══════════════════════════════════════════════════════════════╪════════════════════════════════╡
|
||||
│ LlamaIndex Newsletter │ 4 min read 4 min read Published in ·Jan 2 LlamaIndex │ https://medium.com/@llama_inde │
|
||||
│ 2024-01-23 │ Newsletter 2024–01–02 3 min read 3 min read Published in │ x │
|
||||
│ │ ·Jan 9 LlamaIndex Newsletter 2024–01–09 ·Dec 19, 2023 │ │
|
||||
│ │ LlamaIndex Newsletter 2023–12–19 4 min read 4 min read │ │
|
||||
│ │ Published in ·Dec 12, 2023 LlamaIndex Newsletter │ │
|
||||
│ │ 2023–12–12LlamaIndex Newsletter 2024-01-16 Hello LlamaIndex │ │
|
||||
│ │ Enthusiasts 🦙, Get ready for an exciting week at │ │
|
||||
│ │ LlamaIndex, teeming with dynamic community contributions and │ │
|
||||
│ │ insightful learning... │ │
|
||||
├─────────────────────────────┼──────────────────────────────────────────────────────────────┼────────────────────────────────┤
|
||||
│ Welcome to my guide of │ Sign up Sign in Sign up Sign in Guide to LlamaIndex in 2024 │ https://medium.com/@Debaprasan │
|
||||
│ LlamaIndex! - Medium │ Debaprasann Bhoi Follow GoPenAI -- Listen Share LlamaIndex, │ nBhoi/guide-to-llamaindex- │
|
||||
│ │ previously known as the GPT Index, is a remarkable data │ in-2024-64caa8ef2e72 │
|
||||
│ │ framework aimed at helping you build applications with the │ │
|
||||
│ │ Llama Index at their core. Building the │ │
|
||||
│ │ LlamaIndex:LlamaIndex, previously known as the GPT Index, is │ │
|
||||
│ │ a remarkable data framework aimed at helping you build │ │
|
||||
│ │ applications with LLMs by providing essential tools that │ │
|
||||
│ │ facilitate data ingestion,... │ │
|
||||
├─────────────────────────────┼──────────────────────────────────────────────────────────────┼────────────────────────────────┤
|
||||
│ Guide to LlamaIndex in 2024 │ Sign up Sign in Sign up Sign in Guide to LlamaIndex in 2024 │ https://blog.gopenai.com/guide │
|
||||
│ │ Debaprasann Bhoi Follow GoPenAI -- Listen Share LlamaIndex, │ -to-llamaindex- │
|
||||
│ │ previously known as the GPT Index, is a remarkable data │ in-2024-64caa8ef2e72 │
|
||||
│ │ framework aimed at helping you build applications with the │ │
|
||||
│ │ Llama Index at their core. Building the │ │
|
||||
│ │ LlamaIndex:LlamaIndex, previously known as the GPT Index, is │ │
|
||||
│ │ a remarkable data framework aimed at helping you build │ │
|
||||
│ │ applications with LLMs by providing essential tools that │ │
|
||||
│ │ facilitate data ingestion, structuring, retrieval, and │ │
|
||||
│ │ integration with various application frameworks. The │ │
|
||||
│ │ capabilities offered by LlamaIndex are numerous and highly │ │
|
||||
│ │ valuable: │ │
|
||||
├─────────────────────────────┼──────────────────────────────────────────────────────────────┼────────────────────────────────┤
|
||||
│ A Beginner's Guide to │ you'd like to index. What is LlamaIndex? LlamaIndex core │ https://dev.to/pavanbelagatti/ │
|
||||
│ LlamaIndex! │ functionalities + applications Data indexingLlamaIndex is │ a-beginners-guide-to- │
|
||||
│ │ an advanced orchestration framework designed to amplify the │ llamaindex-3mip │
|
||||
│ │ capabilities of LLMs like GPT-4. While LLMs are inherently │ │
|
||||
│ │ powerful, having been trained on vast public datasets, they │ │
|
||||
│ │ often lack the means to interact with private or domain- │ │
|
||||
│ │ specific data. │ │
|
||||
├─────────────────────────────┼──────────────────────────────────────────────────────────────┼────────────────────────────────┤
|
||||
│ LlamaIndex Newsletter │ Sign up Sign in Sign up Sign in LlamaIndex Newsletter │ https://blog.llamaindex.ai/lla │
|
||||
│ 2024-01-30 │ 2024–01–30 LlamaIndex Follow LlamaIndex Blog -- Listen Share │ maindex-newsletter-2024-01-30- │
|
||||
│ │ to supercharge your journey with LlamaIndex. from │ 0d01eb0d8cef │
|
||||
│ │ LlamaIndex, delivered directly to your inbox. -- -- Written │ │
|
||||
│ │ by LlamaIndex LlamaIndex Blog Help Status About Careers Blog │ │
|
||||
│ │ Privacy Terms Text to speech TeamsWe have launched RAG CLI: │ │
|
||||
│ │ A straightforward command-line tool for indexing and │ │
|
||||
│ │ searching any local file, featuring integration with │ │
|
||||
│ │ IngestionPipeline, QueryPipeline, and ChromaDB, with support │ │
|
||||
│ │ for local models and customizable logic. Docs, Tweet. We │ │
|
||||
│ │ have introduced JSONalyze, a query engine that swiftly │ │
|
||||
│ │ summarizes large JSON datasets. │ │
|
||||
╘═════════════════════════════╧══════════════════════════════════════════════════════════════╧════════════════════════════════╛
|
||||
|
||||
|
||||
╒════════════════════════════════════════════════════════════════════════════════╕
|
||||
│ The answer to search query: how to llamaindex │
|
||||
╞════════════════════════════════════════════════════════════════════════════════╡
|
||||
│ LlamaIndex is a data framework, previously known as the GPT Index, aimed at │
|
||||
│ helping users build applications with LLMs (Language Model Models) at their │
|
||||
│ core. It provides essential tools that facilitate data ingestion, structuring, │
|
||||
│ retrieval, and integration with various application frameworks. Some of the │
|
||||
│ capabilities offered by LlamaIndex include indexing and searching local files, │
|
||||
│ integration with IngestionPipeline, QueryPipeline, and ChromaDB, support for │
|
||||
│ local models, and customizable logic. To install LlamaIndex, you can use the │
|
||||
│ command "pip install llama-index" if you have Python installed. │
|
||||
╘════════════════════════════════════════════════════════════════════════════════╛
|
||||
|
||||
|
||||
╒═══════════════════════════════════════════════════════════════════════════╕
|
||||
│ Search Engine follow up questions for query: how to llamaindex │
|
||||
╞═══════════════════════════════════════════════════════════════════════════╡
|
||||
│ ['What are the benefits of llamaindex?', 'Are there any specific tools or │
|
||||
│ techniques for llamaindexing?', 'Can you provide examples of successful │
|
||||
│ companies that have implemented llamaindex?'] │
|
||||
╘═══════════════════════════════════════════════════════════════════════════╛
|
||||
|
||||
|
||||
@@ -0,0 +1,133 @@
|
||||
╒════════╤═══════════════════════════╤═════════════════════════════════════╤════════════════════════════════════════════════════╕
|
||||
│ Rank │ Title │ Link │ Snippet │
|
||||
╞════════╪═══════════════════════════╪═════════════════════════════════════╪════════════════════════════════════════════════════╡
|
||||
│ 1 │ What is LlamaIndex?: How │ https://nanonets.com/blog/llamainde │ The core essence of LlamaIndex lies in its ability │
|
||||
│ │ It Works, and Optimizing │ x/ │ to build structured indices over ingested data, │
|
||||
│ │ Data Query │ │ represented as either Documents or Nodes. │
|
||||
├────────┼───────────────────────────┼─────────────────────────────────────┼────────────────────────────────────────────────────┤
|
||||
│ 2 │ Starter Tutorial - │ https://docs.llamaindex.ai/en/lates │ The easiest way to get it is to download it via │
|
||||
│ │ LlamaIndex 0.9.43 │ t/getting_started/starter_example.h │ this link and save it in a folder called data . │
|
||||
│ │ │ tml │ Set ... │
|
||||
├────────┼───────────────────────────┼─────────────────────────────────────┼────────────────────────────────────────────────────┤
|
||||
│ 3 │ LlamaIndex: Adding │ https://www.datacamp.com/tutorial/l │ You can download your resume by going on to the │
|
||||
│ │ Personal Data to LLMs - │ lama-index-adding-personal-data-to- │ Linkedin profile page, clicking on More, and then │
|
||||
│ │ DataCamp │ llms │ Save to PDF. │
|
||||
├────────┼───────────────────────────┼─────────────────────────────────────┼────────────────────────────────────────────────────┤
|
||||
│ 4 │ LlamaIndex 0.9.43 │ https://docs.llamaindex.ai/ │ LlamaIndex is a data framework for LLM-based │
|
||||
│ │ │ │ applications to ingest, structure, and access │
|
||||
│ │ │ │ private or domain-specific data. It's available in │
|
||||
│ │ │ │ Python (these docs) ... │
|
||||
├────────┼───────────────────────────┼─────────────────────────────────────┼────────────────────────────────────────────────────┤
|
||||
│ 5 │ Generative AI: An │ https://www.singlestore.com/blog/ge │ Delve into the world of LlamaIndex with this │
|
||||
│ │ Absolute Beginner's Guide │ nerative-ai-a-guide-to-llamaindex/ │ comprehensive beginner's guide, including an │
|
||||
│ │ to LlamaIndex │ │ insightful tutorial. │
|
||||
╘════════╧═══════════════════════════╧═════════════════════════════════════╧════════════════════════════════════════════════════╛
|
||||
|
||||
|
||||
╒═══════════════════════════╕
|
||||
│ Related Search │
|
||||
╞═══════════════════════════╡
|
||||
│ LlamaIndex vs LangChain │
|
||||
├───────────────────────────┤
|
||||
│ Is LlamaIndex free │
|
||||
├───────────────────────────┤
|
||||
│ Llama_index github │
|
||||
├───────────────────────────┤
|
||||
│ LlamaIndex documentation │
|
||||
├───────────────────────────┤
|
||||
│ LlamaIndex PDF │
|
||||
├───────────────────────────┤
|
||||
│ LlamaIndex course │
|
||||
├───────────────────────────┤
|
||||
│ Is LlamaIndex open source │
|
||||
├───────────────────────────┤
|
||||
│ LlamaIndex RAG │
|
||||
╘═══════════════════════════╛
|
||||
|
||||
|
||||
╒═══════════════════════════════╤══════════════════════════════════════════════════════════════╤════════════════════════════════╕
|
||||
│ Title │ Snippet │ Link │
|
||||
╞═══════════════════════════════╪══════════════════════════════════════════════════════════════╪════════════════════════════════╡
|
||||
│ LlamaIndex - Medium │ 4 min read 4 min read Published in ·Jan 2 LlamaIndex │ https://medium.com/@llama_inde │
|
||||
│ │ Newsletter 2024–01–02 3 min read 3 min read Published in │ x │
|
||||
│ │ ·Jan 9 LlamaIndex Newsletter 2024–01–09 ·Dec 19, 2023 │ │
|
||||
│ │ LlamaIndex Newsletter 2023–12–19 4 min read 4 min read │ │
|
||||
│ │ Published in ·Dec 12, 2023 LlamaIndex Newsletter │ │
|
||||
│ │ 2023–12–12LlamaIndex Newsletter 2024-01-02 Hello, Llama │ │
|
||||
│ │ Lovers 🦙, Happy New Year! As we step into 2024, we're │ │
|
||||
│ │ thrilled to bring you a special edition of our newsletter, │ │
|
||||
│ │ packed with updates from the ... │ │
|
||||
├───────────────────────────────┼──────────────────────────────────────────────────────────────┼────────────────────────────────┤
|
||||
│ Guide to LlamaIndex in 2024 - │ Sign up Sign in Sign up Sign in Guide to LlamaIndex in 2024 │ https://medium.com/@Debaprasan │
|
||||
│ Medium │ Debaprasann Bhoi Follow GoPenAI -- Listen Share LlamaIndex, │ nBhoi/guide-to-llamaindex- │
|
||||
│ │ previously known as the GPT Index, is a remarkable data │ in-2024-64caa8ef2e72 │
|
||||
│ │ framework aimed at helping you build applications with the │ │
|
||||
│ │ Llama Index at their core. Building the │ │
|
||||
│ │ LlamaIndex:LlamaIndex, previously known as the GPT Index, is │ │
|
||||
│ │ a remarkable data framework aimed at helping you build │ │
|
||||
│ │ applications with LLMs by providing essential tools that │ │
|
||||
│ │ facilitate data ingestion,... │ │
|
||||
├───────────────────────────────┼──────────────────────────────────────────────────────────────┼────────────────────────────────┤
|
||||
│ Guide to LlamaIndex in 2024. │ Sign up Sign in Sign up Sign in Guide to LlamaIndex in 2024 │ https://blog.gopenai.com/guide │
|
||||
│ Welcome to my guide of │ Debaprasann Bhoi Follow GoPenAI -- Listen Share LlamaIndex, │ -to-llamaindex- │
|
||||
│ LlamaIndex! | by ... │ previously known as the GPT Index, is a remarkable data │ in-2024-64caa8ef2e72 │
|
||||
│ │ framework aimed at helping you build applications with the │ │
|
||||
│ │ Llama Index at their core. Building the │ │
|
||||
│ │ LlamaIndex:LlamaIndex, previously known as the GPT Index, is │ │
|
||||
│ │ a remarkable data framework aimed at helping you build │ │
|
||||
│ │ applications with LLMs by providing essential tools that │ │
|
||||
│ │ facilitate data ingestion, structuring, retrieval, and │ │
|
||||
│ │ integration with various application frameworks. The │ │
|
||||
│ │ capabilities offered by LlamaIndex are numerous and highly │ │
|
||||
│ │ valuable: │ │
|
||||
├───────────────────────────────┼──────────────────────────────────────────────────────────────┼────────────────────────────────┤
|
||||
│ A Beginner's Guide to │ you'd like to index. LlamaIndex core functionalities + │ https://dev.to/pavanbelagatti/ │
|
||||
│ LlamaIndex! - DEV Community │ applications What is LlamaIndex? Data indexingLlamaIndex │ a-beginners-guide-to- │
|
||||
│ │ is an advanced orchestration framework designed to amplify │ llamaindex-3mip │
|
||||
│ │ the capabilities of LLMs like GPT-4. While LLMs are │ │
|
||||
│ │ inherently powerful, having been trained on vast public │ │
|
||||
│ │ datasets, they often lack the means to interact with private │ │
|
||||
│ │ or domain-specific data. │ │
|
||||
├───────────────────────────────┼──────────────────────────────────────────────────────────────┼────────────────────────────────┤
|
||||
│ LlamaIndex Newsletter │ Sign up Sign in Sign up Sign in LlamaIndex Newsletter │ https://blog.llamaindex.ai/lla │
|
||||
│ 2024-01-30 │ 2024–01–30 LlamaIndex Follow LlamaIndex Blog -- Listen Share │ maindex-newsletter-2024-01-30- │
|
||||
│ │ to supercharge your journey with LlamaIndex. from │ 0d01eb0d8cef │
|
||||
│ │ LlamaIndex, delivered directly to your inbox. -- -- Written │ │
|
||||
│ │ by LlamaIndex LlamaIndex Blog Help Status About Careers Blog │ │
|
||||
│ │ Privacy Terms Text to speech TeamsWe have launched RAG CLI: │ │
|
||||
│ │ A straightforward command-line tool for indexing and │ │
|
||||
│ │ searching any local file, featuring integration with │ │
|
||||
│ │ IngestionPipeline, QueryPipeline, and ChromaDB, with support │ │
|
||||
│ │ for local models and customizable logic. Docs, Tweet. We │ │
|
||||
│ │ have introduced JSONalyze, a query engine that swiftly │ │
|
||||
│ │ summarizes large JSON datasets. │ │
|
||||
╘═══════════════════════════════╧══════════════════════════════════════════════════════════════╧════════════════════════════════╛
|
||||
|
||||
|
||||
╒═════════════════════════════════════════════════════════════════════════════════╕
|
||||
│ The answer to search query: how to llamaindex │
|
||||
╞═════════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Based on the given data, there are a few sources that provide information about │
|
||||
│ LlamaIndex. LlamaIndex is a data framework aimed at helping developers build │
|
||||
│ applications with large language models (LLMs) at their core. It offers tools │
|
||||
│ for data ingestion, structuring, retrieval, and integration with various │
|
||||
│ application frameworks. LlamaIndex is particularly useful for connecting custom │
|
||||
│ data sources to LLMs and can be used for web scraping, data indexing, and │
|
||||
│ natural language processing. There is also a command-line tool called RAG CLI │
|
||||
│ that allows indexing and searching of local files with integration to │
|
||||
│ IngestionPipeline, QueryPipeline, and ChromaDB. Additionally, there is a query │
|
||||
│ engine called JSONalyze that swiftly summarizes large JSON datasets. Please │
|
||||
│ note that the information provided may not be comprehensive, and it is │
|
||||
│ recommended to refer to the provided sources for more detailed information. │
|
||||
╘═════════════════════════════════════════════════════════════════════════════════╛
|
||||
|
||||
|
||||
╒════════════════════════════════════════════════════════════════════════════════╕
|
||||
│ Search Engine follow up questions for query: how to llamaindex │
|
||||
╞════════════════════════════════════════════════════════════════════════════════╡
|
||||
│ ['What is the significance of llamaindex?', 'Are there any specific techniques │
|
||||
│ or tools for llamaindexing?', 'Can you provide examples of successful │
|
||||
│ llamaindexing?'] │
|
||||
╘════════════════════════════════════════════════════════════════════════════════╛
|
||||
|
||||
|
||||
@@ -0,0 +1,100 @@
|
||||
╒══════════════════════╤══════════════════════╤══════════════════╤══════════════════════════════════════════════════════════════╕
|
||||
│ URL │ Title │ Published Date │ Summary │
|
||||
╞══════════════════════╪══════════════════════╪══════════════════╪══════════════════════════════════════════════════════════════╡
|
||||
│ https://tech.dentsus │ LlamaIndexを使ってロ │ 2024-01-22 │ - Retrieval-Augmented Generation (RAG) is a technique to │
|
||||
│ oken.com/entry/2024/ │ ーカル環境でRAGを実 │ │ improve the accuracy and reduce hallucination of Large │
|
||||
│ 01/22/LlamaIndex%E3% │ 行する方法 │ │ Language Models (LLMs) by providing relevant information │
|
||||
│ 82%92%E4%BD%BF%E3%81 │ │ │ from a knowledge base. - LlamaIndex is a Python and │
|
||||
│ %A3%E3%81%A6%E3%83%A │ │ │ Typescript framework specifically designed for implementing │
|
||||
│ D%E3%83%BC%E3%82%AB% │ │ │ RAG-based applications. - To implement RAG locally, you can │
|
||||
│ E3%83%AB%E7%92%B0%E5 │ │ │ use LlamaIndex and a GPU-enabled environment such as Windows │
|
||||
│ %A2%83%E3%81%A7RAG%E │ │ │ with WSL and devcontainer. - A step-by-step guide is │
|
||||
│ 3%82%92%E5%AE%9F%E8% │ │ │ provided to build the local RAG implementation environment │
|
||||
│ A1%8C%E3%81%99%E3%82 │ │ │ and execute the RAG system using LlamaIndex. - The │
|
||||
│ %8B%E6%96%B9%E6%B3%9 │ │ │ implemented RAG system can answer questions based on the │
|
||||
│ 5 │ │ │ context derived from text files using a │
|
||||
│ │ │ │ Multilingual-E5-large embedding model and ELYZA-japanese- │
|
||||
│ │ │ │ Llama LLM model. - Suggestions for improving the performance │
|
||||
│ │ │ │ and accuracy of the RAG system are discussed, including │
|
||||
│ │ │ │ reducing query latency and optimizing context selection. │
|
||||
├──────────────────────┼──────────────────────┼──────────────────┼──────────────────────────────────────────────────────────────┤
|
||||
│ https://levelup.gitc │ Live Indexing for │ 2024-01-08 │ - The analysis of PDFs can be a challenging task for AI │
|
||||
│ onnected.com/live- │ RAG: A Guide For │ │ systems due to their complex information, such as nested │
|
||||
│ indexing-for-rag-a- │ Real-Time Indexing │ │ tables, figures, equations, and photos. - Large Language │
|
||||
│ guide-for-real-time- │ Using LlamaIndex and │ │ Models (LLMs) often make mistakes and produce hallucinations │
|
||||
│ indexing-using- │ AWS │ │ when analyzing PDFs. - RAG frameworks like LlamaIndex and │
|
||||
│ llamaindex-and-aws-5 │ │ │ Langchain, along with the rise of LLMs, have transformed the │
|
||||
│ 1353083ace4?gi=472c9 │ │ │ ecosystem for creating full-stack applications. - LlamaIndex │
|
||||
│ 89ddb71&source=rss │ │ │ is a prominent RAG framework that allows users to create │
|
||||
│ ----5517fd7b58a6---4 │ │ │ chat-with-PDFs applications with minimal code. - To turn a │
|
||||
│ │ │ │ RAG application into an enterprise-grade application, AI │
|
||||
│ │ │ │ engineers need to address challenges like re-indexing and │
|
||||
│ │ │ │ live updating data. │
|
||||
├──────────────────────┼──────────────────────┼──────────────────┼──────────────────────────────────────────────────────────────┤
|
||||
│ https://blog.llamain │ LlamaIndex Blog │ 2024-01-23 │ - The LlamaIndex Blog is the official blog of LlamaIndex. - │
|
||||
│ dex.ai/?gi=a117797fb │ │ │ Posts include release updates, guides, community showcases, │
|
||||
│ bc8 │ │ │ and more. - Recent posts discussed building a secure Multi- │
|
||||
│ │ │ │ Tenancy RAG System, enhancing accessibility in AI, │
|
||||
│ │ │ │ introducing Query Pipelines within LlamaIndex, scaling │
|
||||
│ │ │ │ LlamaIndex with AWS and Hugging Face, and more. │
|
||||
├──────────────────────┼──────────────────────┼──────────────────┼──────────────────────────────────────────────────────────────┤
|
||||
│ https://blog.llamain │ A Cheat Sheet and │ 2024-01-05 │ This web page provides a comprehensive overview of │
|
||||
│ dex.ai/a-cheat- │ Some Recipes For │ │ Retrieval-Augmented Generation (RAG) systems, covering the │
|
||||
│ sheet-and-some- │ Building Advanced │ │ basics, advanced techniques, and success requirements. RAG │
|
||||
│ recipes-for- │ RAG │ │ involves retrieving relevant documents from an external │
|
||||
│ building-advanced- │ │ │ knowledge base and feeding them along with the user's query │
|
||||
│ rag-803a9d94c41b │ │ │ to a large language model (LLM) for response generation. To │
|
||||
│ │ │ │ ensure the success of a RAG system, both retrieval and │
|
||||
│ │ │ │ generation components must perform well. Advanced RAG │
|
||||
│ │ │ │ techniques focus on enhancing these components independently │
|
||||
│ │ │ │ or simultaneously. The page presents sophisticated │
|
||||
│ │ │ │ techniques like Chunk-Size Optimization and Structured │
|
||||
│ │ │ │ External Knowledge to improve retrieval performance. │
|
||||
│ │ │ │ Additionally, it emphasizes the significance of prompt │
|
||||
│ │ │ │ engineering, explorative data analysis, and dataset │
|
||||
│ │ │ │ selection in developing effective RAG systems. The goal of │
|
||||
│ │ │ │ advanced RAG is to refine the system to generate high- │
|
||||
│ │ │ │ quality, informative, and relevant responses to user │
|
||||
│ │ │ │ queries. │
|
||||
├──────────────────────┼──────────────────────┼──────────────────┼──────────────────────────────────────────────────────────────┤
|
||||
│ https://dev.to/lgram │ Create Your Own │ 2024-01-13 │ The webpage contains instructions and code to create a local │
|
||||
│ mel/create-your-own- │ Local Chatbot with │ │ chatbot using Next.js, Llama.cpp, and ModelFusion. Llama.cpp │
|
||||
│ local-chatbot-with- │ Next.js, Llama.cpp, │ │ is used to serve the OpenHermes 2.5 Mistral LLM locally, the │
|
||||
│ nextjs-llamacpp-and- │ and ModelFusion │ │ Vercel AI SDK is used to handle stream forwarding and │
|
||||
│ modelfusion-461j │ │ │ rendering, and ModelFusion is used to integrate Llama.cpp │
|
||||
│ │ │ │ with the Vercel AI SDK. The chatbot is able to generate │
|
||||
│ │ │ │ responses to user messages in real time. Here is a summary │
|
||||
│ │ │ │ of the instructions: 1. **Set up Llama.cpp** - Clone the │
|
||||
│ │ │ │ Llama.cpp repository and build it on your machine. - │
|
||||
│ │ │ │ Download the OpenHermes 2.5 Mistral GGUF model from │
|
||||
│ │ │ │ HuggingFace and move it to the models/ directory of your │
|
||||
│ │ │ │ local Llama.cpp repository. - Start the Llama.cpp server. │
|
||||
│ │ │ │ 2. **Create the Next.js Project** - Create a new Next.js │
|
||||
│ │ │ │ project using the create-next-app command. - Configure the │
|
||||
│ │ │ │ project settings using the prompts. - Navigate to the │
|
||||
│ │ │ │ project directory. 3. **Install the Required Libraries** │
|
||||
│ │ │ │ - Install the Vercel AI SDK, ModelFusion, and the │
|
||||
│ │ │ │ ModelFusion Vercel AI SDK Integration using the npm install │
|
||||
│ │ │ │ command. 4. **Create an API Route for the Chatbot** - │
|
||||
│ │ │ │ Create a new file named route.ts in the src/app/api/chat/ │
|
||||
│ │ │ │ directory. - Import the necessary libraries and classes. │
|
||||
│ │ │ │ - Create a POST request that takes a list of messages as │
|
||||
│ │ │ │ input. - Initialize a ModelFusion text generation model │
|
||||
│ │ │ │ and create a ModelFusion chat prompt from the AI SDK │
|
||||
│ │ │ │ messages. - Use ModelFusion to call Llama.cpp and generate │
|
||||
│ │ │ │ a streaming response. - Return the streaming text response │
|
||||
│ │ │ │ using the Vercel AI SDK. 5. **Add the Chat Interface** - │
|
||||
│ │ │ │ Create a dedicated chat page at src/app/page.tsx. - Use │
|
||||
│ │ │ │ the useChat hook from the Vercel AI SDK to call the │
|
||||
│ │ │ │ /api/chat route and process the streaming response. - │
|
||||
│ │ │ │ Render the messages as they arrive. - Clean up the global │
|
||||
│ │ │ │ styles for a more visually appealing chat interface. 6. │
|
||||
│ │ │ │ **Run the Chatbot Application** - Launch the development │
|
||||
│ │ │ │ server using the npm run dev command. - Navigate to │
|
||||
│ │ │ │ http://localhost:3000 in a browser to see the chat page. - │
|
||||
│ │ │ │ Interact with the chatbot by typing messages into the input │
|
||||
│ │ │ │ field. The chatbot will be able to generate responses to │
|
||||
│ │ │ │ your messages in real-time. │
|
||||
╘══════════════════════╧══════════════════════╧══════════════════╧══════════════════════════════════════════════════════════════╛
|
||||
|
||||
|
||||
@@ -0,0 +1,82 @@
|
||||
╒══════════════════════╤══════════════════════╤══════════════════╤══════════════════════════════════════════════════════════════╕
|
||||
│ URL │ Title │ Published Date │ Summary │
|
||||
╞══════════════════════╪══════════════════════╪══════════════════╪══════════════════════════════════════════════════════════════╡
|
||||
│ https://tech.dentsus │ LlamaIndexを使ってロ │ 2024-01-22 │ The webpage provides a step-by-step guide on how to │
|
||||
│ oken.com/entry/2024/ │ ーカル環境でRAGを実 │ │ implement Retrieval-Augmented Generation (RAG) using the │
|
||||
│ 01/22/LlamaIndex%E3% │ 行する方法 │ │ LlamaIndex library, aiming for local deployment of LLM. It │
|
||||
│ 82%92%E4%BD%BF%E3%81 │ │ │ explains why utilizing LLM in a local environment can be │
|
||||
│ %A3%E3%81%A6%E3%83%A │ │ │ beneficial, such as dealing with confidential data or │
|
||||
│ D%E3%83%BC%E3%82%AB% │ │ │ restricted internet access. The instruction includes │
|
||||
│ E3%83%AB%E7%92%B0%E5 │ │ │ setting up the necessary environment using WSL, Dev │
|
||||
│ %A2%83%E3%81%A7RAG%E │ │ │ Container, and installing required libraries. Additionally, │
|
||||
│ 3%82%92%E5%AE%9F%E8% │ │ │ it describes the process of building a RAG system using │
|
||||
│ A1%8C%E3%81%99%E3%82 │ │ │ LlamaIndex, including loading data, initializing models, and │
|
||||
│ %8B%E6%96%B9%E6%B3%9 │ │ │ handling querying and responding tasks. The page also │
|
||||
│ 5 │ │ │ explores areas for improvement, discussing optimizations │
|
||||
│ │ │ │ like minimizing query response time, selecting relevant │
|
||||
│ │ │ │ contexts, and tweaking hardware and software configurations. │
|
||||
│ │ │ │ Finally, it encourages readers to try out the RAG │
|
||||
│ │ │ │ implementation and appreciate the convenience of LlamaIndex │
|
||||
│ │ │ │ while acknowledging the complexity involved in constructing │
|
||||
│ │ │ │ effective RAG systems. The page is authored by Yamashita │
|
||||
│ │ │ │ Tsuyoshi and reviewed by Wakamoto Ryosuke, using Shodo for │
|
||||
│ │ │ │ documentation. │
|
||||
├──────────────────────┼──────────────────────┼──────────────────┼──────────────────────────────────────────────────────────────┤
|
||||
│ https://blog.llamain │ LlamaIndex Blog │ 2024-01-23 │ The LlamaIndex Blog is a hub for news, updates, and guides │
|
||||
│ dex.ai/?gi=a117797fb │ │ │ related to LlamaIndex, a search engine and platform for │
|
||||
│ bc8 │ │ │ building and deploying AI-powered applications. This blog │
|
||||
│ │ │ │ features release updates, community showcases, and guides on │
|
||||
│ │ │ │ using LlamaIndex. Articles range from introducing new │
|
||||
│ │ │ │ features to exploring building various systems using │
|
||||
│ │ │ │ LlamaIndex. Some notable topics covered in the blog include │
|
||||
│ │ │ │ building a secure Multi-Tenancy RAG System, enhancing │
|
||||
│ │ │ │ accessibility in AI with LlamaIndex and GPT3.5, and │
|
||||
│ │ │ │ introducing Query Pipelines. │
|
||||
├──────────────────────┼──────────────────────┼──────────────────┼──────────────────────────────────────────────────────────────┤
|
||||
│ https://levelup.gitc │ Live Indexing for │ 2024-01-08 │ * The task of processing and answering questions from PDFs │
|
||||
│ onnected.com/live- │ RAG: A Guide For │ │ is difficult for AI systems due to complex information, such │
|
||||
│ indexing-for-rag-a- │ Real-Time Indexing │ │ as nested tables, figures, and equations. * RAG frameworks │
|
||||
│ guide-for-real-time- │ Using LlamaIndex and │ │ and large language models (LLMs) have evolved to create │
|
||||
│ indexing-using- │ AWS │ │ fully-stack applications, enabling a chat-with-PDFs │
|
||||
│ llamaindex-and-aws-5 │ │ │ application with minimal code. * Creating an enterprise RAG │
|
||||
│ 1353083ace4?gi=472c9 │ │ │ application requires addressing challenges such as re- │
|
||||
│ 89ddb71&source=rss │ │ │ indexing and live updates of data sources. │
|
||||
│ ----5517fd7b58a6---4 │ │ │ │
|
||||
├──────────────────────┼──────────────────────┼──────────────────┼──────────────────────────────────────────────────────────────┤
|
||||
│ https://dev.to/lgram │ Create Your Own │ 2024-01-13 │ This article explains how to create a chatbot using Next.js, │
|
||||
│ mel/create-your-own- │ Local Chatbot with │ │ Llama.cpp, and ModelFusion. Here's a concise summary: 1. │
|
||||
│ local-chatbot-with- │ Next.js, Llama.cpp, │ │ **Setup:** - Clone and build Llama.cpp, an LLM inference │
|
||||
│ nextjs-llamacpp-and- │ and ModelFusion │ │ engine. - Download the OpenHermes 2.5 Mistral model from │
|
||||
│ modelfusion-461j │ │ │ HuggingFace. - Start the Llama.cpp server. 2. **Next.js │
|
||||
│ │ │ │ Project:** - Create a Next.js project. - Install │
|
||||
│ │ │ │ required libraries: Vercel AI SDK, ModelFusion, and │
|
||||
│ │ │ │ ModelFusion Vercel AI SDK Integration. 3. **API Route:** │
|
||||
│ │ │ │ - Create a POST API route in Next.js to handle chat │
|
||||
│ │ │ │ interactions. - Initialize a ModelFusion text generation │
|
||||
│ │ │ │ model with the OpenHermes model. - Create a ModelFusion │
|
||||
│ │ │ │ chat prompt from Vercel AI SDK messages and call the model. │
|
||||
│ │ │ │ - Return the streaming response using the │
|
||||
│ │ │ │ ModelFusionTextStream adapter. 4. **Chat Interface:** - │
|
||||
│ │ │ │ Create a dedicated Chat page using the useChat hook from │
|
||||
│ │ │ │ Vercel AI SDK to render chat messages. - Update global │
|
||||
│ │ │ │ styles for improved readability. 5. **Run the │
|
||||
│ │ │ │ Application:** - Run the development server and navigate │
|
||||
│ │ │ │ to http://localhost:3000 to interact with the chatbot. This │
|
||||
│ │ │ │ chatbot is functional, leveraging these technologies to │
|
||||
│ │ │ │ provide real-time responses to user messages. The code is a │
|
||||
│ │ │ │ starting point for further exploration and customization. │
|
||||
├──────────────────────┼──────────────────────┼──────────────────┼──────────────────────────────────────────────────────────────┤
|
||||
│ https://blog.llamain │ A Cheat Sheet and │ 2024-01-05 │ This article provides a comprehensive overview of Retrieval- │
|
||||
│ dex.ai/a-cheat- │ Some Recipes For │ │ Augmented Generation (RAG) systems, focusing on the advanced │
|
||||
│ sheet-and-some- │ Building Advanced │ │ techniques and strategies used to build effective RAG │
|
||||
│ recipes-for- │ RAG │ │ systems that can handle complex queries using external │
|
||||
│ building-advanced- │ │ │ knowledge bases. It covers success requirements, various │
|
||||
│ rag-803a9d94c41b │ │ │ techniques for Retrieval and Generation components, and │
|
||||
│ │ │ │ includes a RAG Cheat Sheet for reference. The techniques │
|
||||
│ │ │ │ include Chunk-Size Optimization, Structured External │
|
||||
│ │ │ │ knowledge, Sparse-Attention Mechanism, Referring and Fine- │
|
||||
│ │ │ │ tuning on Predictions. The article also addresses the │
|
||||
│ │ │ │ challenges encountered in implementing these techniques. │
|
||||
╘══════════════════════╧══════════════════════╧══════════════════╧══════════════════════════════════════════════════════════════╛
|
||||
|
||||
|
||||
@@ -0,0 +1,76 @@
|
||||
╒══════════════════════╤══════════════════════╤══════════════════╤══════════════════════════════════════════════════════════════╕
|
||||
│ URL │ Title │ Published Date │ Summary │
|
||||
╞══════════════════════╪══════════════════════╪══════════════════╪══════════════════════════════════════════════════════════════╡
|
||||
│ https://tech.dentsus │ LlamaIndexを使ってロ │ 2024-01-22 │ The article explains how to implement RAG (Retrieval- │
|
||||
│ oken.com/entry/2024/ │ ーカル環境でRAGを実 │ │ Augmented Generation) using LlamaIndex, a library that lets │
|
||||
│ 01/22/LlamaIndex%E3% │ 行する方法 │ │ you use Large Language Models (LLMs) like ChatGPT locally. │
|
||||
│ 82%92%E4%BD%BF%E3%81 │ │ │ RAG helps LLM answer questions or generate text by providing │
|
||||
│ %A3%E3%81%A6%E3%83%A │ │ │ relevant context from external data sources. By integrating │
|
||||
│ D%E3%83%BC%E3%82%AB% │ │ │ an embedding model and an LLM, LlamaIndex allows you to load │
|
||||
│ E3%83%AB%E7%92%B0%E5 │ │ │ text data, create an index, and retrieve context-aware │
|
||||
│ %A2%83%E3%81%A7RAG%E │ │ │ responses to user queries. The article discusses setup, │
|
||||
│ 3%82%92%E5%AE%9F%E8% │ │ │ model selection, and code implementation using Python. It │
|
||||
│ A1%8C%E3%81%99%E3%82 │ │ │ also highlights potential improvements in terms of │
|
||||
│ %8B%E6%96%B9%E6%B3%9 │ │ │ performance and accuracy. │
|
||||
│ 5 │ │ │ │
|
||||
├──────────────────────┼──────────────────────┼──────────────────┼──────────────────────────────────────────────────────────────┤
|
||||
│ https://levelup.gitc │ Live Indexing for │ 2024-01-08 │ - PDFs contain valuable information, but analyzing them with │
|
||||
│ onnected.com/live- │ RAG: A Guide For │ │ Large Language Models (LLMs) is challenging due to their │
|
||||
│ indexing-for-rag-a- │ Real-Time Indexing │ │ complex structure. - The rise of Retrieval-Augmented │
|
||||
│ guide-for-real-time- │ Using LlamaIndex and │ │ Generation (RAG) frameworks and LLMs has simplified the │
|
||||
│ indexing-using- │ AWS │ │ creation of full-stack applications. - LlamaIndex, a │
|
||||
│ llamaindex-and-aws-5 │ │ │ prominent RAG framework, allows users to create chat-with- │
|
||||
│ 1353083ace4?gi=472c9 │ │ │ PDFs applications with just a few lines of code. - Creating │
|
||||
│ 89ddb71&source=rss │ │ │ an enterprise RAG application requires additional │
|
||||
│ ----5517fd7b58a6---4 │ │ │ considerations, such as re-indexing and live updates. │
|
||||
├──────────────────────┼──────────────────────┼──────────────────┼──────────────────────────────────────────────────────────────┤
|
||||
│ https://www.youtube. │ Transforming Invoice │ 2024-01-08 │ This web page introduces Sparrow, an open-source solution │
|
||||
│ com/watch?v=VKeYaIEk │ Data into JSON: │ │ for document processing with local LLMs. The video │
|
||||
│ 82s&v=watch&feature= │ Local LLM with │ │ demonstrates how to use Sparrow with LlamaIndex and a │
|
||||
│ youtu.be │ LlamaIndex \u0026 │ │ dynamic Pydantic class to extract structured JSON output │
|
||||
│ │ Pydantic │ │ from invoice documents, running locally on a MacBook Air M1 │
|
||||
│ │ │ │ with 8GB RAM. The process involves configuring Sparrow, │
|
||||
│ │ │ │ creating a RAG pipeline, implementing a dynamic Pydantic │
|
||||
│ │ │ │ class, and setting up LlamaIndex with the Pydantic class to │
|
||||
│ │ │ │ produce JSON output. A step-by-step explanation of the setup │
|
||||
│ │ │ │ and implementation is provided. The end result is a │
|
||||
│ │ │ │ structured JSON output that can be easily used for further │
|
||||
│ │ │ │ processing or analysis. │
|
||||
├──────────────────────┼──────────────────────┼──────────────────┼──────────────────────────────────────────────────────────────┤
|
||||
│ https://dev.to/lgram │ Create Your Own │ 2024-01-13 │ This article aims to guide readers in creating a local │
|
||||
│ mel/create-your-own- │ Local Chatbot with │ │ chatbot using Next.js, Llama.cpp, and ModelFusion. It begins │
|
||||
│ local-chatbot-with- │ Next.js, Llama.cpp, │ │ by explaining how to set up Llama.cpp along with the │
|
||||
│ nextjs-llamacpp-and- │ and ModelFusion │ │ necessary steps for building and downloading the OpenHermes │
|
||||
│ modelfusion-461j │ │ │ 2.5 Mistral GGUF model. Once Llama.cpp is ready, users can │
|
||||
│ │ │ │ start the server. The next step involves creating a Next.js │
|
||||
│ │ │ │ project, installing the required libraries, and setting up │
|
||||
│ │ │ │ an API route for handling chatbot interactions. The guide │
|
||||
│ │ │ │ provides detailed explanations of each of these steps, │
|
||||
│ │ │ │ including code snippets and explanations. Once the chatbot │
|
||||
│ │ │ │ interface has been added, users can run the chatbot │
|
||||
│ │ │ │ application using a command in their terminal. A screenshot │
|
||||
│ │ │ │ demonstrating the expected look of the running chatbot is │
|
||||
│ │ │ │ also included. In conclusion, this article serves as a │
|
||||
│ │ │ │ comprehensive guide for developers interested in creating a │
|
||||
│ │ │ │ local chatbot. It covers the setup process, API route │
|
||||
│ │ │ │ creation, frontend development, and application execution. │
|
||||
│ │ │ │ The guide encourages readers to explore the codebase and │
|
||||
│ │ │ │ modify it to suit their specific project needs. │
|
||||
├──────────────────────┼──────────────────────┼──────────────────┼──────────────────────────────────────────────────────────────┤
|
||||
│ https://blog.llamain │ A Cheat Sheet and │ 2024-01-05 │ This blog post gives a detailed RAG cheat sheet. RAG, or │
|
||||
│ dex.ai/a-cheat- │ Some Recipes For │ │ Retrieval Augmented Generation system, involves retrieving │
|
||||
│ sheet-and-some- │ Building Advanced │ │ documents from an external knowledge base and passing it │
|
||||
│ recipes-for- │ RAG │ │ along with the user's query to an LLM for response │
|
||||
│ building-advanced- │ │ │ generation. It consists of a Retrieval component, an │
|
||||
│ rag-803a9d94c41b │ │ │ External Knowledge database, and a Generation component. For │
|
||||
│ │ │ │ a RAG system to be successful, it must be able to find the │
|
||||
│ │ │ │ most relevant documents to a user's query and make good use │
|
||||
│ │ │ │ of the retrieved documents to answer the query sufficiently. │
|
||||
│ │ │ │ Advanced RAG involves applying more sophisticated techniques │
|
||||
│ │ │ │ and strategies to the Retrieval and Generation components to │
|
||||
│ │ │ │ achieve these requirements. It mentions two advanced │
|
||||
│ │ │ │ techniques for Retrieval, Chunk-Size Optimization and │
|
||||
│ │ │ │ Structured External Knowledge, with code samples. │
|
||||
╘══════════════════════╧══════════════════════╧══════════════════╧══════════════════════════════════════════════════════════════╛
|
||||
|
||||
|
||||
@@ -0,0 +1,116 @@
|
||||
╒══════════════════════╤══════════════════════╤══════════════════╤══════════════════════════════════════════════════════════════╕
|
||||
│ URL │ Title │ Published Date │ Summary │
|
||||
╞══════════════════════╪══════════════════════╪══════════════════╪══════════════════════════════════════════════════════════════╡
|
||||
│ https://www.analytic │ Using Llamafiles to │ 2024-01-18 │ The article discusses Llamafiles, which simplify the process │
|
||||
│ svidhya.com/blog/202 │ Simplify LLM │ │ of running Large Language Models (LLMs) on consumer │
|
||||
│ 4/01/using- │ Execution │ │ hardware. Traditionally, running LLMs involved downloading │
|
||||
│ llamafiles-to- │ │ │ third-party software, creating Python environments, and │
|
||||
│ simplify-llm- │ │ │ writing code. Llamafiles address these challenges by │
|
||||
│ execution/ │ │ │ enabling users to download and run LLMs as single-file │
|
||||
│ │ │ │ executables. Additionally, the article explains the concept │
|
||||
│ │ │ │ of Llamafiles, including its benefits and limitations, as │
|
||||
│ │ │ │ well as how to create Llamafiles from quantized LLMs. │
|
||||
├──────────────────────┼──────────────────────┼──────────────────┼──────────────────────────────────────────────────────────────┤
|
||||
│ https://levelup.gitc │ Live Indexing for │ 2024-01-08 │ - Most AI systems, including LLMs, struggle to process and │
|
||||
│ onnected.com/live- │ RAG: A Guide For │ │ answer questions from PDFs due to their complex information. │
|
||||
│ indexing-for-rag-a- │ Real-Time Indexing │ │ - RAG frameworks and Large Language Models (LLMs) have │
|
||||
│ guide-for-real-time- │ Using LlamaIndex and │ │ enabled the creation of full-stack applications for │
|
||||
│ indexing-using- │ AWS │ │ interacting with PDFs. - LlamaIndex is provided as an │
|
||||
│ llamaindex-and-aws-5 │ │ │ example of a RAG framework that allows users to create chat │
|
||||
│ 1353083ace4?gi=472c9 │ │ │ applications for interacting with PDFs with just a few lines │
|
||||
│ 89ddb71&source=rss │ │ │ of code. - The article also discusses additional challenges │
|
||||
│ ----5517fd7b58a6---4 │ │ │ for AI engineers in creating enterprise-grade RAG │
|
||||
│ │ │ │ applications such as re-indexing and live updates. │
|
||||
├──────────────────────┼──────────────────────┼──────────────────┼──────────────────────────────────────────────────────────────┤
|
||||
│ https://tech.dentsus │ LlamaIndexを使ってロ │ 2024-01-22 │ This webpage discusses how to implement Retrieval-Augmented │
|
||||
│ oken.com/entry/2024/ │ ーカル環境でRAGを実 │ │ Generation (RAG) using the LlamaIndex library in a local │
|
||||
│ 01/22/LlamaIndex%E3% │ 行する方法 │ │ environment. The goal is to leverage Large Language Models │
|
||||
│ 82%92%E4%BD%BF%E3%81 │ │ │ (LLMs) like ChatGPT while addressing limitations such as │
|
||||
│ %A3%E3%81%A6%E3%83%A │ │ │ data confidentiality and restricted internet access. The │
|
||||
│ D%E3%83%BC%E3%82%AB% │ │ │ article highlights the benefits of using a local setup for │
|
||||
│ E3%83%AB%E7%92%B0%E5 │ │ │ LLM applications and explains why the LlamaIndex framework │
|
||||
│ %A2%83%E3%81%A7RAG%E │ │ │ is suitable for this purpose. The author provides detailed │
|
||||
│ 3%82%92%E5%AE%9F%E8% │ │ │ instructions on setting up the environment, including │
|
||||
│ A1%8C%E3%81%99%E3%82 │ │ │ installing necessary software and configuring a development │
|
||||
│ %8B%E6%96%B9%E6%B3%9 │ │ │ container using Docker. Furthermore, the article guides │
|
||||
│ 5 │ │ │ readers through the process of loading data, initializing │
|
||||
│ │ │ │ LLM and embedding models, and implementing RAG using Python │
|
||||
│ │ │ │ code. It also includes a sample implementation of a chat │
|
||||
│ │ │ │ system that leverages RAG to answer questions based on a │
|
||||
│ │ │ │ provided text document. The author discusses the challenges │
|
||||
│ │ │ │ faced during implementation and suggests potential │
|
||||
│ │ │ │ improvements, such as optimizing performance by reducing │
|
||||
│ │ │ │ context information and leveraging more powerful hardware. │
|
||||
│ │ │ │ The article concludes by encouraging readers to experiment │
|
||||
│ │ │ │ with RAG and emphasizing the potential of this technology to │
|
||||
│ │ │ │ create useful applications. │
|
||||
├──────────────────────┼──────────────────────┼──────────────────┼──────────────────────────────────────────────────────────────┤
|
||||
│ https://dev.to/lgram │ Create Your Own │ 2024-01-13 │ This blog post provides a detailed guide on how to build a │
|
||||
│ mel/create-your-own- │ Local Chatbot with │ │ local chatbot using several technologies. Here's a summary: │
|
||||
│ local-chatbot-with- │ Next.js, Llama.cpp, │ │ Objective of the blog post: - Build a chatbot that runs on │
|
||||
│ nextjs-llamacpp-and- │ and ModelFusion │ │ your computer using Next.js, Llama.cpp, and ModelFusion. - │
|
||||
│ modelfusion-461j │ │ │ Use the OpenHermes 2.5 Mistral LLM (large language model) │
|
||||
│ │ │ │ for natural language interaction. - Employ the Vercel AI SDK │
|
||||
│ │ │ │ for stream forwarding and rendering. - Integrate the │
|
||||
│ │ │ │ Llama.cpp language model with the Vercel AI SDK through │
|
||||
│ │ │ │ ModelFusion. Necessary Steps: 1. Setup Llama.cpp: a) │
|
||||
│ │ │ │ Clone the repository. b) Build Llama.cpp: Linux/Mac users │
|
||||
│ │ │ │ can run "make", Windows users can follow the instructions │
|
||||
│ │ │ │ provided. c) Download the OpenHermes 2.5 Mistral GGUF │
|
||||
│ │ │ │ model from HuggingFace and move it into the Llama.cpp │
|
||||
│ │ │ │ repository's "models/" directory. d) Start the Llama.cpp │
|
||||
│ │ │ │ server to enable the integration of the model into the │
|
||||
│ │ │ │ chatbot. 2. Create a Next.js Project: a) Create a new │
|
||||
│ │ │ │ Next.js project using "npx create-next-app@latest llamacpp- │
|
||||
│ │ │ │ nextjs-chatbot". b) Configure the project with preferred │
|
||||
│ │ │ │ settings, including TypeScript, ESLint, Tailwind CSS, and │
|
||||
│ │ │ │ App Router. 3. Install Required Libraries: a) Install │
|
||||
│ │ │ │ libraries such as Vercel AI SDK, ModelFusion, and │
|
||||
│ │ │ │ ModelFusion Vercel AI SDK Integration. 4. Creating an API │
|
||||
│ │ │ │ Route for the Chatbot: a) In the 'api/chat/' directory, │
|
||||
│ │ │ │ create 'route.ts' for handling chat interactions. b) │
|
||||
│ │ │ │ Import relevant modules and initialize a ModelFusion text │
|
||||
│ │ │ │ generation model. c) Send the API request, process the │
|
||||
│ │ │ │ response, and generate a streaming response using │
|
||||
│ │ │ │ ModelFusion to access the Llama.cpp chat API. 5. Adding the │
|
||||
│ │ │ │ Chat Interface: a) Establish a chat page, 'page.tsx' to │
|
||||
│ │ │ │ display the chatbot and use the 'useChat' hook from the │
|
||||
│ │ │ │ Vercel AI SDK. b) Clean up the global styles for better │
|
||||
│ │ │ │ UI presentation. 6. Running the Chatbot Application: a) │
|
||||
│ │ │ │ Launch the development server with "npm run dev". b) In a │
|
||||
│ │ │ │ browser, navigate to "http://localhost:3000" to interact │
|
||||
│ │ │ │ with the chatbot. Conclusion: The tutorial provides a step- │
|
||||
│ │ │ │ by-step guide to set up a local chatbot, enabling users to │
|
||||
│ │ │ │ explore AI and natural language processing. │
|
||||
├──────────────────────┼──────────────────────┼──────────────────┼──────────────────────────────────────────────────────────────┤
|
||||
│ https://akash-mathur │ Advanced RAG: Query │ 2024-01-18 │ Welcome to the Advanced RAG Learning Series. This article │
|
||||
│ .medium.com/advanced │ Augmentation for │ │ series explores advanced techniques to heighten │
|
||||
│ -rag-query- │ Next-Level Search │ │ understanding and expertise in Retriever-Augmented │
|
||||
│ augmentation-for- │ using LlamaIndex🦙 │ │ Generation (RAG) applications. Key concepts covered include │
|
||||
│ next-level-search- │ │ │ optimizing retrieval with extra context and metadata, │
|
||||
│ using-llamaindex-d36 │ │ │ improving retrieval efficiency via rerankers, and enhancing │
|
||||
│ 2fed7ecc3 │ │ │ query augmentation. The focus is on query transformations │
|
||||
│ │ │ │ which bridge user prompts and relevant information in vast │
|
||||
│ │ │ │ databases, particularly to address the challenge of │
|
||||
│ │ │ │ retrieval misalignment. Five powerful query transformation │
|
||||
│ │ │ │ techniques are explored, addressing the need to adapt to │
|
||||
│ │ │ │ LLMs' comprehension and generation capabilities. The │
|
||||
│ │ │ │ techniques explored are: - Hypothetical Document Embeddings │
|
||||
│ │ │ │ (HyDE), which creates a hypothetical answer document and │
|
||||
│ │ │ │ encodes it to retrieve relevant documents. - Sub-Question │
|
||||
│ │ │ │ Query Engine, which decomposes complex queries into sub- │
|
||||
│ │ │ │ questions and retrieves results from dedicated data sources. │
|
||||
│ │ │ │ - Router Query Engine, which selects the most appropriate │
|
||||
│ │ │ │ query engine based on user queries and metadata. - Single- │
|
||||
│ │ │ │ Step Query Decomposition, which breaks down complex │
|
||||
│ │ │ │ questions into simpler sub-queries for focused information │
|
||||
│ │ │ │ extraction. - Multi-Step Query Decomposition, which employs │
|
||||
│ │ │ │ a self-ask method to iteratively explore knowledge and │
|
||||
│ │ │ │ uncover hidden connections among facts. The article │
|
||||
│ │ │ │ provides code examples and GitHub links to assist in │
|
||||
│ │ │ │ practical implementation. It also highlights the ongoing │
|
||||
│ │ │ │ developments and potential future directions in query │
|
||||
│ │ │ │ augmentation research. │
|
||||
╘══════════════════════╧══════════════════════╧══════════════════╧══════════════════════════════════════════════════════════════╛
|
||||
|
||||
|
||||
@@ -0,0 +1,78 @@
|
||||
╒══════════════════════╤══════════════════════╤══════════════════╤══════════════════════════════════════════════════════════════╕
|
||||
│ URL │ Title │ Published Date │ Summary │
|
||||
╞══════════════════════╪══════════════════════╪══════════════════╪══════════════════════════════════════════════════════════════╡
|
||||
│ https://tech.dentsus │ LlamaIndexを使ってロ │ 2024-01-22 │ LlamaIndex Library using Retrieval Augmented Generation │
|
||||
│ oken.com/entry/2024/ │ ーカル環境でRAGを実 │ │ (RAG) to Implement Chatbot Systems Locally - LlamaIndex is a │
|
||||
│ 01/22/LlamaIndex%E3% │ 行する方法 │ │ library for ingesting, structuring, and accessing private or │
|
||||
│ 82%92%E4%BD%BF%E3%81 │ │ │ domain-specific data to build LLM-based applications. - It │
|
||||
│ %A3%E3%81%A6%E3%83%A │ │ │ facilitates local implementation of RAG, a technique that │
|
||||
│ D%E3%83%BC%E3%82%AB% │ │ │ combines document search and LLM to generate responses with │
|
||||
│ E3%83%AB%E7%92%B0%E5 │ │ │ reduced hallucination and improved accuracy. - The article │
|
||||
│ %A2%83%E3%81%A7RAG%E │ │ │ provides a step-by-step guide for setting up a development │
|
||||
│ 3%82%92%E5%AE%9F%E8% │ │ │ environment using WSL, devcontainer, and the LlamaIndex │
|
||||
│ A1%8C%E3%81%99%E3%82 │ │ │ library. - It demonstrates RAG implementation using Python │
|
||||
│ %8B%E6%96%B9%E6%B3%9 │ │ │ and explains how to configure the prompt, query engine, and │
|
||||
│ 5 │ │ │ other components. - The resulting chatbot can perform Q&A │
|
||||
│ │ │ │ tasks based on the provided context, as demonstrated with │
|
||||
│ │ │ │ examples using a text file derived from the青空文庫 novel │
|
||||
│ │ │ │ 走れメロス. - The author discusses potential improvements, │
|
||||
│ │ │ │ such as optimizing speed and accuracy. │
|
||||
├──────────────────────┼──────────────────────┼──────────────────┼──────────────────────────────────────────────────────────────┤
|
||||
│ https://blog.llamain │ LlamaIndex Blog │ 2024-01-23 │ The LlamaIndex blog is the official blog of LlamaIndex, │
|
||||
│ dex.ai/?gi=a117797fb │ │ │ featuring release updates, guides, community showcases, and │
|
||||
│ bc8 │ │ │ more. The blog contains articles from January 2023 and │
|
||||
│ │ │ │ earlier, with titles such as "LlamaIndex Newsletter," │
|
||||
│ │ │ │ "Building Multi-Tenancy RAG System with LlamaIndex," "AI │
|
||||
│ │ │ │ Voice Assistant with LlamaIndex and GPT3.5," "Join Thousands │
|
||||
│ │ │ │ in our Free Advanced RAG Certification," "Query Pipelines in │
|
||||
│ │ │ │ LlamaIndex," and more. The blog also provides a cheat sheet │
|
||||
│ │ │ │ and recipes for building advanced RAG, as well as │
|
||||
│ │ │ │ information on scaling LlamaIndex with AWS and Hugging Face. │
|
||||
├──────────────────────┼──────────────────────┼──────────────────┼──────────────────────────────────────────────────────────────┤
|
||||
│ https://levelup.gitc │ Live Indexing for │ 2024-01-08 │ - PDFs contain valuable information, but AI systems struggle │
|
||||
│ onnected.com/live- │ RAG: A Guide For │ │ to process and understand them. - RAG frameworks and LLMs │
|
||||
│ indexing-for-rag-a- │ Real-Time Indexing │ │ have evolved to provide a readily deployable platform for │
|
||||
│ guide-for-real-time- │ Using LlamaIndex and │ │ creating full-stack applications. - With just a few lines │
|
||||
│ indexing-using- │ AWS │ │ of code, LlamaIndex can be used to create a chat-with-PDFs │
|
||||
│ llamaindex-and-aws-5 │ │ │ application. - Additional work is still required by AI │
|
||||
│ 1353083ace4?gi=472c9 │ │ │ engineers to create enterprise RAG applications, such as │
|
||||
│ 89ddb71&source=rss │ │ │ addressing the need to re-index and live update data │
|
||||
│ ----5517fd7b58a6---4 │ │ │ sources. │
|
||||
├──────────────────────┼──────────────────────┼──────────────────┼──────────────────────────────────────────────────────────────┤
|
||||
│ https://dev.to/lgram │ Create Your Own │ 2024-01-13 │ Sure, here is a summary of the content of the webpage you │
|
||||
│ mel/create-your-own- │ Local Chatbot with │ │ provided: The article explains how to create a local │
|
||||
│ local-chatbot-with- │ Next.js, Llama.cpp, │ │ chatbot using Next.js, Llama.cpp, and ModelFusion. The │
|
||||
│ nextjs-llamacpp-and- │ and ModelFusion │ │ chatbot will run on the user's computer and will be able to │
|
||||
│ modelfusion-461j │ │ │ generate responses to user messages in real-time using the │
|
||||
│ │ │ │ OpenHermes 2.5 Mistral Large Language Model (LLM). To build │
|
||||
│ │ │ │ the chatbot, the user will need to set up Llama.cpp, create │
|
||||
│ │ │ │ a Next.js project, install the required libraries, configure │
|
||||
│ │ │ │ an API route for the chatbot, add a chat interface, and │
|
||||
│ │ │ │ finally run the chatbot application. The full code for a │
|
||||
│ │ │ │ starter project with more examples can be found on GitHub. │
|
||||
│ │ │ │ The article includes step-by-step instructions, code │
|
||||
│ │ │ │ snippets, and a screenshot of what the chatbot interface │
|
||||
│ │ │ │ looks like when running. The author also provides a brief │
|
||||
│ │ │ │ introduction to each technology used and explains the │
|
||||
│ │ │ │ architecture of the chatbot. │
|
||||
├──────────────────────┼──────────────────────┼──────────────────┼──────────────────────────────────────────────────────────────┤
|
||||
│ https://blog.llamain │ A Cheat Sheet and │ 2024-01-05 │ This webpage shares a comprehensive RAG Cheat Sheet that │
|
||||
│ dex.ai/a-cheat- │ Some Recipes For │ │ provides motivations for RAG, techniques, and strategies for │
|
||||
│ sheet-and-some- │ Building Advanced │ │ creating advanced RAG systems. It begins with Basic RAG, │
|
||||
│ recipes-for- │ RAG │ │ where documents are retrieved from an external database and │
|
||||
│ building-advanced- │ │ │ passed along with the user query to an LLM for response │
|
||||
│ rag-803a9d94c41b │ │ │ generation. Two high-level success requirements for RAG are │
|
||||
│ │ │ │ defined: retrieval must find relevant documents, and │
|
||||
│ │ │ │ generation must use retrieved documents to answer user │
|
||||
│ │ │ │ queries. To achieve these requirements, advanced techniques │
|
||||
│ │ │ │ can address each requirement independently or │
|
||||
│ │ │ │ simultaneously. The webpage briefly describes chunk-size │
|
||||
│ │ │ │ optimization, structured external knowledge, and interleaved │
|
||||
│ │ │ │ retrieval as advanced Retrieval techniques. For the │
|
||||
│ │ │ │ Generation component, advanced techniques include in-context │
|
||||
│ │ │ │ learning, prompt engineering, and policy learning. The │
|
||||
│ │ │ │ provided RAG cheat sheet offers a visual representation of │
|
||||
│ │ │ │ these concepts. │
|
||||
╘══════════════════════╧══════════════════════╧══════════════════╧══════════════════════════════════════════════════════════════╛
|
||||
|
||||
|
||||
@@ -0,0 +1,98 @@
|
||||
╒══════════════════════╤══════════════════════╤══════════════════╤══════════════════════════════════════════════════════════════╕
|
||||
│ URL │ Title │ Published Date │ Summary │
|
||||
╞══════════════════════╪══════════════════════╪══════════════════╪══════════════════════════════════════════════════════════════╡
|
||||
│ https://www.analytic │ Using Llamafiles to │ 2024-01-18 │ Sure, here's a summary of the web page content provided. │
|
||||
│ svidhya.com/blog/202 │ Simplify LLM │ │ **Summary** - Traditional LLM execution is tedious, │
|
||||
│ 4/01/using- │ Execution │ │ involving downloading 3rd party software, Python, Pytorch, │
|
||||
│ llamafiles-to- │ │ │ and HuggingFace libraries, and potentially writing code to │
|
||||
│ simplify-llm- │ │ │ run the model. - Llamafiles are single-file executables │
|
||||
│ execution/ │ │ │ that simplify running LLMs, eliminating the need for initial │
|
||||
│ │ │ │ library installation. - They leverage the llama.cpp C │
|
||||
│ │ │ │ library for quantized LLM execution on CPUs and the │
|
||||
│ │ │ │ cosmopolitan libc for cross-platform compatibility. - │
|
||||
│ │ │ │ Available models are in the GGUF quantized format, designed │
|
||||
│ │ │ │ for efficient storage, sharing, and loading of LLMs on CPUs │
|
||||
│ │ │ │ and GPUs. - There are limitations to using Llamafiles, │
|
||||
│ │ │ │ including the need for quantized models and the lack of │
|
||||
│ │ │ │ support for LLMs requiring GPUs. - Llamafiles offer │
|
||||
│ │ │ │ advantages over traditional methods, such as faster │
|
||||
│ │ │ │ inference, offline usage, and potential cost reduction. │
|
||||
├──────────────────────┼──────────────────────┼──────────────────┼──────────────────────────────────────────────────────────────┤
|
||||
│ https://tech.dentsus │ LlamaIndexを使ってロ │ 2024-01-22 │ - Retrieval-Augmented Generation (RAG) is a technique that │
|
||||
│ oken.com/entry/2024/ │ ーカル環境でRAGを実 │ │ utilizes Large Language Models (LLMs) to improve the │
|
||||
│ 01/22/LlamaIndex%E3% │ 行する方法 │ │ accuracy and reduce hallucination in generated responses. │
|
||||
│ 82%92%E4%BD%BF%E3%81 │ │ │ - LlamaIndex is a data framework used for ingesting, │
|
||||
│ %A3%E3%81%A6%E3%83%A │ │ │ structuring, and accessing private or domain-specific data │
|
||||
│ D%E3%83%BC%E3%82%AB% │ │ │ for LLM-based applications. - This article demonstrates │
|
||||
│ E3%83%AB%E7%92%B0%E5 │ │ │ how to set up a local environment with WSL and Devcontainer │
|
||||
│ %A2%83%E3%81%A7RAG%E │ │ │ to utilize LLMs. - An example implementation of a RAG │
|
||||
│ 3%82%92%E5%AE%9F%E8% │ │ │ application using LlamaIndex is provided for answering │
|
||||
│ A1%8C%E3%81%99%E3%82 │ │ │ questions based on the context of a document. - Optimizing │
|
||||
│ %8B%E6%96%B9%E6%B3%9 │ │ │ the system's performance can be achieved by adjusting the │
|
||||
│ 5 │ │ │ context information and utilizing more powerful hardware. │
|
||||
│ │ │ │ Creating a more effective RAG involves finding optimal │
|
||||
│ │ │ │ contexts and refining the search techniques. │
|
||||
├──────────────────────┼──────────────────────┼──────────────────┼──────────────────────────────────────────────────────────────┤
|
||||
│ https://dev.to/lgram │ Create Your Own │ 2024-01-13 │ The blog post covers building a local chatbot using the │
|
||||
│ mel/create-your-own- │ Local Chatbot with │ │ Next.js framework. An AI chatbot uses the Vercel AI SDK to │
|
||||
│ local-chatbot-with- │ Next.js, Llama.cpp, │ │ handle stream forwarding and rendering, the ModelFusion │
|
||||
│ nextjs-llamacpp-and- │ and ModelFusion │ │ library to integrate Llama.cpp with the Vercel AI SDK, and │
|
||||
│ modelfusion-461j │ │ │ OpenHermes 2.5 Mistral as a powerful language model. The │
|
||||
│ │ │ │ architecture involves a user interface that sends messages │
|
||||
│ │ │ │ to the AI server, processed by Llama.cpp, and returned as │
|
||||
│ │ │ │ responses to the user. The initial steps include setting up │
|
||||
│ │ │ │ Llama.cpp, downloading OpenHermes 2.5 Mistral GGUF, and │
|
||||
│ │ │ │ starting the Llama.cpp server. Creating the Next.js project │
|
||||
│ │ │ │ involves installing the required libraries and setting up │
|
||||
│ │ │ │ the API route using the useChat hook from the Vercel AI SDK. │
|
||||
│ │ │ │ Adding the chat interface involves creating a separate page, │
|
||||
│ │ │ │ handling global styles, and more. Finally, running the │
|
||||
│ │ │ │ chatbot application lets users interact with the chatbot, │
|
||||
│ │ │ │ and the conclusion highlights the blog's intent as a │
|
||||
│ │ │ │ starting point for exploration. │
|
||||
├──────────────────────┼──────────────────────┼──────────────────┼──────────────────────────────────────────────────────────────┤
|
||||
│ https://levelup.gitc │ Live Indexing for │ 2024-01-08 │ - LLMs (Large Language Models) are not effective at │
|
||||
│ onnected.com/live- │ RAG: A Guide For │ │ analyzing PDFs due to their complex information, leading to │
|
||||
│ indexing-for-rag-a- │ Real-Time Indexing │ │ errors and hallucinations. - RAG (Retrieval-Augmented │
|
||||
│ guide-for-real-time- │ Using LlamaIndex and │ │ Generation) frameworks like LlamaIndex and Langchain have │
|
||||
│ indexing-using- │ AWS │ │ made it easier to develop full-stack applications. - │
|
||||
│ llamaindex-and-aws-5 │ │ │ LlamaIndex requires minimal code to create a chat-with-PDFs │
|
||||
│ 1353083ace4?gi=472c9 │ │ │ application, making it user-friendly with a few prompts and │
|
||||
│ 89ddb71&source=rss │ │ │ configurations. - The article mentions the need for further │
|
||||
│ ----5517fd7b58a6---4 │ │ │ actions by AI engineers to create enterprise RAG │
|
||||
│ │ │ │ applications but doesn't provide specifics. │
|
||||
├──────────────────────┼──────────────────────┼──────────────────┼──────────────────────────────────────────────────────────────┤
|
||||
│ https://www.youtube. │ Transforming Invoice │ 2024-01-08 │ This webpage showcases Sparrow, an open-source solution for │
|
||||
│ com/watch?v=VKeYaIEk │ Data into JSON: │ │ processing documents with local LLMs. The author uses │
|
||||
│ 82s&v=watch&feature= │ Local LLM with │ │ Starling LLM with Ollama and demonstrates the extraction of │
|
||||
│ youtu.be │ LlamaIndex \u0026 │ │ structured data from invoice documents. Here's a concise │
|
||||
│ │ Pydantic │ │ summary of the content: 1. Sparrow GitHub Repo: A link to │
|
||||
│ │ │ │ the project's GitHub repository is provided. 2. │
|
||||
│ │ │ │ Introduction: The author introduces Sparrow as a solution │
|
||||
│ │ │ │ for document processing using LLMs and mentions that it runs │
|
||||
│ │ │ │ locally with Ollama. 3. Example: A simple example │
|
||||
│ │ │ │ demonstrates how to process a document and extract invoice- │
|
||||
│ │ │ │ related information in JSON format. 4. Configuration: The │
|
||||
│ │ │ │ author guides viewers on setting up the configuration for │
|
||||
│ │ │ │ the project. 5. RAG with Sparrow and LlamaIndex: The video │
|
||||
│ │ │ │ demonstrates how to use RAG (Retrieve Answers from Generated │
|
||||
│ │ │ │ Text) along with Sparrow and LlamaIndex for document │
|
||||
│ │ │ │ processing. 6. RAG Pipeline Implementation: The author │
|
||||
│ │ │ │ provides a detailed walkthrough of implementing RAG pipeline │
|
||||
│ │ │ │ for document processing. 7. Pydantic Dynamic Class: A │
|
||||
│ │ │ │ Pydantic dynamic class is created to generate structured │
|
||||
│ │ │ │ JSON output from the processed documents. 8. LlamaIndex │
|
||||
│ │ │ │ Setup with Pydantic Class to Produce JSON Output: The video │
|
||||
│ │ │ │ demonstrates how to set up LlamaIndex with a Pydantic class │
|
||||
│ │ │ │ to obtain structured JSON output from the document │
|
||||
│ │ │ │ processing. 9. Query: Viewers are shown how to query │
|
||||
│ │ │ │ processed documents for specific information. 10. Summary: │
|
||||
│ │ │ │ The author summarizes the key points of the video, │
|
||||
│ │ │ │ highlighting the use of Sparrow for document processing with │
|
||||
│ │ │ │ LLMs. The video includes additional information about │
|
||||
│ │ │ │ connecting with the author via various platforms, such as │
|
||||
│ │ │ │ YouTube, Twitter, LinkedIn, and Medium. Hashtags related to │
|
||||
│ │ │ │ the video's topic are also mentioned. │
|
||||
╘══════════════════════╧══════════════════════╧══════════════════╧══════════════════════════════════════════════════════════════╛
|
||||
|
||||
|
||||
@@ -0,0 +1,82 @@
|
||||
╒══════════════════════╤══════════════════════╤══════════════════╤══════════════════════════════════════════════════════════════╕
|
||||
│ URL │ Title │ Published Date │ Summary │
|
||||
╞══════════════════════╪══════════════════════╪══════════════════╪══════════════════════════════════════════════════════════════╡
|
||||
│ https://tech.dentsus │ LlamaIndexを使ってロ │ 2024-01-22 │ The article talks about how to implement Retrieval-Augmented │
|
||||
│ oken.com/entry/2024/ │ ーカル環境でRAGを実 │ │ Generation (RAG) using the LlamaIndex library in a local │
|
||||
│ 01/22/LlamaIndex%E3% │ 行する方法 │ │ environment. Reasons for choosing local environment for LLM │
|
||||
│ 82%92%E4%BD%BF%E3%81 │ │ │ utilization is discussed. LlamaIndex benefits and features │
|
||||
│ %A3%E3%81%A6%E3%83%A │ │ │ along with the required environment setup are also │
|
||||
│ D%E3%83%BC%E3%82%AB% │ │ │ mentioned. A detailed step-by-step guide to implement RAG │
|
||||
│ E3%83%AB%E7%92%B0%E5 │ │ │ using LlamaIndex is provided with sample questions and │
|
||||
│ %A2%83%E3%81%A7RAG%E │ │ │ answers. The article highlights aspects of this │
|
||||
│ 3%82%92%E5%AE%9F%E8% │ │ │ implementation that can be further improved in terms of │
|
||||
│ A1%8C%E3%81%99%E3%82 │ │ │ reducing time and increasing accuracy. Additionally, using │
|
||||
│ %8B%E6%96%B9%E6%B3%9 │ │ │ more RAM and processing power is suggested. Overall, the │
|
||||
│ 5 │ │ │ article explores the convenience of using LlamaIndex for RAG │
|
||||
│ │ │ │ implementation while highlighting areas for improvement to │
|
||||
│ │ │ │ build a more robust RAG system. │
|
||||
├──────────────────────┼──────────────────────┼──────────────────┼──────────────────────────────────────────────────────────────┤
|
||||
│ https://blog.llamain │ LlamaIndex Blog │ 2024-01-23 │ The LlamaIndex blog offers updates on releases, guides, and │
|
||||
│ dex.ai/?gi=a117797fb │ │ │ community showcases. The recent posts include a newsletter │
|
||||
│ bc8 │ │ │ from January 23rd, news about building a secure multi- │
|
||||
│ │ │ │ tenancy RAG system, using LlamaIndex and GPT3.5 to build an │
|
||||
│ │ │ │ AI voice assistant, and launching a free course on advanced │
|
||||
│ │ │ │ RAG certification. Additionally, there are introductions to │
|
||||
│ │ │ │ new features like query pipelines and discussions on scaling │
|
||||
│ │ │ │ LlamaIndex with AWS and Hugging Face. │
|
||||
├──────────────────────┼──────────────────────┼──────────────────┼──────────────────────────────────────────────────────────────┤
|
||||
│ https://levelup.gitc │ Live Indexing for │ 2024-01-08 │ The page discusses the challenges and solutions in │
|
||||
│ onnected.com/live- │ RAG: A Guide For │ │ processing and analyzing PDF documents using AI systems. The │
|
||||
│ indexing-for-rag-a- │ Real-Time Indexing │ │ author highlights the difficulty in extracting meaningful │
|
||||
│ guide-for-real-time- │ Using LlamaIndex and │ │ information from PDFs due to their complex structure and the │
|
||||
│ indexing-using- │ AWS │ │ presence of various elements like tables, figures, │
|
||||
│ llamaindex-and-aws-5 │ │ │ equations, and photos. The author also mentions the rise of │
|
||||
│ 1353083ace4?gi=472c9 │ │ │ Retrieval-Augmented Generation (RAG) frameworks and Large │
|
||||
│ 89ddb71&source=rss │ │ │ Language Models (LLMs) in 2022 and the evolution of the │
|
||||
│ ----5517fd7b58a6---4 │ │ │ ecosystem for creating full-stack applications. They │
|
||||
│ │ │ │ specifically highlight LlamaIndex as a prominent RAG │
|
||||
│ │ │ │ framework that simplifies the creation of chat applications │
|
||||
│ │ │ │ for interacting with PDFs. The page further mentions that │
|
||||
│ │ │ │ although creating a basic RAG application is relatively │
|
||||
│ │ │ │ simple, developing an enterprise-grade RAG application │
|
||||
│ │ │ │ requires addressing challenges related to live data │
|
||||
│ │ │ │ indexing, updates, real-time inference, and security. │
|
||||
├──────────────────────┼──────────────────────┼──────────────────┼──────────────────────────────────────────────────────────────┤
|
||||
│ https://dev.to/lgram │ Create Your Own │ 2024-01-13 │ Sure, here's a brief summary of the webpage's content: │
|
||||
│ mel/create-your-own- │ Local Chatbot with │ │ **Title: Create Your Own Local Chatbot with Next.js, │
|
||||
│ local-chatbot-with- │ Next.js, Llama.cpp, │ │ Llama.cpp, and ModelFusion** - The blog post provides a │
|
||||
│ nextjs-llamacpp-and- │ and ModelFusion │ │ step-by-step guide to building a local chatbot using │
|
||||
│ modelfusion-461j │ │ │ Next.js, Llama.cpp, and ModelFusion. - Llama.cpp is an LLM │
|
||||
│ │ │ │ (large language model) inference engine that allows running │
|
||||
│ │ │ │ LLMs like OpenHermes 2.5 Mistral locally. - The Vercel AI │
|
||||
│ │ │ │ SDK is leveraged to manage stream forwarding and rendering, │
|
||||
│ │ │ │ while ModelFusion is utilized for integrating Llama.cpp with │
|
||||
│ │ │ │ the SDK. - Instructions are provided for setting up │
|
||||
│ │ │ │ Llama.cpp, downloading the OpenHermes 2.5 Mistral model, and │
|
||||
│ │ │ │ starting the Llama.cpp server. - The creation of the │
|
||||
│ │ │ │ Next.js project and installation of required libraries are │
|
||||
│ │ │ │ outlined. - Detailed steps for creating an API route for │
|
||||
│ │ │ │ the chatbot are explained. - The process of adding the chat │
|
||||
│ │ │ │ interface to the frontend and cleaning up global styles is │
|
||||
│ │ │ │ described. - The user can run the chatbot application │
|
||||
│ │ │ │ locally and interact with it via a user-friendly chat page. │
|
||||
│ │ │ │ - The code serves as a starting point for developing AI │
|
||||
│ │ │ │ projects using these tools. │
|
||||
├──────────────────────┼──────────────────────┼──────────────────┼──────────────────────────────────────────────────────────────┤
|
||||
│ https://blog.llamain │ A Cheat Sheet and │ 2024-01-05 │ This web page provides information on Retrieval-Augmented │
|
||||
│ dex.ai/a-cheat- │ Some Recipes For │ │ Generation (RAG) systems. RAG involves retrieving data from │
|
||||
│ sheet-and-some- │ Building Advanced │ │ an external knowledge database and sending it with a user │
|
||||
│ recipes-for- │ RAG │ │ query to an LLM for response generation. A basic RAG │
|
||||
│ building-advanced- │ │ │ involves retrieval, an external knowledge database, and a │
|
||||
│ rag-803a9d94c41b │ │ │ generation component. The success of a RAG system depends on │
|
||||
│ │ │ │ the retrieval and generation components meeting requirements │
|
||||
│ │ │ │ such as relevance and usefulness of answers. To achieve │
|
||||
│ │ │ │ these requirements, advanced techniques can be used in │
|
||||
│ │ │ │ Retrieval and Generation. Techniques for Retrieval include │
|
||||
│ │ │ │ chunk-size optimization and using structured external │
|
||||
│ │ │ │ knowledge, while techniques for Generation include LM │
|
||||
│ │ │ │ adapters, knowledge-aware training objectives, and answer │
|
||||
│ │ │ │ merging/reranking. │
|
||||
╘══════════════════════╧══════════════════════╧══════════════════╧══════════════════════════════════════════════════════════════╛
|
||||
|
||||
|
||||
Reference in New Issue
Block a user