AI FAQ Generator & github blogs
This commit is contained in:
@@ -1,140 +1,157 @@
|
||||
""" Package for writing getting-started and how to guides. """
|
||||
"""
|
||||
Enhanced GitHub Blog Generator
|
||||
|
||||
This module provides comprehensive content generation from GitHub repositories,
|
||||
including technical documentation, tutorials, case studies, and more.
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
import datetime
|
||||
import json
|
||||
from typing import Dict, List, Optional
|
||||
from pathlib import Path
|
||||
|
||||
from loguru import logger
|
||||
logger.remove()
|
||||
logger.add(sys.stdout,
|
||||
colorize=True,
|
||||
format="<level>{level}</level>|<green>{file}:{line}:{function}</green>| {message}"
|
||||
)
|
||||
colorize=True,
|
||||
format="<level>{level}</level>|<green>{file}:{line}:{function}</green>| {message}")
|
||||
|
||||
from .scrape_github_readme import GitHubScraper, GitHubContent
|
||||
from .scrape_github_readme import get_gh_details_vision, get_readme_content
|
||||
from .scrape_github_readme import research_github_topics, check_if_already_written
|
||||
from .github_getting_started import github_readme_blog
|
||||
from .gpt_online_researcher import do_online_research
|
||||
from .faqs_generator_blog import generate_blog_faq
|
||||
from .get_blog_metadata import blog_metadata
|
||||
from .save_blog_to_file import save_blog_to_file
|
||||
from .arxiv_schlorly_research import read_written_ids, extract_arxiv_ids_from_line, append_id_to_file
|
||||
from .github_getting_started import (
|
||||
generate_technical_documentation,
|
||||
generate_getting_started_guide,
|
||||
generate_tutorial_series,
|
||||
generate_comparison_analysis,
|
||||
generate_case_studies,
|
||||
generate_contribution_guide,
|
||||
generate_security_guide,
|
||||
generate_performance_guide
|
||||
)
|
||||
|
||||
|
||||
|
||||
def blog_from_github(github_opts, flag):
|
||||
""" Module for writing getting started code examples from github. """
|
||||
if 'url' in flag:
|
||||
try:
|
||||
write_from_url(github_opts)
|
||||
except Exception as err:
|
||||
logger.error(f"Failed to write from github url: {github_opts}")
|
||||
sys.exit(1)
|
||||
elif 'csv' in flag:
|
||||
try:
|
||||
gh_urls = []
|
||||
with open(github_opts, 'r', encoding="utf-8") as file:
|
||||
# Read each line in the file
|
||||
for gh_url in file:
|
||||
gh_urls.append(gh_url.strip())
|
||||
except FileNotFoundError:
|
||||
logger.error(f"CSV File not found: {file_path}")
|
||||
except Exception as e:
|
||||
logger.error(f"CSV: An error occurred: {str(e)}")
|
||||
|
||||
for gh_url in gh_urls:
|
||||
try:
|
||||
write_from_url(gh_url.strip())
|
||||
except Exception as err:
|
||||
logger.error(f"Failed to write blog from github: {err}")
|
||||
|
||||
|
||||
|
||||
def write_from_url(gh_url):
|
||||
# String to store the blog content.
|
||||
howto_blog = ''
|
||||
# The url was not found in already_written data.
|
||||
if not check_if_already_written(gh_url):
|
||||
logger.info(f"Writing getting started from url: {gh_url}")
|
||||
else:
|
||||
logger.error(f"Skipping, already written on url: {gh_url}")
|
||||
return
|
||||
|
||||
# Direct link to the raw content of README file
|
||||
# fixme: Remove the hardcoding, need add another option OR in config ?
|
||||
image_dir = os.path.join(os.getcwd(), "blog_images")
|
||||
generated_image_name = f"screenshot_image_{datetime.datetime.now():%Y-%m-%d-%H-%M-%S}.png"
|
||||
generated_image_filepath = os.path.join(image_dir, generated_image_name)
|
||||
try:
|
||||
logger.info(f"Getting github repo details from vision model: {generated_image_filepath}")
|
||||
gh_json = get_gh_details_vision(gh_url, generated_image_filepath)
|
||||
except Exception as err:
|
||||
logger.error(f"Failed to get gemini vision details from GH repo image: {err}")
|
||||
sys.exit(1)
|
||||
howto_blog = "```" + f"\nGithub URL:{gh_url}\nStars:{gh_json.get('stars')}\n"
|
||||
howto_blog += f"Forks:{gh_json.get('forks')}\n"
|
||||
howto_blog += f"Description:{gh_json.get('about')}\nBranch:{gh_json.get('branch_name')}\n" + "```\n\n"
|
||||
|
||||
raw_readme_url_base = "https://raw.githubusercontent.com/" + "/".join(gh_url.split("/")[-2:])
|
||||
if gh_json.get('branch_name'):
|
||||
raw_readme_url = raw_readme_url_base + f"/{gh_json.get('branch_name')}/" + "README.md"
|
||||
else:
|
||||
raw_readme_url = raw_readme_url_base + f"/main/" + "README.md"
|
||||
logger.info(f"Using this url to fetch the README file: {raw_readme_url}")
|
||||
|
||||
try:
|
||||
# Get and print the main content
|
||||
readme_content = get_readme_content(raw_readme_url)
|
||||
except Exception as err:
|
||||
logger.error(f"Failed to get README from URL: {raw_readme_url}: {err}")
|
||||
# If the readme is still None, try with master branch.
|
||||
if not readme_content:
|
||||
raw_readme_url = raw_readme_url_base + f"/master/" + "README.md"
|
||||
logger.warning(f"Trying with master branch: {raw_readme_url}")
|
||||
readme_content = get_readme_content(raw_readme_url)
|
||||
if not readme_content:
|
||||
logger.error(f"Still failed to get the README: {readme_content}")
|
||||
sys.exit(1)
|
||||
class GitHubBlogGenerator:
|
||||
"""Generator for various types of GitHub-related content."""
|
||||
|
||||
# Create a getting-started blog, adapted from the GH url README.
|
||||
howto_blog += github_readme_blog(readme_content, "gemini")
|
||||
def __init__(self, cache_dir: str = ".github_cache", ttl_hours: int = 24):
|
||||
"""Initialize the blog generator."""
|
||||
self.cache_dir = Path(cache_dir)
|
||||
self.scraper = GitHubScraper(cache_dir, ttl_hours)
|
||||
self.output_dir = Path("generated_content")
|
||||
self.output_dir.mkdir(exist_ok=True)
|
||||
|
||||
async def generate_content(self, github_url: str, content_types: List[str] = None) -> Dict[str, str]:
|
||||
"""Generate various types of content from a GitHub repository."""
|
||||
if content_types is None:
|
||||
content_types = ["getting_started", "technical_docs", "tutorials"]
|
||||
|
||||
try:
|
||||
# Scrape GitHub content
|
||||
repo_content = await self.scraper.scrape_github_content(github_url)
|
||||
|
||||
# Generate different types of content
|
||||
generated_content = {}
|
||||
|
||||
for content_type in content_types:
|
||||
if content_type == "getting_started":
|
||||
content = generate_getting_started_guide(repo_content.dict())
|
||||
elif content_type == "technical_docs":
|
||||
content = generate_technical_documentation(repo_content.dict())
|
||||
elif content_type == "tutorials":
|
||||
content = generate_tutorial_series(repo_content.dict())
|
||||
elif content_type == "comparison":
|
||||
content = generate_comparison_analysis(repo_content.dict())
|
||||
elif content_type == "case_studies":
|
||||
content = generate_case_studies(repo_content.dict())
|
||||
elif content_type == "contribution":
|
||||
content = generate_contribution_guide(repo_content.dict())
|
||||
elif content_type == "security":
|
||||
content = generate_security_guide(repo_content.dict())
|
||||
elif content_type == "performance":
|
||||
content = generate_performance_guide(repo_content.dict())
|
||||
else:
|
||||
logger.warning(f"Unknown content type: {content_type}")
|
||||
continue
|
||||
|
||||
generated_content[content_type] = content
|
||||
|
||||
# Generate FAQs from online research
|
||||
try:
|
||||
research_report = do_online_research(repo_content.title, "gemini", github_url)
|
||||
faqs = generate_blog_faq(research_report, "gemini")
|
||||
generated_content["faqs"] = faqs
|
||||
except Exception as err:
|
||||
logger.error(f"Failed to generate FAQs: {err}")
|
||||
|
||||
return generated_content
|
||||
|
||||
except Exception as err:
|
||||
logger.error(f"Failed to generate content: {err}")
|
||||
raise
|
||||
|
||||
def save_content(self, content: Dict[str, str], base_filename: str):
|
||||
"""Save generated content to files."""
|
||||
try:
|
||||
for content_type, content_text in content.items():
|
||||
# Generate metadata for each content type
|
||||
title, meta_desc, tags, categories = blog_metadata(content_text, "gemini")
|
||||
|
||||
# Create filename with content type
|
||||
filename = f"{base_filename}_{content_type}.md"
|
||||
|
||||
# Save content to file
|
||||
save_blog_to_file(
|
||||
content_text,
|
||||
title,
|
||||
meta_desc,
|
||||
tags,
|
||||
categories,
|
||||
None # No image path for now
|
||||
)
|
||||
|
||||
logger.info(f"Saved {content_type} content to {filename}")
|
||||
|
||||
except Exception as err:
|
||||
logger.error(f"Failed to save content: {err}")
|
||||
raise
|
||||
|
||||
# Do online research for faqs on the github url.
|
||||
try:
|
||||
# Repo names are misnomers for others search, include its decription too.
|
||||
# Which, skews the result favourably towards its home/paid pages.
|
||||
#online_query = f"{''.join(gh_url.split('/')[-1:])} " + gh_json.get('about')
|
||||
online_query = f"{''.join(gh_url.split('/')[-1:])} "
|
||||
logger.info("Do web research with Tavily & Metaphor AI.")
|
||||
research_report = do_online_research(online_query, "gemini", gh_url)
|
||||
except Exception as err:
|
||||
logger.error(f"failed to do online research: {err}")
|
||||
async def main():
|
||||
"""Example usage of the GitHub blog generator."""
|
||||
generator = GitHubBlogGenerator()
|
||||
|
||||
# Example GitHub URLs
|
||||
urls = [
|
||||
"https://github.com/owner/repo",
|
||||
"https://github.com/owner/another-repo"
|
||||
]
|
||||
|
||||
content_types = [
|
||||
"getting_started",
|
||||
"technical_docs",
|
||||
"tutorials",
|
||||
"comparison",
|
||||
"case_studies",
|
||||
"contribution",
|
||||
"security",
|
||||
"performance"
|
||||
]
|
||||
|
||||
for url in urls:
|
||||
try:
|
||||
# Generate content
|
||||
content = await generator.generate_content(url, content_types)
|
||||
|
||||
# Create base filename from URL
|
||||
base_filename = url.split("/")[-1]
|
||||
|
||||
# Save content
|
||||
generator.save_content(content, base_filename)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error processing {url}: {e}")
|
||||
|
||||
# Generate FAQs from the online research report.
|
||||
try:
|
||||
blog_faqs = generate_blog_faq(research_report, "gemini")
|
||||
howto_blog += f"\n\n## {''.join(gh_url.split('/')[-1:])} FAQs\n\n" + blog_faqs
|
||||
except Exception as err:
|
||||
logger.error(f"Failed to generate FAQs from web research_report: {err}")
|
||||
|
||||
logger.info(f"\n\nFinal Blog Content: {howto_blog}\n\n")
|
||||
|
||||
try:
|
||||
blog_title, blog_meta_desc, blog_tags, blog_categories = blog_metadata(howto_blog, "gemini")
|
||||
except Exception as err:
|
||||
logger.error(f"Failed to get blog metadata: {err}")
|
||||
raise err
|
||||
|
||||
try:
|
||||
save_blog_to_file(howto_blog, blog_title, blog_meta_desc, blog_tags,\
|
||||
blog_categories, generated_image_filepath)
|
||||
except Exception as err:
|
||||
logger.error(f"Failed to save blog to a file: {err}")
|
||||
sys.exit(1)
|
||||
|
||||
try:
|
||||
append_id_to_file(gh_url, "papers_already_written_on.txt")
|
||||
except Exception as err:
|
||||
logger.error(f"Failed to write/append ID to papers_already_written_on.txt: {err}")
|
||||
raise err
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
|
||||
Reference in New Issue
Block a user