#!/usr/bin/python3 """ Main module for calling PSEO related functions. This is the end user interface and is user-driven. Allows the user to specify various parameters for blog generation without needing to edit the code. """ import sys import os import re import argparse import requests from loguru import logger import csv import json # Logger configuration logger.remove() logger.add(sys.stdout, colorize=True, format="{level}|{file}:{line}:{function}| {message}") # Importing custom functions from lib.get_text_response import generate_detailed_blog, generate_youtube_blog from lib.main_youtube_research_blog import generate_youtube_research_blog from lib.main_keywords_to_blog import generate_keyword_blog from lib.main_arxiv_to_blog import blog_arxiv_keyword, blog_arxiv_url_list def parse_arguments(): """Parses command-line arguments. Returns: argparse.Namespace: Parsed arguments. """ example_usage = """ Example Usage: Keyword usage: python pseo_main.py --keywords "Writesonic AI SEO-optimized blog writing,PepperType AI virtual content assistant,Copysmith AI enterprise eCommerce content,Copy AI artificial intelligence content generator,Jasper AI creative content platform,Contents generative AI content strategy" YouTube usage: python pseo_main.py --youtube https://www.youtube.com/watch?v=yu27PWzJI_Y,https://www.youtube.com/watch?v=WGzoBD-xthI,https://www.youtube.com/watch?v=zizonToFXDs Scholar usage: python pseo_main.py --scholar "GPT-4 Technical Report" """ parser = argparse.ArgumentParser(description="Generate blogs based on user input.", epilog=example_usage, formatter_class=argparse.RawDescriptionHelpFormatter) # Inputs csv, keywords, youtube_urls and scholar are mandatory. parser.add_argument("--csv", type=str, help="Provide path csv file. Check the template csv for example.") parser.add_argument("--keywords", type=str, help="Keywords for blog generation.") parser.add_argument("--youtube_urls", type=str, help="Comma-separated YouTube URLs for blog generation.") parser.add_argument("--scholar", type=str, help="Write blog from latest research papers on given keywords. Use 'arxiv_papers_url' to provide a file arxiv url list.") # Optional options. parser.add_argument("--niche", action='store_true', default=False, help="Flag to generate niche blogs (default: False).") parser.add_argument("--wordpress", action='store_true', default=False, help="Flag to upload blogs to WordPress (default: False).") # Add options for blog_tone and blog_personality. parser.add_argument("--output_format", choices=['plaintext', 'markdown', 'html'], default='markdown', help="Output format of the blogs (default: plaintext).") return parser.parse_args() def check_openai_api_key(api_key): """Checks if the OpenAI API key is valid. Args: api_key (str): The OpenAI API key. Returns: bool: True if the key is valid, False otherwise. """ headers = {"Authorization": f"Bearer {api_key}"} response = requests.get("https://api.openai.com/v1/engines", headers=headers) return response.status_code == 200 def main(): """Main function to handle blog generation based on user input.""" try: args = parse_arguments() logger.info("Fetch and Validate Openai key.") # Validate user input if not args.keywords and not args.youtube_urls and not args.csv and not args.scholar: raise ValueError("Either --keywords, --youtube_urls, --csv Or --scholar must be provided.") # Validate OpenAI API key openai_api_key = os.environ.get("OPENAI_API_KEY") if not openai_api_key or not check_openai_api_key(openai_api_key): raise EnvironmentError("Invalid or missing OPENAI_API_KEY environment variable.") logger.info("Valid OpenAI API key found.") # Handle blog generation based on input if args.youtube_urls: yt_urls = args.youtube_urls.split(",") valid_urls = [url for url in yt_urls if is_valid_url(url)] quoted_strings = [url for url in yt_urls if not is_valid_url(url)] if valid_urls: logger.info(f"Generating blogs from YouTube URLs: {valid_urls}") generate_youtube_blog(valid_urls) if quoted_strings: logger.info(f"Do youtube research and write blogs for: {quoted_strings}") generate_youtube_research_blog(quoted_strings) elif args.keywords: logger.info(f"Generating {args.num_blogs} blogs on '{args.keywords}' with {args.num_subtopics} subtopics.") #generate_detailed_blog(args.num_blogs, args.keywords, args.niche, # args.num_subtopics, args.wordpress, args.output_format) keyword_list = args.keywords.split(",") generate_keyword_blog(keyword_list) elif args.csv: try: data = read_csv_to_json(args.csv) logger.info(f"Generating blogs from csv file: {json.dumps(data, indent=4)}") for item in data: keyword_list = [item['keyword']] generate_keyword_blog(keyword_list, item['URL']) except Exception as err: logger.error(f"Failed to generate blogs the CSV file:{err}") sys.exit(1) elif args.scholar: logger.info(f"Writing blog on {args.scholar} from research papers of arxiv, google & Semantic scholar.") # Write from arxiv urls given in a file. if 'arxiv_papers_url' in args.scholar: try: logger.info(f"Writing scholar blogs from arxiv url list.") blog_arxiv_url_list(args.scholar) except Exception as err: logger.error(f"Failed to write from file {args.scholar} in present directory: {err}") sys.exit(1) # Write scholar blogs from given keywords. else: try: blog_arxiv_keyword(args.scholar) except Exception as err: logger.error(f"Failed to write blog from research papers: {err}") raise err except Exception as e: logger.error(f"An error occurred: {e}") sys.exit(1) def read_csv_to_json(file_path): # Initialize a list to store JSON objects json_data = [] try: # Read the CSV file with open(file_path, newline='', encoding='utf-8') as csvfile: reader = csv.DictReader(csvfile) # Iterate over each row and convert it to a JSON object for row in reader: json_data.append(row) return json_data except Exception as err: logger.error(f"Failed to read the CSV file:{err}") sys.exit(1) def is_valid_url(url): """ Check if the given string is a valid URL. Args: url (str): String to check. Returns: bool: True if the string is a valid URL, False otherwise. """ # Regular expression to check for a valid URL url_pattern = re.compile( r'^(?:http|ftp)s?://' # http:// or https:// r'(?:(?:[A-Z0-9](?:[A-Z0-9-]{0,61}[A-Z0-9])?\.)+(?:[A-Z]{2,6}\.?|[A-Z0-9-]{2,}\.?)|' # domain... r'localhost|' # localhost... r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}|' # ...or ipv4 r'\[?[A-F0-9]*:[A-F0-9:]+\]?)' # ...or ipv6 r'(?::\d+)?' # optional port r'(?:/?|[/?]\S+)$', re.IGNORECASE) return re.match(url_pattern, url) is not None if __name__ == "__main__": main()