ALwrity/main_config

###################################################
#
# Define Blog Content charateristics:
# This is the main config file which drives the code.
# This config will restrict code modifications and hence ease of usuability.
#
###################################################

[blog_characteristics]

# Length of blogs Or word count. Note: It wont be exact and depends on GPT providers and Max token count.
blog_length = 2000

# professional, how-to, begginer, research, programming, casual, etc
blog_tone = "professional"

# Target Audience, Gen-Z, Tech-savvy, Working professional, students, kids etc
blog_demographic = "All"

# informational, commercial, company, news, finance, competitor, programming, scholar etc
blog_type = "Informational"

# German, Chinese, Arabic, Nepali, Hindi, Hindustani etc
blog_language = "Spanish"

# Specify the output format of the blog as: HTML, markdown, plaintext. Defaults to markdown.
blog_output_format = "markdown"

# Specify full path to folder where the final blog should be stored. ex: _posts
blog_output_folder = ""

# Specify full path to folder where blog images will be stored. ex: assets
blog_image_output_folder = ""


############################################################
#
# Blog Images details.
# Note: The images are created from the blog content. Blog title is used,
# the title is modified for image generation prompt.
#
############################################################

# Options are dalle2, dalle3, stable-diffusion.
image_gen_model = "stable-diffusion"

# Number of blog images to include.
num_images = 1


###########################################################
#
# Define LLM and its charateristics for fine control on output
# Note:
###########################################################

# Choose one of following: Openai, Google, Minstral
gpt_provider = "openai"

# Mention which model of the above provider to use.
model = "gpt-3.5-turbo-0125"

# Temperature is a parameter that controls the “creativity” or randomness of the text generated by GPT.
# greater determinism and higher values indicating more randomness.
# while a lower temperature (e.g., 0.2) makes the output more deterministic and focused (thus, getting flagged as AI content).
temperature = 0.6

# Top-p sampling is particularly useful in scenarios where you want to control the level of diversity in the generated text.
# By adjusting the threshold p, you can influence the diversity of the generated sequences.
# A lower top_p will lead to more diverse but potentially less coherent outputs,
# while a higher top_p will produce more conservative outputs with higher probability tokens.
top_p = 0.9

# "Max tokens" is a parameter that determines the maximum length of the output sequence generated by a model,
# usually measured in the number of tokens (words or subwords).
# It helps control the length of generated text and manage computational resources during text generation tasks.
max_tokens = 4096

# "n" represents the number of words or characters grouped together in a sequence when analyzing text.
# For example, if "n" is 2, we're looking at pairs of words (bigrams),
# if "n" is 3, we're looking at groups of three words (trigrams), and so on.
# It helps us understand patterns and relationships between words in a piece of text.
n = 1

# The frequency penalty parameter, ranging from -1 to 1, influences word selection during text generation.
# Higher values favor less common words, promoting diversity, while lower values favor common words, leading to more predictable text.
frequency_penalty = 1

# Presence Penalty encourages the use of diverse words by discouraging repetition.
# It encourages the model to avoid using the same words repeatedly and prompts it to generate varied text by suggesting,
# "Try using different words instead of repeating the same ones."
# from -2 (more flexible while generating text) to 2 (strong discouragement in repetition).
presence_penalty = 1