ALwrity + Wordpress + Wix + GSC integration

This commit is contained in:
ajaysi
2025-10-08 10:13:14 +05:30
parent 14dfb2e5c0
commit 3bab3450dc
147 changed files with 19815 additions and 17053 deletions

View File

@@ -1,156 +0,0 @@
import os
import streamlit as st
import google.genai as genai
from google.genai import types
from google.genai.types import Tool, GenerateContentConfig, GoogleSearch
# Set page config
st.set_page_config(
page_title="Gemini Grounding Search",
page_icon="🔍",
layout="wide"
)
# Custom CSS for styling
st.markdown("""
<style>
.container {
align-items: center;
border-radius: 8px;
display: flex;
font-family: Google Sans, Roboto, sans-serif;
font-size: 14px;
line-height: 20px;
padding: 8px 12px;
background-color: #fafafa;
box-shadow: 0 0 0 1px #0000000f;
margin-top: 20px;
}
.chip {
display: inline-block;
border: solid 1px;
border-radius: 16px;
min-width: 14px;
padding: 5px 16px;
text-align: center;
user-select: none;
margin: 0 8px;
background-color: #ffffff;
border-color: #d2d2d2;
color: #5e5e5e;
text-decoration: none;
}
.chip:hover {
background-color: #f2f2f2;
}
.carousel {
overflow: auto;
scrollbar-width: none;
white-space: nowrap;
margin-right: -12px;
display: flex;
align-items: center;
}
.headline {
display: flex;
margin-right: 4px;
align-items: center;
}
.gradient-container {
position: relative;
}
.gradient {
position: absolute;
transform: translate(3px, -9px);
height: 36px;
width: 9px;
background: linear-gradient(90deg, #fafafa 15%, #fafafa00 100%);
}
.result-text {
font-size: 16px;
line-height: 1.6;
color: #202124;
margin: 20px 0;
white-space: pre-wrap;
}
@media (prefers-color-scheme: dark) {
.container {
background-color: #1f1f1f;
box-shadow: 0 0 0 1px #ffffff26;
}
.headline-label {
color: #fff;
}
.chip {
background-color: #2c2c2c;
border-color: #3c4043;
color: #fff;
}
.chip:hover {
background-color: #353536;
}
.gradient {
background: linear-gradient(90deg, #1f1f1f 15%, #1f1f1f00 100%);
}
.result-text {
color: #e8eaed;
}
}
</style>
""", unsafe_allow_html=True)
# Title
st.title("Gemini Grounding Search")
# Initialize Gemini client
if 'GEMINI_API_KEY' not in os.environ:
api_key = st.text_input("Enter your Gemini API Key:", type="password")
if api_key:
os.environ['GEMINI_API_KEY'] = api_key
# Search input
search_query = st.text_input("Enter your search query:", "When is the next total solar eclipse in the United States?")
if st.button("Search"):
if 'GEMINI_API_KEY' not in os.environ:
st.error("Please enter your Gemini API Key first!")
else:
try:
client = genai.Client(api_key=os.environ['GEMINI_API_KEY'])
model_id = "gemini-2.0-flash"
google_search_tool = Tool(
google_search = GoogleSearch()
)
with st.spinner("Searching..."):
response = client.models.generate_content(
model=model_id,
contents=search_query,
config=GenerateContentConfig(
tools=[google_search_tool],
response_modalities=["TEXT"],
)
)
# Display search results header
st.header("Search Results")
# Display the response text
if response.candidates[0].content.parts:
st.markdown('<div class="result-text">' +
response.candidates[0].content.parts[0].text.replace('\n', '<br>') +
'</div>',
unsafe_allow_html=True)
# Display the grounding metadata
if hasattr(response.candidates[0], 'grounding_metadata') and \
hasattr(response.candidates[0].grounding_metadata, 'search_entry_point') and \
hasattr(response.candidates[0].grounding_metadata.search_entry_point, 'rendered_content'):
st.header("Related Searches")
rendered_content = response.candidates[0].grounding_metadata.search_entry_point.rendered_content
st.markdown(rendered_content, unsafe_allow_html=True)
except Exception as e:
st.error(f"An error occurred: {str(e)}")

View File

@@ -1,108 +0,0 @@
import re #additional import for regex
import os
import json
import requests
from openai import OpenAI
client = OpenAI(
api_key=os.getenv('OPENAI-API-KEY')
)
# Target URL can be a website url or it can google search
query = "kedarkanta trek"
target_url = f"https://www.google.com/search?q={query}&gl=us"
response = requests.get(target_url)
print
html_text = response.text
# Remove unnecessary part to prevent HUGE TOKEN cost!
# Remove everything between <head> and </head>
html_text = re.sub(r'<head.*?>.*?</head>', '', html_text, flags=re.DOTALL)
# Remove all occurrences of content between <script> and </script>
html_text = re.sub(r'<script.*?>.*?</script>', '', html_text, flags=re.DOTALL)
# Remove all occurrences of content between <style> and </style>
html_text = re.sub(r'<style.*?>.*?</style>', '', html_text, flags=re.DOTALL)
completion = client.chat.completions.create(
model="gpt-4-1106-preview",
messages=[
{"role": "system", "content": "You are a master at scraping Google results data. Scrape two things: 1st. Scrape top 10 organic results data and 2nd. Scrape people_also_ask section from Google search result page."},
{"role": "user", "content": html_text}
],
tools=[
{
"type": "function",
"function": {
"name": "parse_organic_results",
"description": "Parse organic results from Google SERP raw HTML data nicely",
"parameters": {
'type': 'object',
'properties': {
'data': {
'type': 'array',
'items': {
'type': 'object',
'properties': {
'title': {'type': 'string'},
'original_url': {'type': 'string'},
'snippet': {'type': 'string'},
'position': {'type': 'integer'}
}
}
}
}
}
}
},
{
"type": "function",
"function": {
"name": "parse_people_also_ask_section",
"description": "Parse `people also ask` section from Google SERP raw HTML",
"parameters": {
'type': 'object',
'properties': {
'data': {
'type': 'array',
'items': {
'type': 'object',
'properties': {
'question': {'type': 'string'},
'original_url': {'type': 'string'},
'answer': {'type': 'string'},
}
}
}
}
}
}
}
],
tool_choice="auto"
)
# Organic_results
argument_str = completion.choices[0].message.tool_calls[0].function.arguments
argument_dict = json.loads(argument_str)
organic_results = argument_dict['data']
print('Organic results:')
for result in organic_results:
print(f"Blog Title: {result['title']}")
print(f"Blog URL: {result['original_url']}")
print(f"Blog Snippet: {result['snippet']}")
print(f"Blog Position: {result['position']}")
print('---')
# People also ask
argument_str = completion.choices[0].message.tool_calls[1].function.arguments
argument_dict = json.loads(argument_str)
people_also_ask = argument_dict['data']
print('People also ask:')
for result in people_also_ask:
print(f"People_Also_Ask: Question: {result['question']}")
print(f"People_Also_Ask: URL: {result['original_url']}")
print("People_Also_Ask: Answer: {result['answer']}")
print('---')

View File

@@ -1,30 +0,0 @@
import sys
from loguru import logger
logger.remove()
logger.add(sys.stdout,
colorize=True,
format="<level>{level}</level>|<green>{file}:{line}:{function}</green>| {message}"
)
from ..gpt_providers.text_generation.main_text_generation import llm_text_gen
def summarize_competitor_content(research_content):
"""Combine the given online research and gpt blog content"""
prompt = f"""You are a helpful assistant writing a research report about a company. I will provide you with company details.
Summarize the given company details into multiple paragraphs.
Be extremely concise, professional, and factual as possible.
The first paragraph should be an introduction and summary of the company.
The second paragraph should include pros and cons of the company.
The third paragraph should be on their pricing model.
Include a conclusion, summarizing your research about the given company details.
Company details: '{research_content}'"""
try:
response = llm_text_gen(prompt)
return response
except Exception as err:
logger.error(f"Failed to get response from LLM: {err}")
raise err

View File

@@ -1,23 +0,0 @@
import sys
from loguru import logger
logger.remove()
logger.add(sys.stdout,
colorize=True,
format="<level>{level}</level>|<green>{file}:{line}:{function}</green>| {message}"
)
from ..gpt_providers.text_generation.main_text_generation import llm_text_gen
def summarize_web_content(page_content, gpt_providers="openai"):
"""Combine the given online research and gpt blog content"""
prompt = f"""You are a helpful assistant that briefly summarizes the content of a webpage.
Summarize the given web page content below.
Web page content: '{page_content}'"""
try:
response = llm_text_gen(prompt)
return response
except Exception as err:
logger.error(f"summarize_web_content: Failed to get response from LLM: {err}")
raise err

View File

@@ -1,129 +0,0 @@
import os
import requests
from clint.textui import progress
from loguru import logger
from pathlib import Path
from dotenv import load_dotenv
load_dotenv(Path('../../.env'))
def search_ydc_index(search_query, num_web_results=10, country="IN"):
"""
Search YDC Index API and retrieve results.
Args:
search_query (str): The search query.
num_web_results (int): Number of web results to retrieve.
country (str): Country code.
api_key (str): YDC Index API key.
Returns:
dict: The response from the YDC Index API in JSON format.
"""
api_key = os.environ["YOU_API_KEY"]
try:
url = "https://api.ydc-index.io/search"
querystring = {
"query": search_query,
}
headers = {"X-API-Key": api_key}
response = requests.get(url, headers=headers, params=querystring, stream=True)
response.raise_for_status() # Raise an HTTPError for bad responses (4xx or 5xx)
result_json = response.json()
return result_json
except requests.exceptions.RequestException as req_exc:
logger.error(f"Request to YDC Index API failed: {req_exc}")
return {"error": str(req_exc)}
except Exception as e:
logger.error(f"An error occurred: {e}")
return {"error": str(e)}
def get_rag_results(search_query, num_web_results=10, country="IN"):
"""
Retrieve RAG (Relevance, Authority, and Goodness) results from YDC Index API.
Args:
search_query (str): The search query.
num_web_results (int): Number of web results to retrieve.
country (str): Country code
Returns:
dict: The response from the YDC Index API in JSON format.
"""
api_key = os.environ["YOU_API_KEY"]
try:
url = "https://api.ydc-index.io/rag"
querystring = {
"query": search_query,
"num_web_results": str(num_web_results),
"country": country
}
headers = {"X-API-Key": api_key}
with progress.Bar(expected_size=num_web_results, label="Fetching RAG Results") as bar:
response = requests.get(url, headers=headers, params=querystring, stream=True)
response.raise_for_status() # Raise an HTTPError for bad responses (4xx or 5xx)
result_json = response.json()
bar.show(result_json.get("web_results", [])) # Update progress bar with the number of web results
return result_json
except requests.exceptions.RequestException as req_exc:
logger.error(f"Request to YDC Index API failed: {req_exc}")
return {"error": str(req_exc)}
except Exception as e:
logger.error(f"An error occurred: {e}")
return {"error": str(e)}
def get_news_results(query, spellcheck=True):
"""
Retrieve news results from YDC Index API.
Args:
query (str): The search query.
spellcheck (bool): Whether to enable spellcheck.
api_key (str): YDC Index API key.
Returns:
dict: The response from the YDC Index API in JSON format.
"""
api_key = os.environ["YOU_API_KEY"]
try:
url = "https://api.ydc-index.io/news"
querystring = {
"q": query,
"spellcheck": str(spellcheck).lower()
}
headers = {"X-API-Key": api_key}
with progress.Bar(expected_size=1, label="Fetching News Results") as bar:
response = requests.get(url, headers=headers, params=querystring, stream=True)
response.raise_for_status() # Raise an HTTPError for bad responses (4xx or 5xx)
result_json = response.json()
bar.show() # Update progress bar
return result_json
except requests.exceptions.RequestException as req_exc:
logger.error(f"Request to YDC Index API failed: {req_exc}")
return {"error": str(req_exc)}
except Exception as e:
logger.error(f"An error occurred: {e}")
return {"error": str(e)}