Skip to content

Instantly share code, notes, and snippets.

@jalotra
Last active January 24, 2025 07:46
Show Gist options
  • Save jalotra/820b7ccd94f162267ed836bad12c2e40 to your computer and use it in GitHub Desktop.
Save jalotra/820b7ccd94f162267ed836bad12c2e40 to your computer and use it in GitHub Desktop.
import re
import requests
from bs4 import BeautifulSoup
from langflow.custom import Component
from langflow.io import DropdownInput, MessageTextInput, Output
from langflow.schema import Data
class CustomApiCallerComponent(Component):
display_name = "API Caller"
description = "Fetch content from URLs using custom HTTP methods and API keys"
icon = "layout-template"
name = "CustomApiLoader"
inputs = [
MessageTextInput(
name="urls",
display_name="URLs",
info="Enter one or more URLs using the '+' button",
is_list=True,
tool_mode=True,
),
MessageTextInput(
name="api_key",
display_name="API Key",
info="Add your API key for authentication",
is_list=False,
tool_mode=False
),
MessageTextInput(
name="extra_headers",
display_name="Extra Headers",
info="Add custom headers that you want here!",
is_list=True,
tool_mode=False
),
MessageTextInput(
name="extra_query_params",
display_name="Extra Query Params",
info="Add custom query params that you want to add here!",
is_list=True,
tool_mode=True
),
DropdownInput(
name="format",
display_name="Output Format",
info="Choose between extracted text or raw content",
options=["TEXT", "JSON"],
value="JSON",
),
]
outputs = [
Output(display_name="Data", name="data", method="fetch_content")
]
def ensure_url(self, string: str) -> str:
"""Validate and format URLs with proper error handling"""
if not string.startswith(("http://", "https://")):
string = "https://" + string
url_pattern = re.compile(
r"^(https?://)?" # Protocol
r"(www\.)?" # WWW prefix
r"([a-zA-Z0-9-]+\.)*" # Subdomains
r"[a-zA-Z0-9-]+\.[a-zA-Z]{2,}" # Domain and TLD
r"(:\d+)?(/[^\s]*)?$", # Port and path
re.IGNORECASE
)
if not url_pattern.match(string):
raise ValueError(f"Invalid URL format: {string}")
return string
def parse_header_or_query(self, item: str) -> tuple[str, str]:
parts = item.split("<TOKEN>")
print(item)
if len(parts) != 2:
raise ValueError(f"Invalid format for header/query parameter: {item}. Expected 'key<TOKEN>value'")
return parts[0].strip(), parts[1].strip()
def fetch_content(self) -> list[Data]:
"""Main processing method with comprehensive error handling"""
results = []
try:
# Prepare headers
headers = {}
if self.api_key.strip():
headers["Authorization"] = f"Bearer {self.api_key.strip()}"
if self.extra_headers:
for header in self.extra_headers:
key, value = self.parse_header_or_query(header)
headers[key] = value
query_dict = {}
if self.extra_query_params:
for query_param in self.extra_query_params:
key, value = self.parse_header_or_query(query_param)
query_dict[key] = value
print(headers, query_dict)
# Process URLs
for url in self.urls:
try:
clean_url = self.ensure_url(url.strip())
# Make API request
response = requests.request(
method="GET",
url=clean_url,
headers=headers,
params=query_dict,
timeout=10
)
response.raise_for_status()
# Process content
if self.format == "TEXT":
content = response.text
soup = BeautifulSoup(content, "html.parser")
content = soup.get_text(separator="\n", strip=True)
elif self.format == "JSON":
content = response.json()
else:
raise ValueError("Formats supported are 'JSON' and 'HTML'")
# Create data object
results.append(Data(
text=content,
metadata={
"url": clean_url,
"status_code": response.status_code,
"content_type": response.headers.get("Content-Type", ""),
"method": "GET"
}
))
except Exception as e:
results.append(Data(
text=f"Error processing {url}: {str(e)}",
metadata={
"url": url,
"error": str(e),
"method": "GET"
}
))
except Exception as e:
results.append(Data(
text=f"Component error: {str(e)}",
metadata={"error": str(e)}
))
return results
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment