Last active
January 24, 2025 07:46
-
-
Save jalotra/820b7ccd94f162267ed836bad12c2e40 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re | |
import requests | |
from bs4 import BeautifulSoup | |
from langflow.custom import Component | |
from langflow.io import DropdownInput, MessageTextInput, Output | |
from langflow.schema import Data | |
class CustomApiCallerComponent(Component): | |
display_name = "API Caller" | |
description = "Fetch content from URLs using custom HTTP methods and API keys" | |
icon = "layout-template" | |
name = "CustomApiLoader" | |
inputs = [ | |
MessageTextInput( | |
name="urls", | |
display_name="URLs", | |
info="Enter one or more URLs using the '+' button", | |
is_list=True, | |
tool_mode=True, | |
), | |
MessageTextInput( | |
name="api_key", | |
display_name="API Key", | |
info="Add your API key for authentication", | |
is_list=False, | |
tool_mode=False | |
), | |
MessageTextInput( | |
name="extra_headers", | |
display_name="Extra Headers", | |
info="Add custom headers that you want here!", | |
is_list=True, | |
tool_mode=False | |
), | |
MessageTextInput( | |
name="extra_query_params", | |
display_name="Extra Query Params", | |
info="Add custom query params that you want to add here!", | |
is_list=True, | |
tool_mode=True | |
), | |
DropdownInput( | |
name="format", | |
display_name="Output Format", | |
info="Choose between extracted text or raw content", | |
options=["TEXT", "JSON"], | |
value="JSON", | |
), | |
] | |
outputs = [ | |
Output(display_name="Data", name="data", method="fetch_content") | |
] | |
def ensure_url(self, string: str) -> str: | |
"""Validate and format URLs with proper error handling""" | |
if not string.startswith(("http://", "https://")): | |
string = "https://" + string | |
url_pattern = re.compile( | |
r"^(https?://)?" # Protocol | |
r"(www\.)?" # WWW prefix | |
r"([a-zA-Z0-9-]+\.)*" # Subdomains | |
r"[a-zA-Z0-9-]+\.[a-zA-Z]{2,}" # Domain and TLD | |
r"(:\d+)?(/[^\s]*)?$", # Port and path | |
re.IGNORECASE | |
) | |
if not url_pattern.match(string): | |
raise ValueError(f"Invalid URL format: {string}") | |
return string | |
def parse_header_or_query(self, item: str) -> tuple[str, str]: | |
parts = item.split("<TOKEN>") | |
print(item) | |
if len(parts) != 2: | |
raise ValueError(f"Invalid format for header/query parameter: {item}. Expected 'key<TOKEN>value'") | |
return parts[0].strip(), parts[1].strip() | |
def fetch_content(self) -> list[Data]: | |
"""Main processing method with comprehensive error handling""" | |
results = [] | |
try: | |
# Prepare headers | |
headers = {} | |
if self.api_key.strip(): | |
headers["Authorization"] = f"Bearer {self.api_key.strip()}" | |
if self.extra_headers: | |
for header in self.extra_headers: | |
key, value = self.parse_header_or_query(header) | |
headers[key] = value | |
query_dict = {} | |
if self.extra_query_params: | |
for query_param in self.extra_query_params: | |
key, value = self.parse_header_or_query(query_param) | |
query_dict[key] = value | |
print(headers, query_dict) | |
# Process URLs | |
for url in self.urls: | |
try: | |
clean_url = self.ensure_url(url.strip()) | |
# Make API request | |
response = requests.request( | |
method="GET", | |
url=clean_url, | |
headers=headers, | |
params=query_dict, | |
timeout=10 | |
) | |
response.raise_for_status() | |
# Process content | |
if self.format == "TEXT": | |
content = response.text | |
soup = BeautifulSoup(content, "html.parser") | |
content = soup.get_text(separator="\n", strip=True) | |
elif self.format == "JSON": | |
content = response.json() | |
else: | |
raise ValueError("Formats supported are 'JSON' and 'HTML'") | |
# Create data object | |
results.append(Data( | |
text=content, | |
metadata={ | |
"url": clean_url, | |
"status_code": response.status_code, | |
"content_type": response.headers.get("Content-Type", ""), | |
"method": "GET" | |
} | |
)) | |
except Exception as e: | |
results.append(Data( | |
text=f"Error processing {url}: {str(e)}", | |
metadata={ | |
"url": url, | |
"error": str(e), | |
"method": "GET" | |
} | |
)) | |
except Exception as e: | |
results.append(Data( | |
text=f"Component error: {str(e)}", | |
metadata={"error": str(e)} | |
)) | |
return results |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment