Created
September 21, 2024 06:44
-
-
Save tikendraw/fdffe9fa2bc33e32afe23a69224078d7 to your computer and use it in GitHub Desktop.
Extract json Code blocks for pydantic class with fallback
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re | |
import json | |
import ast | |
from pydantic import ValidationError | |
def extract_code_block(text): | |
# This regex looks for ```json or ``` followed by { ... } (JSON or dict-like structure) | |
code_block = re.findall(r'```(?:json)?\s*({.*?})\s*```', text, re.DOTALL) | |
# If a match is found, parse it as a dictionary | |
if code_block: | |
try: | |
# Use ast.literal_eval to safely evaluate the dictionary-like string | |
return [ast.literal_eval(block) for block in code_block] | |
except (SyntaxError, ValueError) as e: | |
return f"Error parsing code block: {e}" | |
return None | |
# Function to extract potential JSON/dict blocks | |
def extract_code_block(text): | |
# Try to find code blocks first with regex | |
code_block = re.findall(r'```(?:json)?\s*({.*?})\s*```', text, re.DOTALL) | |
# Try to parse the blocks if found | |
if code_block: | |
try: | |
return [json.loads(block) for block in code_block] | |
except json.JSONDecodeError: | |
return None | |
return None | |
# Fallback function when parsing fails | |
def fallback_extract(text, expected_keys): | |
# Start extracting key-value pairs based on known keys | |
fallback_dict = {} | |
for i, key in enumerate(expected_keys): | |
# Find the location of the key in the text | |
match = re.search(rf'"{key}"\s*:\s*([^\s,]+)', text) | |
if match: | |
value = match.group(1).strip('"').strip(',') | |
# Try to infer the type of the value (str, int, or dict) | |
if value.isdigit(): | |
fallback_dict[key] = int(value) | |
elif re.match(r'^\{.*\}$', value): # Detect dictionary structure | |
try: | |
fallback_dict[key] = json.loads(value) | |
except json.JSONDecodeError: | |
fallback_dict[key] = value # Leave it as a string if malformed | |
else: | |
fallback_dict[key] = value | |
else: | |
fallback_dict[key] = None # If the key is not found, set it to None | |
return fallback_dict | |
# Main function to handle parsing with fallback | |
def parse_with_fallback(text, pydantic_class): | |
# Extract expected keys from the Pydantic class | |
expected_keys = list(pydantic_class.__fields__.keys()) | |
# First try to extract clean JSON blocks | |
parsed_blocks = extract_code_block(text) | |
if parsed_blocks: | |
# Validate and return parsed data | |
try: | |
return [pydantic_class(**block) for block in parsed_blocks] | |
except ValidationError as e: | |
print("Validation error:", e) | |
# Fallback to manually extracting key-value pairs | |
fallback_data = fallback_extract(text, expected_keys) | |
try: | |
# Try to validate the fallback data with the Pydantic class | |
return pydantic_class(**fallback_data) | |
except ValidationError as e: | |
return f"Error parsing with fallback: {e}" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment