The system uses a multi-database architecture with three specialized databases working together to provide comprehensive legal case search and discovery capabilities.
CREATE TABLE cases (
id UUID PRIMARY KEY,
case_number VARCHAR(100) UNIQUE NOT NULL,
title TEXT NOT NULL,
court_name VARCHAR(200) NOT NULL,
judge_name VARCHAR(200),
filing_date DATE,
decision_date DATE,
case_type VARCHAR(50), -- 'criminal', 'civil', 'constitutional'
case_status VARCHAR(30), -- 'pending', 'decided', 'appealed'
summary TEXT,
full_text TEXT,
articles_involved TEXT[], -- Array of legal articles
created_at TIMESTAMP DEFAULT NOW(),
updated_at TIMESTAMP DEFAULT NOW()
);
CREATE INDEX idx_cases_court_name ON cases(court_name);
CREATE INDEX idx_cases_case_type ON cases(case_type);
CREATE INDEX idx_cases_articles ON cases USING GIN(articles_involved);
CREATE INDEX idx_cases_filing_date ON cases(filing_date);
CREATE TABLE case_citations (
id UUID PRIMARY KEY,
citing_case_id UUID REFERENCES cases(id),
cited_case_id UUID REFERENCES cases(id),
citation_context TEXT,
citation_type VARCHAR(50), -- 'precedent', 'reference', 'distinction'
created_at TIMESTAMP DEFAULT NOW()
);
CREATE INDEX idx_citations_citing ON case_citations(citing_case_id);
CREATE INDEX idx_citations_cited ON case_citations(cited_case_id);
{
"index_name": "legal_cases_embeddings",
"dimension": 1536,
"metric": "cosine",
"metadata_config": {
"indexed": ["case_id", "court_name", "case_type", "articles_involved"]
}
}
{
"id": "case_uuid",
"values": [0.1, 0.2, ...], // 1536-dimensional embedding
"metadata": {
"case_id": "case_uuid",
"court_name": "Delhi High Court",
"case_type": "criminal",
"articles_involved": ["article_48", "article_21"],
"title": "Case title",
"summary": "Case summary"
}
}
// Case Node
CREATE CONSTRAINT case_id_unique FOR (c:Case) REQUIRE c.case_id IS UNIQUE;
(:Case {
case_id: "UUID",
case_number: "string",
title: "string",
court_name: "string",
case_type: "string"
})
// Article Node
(:Article {
article_number: "string",
article_title: "string",
section: "string"
})
// Judge Node
(:Judge {
judge_id: "UUID",
name: "string",
court: "string"
})
// Case relationships
(case1:Case)-[:CITES]->(case2:Case)
(case1:Case)-[:SIMILAR_TO {similarity_score: float}]->(case2:Case)
(case:Case)-[:INVOLVES]->(article:Article)
(case:Case)-[:DECIDED_BY]->(judge:Judge)
(case1:Case)-[:APPEALS]->(case2:Case)
interface SearchService {
// Hybrid search combining vector and relational
hybridSearch(query: SearchQuery): Promise<SearchResults>;
// Pure semantic search
semanticSearch(query: string, filters?: SearchFilters): Promise<CaseId[]>;
// Structured search
structuredSearch(filters: SearchFilters): Promise<Case[]>;
}
interface SearchQuery {
text: string;
filters: SearchFilters;
limit?: number;
offset?: number;
}
interface SearchFilters {
courtNames?: string[];
caseTypes?: string[];
articlesInvolved?: string[];
dateRange?: {
from: Date;
to: Date;
};
judges?: string[];
}
interface GraphService {
// Find related cases
getRelatedCases(caseId: string, depth?: number): Promise<RelatedCase[]>;
// Find citation network
getCitationNetwork(caseId: string): Promise<CitationGraph>;
// Find similar cases by legal principles
getSimilarCasesByPrinciple(caseId: string, limit?: number): Promise<Case[]>;
}
interface RelatedCase {
case: Case;
relationship: string;
strength: number;
path: string[];
}
class HybridSearchEngine {
constructor(
private vectorDB: VectorDatabase,
private relationalDB: RelationalDatabase,
private graphDB: GraphDatabase
) {}
async search(query: SearchQuery): Promise<SearchResults> {
// Step 1: Semantic search for relevant case IDs
const semanticCaseIds = await this.vectorDB.search(
query.text,
query.filters
);
// Step 2: Apply structured filters
const filteredCases = await this.relationalDB.query(`
SELECT * FROM cases
WHERE id = ANY($1)
${this.buildFilterClause(query.filters)}
ORDER BY decision_date DESC
LIMIT $2 OFFSET $3
`, [semanticCaseIds, query.limit, query.offset]);
// Step 3: Enrich with graph data
const enrichedCases = await this.enrichWithGraphData(filteredCases);
return {
cases: enrichedCases,
total: filteredCases.length,
facets: await this.buildFacets(semanticCaseIds)
};
}
private buildFilterClause(filters: SearchFilters): string {
const conditions = [];
if (filters.courtNames?.length) {
conditions.push(`AND court_name = ANY($${this.paramCount++})`);
}
if (filters.articlesInvolved?.length) {
conditions.push(`AND articles_involved && $${this.paramCount++}`);
}
// Add more filter conditions...
return conditions.join(' ');
}
}
class EmbeddingService {
constructor(private embeddingModel: EmbeddingModel) {}
async generateEmbedding(text: string): Promise<number[]> {
// Use OpenAI embeddings or similar
return await this.embeddingModel.embed(text);
}
async indexCase(caseData: Case): Promise<void> {
const textContent = `${caseData.title} ${caseData.summary} ${caseData.full_text}`;
const embedding = await this.generateEmbedding(textContent);
await this.vectorDB.upsert({
id: caseData.id,
values: embedding,
metadata: {
case_id: caseData.id,
court_name: caseData.court_name,
case_type: caseData.case_type,
articles_involved: caseData.articles_involved
}
});
}
}
class GraphBuilderService {
async buildCaseGraph(cases: Case[]): Promise<void> {
for (const caseData of cases) {
// Create case node
await this.graphDB.run(`
MERGE (c:Case {case_id: $case_id})
SET c.case_number = $case_number,
c.title = $title,
c.court_name = $court_name
`, caseData);
// Create relationships
await this.createCitationRelationships(caseData);
await this.createArticleRelationships(caseData);
await this.createSimilarityRelationships(caseData);
}
}
private async createSimilarityRelationships(caseData: Case): Promise<void> {
// Find similar cases using vector similarity
const similarCases = await this.vectorDB.query({
vector: await this.embeddingService.generateEmbedding(caseData.summary),
topK: 10,
includeMetadata: true
});
for (const similar of similarCases.matches) {
if (similar.score > 0.8) { // Threshold for similarity
await this.graphDB.run(`
MATCH (c1:Case {case_id: $case_id1})
MATCH (c2:Case {case_id: $case_id2})
MERGE (c1)-[:SIMILAR_TO {similarity_score: $score}]->(c2)
`, {
case_id1: caseData.id,
case_id2: similar.metadata.case_id,
score: similar.score
});
}
}
}
}
- User Query → API Gateway
- Query Processing → Search Service
- Semantic Search → Vector DB (returns case IDs)
- Structured Filtering → Relational DB (filters by case IDs + criteria)
- Graph Enrichment → Graph DB (adds related cases)
- Response Assembly → Return to user
- New Case Data → Ingestion Service
- Store Structured Data → Relational DB
- Generate Embeddings → Vector DB
- Build Graph Relationships → Graph DB
- Update Search Indices → Complete
- Redis Cache for frequent queries
- Cache Keys:
search:{query_hash}
,case:{case_id}
,related:{case_id}
- TTL: 1 hour for search results, 24 hours for case data
- Batch Processing for large case imports
- Real-time Updates for new cases
- Background Jobs for similarity calculations
- Read Replicas for relational database
- Vector Index Sharding by court or date range
- Graph Database Clustering for large datasets
- Vector DB unavailable → Fallback to keyword search
- Graph DB timeout → Return results without recommendations
- Embedding service failure → Queue for retry
- Search latency percentiles
- Vector similarity score distributions
- Graph traversal performance
- Cache hit rates