"""
Shared API models for the RAG service.
These models are the single source of truth for request/response schemas
shared between the rag-service and the api-gateway.
"""
from typing import Any, Dict, List, Optional, TypedDict
from pydantic import BaseModel, Field, model_validator
[docs]
class SourcesResponse(BaseModel):
"""Structured sources format from ResponseBuilder"""
total: int
documents: List[Dict[str, Any]]
acts: Optional[Dict[str, Any]] = None
graph_nodes: Optional[List[Dict[str, Any]]] = None
graph_edges: Optional[List[Dict[str, Any]]] = None
cypher_rows: Optional[List[Dict[str, Any]]] = None
community_reports: Optional[List[Dict[str, Any]]] = None
graph_query: Optional[Dict[str, Any]] = None
[docs]
class QueryResponse(BaseModel):
"""Response model for RAG queries"""
query_id: str
question: str
answer: str
mode: str
sources: Optional[SourcesResponse] = None
metadata: Dict[str, Any]
conversation_id: Optional[str] = None
message_id: Optional[str] = None
[docs]
class SearchRequest(BaseModel):
"""Search request (semantic / lexical / hybrid).
``top_k``, ``mode`` and ``granularity`` are optional — when omitted the
rag-service resolves them from ``SearchConfig`` defaults.
"""
query: Optional[str] = Field(
default=None,
description=("Search query (required for lexical/hybrid modes, optional for semantic with query_embedding)"),
)
query_embedding: Optional[List[float]] = Field(default=None, description="Pre-computed query embedding")
top_k: Optional[int] = Field(
default=None,
ge=1,
le=100,
description="Number of results. Default from config (search.default_limit).",
)
mode: Optional[str] = Field(
default=None,
description="Search mode: semantic, lexical, hybrid. Default from config.",
)
score_threshold: Optional[float] = Field(
default=None,
ge=0.0,
description=(
"Minimum normalized score threshold (0-1) applied consistently "
"across semantic, lexical, and hybrid retrieval"
),
)
granularity: Optional[str] = Field(
default=None,
description="Retrieval granularity: chunks or all. Default from config.",
)
embedding_preset: Optional[str] = Field(
default=None,
description="Embedding preset used for semantic retrieval and collection routing.",
)
include_full_content: bool = Field(
default=False,
description="Include full content in search results (default: false)",
)
filters: Optional[Dict[str, Any]] = Field(
default=None,
description=(
"Metadata filters applied to retrieval. "
"Supported keys: act_id, celex, act_type, subdivision_type, language. "
'Example: {"act_type": "regulation"}'
),
)
[docs]
@model_validator(mode="after")
def validate_query(self) -> "SearchRequest":
"""Ensure at least a text query or a precomputed embedding is provided."""
if not self.query and not self.query_embedding:
raise ValueError("Either 'query' or 'query_embedding' must be provided")
# mode validation deferred to endpoint (after config defaults are applied)
return self
[docs]
class SearchResult(BaseModel):
"""Search result item"""
celex: Optional[str]
subdivision_id: int
chunk_id: Optional[int] = None
chunk_index: Optional[int] = None
content: str
score: float
metadata: Dict[str, Any]
trace: Optional[Dict[str, Any]] = None
[docs]
class SearchResponse(BaseModel):
"""Search response"""
search_id: str
results: List[SearchResult]
total: int
mode: str