Source code for lalandre_rag.models.api

"""
Shared API models for the RAG service.

These models are the single source of truth for request/response schemas
shared between the rag-service and the api-gateway.
"""

from typing import Any, Dict, List, Optional, TypedDict

from pydantic import BaseModel, Field, model_validator


[docs] class QueryMetadata(TypedDict, total=False): """Known fields in QueryResponse.metadata. Additional mode-specific fields may be present.""" top_k: int num_sources: int min_score: float llm_fallback: bool llm_fallback_reason: str fallback_search_error: str retrieval_overview: Dict[str, Any] graph_cypher_support: Dict[str, Any]
[docs] class SourcesResponse(BaseModel): """Structured sources format from ResponseBuilder""" total: int documents: List[Dict[str, Any]] acts: Optional[Dict[str, Any]] = None graph_nodes: Optional[List[Dict[str, Any]]] = None graph_edges: Optional[List[Dict[str, Any]]] = None cypher_rows: Optional[List[Dict[str, Any]]] = None community_reports: Optional[List[Dict[str, Any]]] = None graph_query: Optional[Dict[str, Any]] = None
[docs] class QueryResponse(BaseModel): """Response model for RAG queries""" query_id: str question: str answer: str mode: str sources: Optional[SourcesResponse] = None metadata: Dict[str, Any] conversation_id: Optional[str] = None message_id: Optional[str] = None
[docs] class SearchRequest(BaseModel): """Search request (semantic / lexical / hybrid). ``top_k``, ``mode`` and ``granularity`` are optional — when omitted the rag-service resolves them from ``SearchConfig`` defaults. """ query: Optional[str] = Field( default=None, description=("Search query (required for lexical/hybrid modes, optional for semantic with query_embedding)"), ) query_embedding: Optional[List[float]] = Field(default=None, description="Pre-computed query embedding") top_k: Optional[int] = Field( default=None, ge=1, le=100, description="Number of results. Default from config (search.default_limit).", ) mode: Optional[str] = Field( default=None, description="Search mode: semantic, lexical, hybrid. Default from config.", ) score_threshold: Optional[float] = Field( default=None, ge=0.0, description=( "Minimum normalized score threshold (0-1) applied consistently " "across semantic, lexical, and hybrid retrieval" ), ) granularity: Optional[str] = Field( default=None, description="Retrieval granularity: chunks or all. Default from config.", ) embedding_preset: Optional[str] = Field( default=None, description="Embedding preset used for semantic retrieval and collection routing.", ) include_full_content: bool = Field( default=False, description="Include full content in search results (default: false)", ) filters: Optional[Dict[str, Any]] = Field( default=None, description=( "Metadata filters applied to retrieval. " "Supported keys: act_id, celex, act_type, subdivision_type, language. " 'Example: {"act_type": "regulation"}' ), )
[docs] @model_validator(mode="after") def validate_query(self) -> "SearchRequest": """Ensure at least a text query or a precomputed embedding is provided.""" if not self.query and not self.query_embedding: raise ValueError("Either 'query' or 'query_embedding' must be provided") # mode validation deferred to endpoint (after config defaults are applied) return self
[docs] class SearchResult(BaseModel): """Search result item""" celex: Optional[str] subdivision_id: int chunk_id: Optional[int] = None chunk_index: Optional[int] = None content: str score: float metadata: Dict[str, Any] trace: Optional[Dict[str, Any]] = None
[docs] class SearchResponse(BaseModel): """Search response""" search_id: str results: List[SearchResult] total: int mode: str