Source code for lalandre_rag.models.api

"""
Shared API models for the RAG service.

These models are the single source of truth for request/response schemas
shared between the rag-service and the api-gateway.
"""

from typing import Any, Dict, List, Optional, TypedDict

from pydantic import BaseModel, Field, model_validator



[docs]
class QueryMetadata(TypedDict, total=False):
    """Known fields in QueryResponse.metadata. Additional mode-specific fields may be present."""

    top_k: int
    num_sources: int
    min_score: float
    llm_fallback: bool
    llm_fallback_reason: str
    fallback_search_error: str
    retrieval_overview: Dict[str, Any]
    graph_cypher_support: Dict[str, Any]




[docs]
class SourcesResponse(BaseModel):
    """Structured sources format from ResponseBuilder"""

    total: int
    documents: List[Dict[str, Any]]
    acts: Optional[Dict[str, Any]] = None
    graph_nodes: Optional[List[Dict[str, Any]]] = None
    graph_edges: Optional[List[Dict[str, Any]]] = None
    cypher_rows: Optional[List[Dict[str, Any]]] = None
    community_reports: Optional[List[Dict[str, Any]]] = None
    graph_query: Optional[Dict[str, Any]] = None




[docs]
class QueryResponse(BaseModel):
    """Response model for RAG queries"""

    query_id: str
    question: str
    answer: str
    mode: str
    sources: Optional[SourcesResponse] = None
    metadata: Dict[str, Any]
    conversation_id: Optional[str] = None
    message_id: Optional[str] = None




[docs]
class SearchRequest(BaseModel):
    """Search request (semantic / lexical / hybrid).

    ``top_k``, ``mode`` and ``granularity`` are optional — when omitted the
    rag-service resolves them from ``SearchConfig`` defaults.
    """

    query: Optional[str] = Field(
        default=None,
        description=("Search query (required for lexical/hybrid modes, optional for semantic with query_embedding)"),
    )
    query_embedding: Optional[List[float]] = Field(default=None, description="Pre-computed query embedding")
    top_k: Optional[int] = Field(
        default=None,
        ge=1,
        le=100,
        description="Number of results. Default from config (search.default_limit).",
    )
    mode: Optional[str] = Field(
        default=None,
        description="Search mode: semantic, lexical, hybrid. Default from config.",
    )
    score_threshold: Optional[float] = Field(
        default=None,
        ge=0.0,
        description=(
            "Minimum normalized score threshold (0-1) applied consistently "
            "across semantic, lexical, and hybrid retrieval"
        ),
    )
    granularity: Optional[str] = Field(
        default=None,
        description="Retrieval granularity: chunks or all. Default from config.",
    )
    embedding_preset: Optional[str] = Field(
        default=None,
        description="Embedding preset used for semantic retrieval and collection routing.",
    )
    include_full_content: bool = Field(
        default=False,
        description="Include full content in search results (default: false)",
    )
    filters: Optional[Dict[str, Any]] = Field(
        default=None,
        description=(
            "Metadata filters applied to retrieval. "
            "Supported keys: act_id, celex, act_type, subdivision_type, language. "
            'Example: {"act_type": "regulation"}'
        ),
    )


[docs]
    @model_validator(mode="after")
    def validate_query(self) -> "SearchRequest":
        """Ensure at least a text query or a precomputed embedding is provided."""
        if not self.query and not self.query_embedding:
            raise ValueError("Either 'query' or 'query_embedding' must be provided")
        # mode validation deferred to endpoint (after config defaults are applied)
        return self





[docs]
class SearchResult(BaseModel):
    """Search result item"""

    celex: Optional[str]
    subdivision_id: int
    chunk_id: Optional[int] = None
    chunk_index: Optional[int] = None
    content: str
    score: float
    metadata: Dict[str, Any]
    trace: Optional[Dict[str, Any]] = None




[docs]
class SearchResponse(BaseModel):
    """Search response"""

    search_id: str
    results: List[SearchResult]
    total: int
    mode: str