Source code for rag_service.main

"""
RAG Service - Lalandre
Handles RAG queries (retrieval + generation)
"""

import logging
from contextlib import asynccontextmanager

import uvicorn
from fastapi import FastAPI, Request
from fastapi.responses import PlainTextResponse
from lalandre_core.config import get_config
from lalandre_core.http.middleware import make_http_instrumentation_middleware
from lalandre_core.logging_setup import setup_worker_logging
from lalandre_rag.retrieval.metrics import set_retrieval_metrics_recorder
from prometheus_client import CONTENT_TYPE_LATEST, generate_latest  # type: ignore[import-untyped]
from rag_service.bootstrap import RagComponents, init_components
from rag_service.metrics import (
    PrometheusRetrievalMetricsRecorder,
    observe_http_request,
    refresh_backend_health,
)
from rag_service.models import HealthResponse
from rag_service.routers import conversations, query, search, stream

setup_worker_logging()
logger = logging.getLogger(__name__)


[docs] @asynccontextmanager async def lifespan(app: FastAPI): """Initialize system components.""" set_retrieval_metrics_recorder(PrometheusRetrievalMetricsRecorder()) app.state.components = init_components() yield
app = FastAPI( title="Lalandre RAG Service", description="RAG Query Processing Service", version="0.1.0", lifespan=lifespan, ) app.middleware("http")(make_http_instrumentation_middleware(observe_http_request)) app.include_router(query.router) app.include_router(stream.router) app.include_router(search.router) app.include_router(conversations.router)
[docs] @app.get("/health", response_model=HealthResponse) async def health_check(request: Request) -> HealthResponse: """Health check endpoint.""" components = getattr(request.app.state, "components", None) return HealthResponse( status="healthy", service="rag-service", components_initialized=components is not None, )
[docs] @app.get("/metrics") async def metrics(request: Request) -> PlainTextResponse: """Expose Prometheus metrics after refreshing backend health probes.""" components = getattr(request.app.state, "components", None) if isinstance(components, RagComponents): refresh_backend_health(components) return PlainTextResponse(generate_latest(), media_type=CONTENT_TYPE_LATEST)
[docs] @app.get("/config") async def get_config_info(): """Return read-only generation and search configuration.""" cfg = get_config() return { "generation": { "provider": cfg.generation.provider, "model": cfg.generation.model_name, "temperature": cfg.generation.temperature, "max_tokens": cfg.generation.max_tokens, "timeout_seconds": cfg.generation.timeout_seconds, }, "search": { "rerank_enabled": cfg.search.rerank_enabled, "rerank_model": cfg.search.rerank_model, "default_mode": cfg.search.default_mode, "default_granularity": cfg.search.default_granularity, }, }
if __name__ == "__main__": uvicorn.run(app, host="0.0.0.0", port=8001)