Source code for rag_service.main
"""
RAG Service - Lalandre
Handles RAG queries (retrieval + generation)
"""
import logging
from contextlib import asynccontextmanager
import uvicorn
from fastapi import FastAPI, Request
from fastapi.responses import PlainTextResponse
from lalandre_core.config import get_config
from lalandre_core.http.middleware import make_http_instrumentation_middleware
from lalandre_core.logging_setup import setup_worker_logging
from lalandre_rag.retrieval.metrics import set_retrieval_metrics_recorder
from prometheus_client import CONTENT_TYPE_LATEST, generate_latest # type: ignore[import-untyped]
from rag_service.bootstrap import RagComponents, init_components
from rag_service.metrics import (
PrometheusRetrievalMetricsRecorder,
observe_http_request,
refresh_backend_health,
)
from rag_service.models import HealthResponse
from rag_service.routers import conversations, query, search, stream
setup_worker_logging()
logger = logging.getLogger(__name__)
[docs]
@asynccontextmanager
async def lifespan(app: FastAPI):
"""Initialize system components."""
set_retrieval_metrics_recorder(PrometheusRetrievalMetricsRecorder())
app.state.components = init_components()
yield
app = FastAPI(
title="Lalandre RAG Service",
description="RAG Query Processing Service",
version="0.1.0",
lifespan=lifespan,
)
app.middleware("http")(make_http_instrumentation_middleware(observe_http_request))
app.include_router(query.router)
app.include_router(stream.router)
app.include_router(search.router)
app.include_router(conversations.router)
[docs]
@app.get("/health", response_model=HealthResponse)
async def health_check(request: Request) -> HealthResponse:
"""Health check endpoint."""
components = getattr(request.app.state, "components", None)
return HealthResponse(
status="healthy",
service="rag-service",
components_initialized=components is not None,
)
[docs]
@app.get("/metrics")
async def metrics(request: Request) -> PlainTextResponse:
"""Expose Prometheus metrics after refreshing backend health probes."""
components = getattr(request.app.state, "components", None)
if isinstance(components, RagComponents):
refresh_backend_health(components)
return PlainTextResponse(generate_latest(), media_type=CONTENT_TYPE_LATEST)
[docs]
@app.get("/config")
async def get_config_info():
"""Return read-only generation and search configuration."""
cfg = get_config()
return {
"generation": {
"provider": cfg.generation.provider,
"model": cfg.generation.model_name,
"temperature": cfg.generation.temperature,
"max_tokens": cfg.generation.max_tokens,
"timeout_seconds": cfg.generation.timeout_seconds,
},
"search": {
"rerank_enabled": cfg.search.rerank_enabled,
"rerank_model": cfg.search.rerank_model,
"default_mode": cfg.search.default_mode,
"default_granularity": cfg.search.default_granularity,
},
}
if __name__ == "__main__":
uvicorn.run(app, host="0.0.0.0", port=8001)