Source code for lalandre_rag.retrieval.search_config

"""
Resolved search configuration.

Centralizes the verbose config-resolution logic that was inlined in
RetrievalService.__init__.
"""

import logging
from dataclasses import dataclass
from typing import Any, Optional

from lalandre_core.config import get_config

logger = logging.getLogger(__name__)


def _require(*, value: Any, field_name: str) -> Any:
    """Raise if a required search config value is missing or blank."""
    if value is None:
        logger.error(
            "Missing required search config: search.%s. Set it in app_config.yaml.",
            field_name,
        )
        raise ValueError(f"Missing required search config: search.{field_name}")
    if isinstance(value, str) and not value.strip():
        logger.error(
            "Empty required search config: search.%s. Set it in app_config.yaml.",
            field_name,
        )
        raise ValueError(f"Empty required search config: search.{field_name}")
    return value


[docs] @dataclass(frozen=True) class ResolvedSearchConfig: """All search-related parameters resolved from config + caller overrides.""" search_language: str candidate_multiplier: float min_candidates: int max_candidates: int hnsw_ef: Optional[int] exact_search: bool per_collection_oversampling: float query_expansion_enabled: bool query_expansion_max_variants: int query_expansion_min_query_chars: int lexical_weight: float semantic_weight: float fusion_method: str dynamic_fusion_enabled: bool lexical_boost_factor: float lexical_boost_max: float result_cache_ttl: int
[docs] @classmethod def from_overrides( cls, *, search_language: Optional[str] = None, candidate_multiplier: Optional[float] = None, min_candidates: Optional[int] = None, max_candidates: Optional[int] = None, hnsw_ef: Optional[int] = None, exact_search: Optional[bool] = None, semantic_per_collection_oversampling: Optional[float] = None, query_expansion_enabled: Optional[bool] = None, query_expansion_max_variants: Optional[int] = None, query_expansion_min_query_chars: Optional[int] = None, lexical_weight: Optional[float] = None, semantic_weight: Optional[float] = None, fusion_method: Optional[str] = None, dynamic_fusion_enabled: Optional[bool] = None, result_cache_ttl: Optional[int] = None, ) -> "ResolvedSearchConfig": """Resolve all search parameters from config defaults + explicit overrides.""" config = get_config() cfg = config.search def _resolve(override: Any, config_val: Any, name: str, *, required: bool = True) -> Any: val = override if override is not None else config_val return _require(value=val, field_name=name) if required else val resolved_language = _resolve(search_language, cfg.fulltext_language, "fulltext_language") resolved_candidate_multiplier = float( _resolve(candidate_multiplier, cfg.candidate_multiplier, "candidate_multiplier") ) resolved_min = max(int(_resolve(min_candidates, cfg.min_candidates, "min_candidates")), 1) resolved_max = max( int(_resolve(max_candidates, cfg.max_candidates, "max_candidates")), resolved_min, ) resolved_hnsw = ( max(int(hnsw_ef), 1) if hnsw_ef is not None else (max(int(cfg.hnsw_ef), 1) if cfg.hnsw_ef is not None else None) ) resolved_exact = bool(exact_search if exact_search is not None else cfg.exact_search) resolved_oversampling = max( float( _resolve( semantic_per_collection_oversampling, cfg.semantic_per_collection_oversampling, "semantic_per_collection_oversampling", ) ), 1.0, ) resolved_expansion = bool( query_expansion_enabled if query_expansion_enabled is not None else cfg.query_expansion_enabled ) resolved_max_variants = max( int( _resolve( query_expansion_max_variants, cfg.query_expansion_max_variants, "query_expansion_max_variants", ) ), 1, ) resolved_min_chars = max( int( _resolve( query_expansion_min_query_chars, cfg.query_expansion_min_query_chars, "query_expansion_min_query_chars", ) ), 0, ) resolved_lex_w = float(_resolve(lexical_weight, cfg.lexical_weight, "lexical_weight")) resolved_sem_w = float(_resolve(semantic_weight, cfg.semantic_weight, "semantic_weight")) resolved_fusion = str(_resolve(fusion_method, cfg.fusion_method, "fusion_method")) resolved_dynamic = dynamic_fusion_enabled if dynamic_fusion_enabled is not None else cfg.dynamic_fusion_enabled resolved_ttl = result_cache_ttl if result_cache_ttl is not None else cfg.result_cache_ttl_seconds return cls( search_language=str(resolved_language), candidate_multiplier=resolved_candidate_multiplier, min_candidates=resolved_min, max_candidates=resolved_max, hnsw_ef=resolved_hnsw, exact_search=resolved_exact, per_collection_oversampling=resolved_oversampling, query_expansion_enabled=resolved_expansion, query_expansion_max_variants=resolved_max_variants, query_expansion_min_query_chars=resolved_min_chars, lexical_weight=resolved_lex_w, semantic_weight=resolved_sem_w, fusion_method=resolved_fusion, dynamic_fusion_enabled=bool(resolved_dynamic), lexical_boost_factor=cfg.lexical_boost_factor, lexical_boost_max=cfg.lexical_boost_max, result_cache_ttl=resolved_ttl or 0, )