"""
Resolved search configuration.
Centralizes the verbose config-resolution logic that was inlined in
RetrievalService.__init__.
"""
import logging
from dataclasses import dataclass
from typing import Any, Optional
from lalandre_core.config import get_config
logger = logging.getLogger(__name__)
def _require(*, value: Any, field_name: str) -> Any:
"""Raise if a required search config value is missing or blank."""
if value is None:
logger.error(
"Missing required search config: search.%s. Set it in app_config.yaml.",
field_name,
)
raise ValueError(f"Missing required search config: search.{field_name}")
if isinstance(value, str) and not value.strip():
logger.error(
"Empty required search config: search.%s. Set it in app_config.yaml.",
field_name,
)
raise ValueError(f"Empty required search config: search.{field_name}")
return value
[docs]
@dataclass(frozen=True)
class ResolvedSearchConfig:
"""All search-related parameters resolved from config + caller overrides."""
search_language: str
candidate_multiplier: float
min_candidates: int
max_candidates: int
hnsw_ef: Optional[int]
exact_search: bool
per_collection_oversampling: float
query_expansion_enabled: bool
query_expansion_max_variants: int
query_expansion_min_query_chars: int
lexical_weight: float
semantic_weight: float
fusion_method: str
dynamic_fusion_enabled: bool
lexical_boost_factor: float
lexical_boost_max: float
result_cache_ttl: int
[docs]
@classmethod
def from_overrides(
cls,
*,
search_language: Optional[str] = None,
candidate_multiplier: Optional[float] = None,
min_candidates: Optional[int] = None,
max_candidates: Optional[int] = None,
hnsw_ef: Optional[int] = None,
exact_search: Optional[bool] = None,
semantic_per_collection_oversampling: Optional[float] = None,
query_expansion_enabled: Optional[bool] = None,
query_expansion_max_variants: Optional[int] = None,
query_expansion_min_query_chars: Optional[int] = None,
lexical_weight: Optional[float] = None,
semantic_weight: Optional[float] = None,
fusion_method: Optional[str] = None,
dynamic_fusion_enabled: Optional[bool] = None,
result_cache_ttl: Optional[int] = None,
) -> "ResolvedSearchConfig":
"""Resolve all search parameters from config defaults + explicit overrides."""
config = get_config()
cfg = config.search
def _resolve(override: Any, config_val: Any, name: str, *, required: bool = True) -> Any:
val = override if override is not None else config_val
return _require(value=val, field_name=name) if required else val
resolved_language = _resolve(search_language, cfg.fulltext_language, "fulltext_language")
resolved_candidate_multiplier = float(
_resolve(candidate_multiplier, cfg.candidate_multiplier, "candidate_multiplier")
)
resolved_min = max(int(_resolve(min_candidates, cfg.min_candidates, "min_candidates")), 1)
resolved_max = max(
int(_resolve(max_candidates, cfg.max_candidates, "max_candidates")),
resolved_min,
)
resolved_hnsw = (
max(int(hnsw_ef), 1)
if hnsw_ef is not None
else (max(int(cfg.hnsw_ef), 1) if cfg.hnsw_ef is not None else None)
)
resolved_exact = bool(exact_search if exact_search is not None else cfg.exact_search)
resolved_oversampling = max(
float(
_resolve(
semantic_per_collection_oversampling,
cfg.semantic_per_collection_oversampling,
"semantic_per_collection_oversampling",
)
),
1.0,
)
resolved_expansion = bool(
query_expansion_enabled if query_expansion_enabled is not None else cfg.query_expansion_enabled
)
resolved_max_variants = max(
int(
_resolve(
query_expansion_max_variants,
cfg.query_expansion_max_variants,
"query_expansion_max_variants",
)
),
1,
)
resolved_min_chars = max(
int(
_resolve(
query_expansion_min_query_chars,
cfg.query_expansion_min_query_chars,
"query_expansion_min_query_chars",
)
),
0,
)
resolved_lex_w = float(_resolve(lexical_weight, cfg.lexical_weight, "lexical_weight"))
resolved_sem_w = float(_resolve(semantic_weight, cfg.semantic_weight, "semantic_weight"))
resolved_fusion = str(_resolve(fusion_method, cfg.fusion_method, "fusion_method"))
resolved_dynamic = dynamic_fusion_enabled if dynamic_fusion_enabled is not None else cfg.dynamic_fusion_enabled
resolved_ttl = result_cache_ttl if result_cache_ttl is not None else cfg.result_cache_ttl_seconds
return cls(
search_language=str(resolved_language),
candidate_multiplier=resolved_candidate_multiplier,
min_candidates=resolved_min,
max_candidates=resolved_max,
hnsw_ef=resolved_hnsw,
exact_search=resolved_exact,
per_collection_oversampling=resolved_oversampling,
query_expansion_enabled=resolved_expansion,
query_expansion_max_variants=resolved_max_variants,
query_expansion_min_query_chars=resolved_min_chars,
lexical_weight=resolved_lex_w,
semantic_weight=resolved_sem_w,
fusion_method=resolved_fusion,
dynamic_fusion_enabled=bool(resolved_dynamic),
lexical_boost_factor=cfg.lexical_boost_factor,
lexical_boost_max=cfg.lexical_boost_max,
result_cache_ttl=resolved_ttl or 0,
)