Source code for lalandre_rag.retrieval.query_utils
"""
Query pre-processing utilities for retrieval.
Small, pure helpers for preparing user queries before they hit the search
backends (BM25, semantic, etc.).
"""
import logging
from lalandre_core.config import get_config
logger = logging.getLogger(__name__)
[docs]
def truncate_lexical_query(query: str, max_chars: int | None = None) -> str:
"""Truncate *query* for BM25-based search modes.
Preserves full words by cutting at the last whitespace boundary.
Returns the original query unchanged if it is short enough.
If *max_chars* is not provided, uses ``search.max_lexical_query_chars``
from the central config.
"""
if max_chars is None:
max_chars = get_config().search.max_lexical_query_chars
if len(query) <= max_chars:
return query
truncated = query[:max_chars]
last_space = truncated.rfind(" ")
result = truncated[:last_space].rstrip() if last_space > 0 else truncated
logger.debug("Query truncated: %d → %d chars", len(query), len(result))
return result