Source code for lalandre_rag.modes.llm_mode

"""
LLM Only Mode
Pure LLM generation without retrieval
"""

import time
from typing import Any, Dict, Iterator

from langchain_core.output_parsers import StrOutputParser

from ..prompts import render_llm_only_prompt
from ..response import create_llm_only_response


[docs] class LLMMode: """ MODE 2: Pure LLM (100% Generation) Generate answer using only LLM knowledge (no retrieval) """ def __init__(self, llm: Any): """ Initialize LLM only mode Args: llm: LLM client """ self.llm = llm
[docs] def query(self, question: str, include_warning: bool = True) -> Dict[str, Any]: """ Generate answer using only LLM knowledge Args: question: User question include_warning: Include warning about no document grounding Returns: Dictionary with LLM answer (no sources) """ generation_started_at = time.perf_counter() prompt = render_llm_only_prompt(question=question) answer_text = (self.llm | StrOutputParser()).invoke(prompt) generation_ms = (time.perf_counter() - generation_started_at) * 1000.0 response = create_llm_only_response(query=question, answer=answer_text, include_warning=include_warning) response["metadata"]["phase_timings_ms"] = { "generation_ms": round(generation_ms, 1), "total_ms": round(generation_ms, 1), } return response
[docs] def stream_query(self, question: str) -> Iterator[str]: """Stream LLM answer token by token.""" prompt = render_llm_only_prompt(question=question) chain = self.llm | StrOutputParser() for chunk in chain.stream(prompt): yield chunk