Source code for extraction_worker.bootstrap

"""
Bootstrap for extraction worker.
"""

from dataclasses import dataclass

from extraction_worker.graph import RelationGraphService
from lalandre_core.config import get_config
from lalandre_core.http.llm_client import JSONHTTPLLMClient
from lalandre_core.utils import APIKeyPool
from lalandre_db_neo4j import Neo4jRepository
from lalandre_db_postgres import PostgresRepository
from lalandre_rag.summaries import (
    ActSummaryService,
    CanonicalSummaryGenerator,
)


[docs] @dataclass class ExtractionComponents: """Long-lived dependencies used by the extraction worker.""" pg_repo: PostgresRepository neo4j_repo: Neo4jRepository relation_graph: RelationGraphService act_summary_service: ActSummaryService
def _build_summary_llm_client() -> JSONHTTPLLMClient | None: try: key_pool = APIKeyPool.from_env("MISTRAL_API_KEY", start_index=6) except ValueError: return None config = get_config() extraction_cfg = config.extraction return JSONHTTPLLMClient( provider=extraction_cfg.llm_provider, model=extraction_cfg.llm_model, base_url=extraction_cfg.llm_base_url, timeout_seconds=extraction_cfg.llm_timeout_seconds, api_key=key_pool.next_key(), max_output_tokens=512, temperature=0.1, system_prompt="Tu résumes des actes juridiques. Retourne uniquement du JSON valide.", )
[docs] def init_components() -> ExtractionComponents: """Initialize extraction worker components.""" config = get_config() min_confidence = config.gateway.job_extract_min_confidence if min_confidence is None: raise ValueError("gateway.job_extract_min_confidence must be configured") pg_repo = PostgresRepository(config.database.connection_string) neo4j_repo = Neo4jRepository(config.graph) relation_graph = RelationGraphService( pg_repo=pg_repo, neo4j_repo=neo4j_repo, min_confidence=min_confidence, max_chunk_size=config.chunking.extraction_max_chunk_chars, ) summary_model_id = ActSummaryService.build_runtime_model_id() summary_generator = CanonicalSummaryGenerator( llm_client=_build_summary_llm_client(), model_id=summary_model_id, ) act_summary_service = ActSummaryService( pg_repo=pg_repo, generator=summary_generator, model_id=summary_model_id, ) return ExtractionComponents( pg_repo=pg_repo, neo4j_repo=neo4j_repo, relation_graph=relation_graph, act_summary_service=act_summary_service, )