Source code for extraction_worker.bootstrap
"""
Bootstrap for extraction worker.
"""
from dataclasses import dataclass
from extraction_worker.graph import RelationGraphService
from lalandre_core.config import get_config
from lalandre_core.http.llm_client import JSONHTTPLLMClient
from lalandre_core.utils import APIKeyPool
from lalandre_db_neo4j import Neo4jRepository
from lalandre_db_postgres import PostgresRepository
from lalandre_rag.summaries import (
ActSummaryService,
CanonicalSummaryGenerator,
)
def _build_summary_llm_client() -> JSONHTTPLLMClient | None:
try:
key_pool = APIKeyPool.from_env("MISTRAL_API_KEY", start_index=6)
except ValueError:
return None
config = get_config()
extraction_cfg = config.extraction
return JSONHTTPLLMClient(
provider=extraction_cfg.llm_provider,
model=extraction_cfg.llm_model,
base_url=extraction_cfg.llm_base_url,
timeout_seconds=extraction_cfg.llm_timeout_seconds,
api_key=key_pool.next_key(),
max_output_tokens=512,
temperature=0.1,
system_prompt="Tu résumes des actes juridiques. Retourne uniquement du JSON valide.",
)
[docs]
def init_components() -> ExtractionComponents:
"""Initialize extraction worker components."""
config = get_config()
min_confidence = config.gateway.job_extract_min_confidence
if min_confidence is None:
raise ValueError("gateway.job_extract_min_confidence must be configured")
pg_repo = PostgresRepository(config.database.connection_string)
neo4j_repo = Neo4jRepository(config.graph)
relation_graph = RelationGraphService(
pg_repo=pg_repo,
neo4j_repo=neo4j_repo,
min_confidence=min_confidence,
max_chunk_size=config.chunking.extraction_max_chunk_chars,
)
summary_model_id = ActSummaryService.build_runtime_model_id()
summary_generator = CanonicalSummaryGenerator(
llm_client=_build_summary_llm_client(),
model_id=summary_model_id,
)
act_summary_service = ActSummaryService(
pg_repo=pg_repo,
generator=summary_generator,
model_id=summary_model_id,
)
return ExtractionComponents(
pg_repo=pg_repo,
neo4j_repo=neo4j_repo,
relation_graph=relation_graph,
act_summary_service=act_summary_service,
)