Source code for embedding_worker.bootstrap

"""
Bootstrap for embedding worker
"""

from dataclasses import dataclass

from lalandre_core.config import get_config
from lalandre_core.embedding_presets import resolve_worker_embedding_preset
from lalandre_core.repositories.common import PayloadBuilder
from lalandre_core.utils.api_key_pool import APIKeyPool
from lalandre_db_postgres import PostgresRepository
from lalandre_db_qdrant.repository import QdrantRepository
from lalandre_embedding import EmbeddingService


[docs] @dataclass class EmbeddingComponents: """Long-lived dependencies used by the embedding worker.""" pg_repo: PostgresRepository qdrant_chunks: QdrantRepository qdrant_acts: QdrantRepository embedding_service: EmbeddingService payload_builder: PayloadBuilder preset_id: str
[docs] def init_components() -> EmbeddingComponents: """Initialize embedding worker components""" config = get_config() preset = resolve_worker_embedding_preset() # Initialize repositories pg_repo = PostgresRepository(config.database.connection_string) # Workers use keys 6-10 to avoid rate-limit contention with RAG (keys 1-5) try: worker_key_pool = APIKeyPool.from_env("MISTRAL_API_KEY", start_index=6) except ValueError: worker_key_pool = None # Initialize embedding service for the worker-bound preset embedding_service = EmbeddingService( provider=preset.provider, model_name=preset.model_name, device=preset.device, key_pool=worker_key_pool, ) # Create Qdrant repos with auto-collection naming qdrant_chunks = QdrantRepository.from_embedding_service_with_auto_collection( embedding_service=embedding_service, base_collection_name=config.vector.collection_chunks ) qdrant_acts = QdrantRepository.from_embedding_service_with_auto_collection( embedding_service=embedding_service, base_collection_name=config.vector.collection_acts ) # Ensure collections exist qdrant_chunks.create_collection() qdrant_acts.create_collection() payload_builder = PayloadBuilder() return EmbeddingComponents( pg_repo=pg_repo, qdrant_chunks=qdrant_chunks, qdrant_acts=qdrant_acts, embedding_service=embedding_service, payload_builder=payload_builder, preset_id=preset.preset_id, )