Source code for embedding_worker.bootstrap
"""
Bootstrap for embedding worker
"""
from dataclasses import dataclass
from lalandre_core.config import get_config
from lalandre_core.embedding_presets import resolve_worker_embedding_preset
from lalandre_core.repositories.common import PayloadBuilder
from lalandre_core.utils.api_key_pool import APIKeyPool
from lalandre_db_postgres import PostgresRepository
from lalandre_db_qdrant.repository import QdrantRepository
from lalandre_embedding import EmbeddingService
[docs]
@dataclass
class EmbeddingComponents:
"""Long-lived dependencies used by the embedding worker."""
pg_repo: PostgresRepository
qdrant_chunks: QdrantRepository
qdrant_acts: QdrantRepository
embedding_service: EmbeddingService
payload_builder: PayloadBuilder
preset_id: str
[docs]
def init_components() -> EmbeddingComponents:
"""Initialize embedding worker components"""
config = get_config()
preset = resolve_worker_embedding_preset()
# Initialize repositories
pg_repo = PostgresRepository(config.database.connection_string)
# Workers use keys 6-10 to avoid rate-limit contention with RAG (keys 1-5)
try:
worker_key_pool = APIKeyPool.from_env("MISTRAL_API_KEY", start_index=6)
except ValueError:
worker_key_pool = None
# Initialize embedding service for the worker-bound preset
embedding_service = EmbeddingService(
provider=preset.provider,
model_name=preset.model_name,
device=preset.device,
key_pool=worker_key_pool,
)
# Create Qdrant repos with auto-collection naming
qdrant_chunks = QdrantRepository.from_embedding_service_with_auto_collection(
embedding_service=embedding_service, base_collection_name=config.vector.collection_chunks
)
qdrant_acts = QdrantRepository.from_embedding_service_with_auto_collection(
embedding_service=embedding_service, base_collection_name=config.vector.collection_acts
)
# Ensure collections exist
qdrant_chunks.create_collection()
qdrant_acts.create_collection()
payload_builder = PayloadBuilder()
return EmbeddingComponents(
pg_repo=pg_repo,
qdrant_chunks=qdrant_chunks,
qdrant_acts=qdrant_acts,
embedding_service=embedding_service,
payload_builder=payload_builder,
preset_id=preset.preset_id,
)