"""
Build Qdrant payloads from JSON schemas.
"""
from typing import Any, Callable, Dict, Optional
from ...utils import format_date, to_timestamp
from .schema_loader import PayloadSchemaLoader
SubjectPayload = dict[str, Any]
[docs]
class PayloadBuilder:
"""Schema-driven payload builder."""
def __init__(self, loader: Optional[PayloadSchemaLoader] = None):
self.loader = loader or PayloadSchemaLoader()
self.transformers: Dict[str, Callable[[Any], Any]] = {
"_format_date": format_date,
"_to_timestamp": to_timestamp,
}
[docs]
def build_subdivision_payload(
self,
subdivision_data: Dict[str, Any],
act_data: Dict[str, Any],
version_data: Optional[Dict[str, Any]] = None,
metadata: Optional[Dict[str, str]] = None,
) -> Dict[str, Any]:
"""Build payload for subdivision embeddings."""
resolved_metadata: Dict[str, str] = metadata if metadata is not None else {}
# Prepare context with all data sources
context: Dict[str, Any] = {
"subdivision_data": subdivision_data,
"act_data": act_data,
"version_data": version_data,
"metadata": resolved_metadata,
}
# Build payload from schema
return self.loader.build_payload_from_schema(
schema_name="subdivision_payload_schema", context=context, transformers=self.transformers
)
[docs]
def build_chunk_payload(
self, chunk_data: Dict[str, Any], subdivision_data: Dict[str, Any], act_data: Dict[str, Any]
) -> Dict[str, Any]:
"""Build payload for chunk embeddings."""
# Prepare context with all data sources
context: Dict[str, Any] = {"chunk_data": chunk_data, "subdivision_data": subdivision_data, "act_data": act_data}
# Build payload from schema
return self.loader.build_payload_from_schema(
schema_name="chunk_payload_schema", context=context, transformers=self.transformers
)
[docs]
def build_act_payload(
self,
act_data: Dict[str, Any],
full_text: str,
subjects: Optional[list[SubjectPayload]] = None,
metadata: Optional[Dict[str, str]] = None,
) -> Dict[str, Any]:
"""Build payload for whole-act embeddings (one vector per act)."""
resolved_subjects: list[SubjectPayload] = subjects if subjects is not None else []
resolved_metadata: Dict[str, str] = metadata if metadata is not None else {}
context: Dict[str, Any] = {
"act_data": act_data,
"full_text": full_text,
"subjects": resolved_subjects,
"metadata": resolved_metadata,
}
return self.loader.build_payload_from_schema(
schema_name="act_payload_schema", context=context, transformers=self.transformers
)