Source code for lalandre_extraction.llm.models

"""Pydantic models for structured LLM extraction output."""

from __future__ import annotations

from typing import Any, List

from lalandre_core.models.types import RelationType
from pydantic import BaseModel, Field, field_validator

_EXTRACTION_RELATION_TYPES: frozenset[RelationType] = frozenset(
    {
        RelationType.AMENDS,
        RelationType.REPEALS,
        RelationType.REPLACES,
        RelationType.IMPLEMENTS,
        RelationType.CITES,
        RelationType.DEROGATES,
        RelationType.SUPPLEMENTS,
        RelationType.CORRECTS,
    }
)

ALLOWED_RELATION_TYPES: frozenset[str] = frozenset(rt.value for rt in _EXTRACTION_RELATION_TYPES)


[docs] class ExtractedRelationItem(BaseModel): """Single relation extracted by the LLM.""" model_config = {"extra": "ignore"} target_reference: str = Field(min_length=1) relation_type: str text_evidence: str relation_rationale: str = ""
[docs] @field_validator("relation_type", mode="before") @classmethod def normalize_relation_type(cls, v: Any) -> str: """Normalize the extracted relation type to a lowercase string.""" return str(v).strip().lower() if isinstance(v, str) else ""
[docs] @field_validator("target_reference", "text_evidence", mode="before") @classmethod def clean_text(cls, v: Any) -> str: """Strip mandatory text fields emitted by the extraction model.""" return str(v).strip() if isinstance(v, str) else ""
[docs] @field_validator("relation_rationale", mode="before") @classmethod def clean_rationale(cls, v: Any) -> str: """Normalize optional rationale text and default to an empty string.""" if isinstance(v, str) and v.strip(): return v.strip() return ""
[docs] class ExtractionOutput(BaseModel): """Structured output from the extraction agent.""" model_config = {"extra": "ignore"} relations: List[ExtractedRelationItem] = Field(default_factory=list) output_validation_retries: int = Field(default=0, ge=0)