Source code for lalandre_extraction.llm.models
"""Pydantic models for structured LLM extraction output."""
from __future__ import annotations
from typing import Any, List
from lalandre_core.models.types import RelationType
from pydantic import BaseModel, Field, field_validator
_EXTRACTION_RELATION_TYPES: frozenset[RelationType] = frozenset(
{
RelationType.AMENDS,
RelationType.REPEALS,
RelationType.REPLACES,
RelationType.IMPLEMENTS,
RelationType.CITES,
RelationType.DEROGATES,
RelationType.SUPPLEMENTS,
RelationType.CORRECTS,
}
)
ALLOWED_RELATION_TYPES: frozenset[str] = frozenset(rt.value for rt in _EXTRACTION_RELATION_TYPES)
[docs]
class ExtractedRelationItem(BaseModel):
"""Single relation extracted by the LLM."""
model_config = {"extra": "ignore"}
target_reference: str = Field(min_length=1)
relation_type: str
text_evidence: str
relation_rationale: str = ""
[docs]
@field_validator("relation_type", mode="before")
@classmethod
def normalize_relation_type(cls, v: Any) -> str:
"""Normalize the extracted relation type to a lowercase string."""
return str(v).strip().lower() if isinstance(v, str) else ""
[docs]
@field_validator("target_reference", "text_evidence", mode="before")
@classmethod
def clean_text(cls, v: Any) -> str:
"""Strip mandatory text fields emitted by the extraction model."""
return str(v).strip() if isinstance(v, str) else ""
[docs]
@field_validator("relation_rationale", mode="before")
@classmethod
def clean_rationale(cls, v: Any) -> str:
"""Normalize optional rationale text and default to an empty string."""
if isinstance(v, str) and v.strip():
return v.strip()
return ""
[docs]
class ExtractionOutput(BaseModel):
"""Structured output from the extraction agent."""
model_config = {"extra": "ignore"}
relations: List[ExtractedRelationItem] = Field(default_factory=list)
output_validation_retries: int = Field(default=0, ge=0)