Source code for lalandre_db_postgres.models

"""
PostgreSQL SQLAlchemy models
"""

from datetime import date, datetime
from enum import Enum
from typing import Any, Dict, Optional

from lalandre_core.models.types import ActType, LanguageCode, RelationType, SubdivisionType, VersionType
from sqlalchemy import (
    Boolean,
    CheckConstraint,
    Date,
    DateTime,
    Float,
    ForeignKey,
    Integer,
    String,
    Text,
    UniqueConstraint,
)
from sqlalchemy import Enum as SQLEnum
from sqlalchemy.dialects.postgresql import JSONB
from sqlalchemy.dialects.postgresql import UUID as PG_UUID
from sqlalchemy.orm import DeclarativeBase, Mapped, mapped_column, relationship
from sqlalchemy.sql import func


[docs] class Base(DeclarativeBase): """Base class for all SQLAlchemy declarative models.""" pass
def _enum_values(enum_cls: type[Enum]) -> list[str]: return [str(member.value) for member in enum_cls]
[docs] class ActsSQL(Base): """Persisted legislative act.""" __tablename__ = "acts" id: Mapped[int] = mapped_column(Integer, primary_key=True) celex: Mapped[str] = mapped_column(String(50), unique=True) eli: Mapped[Optional[str]] = mapped_column(String(255), unique=True, nullable=True) act_type: Mapped[str] = mapped_column( SQLEnum(ActType, name="act_type", values_callable=_enum_values), ) title: Mapped[str] = mapped_column(Text) language: Mapped[str] = mapped_column( SQLEnum(LanguageCode, name="language_code", values_callable=_enum_values), ) adoption_date: Mapped[Optional[date]] = mapped_column(Date, nullable=True) force_date: Mapped[Optional[date]] = mapped_column(Date, nullable=True) end_date: Mapped[Optional[date]] = mapped_column(Date, nullable=True) official_journal_reference: Mapped[Optional[str]] = mapped_column(String(100), nullable=True) sector: Mapped[Optional[int]] = mapped_column(Integer, nullable=True) level: Mapped[Optional[int]] = mapped_column(Integer, nullable=True) form_number: Mapped[Optional[str]] = mapped_column(String(50), nullable=True) url_eurlex: Mapped[Optional[str]] = mapped_column(Text, nullable=True) created_at: Mapped[Optional[datetime]] = mapped_column(DateTime, server_default=func.now(), nullable=True) updated_at: Mapped[Optional[datetime]] = mapped_column( DateTime, server_default=func.now(), onupdate=func.now(), nullable=True ) last_synced_at: Mapped[Optional[datetime]] = mapped_column(DateTime, nullable=True) content_hash: Mapped[Optional[str]] = mapped_column(String(64), nullable=True) sync_status: Mapped[Optional[str]] = mapped_column(String(20), default="pending", nullable=True) extracted_at: Mapped[Optional[datetime]] = mapped_column(DateTime, nullable=True) extraction_status: Mapped[Optional[str]] = mapped_column(String(20), default="pending", nullable=True) # Relationships relations_source = relationship( "ActRelationsSQL", foreign_keys="ActRelationsSQL.source_act_id", back_populates="source_act" ) relations_target = relationship( "ActRelationsSQL", foreign_keys="ActRelationsSQL.target_act_id", back_populates="target_act" ) metadata_entries = relationship("ActMetadataSQL", back_populates="act") subjects = relationship("ActSubjectsSQL", back_populates="act") versions = relationship("VersionsSQL", back_populates="act") subdivisions = relationship("SubdivisionsSQL", back_populates="act") summaries = relationship("ActSummarySQL", back_populates="act", cascade="all, delete-orphan")
[docs] class ActSummarySQL(Base): """Canonical or derived summary generated for an act.""" __tablename__ = "act_summaries" id: Mapped[int] = mapped_column(Integer, primary_key=True) act_id: Mapped[int] = mapped_column(Integer, ForeignKey("acts.id", ondelete="CASCADE")) language: Mapped[str] = mapped_column( SQLEnum(LanguageCode, name="language_code", values_callable=_enum_values), ) summary_kind: Mapped[str] = mapped_column(String(32), default="canonical") status: Mapped[str] = mapped_column(String(20), default="pending") summary_text: Mapped[Optional[str]] = mapped_column(Text, nullable=True) content_hash: Mapped[Optional[str]] = mapped_column(String(64), nullable=True) source_version_id: Mapped[Optional[int]] = mapped_column( Integer, ForeignKey("versions.id", ondelete="SET NULL"), nullable=True ) prompt_version: Mapped[Optional[str]] = mapped_column(String(64), nullable=True) model_id: Mapped[Optional[str]] = mapped_column(String(255), nullable=True) generated_at: Mapped[Optional[datetime]] = mapped_column(DateTime, nullable=True) last_attempt_at: Mapped[Optional[datetime]] = mapped_column(DateTime, nullable=True) error_text: Mapped[Optional[str]] = mapped_column(Text, nullable=True) trace_jsonb: Mapped[Dict[str, Any]] = mapped_column(JSONB, default=dict) created_at: Mapped[Optional[datetime]] = mapped_column(DateTime, server_default=func.now(), nullable=True) updated_at: Mapped[Optional[datetime]] = mapped_column( DateTime, server_default=func.now(), onupdate=func.now(), nullable=True ) act = relationship("ActsSQL", back_populates="summaries") source_version = relationship("VersionsSQL") __table_args__ = ( UniqueConstraint("act_id", "language", "summary_kind", name="unique_act_summary_kind"), CheckConstraint("summary_kind IN ('canonical')", name="check_act_summary_kind"), CheckConstraint("status IN ('pending', 'ready', 'failed')", name="check_act_summary_status"), )
[docs] class ActRelationsSQL(Base): """Directed relationship extracted between two acts.""" __tablename__ = "act_relations" id: Mapped[int] = mapped_column(Integer, primary_key=True) source_act_id: Mapped[int] = mapped_column(Integer, ForeignKey("acts.id", ondelete="CASCADE")) target_act_id: Mapped[Optional[int]] = mapped_column( Integer, ForeignKey("acts.id", ondelete="CASCADE"), nullable=True ) target_celex: Mapped[Optional[str]] = mapped_column(String(255), nullable=True) relation_type: Mapped[str] = mapped_column( SQLEnum(RelationType, name="relation_type", values_callable=_enum_values), ) source_subdivision_id: Mapped[Optional[int]] = mapped_column(Integer, ForeignKey("subdivisions.id"), nullable=True) target_subdivision_id: Mapped[Optional[int]] = mapped_column(Integer, ForeignKey("subdivisions.id"), nullable=True) effect_date: Mapped[Optional[date]] = mapped_column(Date, nullable=True) description: Mapped[Optional[str]] = mapped_column(Text, nullable=True) evidence: Mapped[Optional[str]] = mapped_column(Text, nullable=True) rationale: Mapped[Optional[str]] = mapped_column(Text, nullable=True) resolution_method: Mapped[Optional[str]] = mapped_column(String(30), nullable=True) resolution_score: Mapped[Optional[float]] = mapped_column(Float, nullable=True) target_reference: Mapped[Optional[str]] = mapped_column(Text, nullable=True) confidence: Mapped[Optional[float]] = mapped_column(Float, nullable=True) source: Mapped[Optional[str]] = mapped_column(String(50), nullable=True) validated: Mapped[Optional[bool]] = mapped_column(Boolean, default=False, nullable=True) synced_to_neo4j_at: Mapped[Optional[datetime]] = mapped_column(DateTime, nullable=True) is_resolved: Mapped[Optional[bool]] = mapped_column(Boolean, default=True, nullable=True) created_at: Mapped[Optional[datetime]] = mapped_column(DateTime, server_default=func.now(), nullable=True) # Relationships source_act = relationship( "ActsSQL", foreign_keys="ActRelationsSQL.source_act_id", back_populates="relations_source" ) target_act = relationship( "ActsSQL", foreign_keys="ActRelationsSQL.target_act_id", back_populates="relations_target" )
[docs] class ActMetadataSQL(Base): """Key-value metadata attached to an act.""" __tablename__ = "act_metadata" id: Mapped[int] = mapped_column(Integer, primary_key=True) act_id: Mapped[int] = mapped_column(Integer, ForeignKey("acts.id", ondelete="CASCADE")) key: Mapped[str] = mapped_column(String(100)) value: Mapped[str] = mapped_column(Text) created_at: Mapped[Optional[datetime]] = mapped_column(DateTime, server_default=func.now(), nullable=True) # Relationships act = relationship("ActsSQL", back_populates="metadata_entries") __table_args__ = (UniqueConstraint("act_id", "key", name="unique_act_metadata"),)
[docs] class ActSubjectsSQL(Base): """Association table between acts and subject matters.""" __tablename__ = "act_subjects" act_id: Mapped[int] = mapped_column(Integer, ForeignKey("acts.id", ondelete="CASCADE"), primary_key=True) subject_id: Mapped[int] = mapped_column( Integer, ForeignKey("subject_matters.id", ondelete="CASCADE"), primary_key=True ) # Relationships act = relationship("ActsSQL", back_populates="subjects") subject = relationship("SubjectMattersSQL", back_populates="acts")
[docs] class SubjectMattersSQL(Base): """EuroVoc-like subject taxonomy entry.""" __tablename__ = "subject_matters" id: Mapped[int] = mapped_column(Integer, primary_key=True) eurovoc_code: Mapped[str] = mapped_column(String(20), unique=True) label_en: Mapped[str] = mapped_column(Text) label_fr: Mapped[Optional[str]] = mapped_column(Text, nullable=True) parent_code: Mapped[Optional[str]] = mapped_column( String(20), ForeignKey("subject_matters.eurovoc_code"), nullable=True ) # Relationships acts = relationship("ActSubjectsSQL", back_populates="subject") children = relationship("SubjectMattersSQL", backref="parent", remote_side="SubjectMattersSQL.eurovoc_code")
[docs] class VersionsSQL(Base): """Version row describing one published state of an act.""" __tablename__ = "versions" id: Mapped[int] = mapped_column(Integer, primary_key=True) act_id: Mapped[int] = mapped_column(Integer, ForeignKey("acts.id", ondelete="CASCADE")) version_number: Mapped[int] = mapped_column(Integer) version_type: Mapped[str] = mapped_column( SQLEnum(VersionType, name="version_type", values_callable=_enum_values), ) version_date: Mapped[date] = mapped_column(Date) source_url: Mapped[Optional[str]] = mapped_column(Text, nullable=True) is_current: Mapped[Optional[bool]] = mapped_column(Boolean, default=False, nullable=True) created_at: Mapped[Optional[datetime]] = mapped_column(DateTime, server_default=func.now(), nullable=True) # Relationships act = relationship("ActsSQL", back_populates="versions") subdivisions = relationship("SubdivisionsSQL", back_populates="version") __table_args__ = ( UniqueConstraint("act_id", "version_number", name="unique_act_version"), CheckConstraint("version_number > 0", name="check_version_number_positive"), )
[docs] class SubdivisionsSQL(Base): """Hierarchical subdivision belonging to an act version.""" __tablename__ = "subdivisions" id: Mapped[int] = mapped_column(Integer, primary_key=True) act_id: Mapped[int] = mapped_column(Integer, ForeignKey("acts.id", ondelete="CASCADE")) version_id: Mapped[Optional[int]] = mapped_column( Integer, ForeignKey("versions.id", ondelete="CASCADE"), nullable=True ) parent_id: Mapped[Optional[int]] = mapped_column( Integer, ForeignKey("subdivisions.id", ondelete="CASCADE"), nullable=True ) subdivision_type: Mapped[str] = mapped_column( SQLEnum(SubdivisionType, name="subdivision_type", values_callable=_enum_values), ) number: Mapped[Optional[str]] = mapped_column(String(50), nullable=True) title: Mapped[Optional[str]] = mapped_column(Text, nullable=True) content: Mapped[str] = mapped_column(Text) sequence_order: Mapped[int] = mapped_column(Integer) hierarchy_path: Mapped[str] = mapped_column(Text) depth: Mapped[int] = mapped_column(Integer, default=0) created_at: Mapped[Optional[datetime]] = mapped_column(DateTime, server_default=func.now(), nullable=True) content_hash: Mapped[Optional[str]] = mapped_column(String(64), nullable=True) # Relationships act = relationship("ActsSQL", back_populates="subdivisions") version = relationship("VersionsSQL", back_populates="subdivisions") children = relationship("SubdivisionsSQL", backref="parent", remote_side="SubdivisionsSQL.id") chunks = relationship("ChunksSQL", back_populates="subdivision", cascade="all, delete-orphan") __table_args__ = ( CheckConstraint("depth >= 0", name="check_depth_non_negative"), CheckConstraint("sequence_order > 0", name="check_sequence_order_positive"), )
[docs] class ChunksSQL(Base): """Chunks of subdivisions for fine-grained retrieval""" __tablename__ = "chunks" id: Mapped[int] = mapped_column(Integer, primary_key=True) subdivision_id: Mapped[int] = mapped_column(Integer, ForeignKey("subdivisions.id", ondelete="CASCADE")) chunk_index: Mapped[int] = mapped_column(Integer) content: Mapped[str] = mapped_column(Text) char_start: Mapped[int] = mapped_column(Integer) char_end: Mapped[int] = mapped_column(Integer) token_count: Mapped[Optional[int]] = mapped_column(Integer, nullable=True) chunk_metadata: Mapped[Optional[Dict[str, Any]]] = mapped_column(JSONB, nullable=True) created_at: Mapped[Optional[datetime]] = mapped_column(DateTime, server_default=func.now(), nullable=True) # Relationships subdivision = relationship("SubdivisionsSQL", back_populates="chunks") __table_args__ = ( UniqueConstraint("subdivision_id", "chunk_index", name="unique_subdivision_chunk"), CheckConstraint("chunk_index >= 0", name="check_chunk_index_non_negative"), CheckConstraint("char_end > char_start", name="check_char_positions_valid"), CheckConstraint("length(content) > 0", name="check_content_not_empty"), )
[docs] class ConversationSQL(Base): """Multi-turn conversation session.""" __tablename__ = "conversations" id: Mapped[str] = mapped_column(PG_UUID(as_uuid=False), primary_key=True) user_id: Mapped[Optional[str]] = mapped_column(String(255), nullable=True) title: Mapped[str] = mapped_column(Text) created_at: Mapped[Optional[datetime]] = mapped_column(DateTime, server_default=func.now(), nullable=True) updated_at: Mapped[Optional[datetime]] = mapped_column( DateTime, server_default=func.now(), onupdate=func.now(), nullable=True ) messages = relationship( "ConversationMessageSQL", back_populates="conversation", order_by="ConversationMessageSQL.created_at", cascade="all, delete-orphan", )
[docs] class ConversationMessageSQL(Base): """Single message within a conversation (human or assistant).""" __tablename__ = "conversation_messages" id: Mapped[str] = mapped_column(PG_UUID(as_uuid=False), primary_key=True) conversation_id: Mapped[str] = mapped_column( PG_UUID(as_uuid=False), ForeignKey("conversations.id", ondelete="CASCADE"), ) role: Mapped[str] = mapped_column(String(20)) content: Mapped[str] = mapped_column(Text) query_id: Mapped[Optional[str]] = mapped_column(String(50), nullable=True) mode: Mapped[Optional[str]] = mapped_column(String(30), nullable=True) metadata_: Mapped[Optional[Dict[str, Any]]] = mapped_column("metadata", JSONB, nullable=True) created_at: Mapped[Optional[datetime]] = mapped_column(DateTime, server_default=func.now(), nullable=True) conversation = relationship("ConversationSQL", back_populates="messages")
[docs] class EmbeddingStateSQL(Base): """Tracks embedding status per object and model to avoid unnecessary re-embedding""" __tablename__ = "embedding_state" id: Mapped[int] = mapped_column(Integer, primary_key=True) object_type: Mapped[str] = mapped_column(String(20)) object_id: Mapped[int] = mapped_column(Integer) provider: Mapped[str] = mapped_column(String(50)) model_name: Mapped[str] = mapped_column(String(255)) vector_size: Mapped[int] = mapped_column(Integer) content_hash: Mapped[str] = mapped_column(String(64)) embedded_at: Mapped[Optional[datetime]] = mapped_column( DateTime, server_default=func.now(), onupdate=func.now(), nullable=True ) __table_args__ = ( UniqueConstraint( "object_type", "object_id", "provider", "model_name", "vector_size", name="unique_embedding_state" ), CheckConstraint("object_type IN ('subdivision', 'chunk', 'act')", name="check_embedding_state_object_type"), )
# Keep unused imports used in type annotations __all__ = [ "Base", "ActsSQL", "ActRelationsSQL", "ActMetadataSQL", "ActSubjectsSQL", "SubjectMattersSQL", "VersionsSQL", "SubdivisionsSQL", "ChunksSQL", "ConversationSQL", "ConversationMessageSQL", "EmbeddingStateSQL", ]