Source code for scripts.backfill_regulatory_level

#!/usr/bin/env python3
"""
Backfill Lamfalussy level for all acts in the database.

Uses the level field (1=L1, 2=L2, 3=L3) with inference from act metadata.

Usage:
    python scripts/backfill_lamfalussy.py [--dry-run]
"""

import argparse
import sys
from collections import Counter
from pathlib import Path

# Ensure project root is on sys.path
sys.path.insert(0, str(Path(__file__).resolve().parent.parent))

from lalandre_core.config import get_config  # noqa: E402
from lalandre_core.utils.regulatory_level import (  # noqa: E402
    LEVEL_LABELS,
    infer_regulatory_level,
)
from lalandre_db_postgres.repository import PostgresRepository  # noqa: E402
from sqlalchemy import text  # noqa: E402


[docs] def main() -> None: """Infer and persist missing Lamfalussy levels for stored acts.""" parser = argparse.ArgumentParser(description="Backfill Lamfalussy level on acts table") parser.add_argument("--dry-run", action="store_true", help="Print stats without updating") args = parser.parse_args() repo = PostgresRepository(get_config().database.connection_string) session = repo.get_session() try: rows = session.execute( text("SELECT id, celex, act_type, title, form_number FROM acts WHERE level IS NULL") ).fetchall() print(f"Acts with NULL level: {len(rows)}") if not rows: print("Nothing to do.") return stats: Counter[str] = Counter() updates: list[tuple[int, int]] = [] for row in rows: act_id, celex, act_type, title, form_number = row level = infer_regulatory_level( celex=celex, act_type=act_type or "", title=title, form_number=form_number, ) label = LEVEL_LABELS.get(level, "None") if level else "None" stats[label] += 1 if level is not None: updates.append((level, act_id)) print("\nClassification results:") for key in ("L1", "L2", "L3", "None"): print(f" {key}: {stats.get(key, 0)}") if args.dry_run: print("\n--dry-run: no updates applied.") return if updates: for level, act_id in updates: session.execute( text("UPDATE acts SET level = :level WHERE id = :id"), {"level": level, "id": act_id}, ) session.commit() print(f"\nUpdated {len(updates)} acts.") else: print("\nNo acts to update (all classified as None).") finally: session.close() repo.close()
if __name__ == "__main__": main()