Source code for scripts.backfill_regulatory_level
#!/usr/bin/env python3
"""
Backfill Lamfalussy level for all acts in the database.
Uses the level field (1=L1, 2=L2, 3=L3) with inference from act metadata.
Usage:
python scripts/backfill_lamfalussy.py [--dry-run]
"""
import argparse
import sys
from collections import Counter
from pathlib import Path
# Ensure project root is on sys.path
sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
from lalandre_core.config import get_config # noqa: E402
from lalandre_core.utils.regulatory_level import ( # noqa: E402
LEVEL_LABELS,
infer_regulatory_level,
)
from lalandre_db_postgres.repository import PostgresRepository # noqa: E402
from sqlalchemy import text # noqa: E402
[docs]
def main() -> None:
"""Infer and persist missing Lamfalussy levels for stored acts."""
parser = argparse.ArgumentParser(description="Backfill Lamfalussy level on acts table")
parser.add_argument("--dry-run", action="store_true", help="Print stats without updating")
args = parser.parse_args()
repo = PostgresRepository(get_config().database.connection_string)
session = repo.get_session()
try:
rows = session.execute(
text("SELECT id, celex, act_type, title, form_number FROM acts WHERE level IS NULL")
).fetchall()
print(f"Acts with NULL level: {len(rows)}")
if not rows:
print("Nothing to do.")
return
stats: Counter[str] = Counter()
updates: list[tuple[int, int]] = []
for row in rows:
act_id, celex, act_type, title, form_number = row
level = infer_regulatory_level(
celex=celex,
act_type=act_type or "",
title=title,
form_number=form_number,
)
label = LEVEL_LABELS.get(level, "None") if level else "None"
stats[label] += 1
if level is not None:
updates.append((level, act_id))
print("\nClassification results:")
for key in ("L1", "L2", "L3", "None"):
print(f" {key}: {stats.get(key, 0)}")
if args.dry_run:
print("\n--dry-run: no updates applied.")
return
if updates:
for level, act_id in updates:
session.execute(
text("UPDATE acts SET level = :level WHERE id = :id"),
{"level": level, "id": act_id},
)
session.commit()
print(f"\nUpdated {len(updates)} acts.")
else:
print("\nNo acts to update (all classified as None).")
finally:
session.close()
repo.close()
if __name__ == "__main__":
main()