"""Canonical summary generation: LLM-based and deterministic fallback."""
from __future__ import annotations
import logging
from typing import Any, Dict, List, Optional, Sequence
from lalandre_core.config import get_config
from lalandre_core.http.llm_client import JSONHTTPLLMClient
from .agent import run_summary_agent
from .models import CANONICAL_SUMMARY_PROMPT_VERSION, SummaryTraceRecorder
logger = logging.getLogger(__name__)
[docs]
class CanonicalSummaryGenerator:
"""Generate stable, reusable act summaries from structured act content."""
def __init__(
self,
*,
llm_client: Optional[JSONHTTPLLMClient],
prompt_version: str = CANONICAL_SUMMARY_PROMPT_VERSION,
model_id: Optional[str] = None,
) -> None:
self.llm_client = llm_client
self.prompt_version = prompt_version
self.model_id = model_id or "deterministic:fallback"
self.max_context_chars = int(get_config().generation.summarize_max_context_chars)
[docs]
def generate(
self,
*,
act: Any,
version: Any,
subdivisions: Sequence[Any],
) -> Dict[str, Any]:
"""Generate a canonical summary with LLM-first and deterministic fallback."""
context = self._build_structured_context(act=act, version=version, subdivisions=subdivisions)
if self.llm_client is None:
return {
"summary_text": self._build_deterministic_summary(act=act, version=version, subdivisions=subdivisions),
"trace": SummaryTraceRecorder.generation(
mode="deterministic",
context_chars=len(context),
subdivisions_used=len(subdivisions),
model_id="deterministic:fallback",
prompt_version=self.prompt_version,
),
"model_id": "deterministic:fallback",
}
prompt = self._build_prompt(act=act, version=version, context=context)
try:
output, _retries = run_summary_agent(
prompt=prompt,
generate_text=self.llm_client.generate,
model_name=self.model_id,
)
return {
"summary_text": output.summary,
"trace": SummaryTraceRecorder.generation(
mode="llm",
context_chars=len(context),
subdivisions_used=len(subdivisions),
model_id=self.model_id,
prompt_version=self.prompt_version,
),
"model_id": self.model_id,
}
except Exception as exc:
logger.warning("Canonical act summary generation failed for %s: %s", getattr(act, "celex", "?"), exc)
return {
"summary_text": self._build_deterministic_summary(act=act, version=version, subdivisions=subdivisions),
"trace": SummaryTraceRecorder.generation(
mode="deterministic_fallback",
context_chars=len(context),
subdivisions_used=len(subdivisions),
model_id="deterministic:fallback",
prompt_version=self.prompt_version,
),
"model_id": "deterministic:fallback",
}
def _build_prompt(self, *, act: Any, version: Any, context: str) -> str:
version_label = (
f"version {getattr(version, 'version_number', '?')} du {getattr(version, 'version_date', '')}"
if version is not None
else "version non précisée"
)
return (
"Tu résumes un acte juridique pour un panneau de bibliothèque documentaire.\n"
"Retourne uniquement un objet JSON de la forme "
'{"summary":"..."}.\n'
"Règles:\n"
"- 4 à 6 phrases maximum.\n"
"- Ton factuel, sans citations, sans markdown, sans liste.\n"
"- Décris l'objet du texte, sa portée et sa structure si visible.\n"
"- N'invente aucun élément absent du contenu fourni.\n\n"
f"Acte: {getattr(act, 'title', '')}\n"
f"CELEX: {getattr(act, 'celex', '')}\n"
f"Type: {getattr(act, 'act_type', '')}\n"
f"Langue: {getattr(act, 'language', '')}\n"
f"Version: {version_label}\n\n"
"Contenu structuré:\n"
f"{context}"
)
def _build_structured_context(
self,
*,
act: Any,
version: Any,
subdivisions: Sequence[Any],
) -> str:
header_lines = [
f"Titre: {getattr(act, 'title', '')}",
f"CELEX: {getattr(act, 'celex', '')}",
f"Type: {getattr(act, 'act_type', '')}",
f"Langue: {getattr(act, 'language', '')}",
]
if version is not None:
header_lines.append(
f"Version courante: {getattr(version, 'version_number', '?')} ({getattr(version, 'version_type', '')})"
)
blocks: List[str] = ["\n".join(header_lines)]
remaining = self.max_context_chars - len(blocks[0])
for subdivision in subdivisions:
label_parts = [str(getattr(subdivision, "subdivision_type", "") or "").strip().lower()]
number = getattr(subdivision, "number", None)
title = getattr(subdivision, "title", None)
if isinstance(number, str) and number.strip():
label_parts.append(number.strip())
if isinstance(title, str) and title.strip():
label_parts.append(title.strip())
label = " ".join(part for part in label_parts if part).strip()
content = " ".join(str(getattr(subdivision, "content", "")).split())
if not content:
continue
block = f"\n[{label or 'contenu'}]\n{content}"
if remaining - len(block) < 0 and len(blocks) > 1:
break
blocks.append(block[:remaining])
remaining = self.max_context_chars - sum(len(item) for item in blocks)
if remaining <= 0:
break
return "\n".join(blocks).strip()
def _build_deterministic_summary(
self,
*,
act: Any,
version: Any,
subdivisions: Sequence[Any],
) -> str:
act_type = str(getattr(act, "act_type", "") or "texte")
title = str(getattr(act, "title", "") or getattr(act, "celex", "Cet acte"))
version_bits: List[str] = []
if version is not None:
version_number = getattr(version, "version_number", None)
version_type = getattr(version, "version_type", None)
if version_number is not None:
version_bits.append(f"version {version_number}")
if version_type:
version_bits.append(str(version_type))
article_count = 0
section_titles: List[str] = []
for subdivision in subdivisions:
subtype = str(getattr(subdivision, "subdivision_type", "") or "").lower()
if subtype == "article":
article_count += 1
title_value = getattr(subdivision, "title", None)
if isinstance(title_value, str) and title_value.strip():
section_titles.append(title_value.strip())
if len(section_titles) >= 3:
break
sentences = [f"{title} est un {act_type} en langue {getattr(act, 'language', '')}."]
if version_bits:
sentences.append(f"La synthèse disponible repose sur la {' '.join(version_bits)} du texte.")
if article_count > 0:
article_suffix = "s" if article_count > 1 else ""
sentences.append(f"Le document structuré couvre au moins {article_count} article{article_suffix}.")
if section_titles:
sentences.append(f"Les premières sections identifiables portent notamment sur {', '.join(section_titles)}.")
if not section_titles and subdivisions:
first = " ".join(str(getattr(subdivisions[0], "content", "")).split())
if first:
sentences.append(f"Le texte ouvre sur : {first[:220].rstrip()}...")
sentences.append(
"Ce résumé a été produit en mode déterministe de secours et peut être raffiné lors d'un prochain recalcul."
)
return " ".join(sentence.strip() for sentence in sentences if sentence.strip())