Source code for lalandre_rag.graph.community

"""
Community-aware context enrichment for Graph RAG (Level 3).

Communities are stored as :Community nodes in Neo4j, linked to Acts via
BELONGS_TO relationships. This module queries Neo4j directly — no JSON
files on disk.

Usage::

    enricher = CommunityContextEnricher(neo4j_repo)
    community_block = enricher.build_context(seed_act_ids, max_communities=4)
"""

import json
import logging
from typing import Any, Dict, List, Set

from lalandre_core.config import get_config

logger = logging.getLogger(__name__)


[docs] class CommunityContextEnricher: """ Enrich graph context with community summaries stored in Neo4j. """ def __init__(self, neo4j_repo: Any) -> None: self._repo = neo4j_repo @property def available(self) -> bool: """Return whether community enrichment can query Neo4j.""" return self._repo is not None
[docs] def build_context( self, seed_act_ids: Set[int], max_communities: int = 4, max_chars: int = 3000, ) -> tuple[str, Dict[str, Any]]: """ Build a community context block for the LLM. Returns (formatted_text, metadata_dict). """ meta: Dict[str, Any] = {"available": False} if not seed_act_ids or not self._repo: return "", meta communities = self._repo.get_communities_for_acts(list(seed_act_ids)) if not communities: return "", meta meta["available"] = True communities = communities[:max_communities] lines: List[str] = ["--- Regulatory Community Context ---"] remaining = max_chars - len(lines[0]) - 1 community_ids_used: List[int] = [] for comm in communities: cid = comm.get("id", -1) summary = comm.get("summary", "") # Parse central_acts from JSON string central_raw = comm.get("central_acts", "[]") if isinstance(central_raw, str): try: central = json.loads(central_raw) except (json.JSONDecodeError, TypeError): central = [] else: central = central_raw if isinstance(central_raw, list) else [] graph_cfg = get_config().graph title_chars = graph_cfg.community_central_act_title_chars acts_display = graph_cfg.community_central_acts_display central_str = ", ".join( f"{a['celex']} ({a.get('title', 'Unknown')[:title_chars]})" for a in central[:acts_display] ) block = f"\n[C{cid}] {summary}\n Central acts: {central_str}" if len(block) > remaining: break lines.append(block) remaining -= len(block) community_ids_used.append(cid) if len(lines) <= 1: return "", meta text = "\n".join(lines) meta.update( { "communities_matched": len(community_ids_used), "community_ids": community_ids_used, "community_block_chars": len(text), } ) logger.info( "Community enrichment: %d communities, %d chars", len(community_ids_used), len(text), ) return text, meta