ArchitectureImpactSelector.java
package com.taxonomy.architecture.service;
import com.taxonomy.dto.NodeOrigin;
import com.taxonomy.dto.RequirementElementView;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.stereotype.Service;
import java.util.*;
import java.util.stream.Collectors;
/**
* Selects the most semantically valuable and concrete nodes for the final
* architecture impact presentation.
*
* <p>Uses a composite scoring formula that prioritises leaf-level nodes with
* high LLM scores, good specificity (depth in the taxonomy), and
* cross-category relation coverage. Generic near-root nodes are suppressed
* when more concrete descendants are available.
*
* <h3>Composite Impact Score</h3>
* <pre>
* impactScore = (llmScore × 0.3)
* + (specificityBonus × 0.25)
* + (crossCategoryBonus × 0.2)
* + (leafConcreteness × 0.15)
* + (humanReadability × 0.1)
* </pre>
*/
@Service
public class ArchitectureImpactSelector {
private static final Logger log = LoggerFactory.getLogger(ArchitectureImpactSelector.class);
/** Maximum number of impact-selected nodes per taxonomy category. */
static final int MAX_IMPACT_PER_CATEGORY = 5;
/** Names considered generic and weak when found at level ≤ 1. */
private static final Set<String> GENERIC_NAMES = Set.of(
"enable", "capabilities", "support", "general", "other",
"manage", "provide", "basic", "core", "common"
);
// ── Composite scoring weights ───────────────────────────────────────────
private static final double W_LLM_SCORE = 0.30;
private static final double W_SPECIFICITY = 0.25;
private static final double W_CROSS_CATEGORY = 0.20;
private static final double W_LEAF_CONCRETENESS = 0.15;
private static final double W_READABILITY = 0.10;
/**
* Selects the most impactful nodes from the full element list and marks
* them with {@link NodeOrigin#IMPACT_PROMOTED}.
*
* @param elements all elements from the architecture view
* @param allScores full LLM score map (nodeCode → 0–100)
* @param crossCategoryCodes set of node codes that participate in cross-category relations
* @return the input list with {@code selectedForImpact} and {@code origin} updated in place
*/
public List<RequirementElementView> selectForImpact(List<RequirementElementView> elements,
Map<String, Integer> allScores,
Set<String> crossCategoryCodes) {
if (elements == null || elements.isEmpty()) {
return elements;
}
// Group by taxonomy category (sheet)
Map<String, List<RequirementElementView>> byCategory = elements.stream()
.filter(e -> e.getTaxonomySheet() != null)
.collect(Collectors.groupingBy(RequirementElementView::getTaxonomySheet,
LinkedHashMap::new, Collectors.toList()));
for (Map.Entry<String, List<RequirementElementView>> entry : byCategory.entrySet()) {
String category = entry.getKey();
List<RequirementElementView> categoryElements = entry.getValue();
// Score each element
List<ScoredElement> scored = new ArrayList<>();
for (RequirementElementView el : categoryElements) {
double impactScore = computeImpactScore(el, allScores, crossCategoryCodes, categoryElements);
scored.add(new ScoredElement(el, impactScore));
}
// Sort by impact score descending
scored.sort(Comparator.comparingDouble(ScoredElement::score).reversed());
// Select top-N, skipping generic weak nodes that have better descendants
int selected = 0;
for (ScoredElement se : scored) {
if (selected >= MAX_IMPACT_PER_CATEGORY) break;
RequirementElementView el = se.element();
if (isTaxonomyScaffolding(el, categoryElements)
|| isGenericWeakNode(el, allScores, categoryElements)
|| isRedundantIntermediate(el, allScores, categoryElements)) {
continue;
}
el.setSelectedForImpact(true);
if (el.getOrigin() == null || el.getOrigin() == NodeOrigin.PROPAGATED
|| el.getOrigin() == NodeOrigin.TRACE_INTERMEDIATE) {
el.setOrigin(NodeOrigin.IMPACT_PROMOTED);
}
el.setSpecificityScore(se.score());
el.setPresenceReason(buildPresenceReason(el, se.score()));
selected++;
}
log.debug("Impact selection for category {}: {} of {} selected",
category, selected, categoryElements.size());
}
return elements;
}
/**
* Computes the composite impact score for a single element.
*/
double computeImpactScore(RequirementElementView element,
Map<String, Integer> allScores,
Set<String> crossCategoryCodes,
List<RequirementElementView> categoryElements) {
String code = element.getNodeCode();
// 1. LLM score component (normalised to 0–1)
double llmComponent = allScores.getOrDefault(code, 0) / 100.0;
// 2. Specificity: deeper nodes score higher (normalised via depth / 5.0, capped at 1)
double specificityComponent = Math.min(element.getTaxonomyDepth() / 5.0, 1.0);
// 3. Cross-category bonus: node participates in cross-category relations
double crossCategoryComponent = crossCategoryCodes.contains(code) ? 1.0 : 0.0;
// 4. Leaf concreteness: has a dash (leaf code) vs. root code
double leafComponent = code.contains("-") ? 1.0 : 0.2;
// 5. Human readability: longer, non-generic names are more readable
double readabilityComponent = computeReadability(element.getTitle());
return (llmComponent * W_LLM_SCORE)
+ (specificityComponent * W_SPECIFICITY)
+ (crossCategoryComponent * W_CROSS_CATEGORY)
+ (leafComponent * W_LEAF_CONCRETENESS)
+ (readabilityComponent * W_READABILITY);
}
/**
* Returns {@code true} if the node is at level ≤ 1 with a generic name
* and a more concrete descendant with ≥ 50% of its score exists.
*/
boolean isGenericWeakNode(RequirementElementView node,
Map<String, Integer> allScores,
List<RequirementElementView> categoryElements) {
if (node.getTaxonomyDepth() > 1) return false;
String name = node.getTitle();
if (name == null || !GENERIC_NAMES.contains(name.toLowerCase().trim())) {
return false;
}
int nodeScore = allScores.getOrDefault(node.getNodeCode(), 0);
int halfScore = nodeScore / 2;
// Check if any deeper node in the same category has a score ≥ half of this node's score
return categoryElements.stream()
.filter(e -> e.getTaxonomyDepth() > node.getTaxonomyDepth())
.anyMatch(e -> allScores.getOrDefault(e.getNodeCode(), 0) >= halfScore);
}
/**
* Returns {@code true} if the node is a taxonomy scaffolding node (depth ≤ 1)
* and more concrete descendants (depth > 1) exist in the same category.
*
* <p>Scaffolding nodes are the virtual root (e.g. "CP", depth 0) and the
* first-level container (e.g. "CP-1000", depth 1). They are structural
* grouping artifacts that do not carry distinct architectural meaning and should
* be suppressed from the impact presentation when deeper, more specific nodes
* are available in the same taxonomy category.
*/
boolean isTaxonomyScaffolding(RequirementElementView node,
List<RequirementElementView> categoryElements) {
if (node.getTaxonomyDepth() > 1) return false;
return categoryElements.stream()
.anyMatch(e -> e.getTaxonomyDepth() > 1);
}
/**
* Returns {@code true} if the node is an intermediate node that has exactly
* one strong child (≥ 50 % of the parent's score) in the same category.
* Such intermediates add no information beyond the child and should be suppressed
* so the child can represent the area directly.
*/
boolean isRedundantIntermediate(RequirementElementView node,
Map<String, Integer> allScores,
List<RequirementElementView> categoryElements) {
if (node.getTaxonomyDepth() <= 1) return false; // handled by scaffolding check
String nodeCode = node.getNodeCode();
int nodeScore = allScores.getOrDefault(nodeCode, 0);
int halfScore = Math.max(nodeScore / 2, 1);
// Count children (deeper nodes whose hierarchyPath passes through this node)
List<RequirementElementView> strongChildren = categoryElements.stream()
.filter(e -> e.getTaxonomyDepth() > node.getTaxonomyDepth())
.filter(e -> isChildOf(e, nodeCode))
.filter(e -> allScores.getOrDefault(e.getNodeCode(), 0) >= halfScore)
.toList();
// Suppress when exactly one strong child exists — the child alone represents the area
return strongChildren.size() == 1;
}
/**
* Returns {@code true} if {@code candidate} appears to be a descendant of
* the node identified by {@code parentCode}, based on its hierarchy path.
*/
private static boolean isChildOf(RequirementElementView candidate, String parentCode) {
String path = candidate.getHierarchyPath();
if (path != null && path.contains(parentCode)) return true;
// Fallback: code-prefix heuristic (e.g. "BP-1490" is a child of "BP-1327"
// when both share the same two-letter prefix) — not sufficient alone, but
// combined with the depth check it avoids false positives across categories.
return false;
}
/**
* Builds a short human-readable reason why this node was selected for impact.
*/
private static String buildPresenceReason(RequirementElementView el, double impactScore) {
StringBuilder sb = new StringBuilder();
sb.append(el.getNodeCode());
if (el.getTitle() != null) {
sb.append(" (").append(el.getTitle()).append(")");
}
sb.append(": ");
if (el.getOrigin() != null) {
sb.append(el.getOrigin().name().toLowerCase().replace('_', ' '));
}
sb.append(", score ").append(el.getDirectLlmScore());
sb.append(", impact ").append(String.format("%.2f", impactScore));
if (el.getTaxonomyDepth() > 0) {
sb.append(", depth ").append(el.getTaxonomyDepth());
}
return sb.toString();
}
private static double computeReadability(String title) {
if (title == null || title.isBlank()) return 0.0;
String lower = title.toLowerCase().trim();
if (GENERIC_NAMES.contains(lower)) return 0.2;
// Longer titles tend to be more descriptive
return Math.min(title.length() / 40.0, 1.0);
}
private record ScoredElement(RequirementElementView element, double score) {}
}