AnalysisRelationGenerator.java
package com.taxonomy.analysis.service;
import com.taxonomy.dto.RelationHypothesisDto;
import com.taxonomy.model.RelationType;
import com.taxonomy.catalog.model.TaxonomyNode;
import com.taxonomy.catalog.repository.TaxonomyNodeRepository;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.stereotype.Service;
import org.springframework.transaction.annotation.Transactional;
import java.util.*;
import com.taxonomy.relations.service.RelationCompatibilityMatrix;
/**
* Generates provisional (not-yet-persisted) relation hypotheses from analysis
* scores, using the {@link RelationCompatibilityMatrix} rules.
*
* <p>This service runs after LLM scoring and produces a list of
* {@link RelationHypothesisDto} that can be shown in the UI and optionally
* accepted into the knowledge graph by the user.
*/
@Service
public class AnalysisRelationGenerator {
private static final Logger log = LoggerFactory.getLogger(AnalysisRelationGenerator.class);
/** Minimum score (0–100) for a node to be considered as a relation endpoint. */
static final int MIN_SCORE = 50;
/**
* Normalisation factor for computing confidence from two 0–100 scores.
* confidence = (scoreA × scoreB) / 10 000 → 0.0–1.0 range.
*/
static final double CONFIDENCE_NORMALIZATION_FACTOR = 10_000.0;
private final RelationCompatibilityMatrix compatibilityMatrix;
private final TaxonomyNodeRepository nodeRepository;
public AnalysisRelationGenerator(RelationCompatibilityMatrix compatibilityMatrix,
TaxonomyNodeRepository nodeRepository) {
this.compatibilityMatrix = compatibilityMatrix;
this.nodeRepository = nodeRepository;
}
/**
* Generates provisional relations from scored nodes.
*
* <p>Algorithm:
* <ol>
* <li>Select all nodes with score ≥ {@value #MIN_SCORE}</li>
* <li>Group them by taxonomy root</li>
* <li>For each pair of roots that have a compatible relation type in the
* {@link RelationCompatibilityMatrix}, generate candidate relations</li>
* <li>Compute confidence from {@code (scoreA × scoreB) / 10000}</li>
* <li>Return as DTOs sorted by confidence descending</li>
* </ol>
*
* @param scores map of nodeCode → integer score (0–100) from LLM analysis
* @return list of provisional relation hypotheses, sorted by confidence descending
*/
@Transactional(readOnly = true)
public List<RelationHypothesisDto> generate(Map<String, Integer> scores) {
if (scores == null || scores.isEmpty()) {
return Collections.emptyList();
}
// 1. Collect qualifying nodes (score >= MIN_SCORE), grouped by taxonomy root
Map<String, List<ScoredNode>> nodesByRoot = new LinkedHashMap<>();
for (Map.Entry<String, Integer> entry : scores.entrySet()) {
if (entry.getValue() < MIN_SCORE) {
continue;
}
String code = entry.getKey();
Optional<TaxonomyNode> nodeOpt = nodeRepository.findByCode(code);
if (nodeOpt.isEmpty()) {
continue;
}
TaxonomyNode node = nodeOpt.get();
String root = node.getTaxonomyRoot();
if (root == null) {
continue;
}
nodesByRoot.computeIfAbsent(root, k -> new ArrayList<>())
.add(new ScoredNode(code, node.getNameEn(), root, entry.getValue()));
}
if (nodesByRoot.size() < 2) {
log.debug("Fewer than 2 taxonomy roots with qualifying nodes; no provisional relations generated.");
return Collections.emptyList();
}
// 2. Generate candidates using compatibility matrix
List<RelationHypothesisDto> hypotheses = new ArrayList<>();
List<String> roots = new ArrayList<>(nodesByRoot.keySet());
for (int i = 0; i < roots.size(); i++) {
String sourceRoot = roots.get(i);
List<ScoredNode> sourceNodes = nodesByRoot.get(sourceRoot);
for (RelationType relationType : RelationType.values()) {
Set<String> allowedTargets = compatibilityMatrix.allowedTargetRoots(sourceRoot, relationType);
if (allowedTargets.isEmpty()) {
// Either no restrictions or no rules — skip for auto-generation
// to avoid noise from RELATED_TO
continue;
}
for (String targetRoot : allowedTargets) {
List<ScoredNode> targetNodes = nodesByRoot.get(targetRoot);
if (targetNodes == null) {
continue;
}
// Generate pairings: pick the top-scoring source × top-scoring target
ScoredNode bestSource = sourceNodes.stream()
.max(Comparator.comparingInt(n -> n.score))
.orElse(null);
ScoredNode bestTarget = targetNodes.stream()
.max(Comparator.comparingInt(n -> n.score))
.orElse(null);
if (bestSource == null || bestTarget == null) {
continue;
}
// Avoid self-relations
if (bestSource.code.equals(bestTarget.code)) {
continue;
}
double confidence = (bestSource.score * bestTarget.score) / CONFIDENCE_NORMALIZATION_FACTOR;
String reasoning = String.format(
"%s (%s, score %d) %s %s (%s, score %d) — inferred from compatibility matrix",
bestSource.name, bestSource.root, bestSource.score,
relationType.name(),
bestTarget.name, bestTarget.root, bestTarget.score);
hypotheses.add(new RelationHypothesisDto(
bestSource.code, bestSource.name,
bestTarget.code, bestTarget.name,
relationType.name(),
confidence,
reasoning));
}
}
}
// 3. Deduplicate: keep highest confidence per source+target+type triple
Map<String, RelationHypothesisDto> deduped = new LinkedHashMap<>();
for (RelationHypothesisDto h : hypotheses) {
String key = h.getSourceCode() + "→" + h.getTargetCode() + ":" + h.getRelationType();
RelationHypothesisDto existing = deduped.get(key);
if (existing == null || h.getConfidence() > existing.getConfidence()) {
deduped.put(key, h);
}
}
// 4. Sort by confidence descending
List<RelationHypothesisDto> result = new ArrayList<>(deduped.values());
result.sort(Comparator.comparingDouble(RelationHypothesisDto::getConfidence).reversed());
log.info("Generated {} provisional relation hypotheses from {} qualifying nodes across {} roots",
result.size(), nodesByRoot.values().stream().mapToInt(List::size).sum(), nodesByRoot.size());
return result;
}
/** Internal holder for a scored node with its metadata. */
private record ScoredNode(String code, String name, String root, int score) {}
}