RankFusionUtil.java
package com.taxonomy.relations.service;
import com.taxonomy.dto.TaxonomyNodeDto;
import java.util.*;
/**
* Reciprocal Rank Fusion (RRF) utility for combining multiple ranked result lists.
*
* <p>RRF merges ranked lists from different retrieval systems (e.g., full-text Lucene and
* semantic KNN) without requiring score calibration. Each document receives a score of
* {@code 1 / (k + rank)} from each list in which it appears, where {@code k} is a smoothing
* constant (typically 60). The final score is the sum across all lists.
*
* <p>This mirrors the {@code reciprocalRankFusion()} helper described in the
* {@code GitDatabaseQueryService.hybridSearch()} plan from the sandbox project
* (sandbox-jgit-storage-hibernate).
*
* @see <a href="https://plg.uwaterloo.ca/~gvcormac/cormacksigir09-rrf.pdf">
* Cormack et al., "Reciprocal Rank Fusion outperforms Condorcet and individual Rank Learning
* Methods", SIGIR 2009</a>
*/
public final class RankFusionUtil {
/** RRF smoothing constant. 60 is the standard value from the original paper. */
static final int K = 60;
private RankFusionUtil() {}
/**
* Merges two ranked lists of taxonomy node DTOs using Reciprocal Rank Fusion.
*
* @param list1 first ranked list (e.g. semantic KNN results)
* @param list2 second ranked list (e.g. full-text Lucene results)
* @param topK maximum number of results to return
* @return merged and re-ranked list with at most {@code topK} entries
*/
public static List<TaxonomyNodeDto> fuse(List<TaxonomyNodeDto> list1,
List<TaxonomyNodeDto> list2,
int topK) {
Map<String, Double> scoreMap = new LinkedHashMap<>();
Map<String, TaxonomyNodeDto> dtoMap = new LinkedHashMap<>();
addRankScores(list1, scoreMap, dtoMap);
addRankScores(list2, scoreMap, dtoMap);
return scoreMap.entrySet().stream()
.sorted(Map.Entry.<String, Double>comparingByValue().reversed())
.limit(topK)
.map(e -> dtoMap.get(e.getKey()))
.toList();
}
private static void addRankScores(List<TaxonomyNodeDto> list,
Map<String, Double> scoreMap,
Map<String, TaxonomyNodeDto> dtoMap) {
for (int i = 0; i < list.size(); i++) {
TaxonomyNodeDto dto = list.get(i);
String code = dto.getCode();
double rrfScore = 1.0 / (K + i + 1);
scoreMap.merge(code, rrfScore, Double::sum);
dtoMap.putIfAbsent(code, dto);
}
}
}