RankFusionUtil.java

/*******************************************************************************
 * Copyright (c) 2026 Carsten Hammer.
 *
 * This program and the accompanying materials
 * are made available under the terms of the Eclipse Public License 2.0
 * which accompanies this distribution, and is available at
 * https://www.eclipse.org/legal/epl-2.0/
 *
 * SPDX-License-Identifier: EPL-2.0
 *
 * Contributors:
 *     Carsten Hammer
 *******************************************************************************/
package org.eclipse.jgit.storage.hibernate.search;

import java.util.ArrayList;
import java.util.Comparator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;

import org.eclipse.jgit.storage.hibernate.entity.JavaBlobIndex;

/**
 * Utility for combining results from multiple search sources using Reciprocal
 * Rank Fusion (RRF).
 * <p>
 * RRF assigns scores based on rank position rather than raw scores, making it
 * effective for combining results from heterogeneous sources (e.g., full-text
 * search and vector similarity search) that produce scores on different
 * scales.
 * </p>
 * <p>
 * The RRF formula is: {@code score(d) = Σ 1 / (k + rank(d))} where {@code k}
 * is a constant (default 60) and the sum is over all result lists.
 * </p>
 *
 * @see <a href="https://plg.uwaterloo.ca/~gvcormac/cormacksigir09-rrf.pdf">
 *      Reciprocal Rank Fusion outperforms Condorcet and individual Rank
 *      Learning Methods (Cormack et al., 2009)</a>
 */
public class RankFusionUtil {

	/** Default RRF constant k (from the original paper). */
	private static final int DEFAULT_K = 60;

	private RankFusionUtil() {
		// utility class
	}

	/**
	 * Combine two result lists using Reciprocal Rank Fusion.
	 * <p>
	 * Results appearing in both lists receive a higher fused score. The
	 * returned list is ordered by descending RRF score and truncated to
	 * {@code topK} results.
	 * </p>
	 *
	 * @param semanticResults
	 *            results from semantic (vector) search, ordered by relevance
	 * @param fulltextResults
	 *            results from full-text search, ordered by relevance
	 * @param topK
	 *            maximum number of results to return
	 * @return fused results ordered by combined RRF score
	 */
	@SuppressWarnings("boxing")
	public static List<JavaBlobIndex> reciprocalRankFusion(
			List<JavaBlobIndex> semanticResults,
			List<JavaBlobIndex> fulltextResults, int topK) {
		Map<Long, ScoredEntry> scoreMap = new LinkedHashMap<>();

		// Score semantic results
		for (int rank = 0; rank < semanticResults.size(); rank++) {
			JavaBlobIndex entry = semanticResults.get(rank);
			Long id = entry.getId();
			if (id == null) {
				continue;
			}
			double rrfScore = 1.0 / (DEFAULT_K + rank + 1);
			scoreMap.computeIfAbsent(id, k -> new ScoredEntry(entry))
					.addScore(rrfScore);
		}

		// Score full-text results
		for (int rank = 0; rank < fulltextResults.size(); rank++) {
			JavaBlobIndex entry = fulltextResults.get(rank);
			Long id = entry.getId();
			if (id == null) {
				continue;
			}
			double rrfScore = 1.0 / (DEFAULT_K + rank + 1);
			scoreMap.computeIfAbsent(id, k -> new ScoredEntry(entry))
					.addScore(rrfScore);
		}

		// Sort by combined score and return top K
		List<ScoredEntry> sorted = new ArrayList<>(scoreMap.values());
		sorted.sort(Comparator
				.comparingDouble(ScoredEntry::getScore).reversed());

		List<JavaBlobIndex> result = new ArrayList<>();
		for (int i = 0; i < Math.min(topK, sorted.size()); i++) {
			result.add(sorted.get(i).getEntry());
		}
		return result;
	}

	/**
	 * Internal holder for an entry and its accumulated RRF score.
	 */
	private static class ScoredEntry {
		private final JavaBlobIndex entry;

		private double score;

		ScoredEntry(JavaBlobIndex entry) {
			this.entry = entry;
		}

		void addScore(double additionalScore) {
			this.score += additionalScore;
		}

		double getScore() {
			return score;
		}

		JavaBlobIndex getEntry() {
			return entry;
		}
	}
}