RuleGrouper.java

/*******************************************************************************
 * Copyright (c) 2026 Carsten Hammer.
 *
 * This program and the accompanying materials
 * are made available under the terms of the Eclipse Public License 2.0
 * which accompanies this distribution, and is available at
 * https://www.eclipse.org/legal/epl-2.0/
 *
 * SPDX-License-Identifier: EPL-2.0
 *
 * Contributors:
 *     Carsten Hammer
 *******************************************************************************/
package org.sandbox.jdt.triggerpattern.mining.analysis;

import java.util.ArrayList;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;

/**
 * Groups similar {@link InferredRule} instances that represent the same
 * refactoring pattern detected across multiple occurrences.
 *
 * <p>Two rules are considered similar when they share the same source and
 * replacement patterns (after normalizing placeholder names).</p>
 *
 * @since 1.2.6
 */
public class RuleGrouper {

	/** Confidence boost per additional occurrence when aggregating grouped rules. */
	private static final double OCCURRENCE_BOOST_PER_INSTANCE = 0.02;

	/** Maximum total boost from multiple occurrences. */
	private static final double MAX_OCCURRENCE_BOOST = 0.1;

	/** Absolute upper bound for any confidence value. */
	private static final double MAX_CONFIDENCE = 1.0;

	/**
	 * Groups a list of inferred rules by their normalized pattern signature.
	 *
	 * @param rules the rules to group
	 * @return a list of {@link RuleGroup} instances
	 */
	public List<RuleGroup> groupSimilar(List<InferredRule> rules) {
		Map<String, List<InferredRule>> groups = new LinkedHashMap<>();

		for (InferredRule rule : rules) {
			String key = normalizeKey(rule);
			groups.computeIfAbsent(key, k -> new ArrayList<>()).add(rule);
		}

		List<RuleGroup> result = new ArrayList<>();
		for (List<InferredRule> group : groups.values()) {
			InferredRule representative = group.get(0);
			double aggregated = aggregateConfidence(group);
			result.add(new RuleGroup(representative, List.copyOf(group),
					group.size(), aggregated));
		}
		return result;
	}

	private String normalizeKey(InferredRule rule) {
		// Normalize by replacing concrete placeholder names with positional markers
		String src = rule.sourcePattern();
		String repl = rule.replacementPattern();

		Map<String, Integer> placeholderIndices = new LinkedHashMap<>();
		for (int i = 0; i < rule.placeholderNames().size(); i++) {
			String name = rule.placeholderNames().get(i);
			placeholderIndices.putIfAbsent(name, Integer.valueOf(i));
		}

		for (Map.Entry<String, Integer> entry : placeholderIndices.entrySet()) {
			String positional = "$_" + entry.getValue(); //$NON-NLS-1$
			src = src.replace(entry.getKey(), positional);
			repl = repl.replace(entry.getKey(), positional);
		}
		return src + " => " + repl + " :: " + rule.kind(); //$NON-NLS-1$ //$NON-NLS-2$
	}

	private double aggregateConfidence(List<InferredRule> group) {
		double maxConfidence = group.stream()
				.mapToDouble(InferredRule::confidence).max().orElse(0.0);
		// Boost confidence slightly for each additional occurrence
		double boost = Math.min(MAX_OCCURRENCE_BOOST, OCCURRENCE_BOOST_PER_INSTANCE * (group.size() - 1));
		return Math.min(MAX_CONFIDENCE, maxConfidence + boost);
	}
}