KnownRulesStore.java
/*******************************************************************************
* Copyright (c) 2025 Carsten Hammer.
*
* This program and the accompanying materials
* are made available under the terms of the Eclipse Public License 2.0
* which accompanies this distribution, and is available at
* https://www.eclipse.org/legal/epl-2.0/
*
* SPDX-License-Identifier: EPL-2.0
*
* Contributors:
* Carsten Hammer
*******************************************************************************/
package org.sandbox.mining.core.config;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.StandardCopyOption;
import java.time.LocalDate;
import java.util.ArrayList;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import com.google.gson.Gson;
import com.google.gson.GsonBuilder;
import org.sandbox.jdt.triggerpattern.llm.CommitEvaluation;
/**
* Persistent store for known mining rules ({@code known-rules.json}).
*
* <p>Tracks all rules ever discovered and validated across mining runs.
* GREEN+VALID evaluations are registered after each run so the LLM can
* be informed about already-known rules and avoid proposing duplicates.</p>
*
* @since 1.3.2
*/
public class KnownRulesStore {
/** Status of a known rule. */
public enum RuleStatus {
/** Discovered by mining but not yet implemented as a .sandbox-hint file. */
DISCOVERED,
/** Implemented as a .sandbox-hint rule. */
IMPLEMENTED,
/** Manually rejected by a reviewer. */
REJECTED,
/** Requires a DSL language extension before it can be implemented. */
NEEDS_DSL_EXTENSION
}
/**
* A single known rule entry.
*/
public static class KnownRule {
private String id;
private String category;
private String dslRule;
private String summary;
private String discoveredAt;
private int discoveredInRun;
private String sourceCommit;
private RuleStatus status;
private String hintFile;
/** No-arg constructor for Gson. */
public KnownRule() {
}
public KnownRule(String id, String category, String dslRule, String summary,
String discoveredAt, int discoveredInRun, String sourceCommit,
RuleStatus status, String hintFile) {
this.id = id;
this.category = category;
this.dslRule = dslRule;
this.summary = summary;
this.discoveredAt = discoveredAt;
this.discoveredInRun = discoveredInRun;
this.sourceCommit = sourceCommit;
this.status = status;
this.hintFile = hintFile;
}
public String getId() { return id; }
public void setId(String id) { this.id = id; }
public String getCategory() { return category; }
public void setCategory(String category) { this.category = category; }
public String getDslRule() { return dslRule; }
public void setDslRule(String dslRule) { this.dslRule = dslRule; }
public String getSummary() { return summary; }
public void setSummary(String summary) { this.summary = summary; }
public String getDiscoveredAt() { return discoveredAt; }
public void setDiscoveredAt(String discoveredAt) { this.discoveredAt = discoveredAt; }
public int getDiscoveredInRun() { return discoveredInRun; }
public void setDiscoveredInRun(int discoveredInRun) { this.discoveredInRun = discoveredInRun; }
public String getSourceCommit() { return sourceCommit; }
public void setSourceCommit(String sourceCommit) { this.sourceCommit = sourceCommit; }
public RuleStatus getStatus() { return status; }
public void setStatus(RuleStatus status) { this.status = status; }
public String getHintFile() { return hintFile; }
public void setHintFile(String hintFile) { this.hintFile = hintFile; }
}
/**
* Root JSON structure for known-rules.json.
*/
static class KnownRulesData {
int version = 1;
List<KnownRule> rules = new ArrayList<>();
}
private static final Gson GSON = new GsonBuilder().setPrettyPrinting().create();
private final KnownRulesData data;
/** Creates a new empty store. */
public KnownRulesStore() {
this.data = new KnownRulesData();
}
private KnownRulesStore(KnownRulesData data) {
this.data = data;
}
/**
* Loads the store from a JSON file, or returns a new empty store if the file
* does not exist.
*
* @param path path to known-rules.json
* @return the loaded store
* @throws IOException if the file cannot be read
*/
public static KnownRulesStore load(Path path) throws IOException {
if (!Files.exists(path)) {
return new KnownRulesStore();
}
try {
String json = Files.readString(path, StandardCharsets.UTF_8);
KnownRulesData loaded = GSON.fromJson(json, KnownRulesData.class);
return new KnownRulesStore(loaded != null ? loaded : new KnownRulesData());
} catch (Exception e) {
System.err.println("Warning: could not load known-rules.json: " + e.getMessage()); //$NON-NLS-1$
return new KnownRulesStore();
}
}
/**
* Saves the store to a JSON file atomically.
*
* @param path path to known-rules.json
* @throws IOException if the file cannot be written
*/
public void save(Path path) throws IOException {
Path parent = path.getParent();
if (parent != null) {
Files.createDirectories(parent);
}
Path fileName = path.getFileName();
if (fileName == null) {
throw new IOException("Cannot save known rules to a root path"); //$NON-NLS-1$
}
Path tmpFile = path.resolveSibling(fileName + ".tmp"); //$NON-NLS-1$
String json = GSON.toJson(data);
Files.writeString(tmpFile, json, StandardCharsets.UTF_8);
Files.move(tmpFile, path, StandardCopyOption.REPLACE_EXISTING, StandardCopyOption.ATOMIC_MOVE);
}
/**
* Returns all known rules.
*
* @return unmodifiable list of known rules
*/
public List<KnownRule> getRules() {
return List.copyOf(data.rules);
}
/**
* Returns the number of known rules.
*
* @return count
*/
public int size() {
return data.rules.size();
}
/**
* Checks whether a rule with the given commit hash already exists.
*
* @param commitHash the commit hash
* @return true if a rule from this commit is already known
*/
public boolean containsCommit(String commitHash) {
return data.rules.stream().anyMatch(r -> commitHash.equals(r.sourceCommit));
}
/**
* Registers GREEN+VALID evaluations from a mining run as known rules.
* Evaluations whose commit hash is already present are skipped.
*
* @param evaluations all evaluations from the current run
* @param runNumber the mining run number
* @return the number of newly registered rules
*/
public int registerFromEvaluations(List<CommitEvaluation> evaluations, int runNumber) {
int added = 0;
for (CommitEvaluation eval : evaluations) {
if (!isGreenAndValid(eval)) {
continue;
}
if (containsCommit(eval.commitHash())) {
continue;
}
String ruleId = buildRuleId(eval);
KnownRule rule = new KnownRule(
ruleId,
eval.category(),
eval.dslRule(),
eval.summary(),
LocalDate.now().toString(),
runNumber,
eval.commitHash(),
RuleStatus.DISCOVERED,
eval.targetHintFile());
data.rules.add(rule);
added++;
}
return added;
}
/**
* Formats the known rules as a concise text summary suitable for LLM context.
*
* @return summary string, or empty string if no rules are known
*/
public String formatForPrompt() {
if (data.rules.isEmpty()) {
return ""; //$NON-NLS-1$
}
StringBuilder sb = new StringBuilder();
sb.append('[');
for (int i = 0; i < data.rules.size(); i++) {
KnownRule r = data.rules.get(i);
if (i > 0) {
sb.append(',');
}
sb.append("\n {\"id\":\"").append(escape(r.id)) //$NON-NLS-1$
.append("\",\"category\":\"").append(escape(r.category)) //$NON-NLS-1$
.append("\",\"summary\":\"").append(escape(r.summary)) //$NON-NLS-1$
.append("\",\"dslRule\":\"").append(escape(r.dslRule)) //$NON-NLS-1$
.append("\"}"); //$NON-NLS-1$
}
sb.append("\n]"); //$NON-NLS-1$
return sb.toString();
}
/**
* Returns a map of commit hashes to their rule IDs for quick lookup.
*
* @return map of commitHash → ruleId
*/
public Map<String, String> getCommitHashIndex() {
Map<String, String> index = new LinkedHashMap<>();
for (KnownRule rule : data.rules) {
if (rule.sourceCommit != null) {
index.put(rule.sourceCommit, rule.id);
}
}
return index;
}
private static boolean isGreenAndValid(CommitEvaluation eval) {
if (eval == null || !eval.relevant()) {
return false;
}
if (eval.trafficLight() != CommitEvaluation.TrafficLight.GREEN) {
return false;
}
if (eval.dslRule() == null || eval.dslRule().isBlank()) {
return false;
}
return "VALID".equals(eval.dslValidationResult()); //$NON-NLS-1$
}
private static String buildRuleId(CommitEvaluation eval) {
String category = eval.category() != null ? eval.category().toLowerCase().replace(' ', '-') : "unknown"; //$NON-NLS-1$
String summary = eval.summary() != null
? eval.summary().toLowerCase().replaceAll("[^a-z0-9]+", "-") //$NON-NLS-1$ //$NON-NLS-2$
: "rule"; //$NON-NLS-1$
if (summary.length() > 40) {
summary = summary.substring(0, 40);
}
if (summary.endsWith("-")) { //$NON-NLS-1$
summary = summary.substring(0, summary.length() - 1);
}
return category + "-" + summary; //$NON-NLS-1$
}
private static String escape(String s) {
if (s == null) {
return ""; //$NON-NLS-1$
}
return s.replace("\\", "\\\\") //$NON-NLS-1$ //$NON-NLS-2$
.replace("\"", "\\\"") //$NON-NLS-1$ //$NON-NLS-2$
.replace("\n", "\\n") //$NON-NLS-1$ //$NON-NLS-2$
.replace("\r", "\\r") //$NON-NLS-1$ //$NON-NLS-2$
.replace("\t", "\\t"); //$NON-NLS-1$ //$NON-NLS-2$
}
}