HintFileUpdater.java
/****************************************************************************
* Copyright (c) 2025 Carsten Hammer.
*
* This program and the accompanying materials
* are made available under the terms of the Eclipse Public License 2.0
* which accompanies this distribution, and is available at
* https://www.eclipse.org/legal/epl-2.0/
*
* SPDX-License-Identifier: EPL-2.0
*
* Contributors:
* Carsten Hammer
*****************************************************************************/
package org.sandbox.mining.core.comparison;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import org.sandbox.jdt.triggerpattern.internal.DslValidator;
import org.sandbox.jdt.triggerpattern.llm.CommitEvaluation;
/**
* Applies validated DSL rules discovered during gap analysis
* to {@code .sandbox-hint} files.
*
* <p>Rules are <b>appended</b> to existing hint files rather than overwriting
* them. Duplicate detection prevents the same rule from being added twice.</p>
*/
public class HintFileUpdater {
private final DslValidator validator;
/**
* Creates a HintFileUpdater with the given validator.
*
* @param validator the DSL validator
*/
public HintFileUpdater(DslValidator validator) {
this.validator = validator;
}
/**
* Validates and writes DSL rules from gap entries to hint files.
* If the target file already exists, rules are appended (not overwritten).
* Duplicate rules are detected and skipped.
*
* @param gaps gap entries that may contain reference DSL rules
* @param outputDir directory where hint files will be written
* @return list of paths to hint files that were created or updated
* @throws IOException if file writing fails
*/
public List<Path> applyGaps(List<GapEntry> gaps, Path outputDir) throws IOException {
Files.createDirectories(outputDir);
List<Path> created = new ArrayList<>();
for (GapEntry gap : gaps) {
if (gap.category() != GapCategory.MISSING_DSL_RULE
&& gap.category() != GapCategory.INVALID_DSL_RULE
&& gap.category() != GapCategory.DSL_SYNTAX
&& gap.category() != GapCategory.GUARD_WISSEN) {
continue;
}
String rule = gap.referenceValue();
if (rule == null || rule.isBlank()) {
continue;
}
var result = validator.validate(rule);
if (!result.valid()) {
continue;
}
String fileName = sanitizeFileName(gap.commitHash()) + ".sandbox-hint"; //$NON-NLS-1$
Path hintFile = outputDir.resolve(fileName);
if (appendRuleIfNotDuplicate(hintFile, rule)) {
created.add(hintFile);
}
}
return created;
}
/**
* Writes {@code .sandbox-hint} files for evaluations that are GREEN with a
* VALID DSL rule. If the target file already exists, rules are appended
* (not overwritten). Duplicate rules are detected and skipped.
*
* @param evaluations all commit evaluations from the mining run
* @param outputDir directory where hint files will be written
* @return list of paths to hint files that were created or updated
* @throws IOException if file writing fails
*/
public List<Path> writeHintFiles(List<CommitEvaluation> evaluations, Path outputDir) throws IOException {
Files.createDirectories(outputDir);
List<Path> created = new ArrayList<>();
for (CommitEvaluation eval : evaluations) {
if (eval.trafficLight() != CommitEvaluation.TrafficLight.GREEN) {
continue;
}
if (!"VALID".equals(eval.dslValidationResult())) { //$NON-NLS-1$
continue;
}
String rule = eval.dslRule();
if (rule == null || rule.isBlank()) {
continue;
}
// Use targetHintFile if available, otherwise generate from commit hash
String fileName;
if (eval.targetHintFile() != null && !eval.targetHintFile().isBlank()) {
fileName = eval.targetHintFile();
if (!fileName.endsWith(".sandbox-hint")) { //$NON-NLS-1$
fileName = fileName + ".sandbox-hint"; //$NON-NLS-1$
}
} else {
fileName = sanitizeFileName(eval.commitHash()) + ".sandbox-hint"; //$NON-NLS-1$
}
// Sanitize to prevent path traversal (targetHintFile may come from LLM output)
Path baseName = Path.of(fileName).getFileName();
fileName = baseName != null ? baseName.toString() : sanitizeFileName(eval.commitHash()) + ".sandbox-hint"; //$NON-NLS-1$
if (fileName.isEmpty() || "..".equals(fileName)) { //$NON-NLS-1$
fileName = sanitizeFileName(eval.commitHash()) + ".sandbox-hint"; //$NON-NLS-1$
}
Path hintFile = outputDir.resolve(fileName);
if (appendRuleIfNotDuplicate(hintFile, rule)) {
created.add(hintFile);
}
}
return created;
}
/**
* Appends a rule to the given hint file if it does not already contain
* an equivalent rule. If the file does not exist, it is created.
*
* @param hintFile the target hint file
* @param newRule the new rule text to append
* @return {@code true} if the rule was written (file created or updated),
* {@code false} if the rule was a duplicate and skipped
* @throws IOException if file I/O fails
*/
static boolean appendRuleIfNotDuplicate(Path hintFile, String newRule) throws IOException {
if (!Files.exists(hintFile)) {
// New file — write directly
Files.writeString(hintFile, newRule, StandardCharsets.UTF_8);
return true;
}
// Read existing content
String existingContent = Files.readString(hintFile, StandardCharsets.UTF_8);
// Extract normalized signatures from existing rules
Set<String> existingSignatures = extractRuleSignatures(existingContent);
// Extract normalized signatures from the new rule
Set<String> newSignatures = extractRuleSignatures(newRule);
// Check if ALL new rule signatures already exist
if (!newSignatures.isEmpty() && existingSignatures.containsAll(newSignatures)) {
return false; // duplicate — skip
}
// Append the new rule, separated by double newline
String separator = existingContent.endsWith("\n") ? "\n" : "\n\n"; //$NON-NLS-1$ //$NON-NLS-2$
Files.writeString(hintFile, existingContent + separator + newRule, StandardCharsets.UTF_8);
return true;
}
/**
* Extracts normalized rule signatures from hint file content.
* A signature is the source pattern + replacement pattern with comments,
* metadata directives, and whitespace stripped out.
*
* @param content the hint file content
* @return set of normalized rule signatures
*/
static Set<String> extractRuleSignatures(String content) {
Set<String> signatures = new HashSet<>();
if (content == null || content.isBlank()) {
return signatures;
}
// Split on rule terminators
String[] ruleBlocks = content.split(";;"); //$NON-NLS-1$
for (String block : ruleBlocks) {
String normalized = normalizeRule(block);
if (!normalized.isEmpty()) {
signatures.add(normalized);
}
}
return signatures;
}
/**
* Normalizes a single rule block by stripping comments, metadata
* directives, and excess whitespace. This produces a canonical form
* that can be used for duplicate comparison.
*
* @param ruleBlock a single rule block (without the {@code ;;} terminator)
* @return the normalized rule text
*/
static String normalizeRule(String ruleBlock) {
if (ruleBlock == null) {
return ""; //$NON-NLS-1$
}
StringBuilder sb = new StringBuilder();
for (String line : ruleBlock.split("\n")) { //$NON-NLS-1$
String trimmed = line.trim();
// Skip empty lines
if (trimmed.isEmpty()) {
continue;
}
// Skip line comments
if (trimmed.startsWith("//")) { //$NON-NLS-1$
continue;
}
// Skip metadata directives like <!id: ...> or <!description: ...>
if (trimmed.startsWith("<!")) { //$NON-NLS-1$
continue;
}
// Skip block comments (simplified: skip lines starting with /* or *)
if (trimmed.startsWith("/*") || trimmed.startsWith("*")) { //$NON-NLS-1$ //$NON-NLS-2$
continue;
}
if (sb.length() > 0) {
sb.append('\n');
}
sb.append(trimmed);
}
return sb.toString();
}
public static String sanitizeFileName(String commitHash) {
if (commitHash == null || commitHash.isBlank()) {
return "unknown"; //$NON-NLS-1$
}
return commitHash.substring(0, Math.min(7, commitHash.length()))
.replaceAll("[^a-zA-Z0-9_-]", "_"); //$NON-NLS-1$ //$NON-NLS-2$
}
}