SourceScanner.java
/*******************************************************************************
* Copyright (c) 2025 Carsten Hammer.
*
* This program and the accompanying materials
* are made available under the terms of the Eclipse Public License 2.0
* which accompanies this distribution, and is available at
* https://www.eclipse.org/legal/epl-2.0/
*
* SPDX-License-Identifier: EPL-2.0
*
* Contributors:
* Carsten Hammer
******************************************************************************/
package org.sandbox.mining.scanner;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.nio.file.FileVisitResult;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.SimpleFileVisitor;
import java.nio.file.attribute.BasicFileAttributes;
import java.util.ArrayList;
import java.util.List;
import org.eclipse.jdt.core.dom.CompilationUnit;
import org.sandbox.jdt.triggerpattern.api.BatchTransformationProcessor;
import org.sandbox.jdt.triggerpattern.api.BatchTransformationProcessor.TransformationResult;
import org.sandbox.jdt.triggerpattern.api.HintFile;
import org.sandbox.mining.report.MiningReport;
/**
* Scans directories for Java source files and runs transformation rules against them.
*/
public class SourceScanner {
private final StandaloneAstParser parser;
private final int maxFiles;
public SourceScanner() {
this(new StandaloneAstParser(), 5000);
}
public SourceScanner(StandaloneAstParser parser, int maxFiles) {
this.parser = parser;
this.maxFiles = maxFiles;
}
/**
* Finds all Java files in the given directory tree.
*
* @param rootDir the root directory to scan
* @param subPaths optional sub-paths to restrict scanning to; if empty, scans
* all
* @return list of Java file paths
* @throws IOException if directory traversal fails
*/
public List<Path> findJavaFiles(Path rootDir, List<String> subPaths) throws IOException {
List<Path> javaFiles = new ArrayList<>();
List<Path> searchRoots = new ArrayList<>();
if (subPaths == null || subPaths.isEmpty()) {
searchRoots.add(rootDir);
} else {
for (String sub : subPaths) {
Path subPath = rootDir.resolve(sub);
if (Files.isDirectory(subPath)) {
searchRoots.add(subPath);
}
}
}
for (Path searchRoot : searchRoots) {
if (!Files.isDirectory(searchRoot)) {
continue;
}
Files.walkFileTree(searchRoot, new SimpleFileVisitor<>() {
@Override
public FileVisitResult visitFile(Path file, BasicFileAttributes attrs) {
if (javaFiles.size() >= maxFiles) {
return FileVisitResult.TERMINATE;
}
if (file.toString().endsWith(".java")) {
javaFiles.add(file);
}
return FileVisitResult.CONTINUE;
}
@Override
public FileVisitResult visitFileFailed(Path file, IOException exc) {
return FileVisitResult.CONTINUE;
}
});
}
return javaFiles;
}
/**
* Scans all Java files using the given hint file and produces a mining report.
*
* @param repoName name of the repository being scanned
* @param rootDir the root directory of the cloned repository
* @param subPaths optional sub-paths to restrict scanning to
* @param hintFiles list of parsed hint files to apply
* @return the mining report with all matches
* @throws IOException if file reading fails
*/
public MiningReport scan(String repoName, Path rootDir, List<String> subPaths, List<HintFile> hintFiles)
throws IOException {
MiningReport report = new MiningReport();
List<Path> javaFiles = findJavaFiles(rootDir, subPaths);
report.addFileCount(repoName, javaFiles.size());
for (HintFile hintFile : hintFiles) {
BatchTransformationProcessor processor = new BatchTransformationProcessor(hintFile);
for (Path javaFile : javaFiles) {
String source = Files.readString(javaFile, StandardCharsets.UTF_8);
CompilationUnit cu = parser.parse(source);
List<TransformationResult> results = processor.process(cu);
for (TransformationResult result : results) {
String relativePath = rootDir.relativize(javaFile).toString();
int line = cu.getLineNumber(result.match().getOffset());
String hintFileName = hintFile.getId() != null ? hintFile.getId() : "unknown";
String ruleName = result.rule().getDescription() != null ? result.rule().getDescription()
: hintFileName;
report.addMatch(repoName, hintFileName, ruleName, relativePath, line, result.matchedText(),
result.hasReplacement() ? result.replacement() : null);
}
}
}
return report;
}
}