StatisticsCollector.java
/*******************************************************************************
* Copyright (c) 2025 Carsten Hammer.
*
* This program and the accompanying materials
* are made available under the terms of the Eclipse Public License 2.0
* which accompanies this distribution, and is available at
* https://www.eclipse.org/legal/epl-2.0/
*
* SPDX-License-Identifier: EPL-2.0
*
* Contributors:
* Carsten Hammer
*******************************************************************************/
package org.sandbox.mining.core.report;
import java.time.Instant;
import java.time.LocalDate;
import java.time.ZoneOffset;
import java.util.ArrayList;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import org.sandbox.jdt.triggerpattern.llm.CommitEvaluation;
/**
* Tracks statistical counts for the mining process.
*
* <p>Records counts of total processed commits, relevant/irrelevant commits,
* duplicates, traffic light distribution, per-repository statistics,
* and daily progress tracking.</p>
*/
public class StatisticsCollector {
private int totalProcessed;
private int relevant;
private int irrelevant;
private int duplicates;
private int green;
private int yellow;
private int red;
private final Map<String, Integer> irrelevantReasons = new LinkedHashMap<>();
private final Map<String, RepoStatistics> perRepository = new LinkedHashMap<>();
private final List<DailyProgress> dailyProgress = new ArrayList<>();
private RunMetadata runMetadata;
private TimeWindow timeWindow;
/**
* Per-repository statistics.
*/
public static class RepoStatistics {
private int totalProcessed;
private int relevant;
private int green;
private int yellow;
private int red;
public void record(CommitEvaluation evaluation) {
totalProcessed++;
if (evaluation.relevant()) {
relevant++;
}
switch (evaluation.trafficLight()) {
case GREEN -> green++;
case YELLOW -> yellow++;
case RED -> red++;
case NOT_APPLICABLE -> { /* no counter */ }
}
}
public int getTotalProcessed() { return totalProcessed; }
public int getRelevant() { return relevant; }
public int getGreen() { return green; }
public int getYellow() { return yellow; }
public int getRed() { return red; }
}
/**
* Daily progress entry.
*/
public static class DailyProgress {
private final String date;
private int processed;
private int relevant;
public DailyProgress(String date) {
this.date = date;
}
public String getDate() { return date; }
public int getProcessed() { return processed; }
public int getRelevant() { return relevant; }
}
/**
* Records a single evaluation in the statistics.
*
* @param evaluation the evaluation to record
*/
public void record(CommitEvaluation evaluation) {
totalProcessed++;
if (evaluation.relevant()) {
relevant++;
} else {
irrelevant++;
if (evaluation.irrelevantReason() != null) {
irrelevantReasons.merge(evaluation.irrelevantReason(), 1, Integer::sum);
}
}
if (evaluation.isDuplicate()) {
duplicates++;
}
switch (evaluation.trafficLight()) {
case GREEN -> green++;
case YELLOW -> yellow++;
case RED -> red++;
case NOT_APPLICABLE -> { /* no counter */ }
}
// Per-repository tracking
if (evaluation.repoUrl() != null) {
perRepository.computeIfAbsent(evaluation.repoUrl(), k -> new RepoStatistics())
.record(evaluation);
}
// Daily progress tracking — use the evaluation's date if available, else today
String today = (evaluation.evaluatedAt() != null)
? LocalDate.ofInstant(evaluation.evaluatedAt(), ZoneOffset.UTC).toString()
: LocalDate.now().toString();
DailyProgress todayProgress = dailyProgress.stream()
.filter(d -> d.getDate().equals(today))
.findFirst()
.orElseGet(() -> {
DailyProgress dp = new DailyProgress(today);
dailyProgress.add(dp);
return dp;
});
todayProgress.processed++;
if (evaluation.relevant()) {
todayProgress.relevant++;
}
}
public int getTotalProcessed() {
return totalProcessed;
}
public int getRelevant() {
return relevant;
}
public int getIrrelevant() {
return irrelevant;
}
public int getDuplicates() {
return duplicates;
}
public int getGreen() {
return green;
}
public int getYellow() {
return yellow;
}
public int getRed() {
return red;
}
public Map<String, Integer> getIrrelevantReasons() {
return Map.copyOf(irrelevantReasons);
}
public Map<String, RepoStatistics> getPerRepository() {
return Map.copyOf(perRepository);
}
public List<DailyProgress> getDailyProgress() {
return List.copyOf(dailyProgress);
}
public RunMetadata getRunMetadata() {
return runMetadata;
}
public void setRunMetadata(RunMetadata runMetadata) {
this.runMetadata = runMetadata;
}
/**
* Rebuilds a {@link StatisticsCollector} from a list of evaluations.
*
* @param evaluations the evaluations to rebuild from
* @return a new {@link StatisticsCollector} reflecting the given evaluations
*/
public static StatisticsCollector rebuildFrom(List<CommitEvaluation> evaluations) {
StatisticsCollector stats = new StatisticsCollector();
for (CommitEvaluation eval : evaluations) {
stats.record(eval);
}
return stats;
}
/**
* Convenience method to record run metadata from raw values.
*/
public void recordRunMetadata(String startedAt, String completedAt, long durationSeconds,
String llmProvider, String llmModel, int batchSize, int commitsPerRequest,
int apiCallsMade, int deferredCommits, int permanentlySkipped) {
RunMetadata meta = new RunMetadata();
meta.startedAt = startedAt;
meta.completedAt = completedAt;
meta.durationSeconds = durationSeconds;
meta.llmProvider = llmProvider;
meta.llmModel = llmModel;
meta.batchSize = batchSize;
meta.commitsPerRequest = commitsPerRequest;
meta.apiCallsMade = apiCallsMade;
meta.deferredCommits = deferredCommits;
meta.permanentlySkipped = permanentlySkipped;
this.runMetadata = meta;
}
public TimeWindow getTimeWindow() {
return timeWindow;
}
/**
* Computes the time window (earliest and latest evaluatedAt) from a list of evaluations.
*/
public void computeTimeWindow(List<CommitEvaluation> evaluations) {
if (evaluations == null || evaluations.isEmpty()) {
return;
}
Instant earliest = null;
Instant latest = null;
Instant earliestCommit = null;
Instant latestCommit = null;
for (CommitEvaluation eval : evaluations) {
Instant at = eval.evaluatedAt();
if (at != null) {
if (earliest == null || at.isBefore(earliest)) {
earliest = at;
}
if (latest == null || at.isAfter(latest)) {
latest = at;
}
}
Instant cd = eval.commitDate();
if (cd != null) {
if (earliestCommit == null || cd.isBefore(earliestCommit)) {
earliestCommit = cd;
}
if (latestCommit == null || cd.isAfter(latestCommit)) {
latestCommit = cd;
}
}
}
if (earliest != null) {
this.timeWindow = new TimeWindow(earliest.toString(), latest.toString(),
earliestCommit != null ? earliestCommit.toString() : null,
latestCommit != null ? latestCommit.toString() : null);
}
}
/**
* Run-level metadata captured at the end of a mining run.
*/
public static class RunMetadata {
private String startedAt;
private String completedAt;
private long durationSeconds;
private String llmProvider;
private String llmModel;
private int batchSize;
private int commitsPerRequest;
private int apiCallsMade;
private int deferredCommits;
private int permanentlySkipped;
public String getStartedAt() { return startedAt; }
public String getCompletedAt() { return completedAt; }
public long getDurationSeconds() { return durationSeconds; }
public String getLlmProvider() { return llmProvider; }
public String getLlmModel() { return llmModel; }
public int getBatchSize() { return batchSize; }
public int getCommitsPerRequest() { return commitsPerRequest; }
public int getApiCallsMade() { return apiCallsMade; }
public int getDeferredCommits() { return deferredCommits; }
public int getPermanentlySkipped() { return permanentlySkipped; }
}
/**
* Time window of evaluated commits.
*/
public static class TimeWindow {
private final String earliestEvaluation;
private final String latestEvaluation;
private final String earliestCommitDate;
private final String latestCommitDate;
public TimeWindow(String earliestEvaluation, String latestEvaluation,
String earliestCommitDate, String latestCommitDate) {
this.earliestEvaluation = earliestEvaluation;
this.latestEvaluation = latestEvaluation;
this.earliestCommitDate = earliestCommitDate;
this.latestCommitDate = latestCommitDate;
}
public String getEarliestEvaluation() { return earliestEvaluation; }
public String getLatestEvaluation() { return latestEvaluation; }
public String getEarliestCommitDate() { return earliestCommitDate; }
public String getLatestCommitDate() { return latestCommitDate; }
}
}