JGitHistoryProvider.java

/*******************************************************************************
 * Copyright (c) 2026 Carsten Hammer.
 *
 * This program and the accompanying materials
 * are made available under the terms of the Eclipse Public License 2.0
 * which accompanies this distribution, and is available at
 * https://www.eclipse.org/legal/epl-2.0/
 *
 * SPDX-License-Identifier: EPL-2.0
 *
 * Contributors:
 *     Carsten Hammer
 *******************************************************************************/
package org.sandbox.jdt.triggerpattern.mining.git;

import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.nio.file.Path;
import java.time.Instant;
import java.time.LocalDateTime;
import java.time.ZoneId;
import java.util.ArrayList;
import java.util.List;

import org.eclipse.jgit.api.Git;
import org.eclipse.jgit.api.LogCommand;
import org.eclipse.jgit.api.errors.GitAPIException;
import org.eclipse.jgit.diff.DiffEntry;
import org.eclipse.jgit.diff.DiffFormatter;
import org.eclipse.jgit.lib.ObjectId;
import org.eclipse.jgit.lib.ObjectLoader;
import org.eclipse.jgit.lib.ObjectReader;
import org.eclipse.jgit.lib.Repository;
import org.eclipse.jgit.revwalk.RevCommit;
import org.eclipse.jgit.revwalk.RevTree;
import org.eclipse.jgit.treewalk.AbstractTreeIterator;
import org.eclipse.jgit.treewalk.CanonicalTreeParser;
import org.eclipse.jgit.treewalk.EmptyTreeIterator;
import org.eclipse.jgit.treewalk.TreeWalk;
import org.eclipse.jgit.treewalk.filter.PathSuffixFilter;
import org.sandbox.jdt.triggerpattern.mining.analysis.CommitInfo;
import org.sandbox.jdt.triggerpattern.mining.analysis.DiffHunk;
import org.sandbox.jdt.triggerpattern.mining.analysis.FileDiff;

/**
 * {@link GitHistoryProvider} implementation that uses JGit to read Git history.
 *
 * <p>This provider works with any local Git repository and does not require
 * the {@code git} command-line tool. It reuses the JGit library that is already
 * available in the Eclipse runtime (via EGit) and in {@code sandbox_common_core}.</p>
 *
 * <p>This is the Eclipse Git Bridge (Phase 1 of Issue #727): it provides the
 * same {@link GitHistoryProvider} interface as {@link CommandLineGitProvider}
 * but uses JGit directly, which is more reliable and portable.</p>
 *
 * @since 1.2.6
 */
public class JGitHistoryProvider implements GitHistoryProvider {

	@Override
	public List<CommitInfo> getHistory(Path repositoryPath, int maxCommits) {
		List<CommitInfo> commits = new ArrayList<>();
		try (Git git = Git.open(repositoryPath.toFile())) {
			LogCommand log = git.log().setMaxCount(maxCommits);
			for (RevCommit commit : log.call()) {
				int fileCount = countChangedFiles(git.getRepository(), commit);
				commits.add(toCommitInfo(commit, fileCount));
			}
		} catch (IOException | GitAPIException e) {
			throw new GitProviderException("Failed to read Git history from " //$NON-NLS-1$
					+ repositoryPath, e);
		}
		return commits;
	}

	@Override
	public List<FileDiff> getDiffs(Path repositoryPath, String commitId) {
		List<FileDiff> diffs = new ArrayList<>();
		try (Git git = Git.open(repositoryPath.toFile())) {
			Repository repository = git.getRepository();
			RevCommit commit = resolveCommit(repository, commitId);
			if (commit == null) {
				return diffs;
			}

			AbstractTreeIterator oldTreeIter = getParentTreeIterator(repository, commit);
			AbstractTreeIterator newTreeIter = prepareTreeParser(repository, commit);

			try (DiffFormatter formatter = new DiffFormatter(new ByteArrayOutputStream())) {
				formatter.setRepository(repository);
				formatter.setPathFilter(PathSuffixFilter.create(".java")); //$NON-NLS-1$
				List<DiffEntry> entries = formatter.scan(oldTreeIter, newTreeIter);

				for (DiffEntry entry : entries) {
					if (entry.getChangeType() != DiffEntry.ChangeType.MODIFY) {
						continue;
					}
					if (commit.getParentCount() == 0) {
						continue; // Initial commit — no parent for MODIFY entries
					}
					String filePath = entry.getNewPath();
					String contentBefore = getFileContentAtCommit(repository,
							commit.getParent(0), filePath);
					String contentAfter = getFileContentAtCommit(repository,
							commit, filePath);
					if (contentBefore == null || contentAfter == null) {
						continue;
					}
					List<DiffHunk> hunks = extractHunks(repository, entry);
					diffs.add(new FileDiff(filePath, contentBefore, contentAfter, hunks));
				}
			}
		} catch (IOException e) {
			throw new GitProviderException("Failed to get diffs for commit " //$NON-NLS-1$
					+ commitId, e);
		}
		return diffs;
	}

	@Override
	public String getFileContent(Path repositoryPath, String commitId, String filePath) {
		try (Git git = Git.open(repositoryPath.toFile())) {
			Repository repository = git.getRepository();
			RevCommit commit = resolveCommit(repository, commitId);
			if (commit == null) {
				return null;
			}
			return getFileContentAtCommit(repository, commit, filePath);
		} catch (IOException e) {
			return null;
		}
	}

	// ---- internal helpers ----

	private static CommitInfo toCommitInfo(RevCommit commit, int fileCount) {
		LocalDateTime timestamp = LocalDateTime.ofInstant(
				Instant.ofEpochSecond(commit.getCommitTime()),
				ZoneId.systemDefault());
		return new CommitInfo(
				commit.getName(),
				commit.abbreviate(7).name(),
				commit.getShortMessage(),
				commit.getAuthorIdent().getName(),
				timestamp,
				fileCount);
	}

	private static RevCommit resolveCommit(Repository repository, String commitId)
			throws IOException {
		ObjectId objectId = repository.resolve(commitId);
		if (objectId == null) {
			return null;
		}
		try (org.eclipse.jgit.revwalk.RevWalk revWalk =
				new org.eclipse.jgit.revwalk.RevWalk(repository)) {
			return revWalk.parseCommit(objectId);
		}
	}

	private static int countChangedFiles(Repository repository, RevCommit commit)
			throws IOException {
		try (DiffFormatter formatter = new DiffFormatter(
				new ByteArrayOutputStream())) {
			formatter.setRepository(repository);
			formatter.setPathFilter(PathSuffixFilter.create(".java")); //$NON-NLS-1$
			AbstractTreeIterator oldIter = getParentTreeIterator(repository, commit);
			AbstractTreeIterator newIter = prepareTreeParser(repository, commit);
			return formatter.scan(oldIter, newIter).size();
		}
	}

	private static AbstractTreeIterator getParentTreeIterator(Repository repository,
			RevCommit commit) throws IOException {
		if (commit.getParentCount() == 0) {
			return new EmptyTreeIterator();
		}
		RevCommit parent;
		try (org.eclipse.jgit.revwalk.RevWalk revWalk =
				new org.eclipse.jgit.revwalk.RevWalk(repository)) {
			parent = revWalk.parseCommit(commit.getParent(0).getId());
		}
		return prepareTreeParser(repository, parent);
	}

	private static CanonicalTreeParser prepareTreeParser(Repository repository,
			RevCommit commit) throws IOException {
		RevTree tree = commit.getTree();
		CanonicalTreeParser parser = new CanonicalTreeParser();
		try (ObjectReader reader = repository.newObjectReader()) {
			parser.reset(reader, tree.getId());
		}
		return parser;
	}

	private static String getFileContentAtCommit(Repository repository,
			RevCommit commit, String filePath) throws IOException {
		RevTree tree = commit.getTree();
		try (TreeWalk treeWalk = TreeWalk.forPath(repository, filePath, tree)) {
			if (treeWalk == null) {
				return null;
			}
			ObjectId blobId = treeWalk.getObjectId(0);
			ObjectLoader loader = repository.open(blobId);
			return new String(loader.getBytes(), StandardCharsets.UTF_8);
		}
	}

	private static List<DiffHunk> extractHunks(Repository repository,
			DiffEntry entry) throws IOException {
		List<DiffHunk> hunks = new ArrayList<>();
		ByteArrayOutputStream out = new ByteArrayOutputStream();
		try (DiffFormatter hunkFormatter = new DiffFormatter(out)) {
			hunkFormatter.setRepository(repository);
			hunkFormatter.format(entry);
		}
		String diffText = out.toString(StandardCharsets.UTF_8);
		// Parse the unified diff output into hunks
		return parseHunksFromDiff(diffText);
	}

	/**
	 * Parses unified diff output into a list of diff hunks.
	 *
	 * @param diffText the unified diff text
	 * @return list of parsed diff hunks
	 */
	static List<DiffHunk> parseHunksFromDiff(String diffText) {
		List<DiffHunk> hunks = new ArrayList<>();
		if (diffText == null || diffText.isEmpty()) {
			return hunks;
		}

		String[] lines = diffText.split("\n"); //$NON-NLS-1$
		int i = 0;
		while (i < lines.length) {
			if (lines[i].startsWith("@@")) { //$NON-NLS-1$
				int hunkStart = i;
				i++;
				StringBuilder beforeText = new StringBuilder();
				StringBuilder afterText = new StringBuilder();
				while (i < lines.length && !lines[i].startsWith("@@") //$NON-NLS-1$
						&& !lines[i].startsWith("diff ")) { //$NON-NLS-1$
					if (lines[i].startsWith("-")) { //$NON-NLS-1$
						beforeText.append(lines[i].substring(1)).append('\n');
					} else if (lines[i].startsWith("+")) { //$NON-NLS-1$
						afterText.append(lines[i].substring(1)).append('\n');
					} else if (lines[i].startsWith(" ")) { //$NON-NLS-1$
						beforeText.append(lines[i].substring(1)).append('\n');
						afterText.append(lines[i].substring(1)).append('\n');
					}
					i++;
				}
				int[] ranges = parseHunkHeader(lines[hunkStart]);
				hunks.add(new DiffHunk(ranges[0], ranges[1], ranges[2], ranges[3],
						beforeText.toString(), afterText.toString()));
			} else {
				i++;
			}
		}
		return hunks;
	}

	private static int[] parseHunkHeader(String header) {
		// Parse @@ -startLine,count +startLine,count @@
		int atEnd = header.indexOf("@@", 2); //$NON-NLS-1$
		if (atEnd < 0) {
			return new int[] { 1, 0, 1, 0 };
		}
		String range = header.substring(3, atEnd).strip();
		String[] parts = range.split("\\s+"); //$NON-NLS-1$
		if (parts.length < 2) {
			return new int[] { 1, 0, 1, 0 };
		}
		int[] before = parseRange(parts[0].substring(1));
		int[] after = parseRange(parts[1].substring(1));
		return new int[] { before[0], before[1], after[0], after[1] };
	}

	private static int[] parseRange(String rangeStr) {
		String[] parts = rangeStr.split(","); //$NON-NLS-1$
		try {
			int start = Integer.parseInt(parts[0]);
			int count = parts.length > 1 ? Integer.parseInt(parts[1]) : 1;
			return new int[] { start, count };
		} catch (NumberFormatException e) {
			return new int[] { 1, 0 };
		}
	}
}