GitDatabaseQueryService.java
/*******************************************************************************
* Copyright (c) 2026 Carsten Hammer.
*
* This program and the accompanying materials
* are made available under the terms of the Eclipse Public License 2.0
* which accompanies this distribution, and is available at
* https://www.eclipse.org/legal/epl-2.0/
*
* SPDX-License-Identifier: EPL-2.0
*
* Contributors:
* Carsten Hammer
*******************************************************************************/
package org.eclipse.jgit.storage.hibernate.service;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.time.Instant;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Set;
import org.eclipse.jgit.lib.ObjectLoader;
import org.eclipse.jgit.lib.ObjectReader;
import org.eclipse.jgit.lib.Repository;
import org.eclipse.jgit.revwalk.RevCommit;
import org.eclipse.jgit.revwalk.RevWalk;
import org.eclipse.jgit.storage.hibernate.entity.FilePathHistory;
import org.eclipse.jgit.storage.hibernate.entity.GitCommitIndex;
import org.eclipse.jgit.storage.hibernate.entity.GitObjectEntity;
import org.eclipse.jgit.storage.hibernate.entity.GitRefEntity;
import org.eclipse.jgit.storage.hibernate.entity.GitReflogEntity;
import org.eclipse.jgit.storage.hibernate.entity.JavaBlobIndex;
import org.eclipse.jgit.storage.hibernate.search.EmbeddingService;
import org.eclipse.jgit.storage.hibernate.search.RankFusionUtil;
import org.eclipse.jgit.treewalk.TreeWalk;
import org.hibernate.Session;
import org.hibernate.SessionFactory;
import org.hibernate.search.mapper.orm.Search;
import org.hibernate.search.mapper.orm.session.SearchSession;
/**
* Extended query service that leverages the relational database and Hibernate
* Search for operations impossible with filesystem-based Git storage.
* <p>
* Full-text search across commit messages and changed paths is powered by
* Hibernate Search with a Lucene backend. Other queries (statistics,
* time-range, reflog, pack analytics) use standard HQL.
*/
public class GitDatabaseQueryService {
private final SessionFactory sessionFactory;
private EmbeddingService embeddingService;
/**
* Create a new query service.
*
* @param sessionFactory
* the Hibernate session factory
*/
public GitDatabaseQueryService(SessionFactory sessionFactory) {
this.sessionFactory = sessionFactory;
}
/**
* Set the embedding service for semantic search.
*
* @param embeddingService
* the embedding service
*/
public void setEmbeddingService(EmbeddingService embeddingService) {
this.embeddingService = embeddingService;
}
/**
* Search commit messages using Hibernate Search full-text query.
*
* @param repoName
* the repository name
* @param query
* the search query
* @return matching commit index entries
*/
public List<GitCommitIndex> searchCommitMessages(String repoName,
String query) {
return searchCommitMessages(repoName, query, 0, 20);
}
/**
* Search commit messages with pagination.
*
* @param repoName
* the repository name
* @param query
* the search query
* @param offset
* the result offset
* @param limit
* the maximum number of results
* @return matching commit index entries
*/
public List<GitCommitIndex> searchCommitMessages(String repoName,
String query, int offset, int limit) {
try (Session session = sessionFactory.openSession()) {
SearchSession searchSession = Search.session(session);
return searchSession.search(GitCommitIndex.class)
.where(f -> f.bool()
.must(f.match().field("repositoryName") //$NON-NLS-1$
.matching(repoName))
.must(f.match().field("commitMessage") //$NON-NLS-1$
.matching(query)))
.fetch(offset, limit).hits();
}
}
/**
* Find repositories containing a specific object.
*
* @param objectId
* the SHA-1 hex string
* @return list of repository names containing this object
*/
public List<String> findRepositoriesContainingObject(String objectId) {
try (Session session = sessionFactory.openSession()) {
return session.createQuery(
"SELECT DISTINCT o.repositoryName FROM GitObjectEntity o WHERE o.objectId = :oid", //$NON-NLS-1$
String.class).setParameter("oid", objectId) //$NON-NLS-1$
.getResultList();
}
}
/**
* Get commits between two timestamps.
*
* @param repoName
* the repository name
* @param start
* start of the time range (inclusive)
* @param end
* end of the time range (inclusive)
* @return matching commit index entries
*/
public List<GitCommitIndex> getCommitsBetween(String repoName,
Instant start, Instant end) {
try (Session session = sessionFactory.openSession()) {
return session.createQuery(
"FROM GitCommitIndex c WHERE c.repositoryName = :repo AND c.commitTime BETWEEN :start AND :end", //$NON-NLS-1$
GitCommitIndex.class).setParameter("repo", repoName) //$NON-NLS-1$
.setParameter("start", start).setParameter("end", end) //$NON-NLS-1$ //$NON-NLS-2$
.getResultList();
}
}
/**
* Get refs modified since a given timestamp.
*
* @param repoName
* the repository name
* @param since
* the cutoff timestamp
* @return refs modified after the given timestamp
*/
public List<GitRefEntity> getRefsModifiedSince(String repoName,
Instant since) {
try (Session session = sessionFactory.openSession()) {
return session.createQuery(
"FROM GitRefEntity r WHERE r.repositoryName = :repo AND r.updatedAt >= :since", //$NON-NLS-1$
GitRefEntity.class).setParameter("repo", repoName) //$NON-NLS-1$
.setParameter("since", since).getResultList(); //$NON-NLS-1$
}
}
/**
* Count objects by type in a repository.
*
* @param repoName
* the repository name
* @param objectType
* the object type constant
* @return count of objects of the given type
*/
public long countObjectsByType(String repoName, int objectType) {
try (Session session = sessionFactory.openSession()) {
Long count = session.createQuery(
"SELECT COUNT(o) FROM GitObjectEntity o WHERE o.repositoryName = :repo AND o.objectType = :type", //$NON-NLS-1$
Long.class).setParameter("repo", repoName) //$NON-NLS-1$
.setParameter("type", objectType).uniqueResult(); //$NON-NLS-1$
return count != null ? count : 0;
}
}
/**
* Get all objects in a repository.
*
* @param repoName
* the repository name
* @return all object entities
*/
public List<GitObjectEntity> getAllObjects(String repoName) {
try (Session session = sessionFactory.openSession()) {
return session.createQuery(
"FROM GitObjectEntity o WHERE o.repositoryName = :repo", //$NON-NLS-1$
GitObjectEntity.class).setParameter("repo", repoName) //$NON-NLS-1$
.getResultList();
}
}
/**
* Search commits by changed path using Hibernate Search full-text query.
*
* @param repoName
* the repository name
* @param pathPattern
* the path pattern to search for
* @return matching commit index entries
*/
public List<GitCommitIndex> searchByChangedPath(String repoName,
String pathPattern) {
return searchByChangedPath(repoName, pathPattern, 0, 20);
}
/**
* Search commits by changed path with pagination.
*
* @param repoName
* the repository name
* @param pathPattern
* the path pattern to search for
* @param offset
* the result offset
* @param limit
* the maximum number of results
* @return matching commit index entries
*/
public List<GitCommitIndex> searchByChangedPath(String repoName,
String pathPattern, int offset, int limit) {
try (Session session = sessionFactory.openSession()) {
SearchSession searchSession = Search.session(session);
return searchSession.search(GitCommitIndex.class)
.where(f -> f.bool()
.must(f.match().field("repositoryName") //$NON-NLS-1$
.matching(repoName))
.must(f.match().field("changedPaths") //$NON-NLS-1$
.matching(pathPattern)))
.fetch(offset, limit).hits();
}
}
/**
* Get author statistics for a repository. Returns a list of
* [authorName, authorEmail, commitCount] arrays.
*
* @param repoName
* the repository name
* @return list of author statistics as Object arrays
*/
public List<AuthorStats> getAuthorStatistics(String repoName) {
try (Session session = sessionFactory.openSession()) {
List<Object[]> rows = session.createQuery(
"SELECT c.authorName, c.authorEmail, COUNT(c) FROM GitCommitIndex c WHERE c.repositoryName = :repo GROUP BY c.authorName, c.authorEmail ORDER BY COUNT(c) DESC", //$NON-NLS-1$
Object[].class).setParameter("repo", repoName) //$NON-NLS-1$
.getResultList();
List<AuthorStats> result = new ArrayList<>(rows.size());
for (Object[] row : rows) {
result.add(new AuthorStats((String) row[0], (String) row[1],
(Long) row[2]));
}
return result;
}
}
/**
* Get reflog entries for a specific ref.
*
* @param repoName
* the repository name
* @param refName
* the reference name
* @param max
* maximum number of entries to return
* @return reflog entities in reverse chronological order
*/
public List<GitReflogEntity> getReflogEntries(String repoName,
String refName, int max) {
try (Session session = sessionFactory.openSession()) {
return session.createQuery(
"FROM GitReflogEntity r WHERE r.repositoryName = :repo AND r.refName = :ref ORDER BY r.id DESC", //$NON-NLS-1$
GitReflogEntity.class)
.setParameter("repo", repoName) //$NON-NLS-1$
.setParameter("ref", refName) //$NON-NLS-1$
.setMaxResults(max).getResultList();
}
}
/**
* Delete reflog entries older than a given timestamp.
*
* @param repoName
* the repository name
* @param before
* the cutoff timestamp
* @return number of entries deleted
*/
public int purgeReflogEntries(String repoName, Instant before) {
try (Session session = sessionFactory.openSession()) {
session.beginTransaction();
int deleted = session.createMutationQuery(
"DELETE FROM GitReflogEntity r WHERE r.repositoryName = :repo AND r.when < :before") //$NON-NLS-1$
.setParameter("repo", repoName) //$NON-NLS-1$
.setParameter("before", before) //$NON-NLS-1$
.executeUpdate();
session.getTransaction().commit();
return deleted;
}
}
/**
* Find pack names that are not referenced by any current pack description.
* <p>
* This can identify orphaned pack data left after failed operations.
*
* @param repoName
* the repository name
* @return list of orphaned pack names
*/
public List<String> findOrphanedPacks(String repoName) {
try (Session session = sessionFactory.openSession()) {
return session.createQuery(
"SELECT DISTINCT p.packName FROM GitPackEntity p " //$NON-NLS-1$
+ "WHERE p.repositoryName = :repo " //$NON-NLS-1$
+ "AND NOT EXISTS (" //$NON-NLS-1$
+ "SELECT 1 FROM GitPackEntity p2 " //$NON-NLS-1$
+ "WHERE p2.repositoryName = :repo " //$NON-NLS-1$
+ "AND p2.packName = p.packName " //$NON-NLS-1$
+ "AND p2.packExtension = 'pack')", //$NON-NLS-1$
String.class)
.setParameter("repo", repoName) //$NON-NLS-1$
.getResultList();
}
}
/**
* Get the total count of pack files in a repository.
*
* @param repoName
* the repository name
* @return the number of distinct pack files
*/
public long countPacks(String repoName) {
try (Session session = sessionFactory.openSession()) {
Long count = session.createQuery(
"SELECT COUNT(DISTINCT p.packName) FROM GitPackEntity p WHERE p.repositoryName = :repo", //$NON-NLS-1$
Long.class).setParameter("repo", repoName) //$NON-NLS-1$
.uniqueResult();
return count != null ? count : 0;
}
}
/**
* Get the total storage size (in bytes) of all packs in a repository.
*
* @param repoName
* the repository name
* @return the total pack data size in bytes
*/
public long getTotalPackSize(String repoName) {
try (Session session = sessionFactory.openSession()) {
Long size = session.createQuery(
"SELECT COALESCE(SUM(p.fileSize), 0) FROM GitPackEntity p WHERE p.repositoryName = :repo", //$NON-NLS-1$
Long.class).setParameter("repo", repoName) //$NON-NLS-1$
.uniqueResult();
return size != null ? size : 0;
}
}
private static final int MAX_BLOB_SIZE_FOR_SEARCH = 1024 * 1024; // 1 MB
/**
* Search commits whose tree blobs contain the given content string.
* <p>
* For each indexed commit in the repository, this method uses JGit's
* {@code RevWalk} and {@code TreeWalk} to read every blob in the commit
* tree and checks whether its UTF-8 text contains {@code contentQuery}.
* Blobs larger than 1 MB are skipped.
*
* @param repoName
* the repository name
* @param contentQuery
* the string to search for inside blob content
* @param repo
* the {@link Repository} object used to read objects
* @return list of {@link GitCommitIndex} entries whose committed files
* contain the search string
* @throws IOException
* if an error occurs reading objects from the repository
*/
public List<GitCommitIndex> searchBlobContent(String repoName,
String contentQuery, Repository repo) throws IOException {
List<GitCommitIndex> allCommits;
try (Session session = sessionFactory.openSession()) {
allCommits = session.createQuery(
"FROM GitCommitIndex c WHERE c.repositoryName = :repo", //$NON-NLS-1$
GitCommitIndex.class)
.setParameter("repo", repoName) //$NON-NLS-1$
.getResultList();
}
List<GitCommitIndex> matches = new ArrayList<>();
try (RevWalk rw = new RevWalk(repo)) {
for (GitCommitIndex idx : allCommits) {
RevCommit commit = rw
.parseCommit(repo.resolve(idx.getObjectId()));
try (ObjectReader reader = repo.newObjectReader();
TreeWalk tw = new TreeWalk(reader)) {
tw.addTree(commit.getTree());
tw.setRecursive(true);
while (tw.next()) {
ObjectLoader loader = reader
.open(tw.getObjectId(0));
if (loader.getSize() > MAX_BLOB_SIZE_FOR_SEARCH) {
continue;
}
byte[] bytes = loader.getBytes();
String text = new String(bytes,
StandardCharsets.UTF_8);
if (text.contains(contentQuery)) {
matches.add(idx);
break;
}
}
}
}
}
return matches;
}
/**
* Search Java blob indices by type name (declared types or FQNs).
*
* @param repoName
* the repository name
* @param query
* the type name query
* @return matching Java blob index entries
*/
public List<JavaBlobIndex> searchByType(String repoName, String query) {
return searchByType(repoName, query, 0, 20);
}
/**
* Search Java blob indices by type name with pagination and boosting.
*
* @param repoName
* the repository name
* @param query
* the type name query
* @param offset
* the result offset
* @param limit
* the maximum number of results
* @return matching Java blob index entries
*/
public List<JavaBlobIndex> searchByType(String repoName, String query,
int offset, int limit) {
try (Session session = sessionFactory.openSession()) {
SearchSession searchSession = Search.session(session);
return searchSession.search(JavaBlobIndex.class)
.where(f -> f.bool()
.must(f.match().field("repositoryName") //$NON-NLS-1$
.matching(repoName))
.must(f.match()
.field("fullyQualifiedNames") //$NON-NLS-1$
.boost(2.0f)
.field("declaredTypes") //$NON-NLS-1$
.boost(1.5f)
.matching(query)))
.fetch(offset, limit).hits();
}
}
/**
* Search Java blob indices by symbol name (methods and fields).
*
* @param repoName
* the repository name
* @param query
* the symbol name query
* @return matching Java blob index entries
*/
public List<JavaBlobIndex> searchBySymbol(String repoName, String query) {
return searchBySymbol(repoName, query, 0, 20);
}
/**
* Search Java blob indices by symbol name with pagination and boosting.
*
* @param repoName
* the repository name
* @param query
* the symbol name query
* @param offset
* the result offset
* @param limit
* the maximum number of results
* @return matching Java blob index entries
*/
public List<JavaBlobIndex> searchBySymbol(String repoName, String query,
int offset, int limit) {
try (Session session = sessionFactory.openSession()) {
SearchSession searchSession = Search.session(session);
return searchSession.search(JavaBlobIndex.class)
.where(f -> f.bool()
.must(f.match().field("repositoryName") //$NON-NLS-1$
.matching(repoName))
.must(f.match()
.field("declaredMethods") //$NON-NLS-1$
.boost(1.5f)
.field("declaredFields") //$NON-NLS-1$
.matching(query)))
.fetch(offset, limit).hits();
}
}
/**
* Search Java blob indices by type hierarchy (extends/implements).
*
* @param repoName
* the repository name
* @param typeName
* the type name to find subtypes of
* @return matching Java blob index entries
*/
public List<JavaBlobIndex> searchByHierarchy(String repoName,
String typeName) {
return searchByHierarchy(repoName, typeName, 0, 20);
}
/**
* Search Java blob indices by type hierarchy with pagination.
*
* @param repoName
* the repository name
* @param typeName
* the type name to find subtypes of
* @param offset
* the result offset
* @param limit
* the maximum number of results
* @return matching Java blob index entries
*/
public List<JavaBlobIndex> searchByHierarchy(String repoName,
String typeName, int offset, int limit) {
try (Session session = sessionFactory.openSession()) {
SearchSession searchSession = Search.session(session);
return searchSession.search(JavaBlobIndex.class)
.where(f -> f.bool()
.must(f.match().field("repositoryName") //$NON-NLS-1$
.matching(repoName))
.must(f.match()
.field("extendsTypes") //$NON-NLS-1$
.field("implementsTypes") //$NON-NLS-1$
.matching(typeName)))
.fetch(offset, limit).hits();
}
}
/**
* Search blobs by annotation names.
*
* @param repo
* the repository name
* @param query
* the annotation search query
* @param offset
* pagination offset
* @param limit
* maximum results
* @return matching blob index entities
*/
public List<JavaBlobIndex> searchByAnnotation(String repo,
String query, int offset, int limit) {
try (Session session = sessionFactory.openSession()) {
SearchSession searchSession = Search.session(session);
return searchSession.search(JavaBlobIndex.class)
.where(f -> f.bool()
.must(f.match()
.field("repositoryName") //$NON-NLS-1$
.matching(repo))
.must(f.match()
.field("annotations") //$NON-NLS-1$
.matching(query)))
.fetchHits(offset, limit);
}
}
/**
* Search blobs by type with optional module/project filter.
*
* @param repo
* the repository name
* @param query
* the type search query
* @param module
* optional module/project name filter (may be null)
* @param offset
* pagination offset
* @param limit
* maximum results
* @return matching blob index entities
*/
public List<JavaBlobIndex> searchByTypeWithModule(String repo,
String query, String module, int offset, int limit) {
try (Session session = sessionFactory.openSession()) {
SearchSession searchSession = Search.session(session);
return searchSession.search(JavaBlobIndex.class)
.where(f -> {
var bool = f.bool()
.must(f.match()
.field("repositoryName") //$NON-NLS-1$
.matching(repo))
.must(f.match()
.fields("simpleClassName", //$NON-NLS-1$
"declaredTypes", //$NON-NLS-1$
"fullyQualifiedNames") //$NON-NLS-1$
.matching(query));
if (module != null && !module.isEmpty()) {
bool = bool.must(f.match()
.field("projectName") //$NON-NLS-1$
.matching(module));
}
return bool;
})
.fetchHits(offset, limit);
}
}
/**
* Full-text search across Java source snippets.
*
* @param repoName
* the repository name
* @param query
* the search query
* @return matching Java blob index entries
*/
public List<JavaBlobIndex> searchSourceContent(String repoName,
String query) {
return searchSourceContent(repoName, query, 0, 20);
}
/**
* Full-text search across Java source snippets with pagination.
*
* @param repoName
* the repository name
* @param query
* the search query
* @param offset
* the result offset
* @param limit
* the maximum number of results
* @return matching Java blob index entries
*/
public List<JavaBlobIndex> searchSourceContent(String repoName,
String query, int offset, int limit) {
try (Session session = sessionFactory.openSession()) {
SearchSession searchSession = Search.session(session);
return searchSession.search(JavaBlobIndex.class)
.where(f -> f.bool()
.must(f.match().field("repositoryName") //$NON-NLS-1$
.matching(repoName))
.must(f.match().field("sourceSnippet") //$NON-NLS-1$
.matching(query)))
.fetch(offset, limit).hits();
}
}
/**
* Search blobs by type documentation (Javadoc).
*
* @param repo
* the repository name
* @param query
* the documentation search query
* @param offset
* pagination offset
* @param limit
* maximum results
* @return matching blob index entities
*/
public List<JavaBlobIndex> searchByDocumentation(String repo,
String query, int offset, int limit) {
try (Session session = sessionFactory.openSession()) {
SearchSession searchSession = Search.session(session);
return searchSession.search(JavaBlobIndex.class)
.where(f -> f.bool()
.must(f.match()
.field("repositoryName") //$NON-NLS-1$
.matching(repo))
.must(f.match()
.field("typeDocumentation") //$NON-NLS-1$
.matching(query)))
.fetchHits(offset, limit);
}
}
/**
* Search blobs by referenced type.
*
* @param repo
* the repository name
* @param query
* the referenced type search query
* @param offset
* pagination offset
* @param limit
* maximum results
* @return matching blob index entities
*/
public List<JavaBlobIndex> searchByReferencedType(String repo,
String query, int offset, int limit) {
try (Session session = sessionFactory.openSession()) {
SearchSession searchSession = Search.session(session);
return searchSession.search(JavaBlobIndex.class)
.where(f -> f.bool()
.must(f.match()
.field("repositoryName") //$NON-NLS-1$
.matching(repo))
.must(f.match()
.field("referencedTypes") //$NON-NLS-1$
.matching(query)))
.fetchHits(offset, limit);
}
}
/**
* Search blobs by string literals.
*
* @param repo
* the repository name
* @param query
* the string literal search query
* @param offset
* pagination offset
* @param limit
* maximum results
* @return matching blob index entities
*/
public List<JavaBlobIndex> searchByStringLiteral(String repo,
String query, int offset, int limit) {
try (Session session = sessionFactory.openSession()) {
SearchSession searchSession = Search.session(session);
return searchSession.search(JavaBlobIndex.class)
.where(f -> f.bool()
.must(f.match()
.field("repositoryName") //$NON-NLS-1$
.matching(repo))
.must(f.match()
.field("stringLiterals") //$NON-NLS-1$
.matching(query)))
.fetchHits(offset, limit);
}
}
/**
* Search file paths across all commits.
*
* @param repo
* the repository name
* @param pathQuery
* the file path search query
* @param offset
* pagination offset
* @param limit
* maximum results
* @return matching file path history entries
*/
public List<FilePathHistory> searchFilePath(String repo,
String pathQuery, int offset, int limit) {
try (Session session = sessionFactory.openSession()) {
SearchSession searchSession = Search.session(session);
return searchSession.search(FilePathHistory.class)
.where(f -> f.bool()
.must(f.match()
.field("repositoryName") //$NON-NLS-1$
.matching(repo))
.must(f.match()
.field("filePath") //$NON-NLS-1$
.matching(pathQuery)))
.fetchHits(offset, limit);
}
}
/**
* Get the history of a specific file across commits.
*
* @param repo
* the repository name
* @param exactPath
* the exact file path
* @param offset
* pagination offset
* @param limit
* maximum results
* @return file path history entries ordered by commit time
*/
public List<FilePathHistory> getFileHistory(String repo,
String exactPath, int offset, int limit) {
try (Session session = sessionFactory.openSession()) {
return session.createQuery(
"FROM FilePathHistory f WHERE f.repositoryName = :repo " //$NON-NLS-1$
+ "AND f.filePath = :path ORDER BY f.commitTime DESC", //$NON-NLS-1$
FilePathHistory.class)
.setParameter("repo", repo) //$NON-NLS-1$
.setParameter("path", exactPath) //$NON-NLS-1$
.setFirstResult(offset)
.setMaxResults(limit)
.getResultList();
}
}
/**
* Search fully qualified names across all file types.
*
* @param repo
* the repository name
* @param fqnQuery
* the FQN search query
* @param fileType
* optional file type filter (may be null)
* @param offset
* pagination offset
* @param limit
* maximum results
* @return matching blob index entities
*/
public List<JavaBlobIndex> searchFqnAcrossTypes(String repo,
String fqnQuery, String fileType, int offset, int limit) {
try (Session session = sessionFactory.openSession()) {
SearchSession searchSession = Search.session(session);
return searchSession.search(JavaBlobIndex.class)
.where(f -> {
var bool = f.bool()
.must(f.match()
.field("repositoryName") //$NON-NLS-1$
.matching(repo))
.must(f.match()
.field("fullyQualifiedNames") //$NON-NLS-1$
.matching(fqnQuery));
if (fileType != null && !fileType.isEmpty()) {
bool = bool.must(f.match()
.field("fileType") //$NON-NLS-1$
.matching(fileType));
}
return bool;
})
.fetchHits(offset, limit);
}
}
// --- Semantic search methods ---
/**
* Semantic search: find code by natural language description.
* <p>
* Uses vector similarity (cosine) on pre-computed embeddings. Requires
* the embedding service to be configured via
* {@link #setEmbeddingService(EmbeddingService)}.
* </p>
*
* @param repoName
* the repository name
* @param queryText
* natural language query (e.g., "HTTP client with retry")
* @param topK
* number of results
* @return matching Java blob index entries ranked by semantic similarity
*/
public List<JavaBlobIndex> semanticSearch(String repoName,
String queryText, int topK) {
if (embeddingService == null || !embeddingService.isAvailable()) {
return List.of();
}
float[] queryVector = embeddingService.embed(queryText);
if (queryVector == null) {
return List.of();
}
try (Session session = sessionFactory.openSession()) {
SearchSession searchSession = Search.session(session);
return searchSession.search(JavaBlobIndex.class)
.where(f -> f.knn(topK)
.field("semanticEmbedding") //$NON-NLS-1$
.matching(queryVector)
.filter(f.match()
.field("repositoryName") //$NON-NLS-1$
.matching(repoName)))
.fetchHits(topK);
}
}
/**
* Hybrid search: combines full-text and semantic search.
* <p>
* Returns the union of both result sets, re-ranked using Reciprocal Rank
* Fusion (RRF). Falls back to full-text only search if the embedding
* service is not available.
* </p>
*
* @param repoName
* the repository name
* @param queryText
* natural language or keyword query
* @param topK
* number of results
* @return fused results ordered by combined relevance score
*/
public List<JavaBlobIndex> hybridSearch(String repoName,
String queryText, int topK) {
List<JavaBlobIndex> fulltextResults = searchSourceContent(
repoName, queryText, 0, topK);
List<JavaBlobIndex> semanticResults = semanticSearch(repoName,
queryText, topK);
if (semanticResults.isEmpty()) {
return fulltextResults;
}
return RankFusionUtil.reciprocalRankFusion(semanticResults,
fulltextResults, topK);
}
/**
* Find semantically similar code to a given blob.
* <p>
* Uses the embedding of the source blob to find nearest neighbors in the
* vector space. Returns an empty list if the source blob has no embedding
* or the embedding service is not available.
* </p>
*
* @param repoName
* the repository name
* @param blobObjectId
* the blob object ID of the source file
* @param topK
* number of similar results to return
* @return similar Java blob index entries ranked by vector similarity
*/
public List<JavaBlobIndex> findSimilarCode(String repoName,
String blobObjectId, int topK) {
try (Session session = sessionFactory.openSession()) {
JavaBlobIndex source = session.createQuery(
"FROM JavaBlobIndex j WHERE j.repositoryName = :repo " //$NON-NLS-1$
+ "AND j.blobObjectId = :blobOid " //$NON-NLS-1$
+ "AND j.hasEmbedding = true", //$NON-NLS-1$
JavaBlobIndex.class)
.setParameter("repo", repoName) //$NON-NLS-1$
.setParameter("blobOid", blobObjectId) //$NON-NLS-1$
.setMaxResults(1)
.uniqueResult();
if (source == null
|| source.getSemanticEmbedding() == null) {
return List.of();
}
float[] sourceVector = source.getSemanticEmbedding();
SearchSession searchSession = Search.session(session);
return searchSession.search(JavaBlobIndex.class)
.where(f -> f.knn(topK + 1)
.field("semanticEmbedding") //$NON-NLS-1$
.matching(sourceVector)
.filter(f.match()
.field("repositoryName") //$NON-NLS-1$
.matching(repoName)))
.fetchHits(topK + 1)
.stream()
.filter(r -> !blobObjectId
.equals(r.getBlobObjectId()))
.limit(topK)
.toList();
}
}
/**
* Author statistics record.
*/
public static class AuthorStats {
private final String authorName;
private final String authorEmail;
private final long commitCount;
/**
* Create author statistics.
*
* @param authorName
* the author name
* @param authorEmail
* the author email
* @param commitCount
* the number of commits
*/
public AuthorStats(String authorName, String authorEmail,
long commitCount) {
this.authorName = authorName;
this.authorEmail = authorEmail;
this.commitCount = commitCount;
}
/**
* Get the author name.
*
* @return the authorName
*/
public String getAuthorName() {
return authorName;
}
/**
* Get the author email.
*
* @return the authorEmail
*/
public String getAuthorEmail() {
return authorEmail;
}
/**
* Get the commit count.
*
* @return the commitCount
*/
public long getCommitCount() {
return commitCount;
}
}
// --- Feature 1: Migration Planning Queries ---
/**
* Find all Java blob index entries whose import statements match a given
* prefix. Useful for planning framework migrations (e.g. from
* {@code javax.servlet} to {@code jakarta.servlet}).
*
* @param repoName
* the repository name
* @param importPrefix
* the import prefix to search for (e.g. {@code "javax.servlet"})
* @param offset
* pagination offset
* @param limit
* maximum results
* @return matching blob index entities
*/
public List<JavaBlobIndex> getMigrationImpact(String repoName,
String importPrefix, int offset, int limit) {
if (repoName == null || repoName.isEmpty() || importPrefix == null
|| importPrefix.isEmpty()) {
return List.of();
}
try (Session session = sessionFactory.openSession()) {
return session.createQuery(
"FROM JavaBlobIndex j WHERE j.repositoryName = :repo " //$NON-NLS-1$
+ "AND (j.importStatements LIKE :prefixFirst " //$NON-NLS-1$
+ "OR j.importStatements LIKE :prefixOther)", //$NON-NLS-1$
JavaBlobIndex.class)
.setParameter("repo", repoName) //$NON-NLS-1$
.setParameter("prefixFirst", importPrefix + "%") //$NON-NLS-1$
.setParameter("prefixOther", "%\n" + importPrefix + "%") //$NON-NLS-1$ //$NON-NLS-2$
.setFirstResult(offset)
.setMaxResults(limit)
.getResultList();
}
}
/**
* Returns aggregated migration impact statistics for a given import prefix.
* <p>
* The returned map contains:
* <ul>
* <li>{@code totalFiles} — total number of affected files</li>
* <li>{@code distinctPackages} — number of distinct package names</li>
* <li>{@code distinctAuthors} — number of distinct commit authors</li>
* <li>{@code earliestDate} — earliest commit date as ISO-8601 string</li>
* <li>{@code latestDate} — latest commit date as ISO-8601 string</li>
* </ul>
*
* @param repoName
* the repository name
* @param importPrefix
* the import prefix to search for
* @return summary statistics as a map
*/
public Map<String, Object> getMigrationImpactSummary(String repoName,
String importPrefix) {
Map<String, Object> summary = new LinkedHashMap<>();
summary.put("totalFiles", Integer.valueOf(0)); //$NON-NLS-1$
summary.put("distinctPackages", Integer.valueOf(0)); //$NON-NLS-1$
summary.put("distinctAuthors", Integer.valueOf(0)); //$NON-NLS-1$
summary.put("earliestDate", null); //$NON-NLS-1$
summary.put("latestDate", null); //$NON-NLS-1$
if (repoName == null || repoName.isEmpty() || importPrefix == null
|| importPrefix.isEmpty()) {
return summary;
}
try (Session session = sessionFactory.openSession()) {
Long total = session.createQuery(
"SELECT COUNT(j) FROM JavaBlobIndex j " //$NON-NLS-1$
+ "WHERE j.repositoryName = :repo " //$NON-NLS-1$
+ "AND (j.importStatements LIKE :prefixFirst " //$NON-NLS-1$
+ "OR j.importStatements LIKE :prefixOther)", //$NON-NLS-1$
Long.class)
.setParameter("repo", repoName) //$NON-NLS-1$
.setParameter("prefixFirst", importPrefix + "%") //$NON-NLS-1$
.setParameter("prefixOther", "%\n" + importPrefix + "%") //$NON-NLS-1$ //$NON-NLS-2$
.getSingleResult();
if (total == null || total == 0L) {
return summary;
}
int totalFiles = total > Integer.MAX_VALUE ? Integer.MAX_VALUE
: total.intValue();
summary.put("totalFiles", Integer.valueOf(totalFiles)); //$NON-NLS-1$
Set<String> packages = new HashSet<>();
Set<String> authors = new HashSet<>();
Instant earliest = null;
Instant latest = null;
final int pageSize = 1000;
int offset = 0;
while (true) {
List<JavaBlobIndex> page = session.createQuery(
"FROM JavaBlobIndex j WHERE j.repositoryName = :repo " //$NON-NLS-1$
+ "AND (j.importStatements LIKE :prefixFirst " //$NON-NLS-1$
+ "OR j.importStatements LIKE :prefixOther)", //$NON-NLS-1$
JavaBlobIndex.class)
.setParameter("repo", repoName) //$NON-NLS-1$
.setParameter("prefixFirst", importPrefix + "%") //$NON-NLS-1$
.setParameter("prefixOther", "%\n" + importPrefix + "%") //$NON-NLS-1$ //$NON-NLS-2$
.setFirstResult(offset)
.setMaxResults(pageSize)
.getResultList();
if (page.isEmpty()) {
break;
}
for (JavaBlobIndex entry : page) {
String pkg = entry.getPackageName();
if (pkg != null) {
packages.add(pkg);
}
String author = entry.getCommitAuthor();
if (author != null) {
authors.add(author);
}
Instant date = entry.getCommitDate();
if (date != null) {
if (earliest == null || date.isBefore(earliest)) {
earliest = date;
}
if (latest == null || date.isAfter(latest)) {
latest = date;
}
}
}
offset += pageSize;
if (page.size() < pageSize) {
break;
}
}
summary.put("distinctPackages", //$NON-NLS-1$
Integer.valueOf(packages.size()));
summary.put("distinctAuthors", //$NON-NLS-1$
Integer.valueOf(authors.size()));
summary.put("earliestDate", //$NON-NLS-1$
earliest != null ? earliest.toString() : null);
summary.put("latestDate", //$NON-NLS-1$
latest != null ? latest.toString() : null);
return summary;
}
}
/**
* Find all Java blob index entries that use the {@code @Deprecated}
* annotation.
*
* @param repoName
* the repository name
* @param offset
* pagination offset
* @param limit
* maximum results
* @return matching blob index entities with {@code @Deprecated} annotation
*/
public List<JavaBlobIndex> getDeprecatedApiUsage(String repoName,
int offset, int limit) {
if (repoName == null || repoName.isEmpty()) {
return List.of();
}
try (Session session = sessionFactory.openSession()) {
return session.createQuery(
"FROM JavaBlobIndex j WHERE j.repositoryName = :repoName " //$NON-NLS-1$
+ "AND (j.annotations = :exact " //$NON-NLS-1$
+ "OR j.annotations LIKE :first " //$NON-NLS-1$
+ "OR j.annotations LIKE :middle " //$NON-NLS-1$
+ "OR j.annotations LIKE :last)", //$NON-NLS-1$
JavaBlobIndex.class)
.setParameter("repoName", repoName) //$NON-NLS-1$
.setParameter("exact", "Deprecated") //$NON-NLS-1$
.setParameter("first", "Deprecated\n%") //$NON-NLS-1$
.setParameter("middle", "%\nDeprecated\n%") //$NON-NLS-1$
.setParameter("last", "%\nDeprecated") //$NON-NLS-1$
.setFirstResult(offset)
.setMaxResults(limit)
.getResultList();
}
}
/**
* Count how many files in the repository import types matching the given
* prefix. Useful for dependency impact analysis.
*
* @param repoName
* the repository name
* @param importPrefix
* the import prefix to count (e.g. {@code "com.google.guava"})
* @return number of files that contain at least one matching import
*/
public long getImportFrequency(String repoName, String importPrefix) {
if (repoName == null || repoName.isEmpty() || importPrefix == null
|| importPrefix.isEmpty()) {
return 0L;
}
try (Session session = sessionFactory.openSession()) {
Long count = session.createQuery(
"SELECT COUNT(j) FROM JavaBlobIndex j " //$NON-NLS-1$
+ "WHERE j.repositoryName = :repo " //$NON-NLS-1$
+ "AND (j.importStatements LIKE :prefixFirst " //$NON-NLS-1$
+ "OR j.importStatements LIKE :prefixOther)", //$NON-NLS-1$
Long.class)
.setParameter("repo", repoName) //$NON-NLS-1$
.setParameter("prefixFirst", importPrefix + "%") //$NON-NLS-1$
.setParameter("prefixOther", "%\n" + importPrefix + "%") //$NON-NLS-1$ //$NON-NLS-2$
.getSingleResult();
return count != null ? count.longValue() : 0L;
}
}
/**
* Find all files that import from any of the given prefixes.
* <p>
* Useful for determining migration scope across multiple frameworks (e.g.
* "find everything using {@code javax.servlet} OR {@code javax.ws.rs}").
*
* @param repoName
* the repository name
* @param importPrefixes
* list of import prefixes to match
* @param offset
* pagination offset
* @param limit
* maximum results
* @return matching blob index entities
*/
public List<JavaBlobIndex> searchByMultipleImports(String repoName,
List<String> importPrefixes, int offset, int limit) {
if (repoName == null || repoName.isEmpty()
|| importPrefixes == null || importPrefixes.isEmpty()) {
return List.of();
}
List<String> validPrefixes = new ArrayList<>();
for (String p : importPrefixes) {
if (p != null && !p.isBlank()) {
validPrefixes.add(p);
}
}
if (validPrefixes.isEmpty()) {
return List.of();
}
// Cap to avoid excessive query length
if (validPrefixes.size() > MAX_IMPORT_PREFIXES) {
validPrefixes = validPrefixes.subList(0, MAX_IMPORT_PREFIXES);
}
try (Session session = sessionFactory.openSession()) {
StringBuilder hql = new StringBuilder(
"FROM JavaBlobIndex j WHERE j.repositoryName = :repo AND ("); //$NON-NLS-1$
for (int i = 0; i < validPrefixes.size(); i++) {
if (i > 0) {
hql.append(" OR "); //$NON-NLS-1$
}
hql.append("j.importStatements LIKE :pf").append(i) //$NON-NLS-1$
.append(" OR j.importStatements LIKE :pm").append(i); //$NON-NLS-1$
}
hql.append(")"); //$NON-NLS-1$
var query = session.createQuery(hql.toString(), JavaBlobIndex.class)
.setParameter("repo", repoName); //$NON-NLS-1$
for (int i = 0; i < validPrefixes.size(); i++) {
String prefix = validPrefixes.get(i);
query.setParameter("pf" + i, prefix + "%"); //$NON-NLS-1$
query.setParameter("pm" + i, "%\n" + prefix + "%"); //$NON-NLS-1$ //$NON-NLS-2$
}
return query.setFirstResult(offset)
.setMaxResults(limit)
.getResultList();
}
}
/**
* Find which files need updating for each old→new import mapping.
* <p>
* Takes a map of old import prefixes to new import prefixes (e.g.
* {@code {"javax.servlet" → "jakarta.servlet"}}) and returns the files
* affected by each mapping.
*
* @param repoName
* the repository name
* @param oldToNewImportMap
* map of old import prefix → new import prefix
* @return map of old prefix → list of files that need migration
*/
public Map<String, List<JavaBlobIndex>> getMigrationCandidates(
String repoName,
Map<String, String> oldToNewImportMap) {
Map<String, List<JavaBlobIndex>> result = new LinkedHashMap<>();
if (repoName == null || repoName.isEmpty()
|| oldToNewImportMap == null || oldToNewImportMap.isEmpty()) {
return result;
}
for (String oldPrefix : oldToNewImportMap.keySet()) {
if (oldPrefix != null && !oldPrefix.isEmpty()) {
List<JavaBlobIndex> affected = new ArrayList<>();
int offset = 0;
final int pageSize = 1000;
while (true) {
List<JavaBlobIndex> page = getMigrationImpact(
repoName, oldPrefix, offset, pageSize);
if (page.isEmpty()) {
break;
}
affected.addAll(page);
if (page.size() < pageSize) {
break;
}
offset += pageSize;
}
result.put(oldPrefix, affected);
}
}
return result;
}
// --- Feature 2: API Evolution Tracker ---
/**
* Compare {@link JavaBlobIndex} entries between two commits.
* <p>
* Returns an {@link ApiDiffResult} with:
* <ul>
* <li>files added in commit B (not in A)</li>
* <li>files removed from commit A (not in B)</li>
* <li>files with method signature or visibility changes</li>
* </ul>
*
* @param repoName
* the repository name
* @param commitOidA
* the first (older) commit object ID
* @param commitOidB
* the second (newer) commit object ID
* @return the diff result
*/
public ApiDiffResult getApiDiff(String repoName, String commitOidA,
String commitOidB) {
if (repoName == null || repoName.isEmpty()
|| commitOidA == null || commitOidA.isEmpty()
|| commitOidB == null || commitOidB.isEmpty()) {
return new ApiDiffResult(List.of(), List.of(), List.of());
}
List<JavaBlobIndex> entriesA = getEntriesForCommit(repoName,
commitOidA);
List<JavaBlobIndex> entriesB = getEntriesForCommit(repoName,
commitOidB);
Map<String, JavaBlobIndex> mapA = indexByFilePath(entriesA);
Map<String, JavaBlobIndex> mapB = indexByFilePath(entriesB);
List<JavaBlobIndex> added = new ArrayList<>();
List<JavaBlobIndex> removed = new ArrayList<>();
List<ApiChangeEntry> changed = new ArrayList<>();
for (Map.Entry<String, JavaBlobIndex> entry : mapB.entrySet()) {
if (!mapA.containsKey(entry.getKey())) {
added.add(entry.getValue());
}
}
for (Map.Entry<String, JavaBlobIndex> entry : mapA.entrySet()) {
if (!mapB.containsKey(entry.getKey())) {
removed.add(entry.getValue());
} else {
JavaBlobIndex before = entry.getValue();
JavaBlobIndex after = mapB.get(entry.getKey());
String desc = detectApiChanges(before, after);
if (desc != null) {
changed.add(new ApiChangeEntry(before, after, desc));
}
}
}
return new ApiDiffResult(added, removed, changed);
}
/**
* Find all entries with {@code @Deprecated} annotation ordered by commit
* date. Shows when deprecations were introduced over time.
*
* @param repoName
* the repository name
* @param offset
* pagination offset
* @param limit
* maximum results
* @return deprecated entries ordered by commit date ascending
*/
public List<JavaBlobIndex> getDeprecationTimeline(String repoName,
int offset, int limit) {
if (repoName == null || repoName.isEmpty()) {
return List.of();
}
try (Session session = sessionFactory.openSession()) {
return session.createQuery(
"FROM JavaBlobIndex j WHERE j.repositoryName = :repo " //$NON-NLS-1$
+ "AND (j.annotations = :exact " //$NON-NLS-1$
+ "OR j.annotations LIKE :first " //$NON-NLS-1$
+ "OR j.annotations LIKE :middle " //$NON-NLS-1$
+ "OR j.annotations LIKE :last) " //$NON-NLS-1$
+ "ORDER BY j.commitDate ASC", //$NON-NLS-1$
JavaBlobIndex.class)
.setParameter("repo", repoName) //$NON-NLS-1$
.setParameter("exact", "Deprecated") //$NON-NLS-1$
.setParameter("first", "Deprecated\n%") //$NON-NLS-1$
.setParameter("middle", "%\nDeprecated\n%") //$NON-NLS-1$
.setParameter("last", "%\nDeprecated") //$NON-NLS-1$
.setFirstResult(offset)
.setMaxResults(limit)
.getResultList();
}
}
/**
* Like {@link #getApiDiff(String, String, String)} but filtered to public
* API only. Detects breaking changes to publicly visible types.
*
* @param repoName
* the repository name
* @param commitOidA
* the first (older) commit object ID
* @param commitOidB
* the second (newer) commit object ID
* @return the diff result for public API entries only
*/
public ApiDiffResult getPublicApiChanges(String repoName,
String commitOidA, String commitOidB) {
ApiDiffResult full = getApiDiff(repoName, commitOidA, commitOidB);
List<JavaBlobIndex> addedPublic = full.getAddedFiles() == null
? List.of()
: full.getAddedFiles().stream()
.filter(e -> isPublic(e))
.toList();
List<JavaBlobIndex> removedPublic = full.getRemovedFiles() == null
? List.of()
: full.getRemovedFiles().stream()
.filter(e -> isPublic(e))
.toList();
List<ApiChangeEntry> changedPublic = full.getChangedFiles() == null
? List.of()
: full.getChangedFiles().stream()
.filter(e -> isPublic(e.getBefore())
|| isPublic(e.getAfter()))
.toList();
return new ApiDiffResult(addedPublic, removedPublic, changedPublic);
}
/**
* Track how a specific type's declared methods and method signatures
* changed over time across commits.
*
* @param repoName
* the repository name
* @param fullyQualifiedTypeName
* the fully qualified type name to track
* @param offset
* pagination offset
* @param limit
* maximum results
* @return entries for the type ordered by commit date ascending
*/
public List<JavaBlobIndex> getMethodEvolution(String repoName,
String fullyQualifiedTypeName, int offset, int limit) {
if (repoName == null || repoName.isEmpty()
|| fullyQualifiedTypeName == null
|| fullyQualifiedTypeName.isEmpty()) {
return List.of();
}
try (Session session = sessionFactory.openSession()) {
return session.createQuery(
"FROM JavaBlobIndex j WHERE j.repositoryName = :repo " //$NON-NLS-1$
+ "AND (j.fullyQualifiedNames = :fqnExact " //$NON-NLS-1$
+ "OR j.fullyQualifiedNames LIKE :fqnPrefix " //$NON-NLS-1$
+ "OR j.fullyQualifiedNames LIKE :fqnSuffix " //$NON-NLS-1$
+ "OR j.fullyQualifiedNames LIKE :fqnMiddle) " //$NON-NLS-1$
+ "ORDER BY j.commitDate ASC", //$NON-NLS-1$
JavaBlobIndex.class)
.setParameter("repo", repoName) //$NON-NLS-1$
.setParameter("fqnExact", fullyQualifiedTypeName) //$NON-NLS-1$
.setParameter("fqnPrefix", fullyQualifiedTypeName + "\n%") //$NON-NLS-1$ //$NON-NLS-2$
.setParameter("fqnSuffix", "%\n" + fullyQualifiedTypeName) //$NON-NLS-1$ //$NON-NLS-2$
.setParameter("fqnMiddle", "%\n" + fullyQualifiedTypeName + "\n%") //$NON-NLS-1$ //$NON-NLS-2$
.setFirstResult(offset)
.setMaxResults(limit)
.getResultList();
}
}
// --- Feature 3: Developer Analytics ---
/**
* Group entries by commit author and type kind, counting how many
* classes/interfaces/enums each author introduced.
* <p>
* Each element of the returned list is an {@code Object[]} with:
* {@code [commitAuthor, typeKind, count]}.
*
* @param repoName
* the repository name
* @return aggregated statistics per author and type kind
*/
public List<Object[]> getAuthorTypeStatistics(String repoName) {
if (repoName == null || repoName.isEmpty()) {
return List.of();
}
try (Session session = sessionFactory.openSession()) {
return session.createQuery(
"SELECT j.commitAuthor, j.typeKind, COUNT(j) " //$NON-NLS-1$
+ "FROM JavaBlobIndex j WHERE j.repositoryName = :repo " //$NON-NLS-1$
+ "GROUP BY j.commitAuthor, j.typeKind " //$NON-NLS-1$
+ "ORDER BY j.commitAuthor, j.typeKind", //$NON-NLS-1$
Object[].class)
.setParameter("repo", repoName) //$NON-NLS-1$
.getResultList();
}
}
/**
* Return average line count per commit date for entries matching a package
* prefix. Shows how code size grows over time.
* <p>
* Each element of the returned list is an {@code Object[]} with:
* {@code [commitDate, avgLineCount]}.
*
* @param repoName
* the repository name
* @param packagePrefix
* package name prefix filter (e.g. {@code "com.example"})
* @return average line count per commit date ordered by date ascending
*/
public List<Object[]> getCodeComplexityTrend(String repoName,
String packagePrefix) {
if (repoName == null || repoName.isEmpty()) {
return List.of();
}
try (Session session = sessionFactory.openSession()) {
String hql;
if (packagePrefix == null || packagePrefix.isEmpty()) {
hql = "SELECT j.commitDate, AVG(j.lineCount) " //$NON-NLS-1$
+ "FROM JavaBlobIndex j WHERE j.repositoryName = :repo " //$NON-NLS-1$
+ "GROUP BY j.commitDate ORDER BY j.commitDate ASC"; //$NON-NLS-1$
return session.createQuery(hql, Object[].class)
.setParameter("repo", repoName) //$NON-NLS-1$
.getResultList();
}
hql = "SELECT j.commitDate, AVG(j.lineCount) " //$NON-NLS-1$
+ "FROM JavaBlobIndex j WHERE j.repositoryName = :repo " //$NON-NLS-1$
+ "AND j.packageName LIKE :pkg " //$NON-NLS-1$
+ "GROUP BY j.commitDate ORDER BY j.commitDate ASC"; //$NON-NLS-1$
return session.createQuery(hql, Object[].class)
.setParameter("repo", repoName) //$NON-NLS-1$
.setParameter("pkg", packagePrefix + "%") //$NON-NLS-1$
.getResultList();
}
}
/**
* Find {@link JavaBlobIndex} entries with a line count above the given
* threshold. Useful for identifying large classes that may need
* refactoring.
*
* @param repoName
* the repository name
* @param lineCountThreshold
* minimum line count (inclusive)
* @param offset
* pagination offset
* @param limit
* maximum results
* @return entries whose line count exceeds the threshold, ordered by line
* count descending
*/
public List<JavaBlobIndex> getMonsterClasses(String repoName,
int lineCountThreshold, int offset, int limit) {
if (repoName == null || repoName.isEmpty()) {
return List.of();
}
try (Session session = sessionFactory.openSession()) {
return session.createQuery(
"FROM JavaBlobIndex j WHERE j.repositoryName = :repo " //$NON-NLS-1$
+ "AND j.lineCount >= :threshold " //$NON-NLS-1$
+ "ORDER BY j.lineCount DESC", //$NON-NLS-1$
JavaBlobIndex.class)
.setParameter("repo", repoName) //$NON-NLS-1$
.setParameter("threshold", lineCountThreshold) //$NON-NLS-1$
.setFirstResult(offset)
.setMaxResults(limit)
.getResultList();
}
}
/**
* Find types whose fully qualified names never appear in any other file's
* import statements within the same repository. These are potential dead
* code candidates.
* <p>
* Wildcard imports (e.g. {@code pkg.*}) are treated as covering all types
* in that package, so types referenced only via wildcard imports are not
* considered dead.
* <p>
* <b>Note:</b> The cross-reference filtering is performed in-memory after
* loading all entries with non-null FQNs. Pagination is applied to the
* filtered result set. For very large repositories, callers should use
* reasonable {@code limit} values.
*
* @param repoName
* the repository name
* @param offset
* pagination offset into the filtered result
* @param limit
* maximum number of results to return
* @return entries that are never imported by other files in the repository
*/
public List<JavaBlobIndex> getDeadCodeCandidates(String repoName,
int offset, int limit) {
if (repoName == null || repoName.isEmpty()) {
return List.of();
}
try (Session session = sessionFactory.openSession()) {
// Collect all import statements in the repo as a set of tokens
List<String> allImports = session.createQuery(
"SELECT j.importStatements FROM JavaBlobIndex j " //$NON-NLS-1$
+ "WHERE j.repositoryName = :repo " //$NON-NLS-1$
+ "AND j.importStatements IS NOT NULL", //$NON-NLS-1$
String.class)
.setParameter("repo", repoName) //$NON-NLS-1$
.getResultList();
// Build a set of individual imported names for precise matching,
// and track wildcard-import package prefixes (pkg.*).
Set<String> importedNames = new HashSet<>();
Set<String> wildcardPackages = new HashSet<>();
for (String imports : allImports) {
if (imports != null) {
for (String token : FQN_SPLIT_PATTERN.split(imports)) {
String t = token.trim();
if (t.isBlank()) {
continue;
}
if (t.endsWith(".*")) { //$NON-NLS-1$
String pkg = t.substring(0, t.length() - 2);
if (!pkg.isEmpty()) {
wildcardPackages.add(pkg);
}
} else {
importedNames.add(t);
}
}
}
}
// Collect all types
List<JavaBlobIndex> all = session.createQuery(
"FROM JavaBlobIndex j WHERE j.repositoryName = :repo " //$NON-NLS-1$
+ "AND j.fullyQualifiedNames IS NOT NULL", //$NON-NLS-1$
JavaBlobIndex.class)
.setParameter("repo", repoName) //$NON-NLS-1$
.getResultList();
List<JavaBlobIndex> deadCandidates = new ArrayList<>();
for (JavaBlobIndex entry : all) {
String fqns = entry.getFullyQualifiedNames();
if (fqns == null || fqns.isBlank()) {
continue;
}
boolean referenced = false;
for (String fqn : FQN_SPLIT_PATTERN.split(fqns)) {
if (fqn.isBlank()) {
continue;
}
String f = fqn.trim();
if (importedNames.contains(f)) {
referenced = true;
break;
}
// Also check if covered by a wildcard import
int lastDot = f.lastIndexOf('.');
if (lastDot > 0
&& wildcardPackages
.contains(f.substring(0, lastDot))) {
referenced = true;
break;
}
}
if (!referenced) {
deadCandidates.add(entry);
}
}
int from = Math.min(offset, deadCandidates.size());
int to = Math.min(from + limit, deadCandidates.size());
return new ArrayList<>(deadCandidates.subList(from, to));
}
}
/**
* Count types annotated with {@code @Test} (test types) vs. total types
* per package. Returns a proxy for test coverage.
* <p>
* Each element of the returned list is an {@code Object[]} with:
* {@code [packageName, testTypeCount, totalTypeCount]}.
*
* @param repoName
* the repository name
* @return per-package test type count and total type count
*/
public List<Object[]> getTestCoverageProxy(String repoName) {
if (repoName == null || repoName.isEmpty()) {
return List.of();
}
try (Session session = sessionFactory.openSession()) {
// Total types per package
List<Object[]> totals = session.createQuery(
"SELECT j.packageName, COUNT(j) FROM JavaBlobIndex j " //$NON-NLS-1$
+ "WHERE j.repositoryName = :repo " //$NON-NLS-1$
+ "GROUP BY j.packageName", //$NON-NLS-1$
Object[].class)
.setParameter("repo", repoName) //$NON-NLS-1$
.getResultList();
// Test types per package (JUnit @Test annotations)
List<Object[]> testCounts = session.createQuery(
"SELECT j.packageName, COUNT(j) FROM JavaBlobIndex j " //$NON-NLS-1$
+ "WHERE j.repositoryName = :repo " //$NON-NLS-1$
+ "AND (j.annotations LIKE :junit4Test OR j.annotations LIKE :junit5Test) " //$NON-NLS-1$
+ "GROUP BY j.packageName", //$NON-NLS-1$
Object[].class)
.setParameter("repo", repoName) //$NON-NLS-1$
.setParameter("junit4Test", "%org.junit.Test%") //$NON-NLS-1$
.setParameter("junit5Test", "%org.junit.jupiter.api.Test%") //$NON-NLS-1$
.getResultList();
Map<String, Long> testMap = new HashMap<>();
for (Object[] row : testCounts) {
testMap.put((String) row[0], (Long) row[1]);
}
List<Object[]> result = new ArrayList<>();
for (Object[] row : totals) {
String pkg = (String) row[0];
Long total = (Long) row[1];
Long testCount = testMap.getOrDefault(pkg, 0L);
result.add(new Object[] { pkg, testCount, total });
}
return result;
}
}
private static final java.util.regex.Pattern FQN_SPLIT_PATTERN = java.util.regex.Pattern
.compile("[,\\s]+"); //$NON-NLS-1$
/** Maximum number of import prefixes accepted by searchByMultipleImports. */
private static final int MAX_IMPORT_PREFIXES = 50;
// --- Private helpers ---
private List<JavaBlobIndex> getEntriesForCommit(String repoName,
String commitOid) {
try (Session session = sessionFactory.openSession()) {
return session.createQuery(
"FROM JavaBlobIndex j WHERE j.repositoryName = :repo " //$NON-NLS-1$
+ "AND j.commitObjectId = :commit", //$NON-NLS-1$
JavaBlobIndex.class)
.setParameter("repo", repoName) //$NON-NLS-1$
.setParameter("commit", commitOid) //$NON-NLS-1$
.getResultList();
}
}
private static Map<String, JavaBlobIndex> indexByFilePath(
List<JavaBlobIndex> entries) {
Map<String, JavaBlobIndex> map = new LinkedHashMap<>();
for (JavaBlobIndex entry : entries) {
if (entry.getFilePath() != null) {
map.put(entry.getFilePath(), entry);
}
}
return map;
}
private static String detectApiChanges(JavaBlobIndex before,
JavaBlobIndex after) {
List<String> changes = new ArrayList<>();
if (!Objects.equals(before.getDeclaredMethods(),
after.getDeclaredMethods())) {
changes.add("methods changed"); //$NON-NLS-1$
}
if (!Objects.equals(before.getMethodSignatures(),
after.getMethodSignatures())) {
changes.add("signatures changed"); //$NON-NLS-1$
}
if (!Objects.equals(before.getVisibility(),
after.getVisibility())) {
changes.add("visibility changed"); //$NON-NLS-1$
}
if (changes.isEmpty()) {
return null;
}
return String.join(", ", changes); //$NON-NLS-1$
}
private static boolean isPublic(JavaBlobIndex entry) {
if (entry == null) {
return false;
}
String vis = entry.getVisibility();
return vis != null && vis.contains("public"); //$NON-NLS-1$
}
}