CommitIndexService.java
package com.taxonomy.architecture.service;
import com.taxonomy.dsl.parser.DslTokenizer;
import com.taxonomy.dsl.storage.DslCommit;
import com.taxonomy.dsl.storage.DslGitRepository;
import com.taxonomy.dsl.storage.DslGitRepositoryFactory;
import com.taxonomy.dto.ElementHistoryAggregation;
import com.taxonomy.architecture.model.ArchitectureCommitIndex;
import com.taxonomy.architecture.repository.ArchitectureCommitIndexRepository;
import com.taxonomy.workspace.service.WorkspaceContext;
import jakarta.persistence.EntityManager;
import jakarta.persistence.PersistenceContext;
import org.hibernate.search.mapper.orm.Search;
import org.hibernate.search.mapper.orm.session.SearchSession;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.stereotype.Service;
import org.springframework.transaction.annotation.Transactional;
import java.io.IOException;
import java.time.Instant;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
import java.util.Locale;
import java.util.Set;
/**
* Populates and queries the {@link ArchitectureCommitIndex} from JGit history.
*
* <p>Each commit on a DSL branch is parsed and tokenized to enable
* history search over architecture evolution.
*
* <p>Search methods use <b>Hibernate Search</b> (Lucene backend) with custom
* analyzers ({@code "dsl"}, {@code "csv-keyword"}) for full-text queries,
* replacing the previous JPQL {@code LIKE}/{@code LOWER()} approach.
*/
@Service
public class CommitIndexService {
private static final Logger log = LoggerFactory.getLogger(CommitIndexService.class);
private final DslGitRepositoryFactory repositoryFactory;
private final ArchitectureCommitIndexRepository indexRepository;
private final DslTokenizer tokenizer = new DslTokenizer();
@PersistenceContext
private EntityManager entityManager;
public CommitIndexService(DslGitRepositoryFactory repositoryFactory,
ArchitectureCommitIndexRepository indexRepository) {
this.repositoryFactory = repositoryFactory;
this.indexRepository = indexRepository;
}
/**
* Resolve the Git repository for the given workspace context.
*
* @param ctx the workspace context (use {@link WorkspaceContext#SHARED}
* for the system repository)
* @return the resolved DslGitRepository
*/
private DslGitRepository resolveRepository(WorkspaceContext ctx) {
return repositoryFactory.resolveRepository(ctx);
}
/**
* Index all unindexed commits on the given branch.
*
* <p>Uses the system repository (SHARED context). Use
* {@link #indexBranch(String, WorkspaceContext)} for workspace-aware resolution.
*
* @param branch the Git branch to index
* @return number of newly indexed commits
*/
@Transactional
public int indexBranch(String branch) {
return indexBranch(branch, WorkspaceContext.SHARED);
}
/**
* Index all unindexed commits on the given branch.
*
* <p><b>Known limitation:</b> {@link ArchitectureCommitIndex} is keyed only by
* {@code commit_id} and filtered by {@code branch}. In factory mode with
* per-workspace repositories, different workspaces may share branch names
* (e.g. {@code draft}), causing potential cross-workspace leakage in search
* results. A future enhancement should add a repository/workspace
* discriminator column to {@code ArchitectureCommitIndex} and include it in
* uniqueness constraints and search filters.
*
* @param branch the Git branch to index
* @param ctx the workspace context for repository resolution
* @return number of newly indexed commits
*/
@Transactional
public int indexBranch(String branch, WorkspaceContext ctx) {
try {
DslGitRepository repo = resolveRepository(ctx);
List<DslCommit> commits = repo.getDslHistory(branch);
int indexed = 0;
for (DslCommit commit : commits) {
if (indexRepository.existsByCommitId(commit.commitId())) {
continue; // already indexed
}
String dslText = repo.getDslAtCommit(commit.commitId());
if (dslText == null) {
continue;
}
ArchitectureCommitIndex entry = new ArchitectureCommitIndex();
entry.setCommitId(commit.commitId());
entry.setAuthor(commit.author());
entry.setCommitTimestamp(commit.timestamp());
entry.setMessage(commit.message());
entry.setBranch(branch);
entry.setChangedFiles("architecture.taxdsl");
// Tokenize DSL content
String tokenized = tokenizer.tokenize(dslText);
entry.setTokenizedChangeText(tokenized);
// Extract affected IDs
Set<String> elementIds = tokenizer.extractElementIds(dslText);
entry.setAffectedElementIds(String.join(",", elementIds));
Set<String> relationKeys = tokenizer.extractRelationKeys(dslText);
entry.setAffectedRelationIds(String.join(";", relationKeys));
indexRepository.save(entry);
indexed++;
}
if (indexed > 0) {
log.info("Indexed {} new commit(s) on branch '{}'", indexed, branch);
}
return indexed;
} catch (IOException e) {
log.error("Failed to index branch '{}'", branch, e);
return 0;
}
}
/**
* Full-text search across commit history using Hibernate Search.
*
* <p>Searches tokenized DSL text (with boost 1.0), commit messages (boost 0.5),
* and affected element/relation IDs (boost 3.0) for relevance-ranked results.
*
* @param query the search query
* @return matching commits ranked by relevance
*/
@Transactional(readOnly = true)
public List<ArchitectureCommitIndex> search(String query) {
return search(query, 50);
}
/**
* Full-text search across commit history using Hibernate Search.
*
* @param query the search query
* @param maxResults maximum number of results to return
* @return matching commits ranked by relevance
*/
@Transactional(readOnly = true)
public List<ArchitectureCommitIndex> search(String query, int maxResults) {
if (query == null || query.isBlank()) {
return Collections.emptyList();
}
try {
SearchSession session = Search.session(entityManager);
String lower = query.toLowerCase(Locale.ROOT);
return session.search(ArchitectureCommitIndex.class)
.where(f -> f.bool()
.should(f.match().field("tokenizedChangeText").matching(lower).boost(1.0f))
.should(f.match().field("message").matching(query).boost(0.5f))
.should(f.match().field("affectedElementIds").matching(lower).boost(3.0f))
.should(f.match().field("affectedRelationIds").matching(lower).boost(2.0f)))
.sort(f -> f.score())
.fetchHits(maxResults);
} catch (Exception e) {
log.error("Hibernate Search commit search failed for '{}': {}", query, e.getMessage());
return Collections.emptyList();
}
}
/**
* Find commits that affected a specific element using Hibernate Search.
*
* @param elementId the element ID (e.g., "CP-1023")
* @return matching commits sorted by relevance
*/
@Transactional(readOnly = true)
public List<ArchitectureCommitIndex> findByElement(String elementId) {
if (elementId == null || elementId.isBlank()) {
return Collections.emptyList();
}
try {
SearchSession session = Search.session(entityManager);
return session.search(ArchitectureCommitIndex.class)
.where(f -> f.bool()
.should(f.match().field("affectedElementIds")
.matching(elementId.toLowerCase(Locale.ROOT)).boost(3.0f))
.should(f.match().field("tokenizedChangeText")
.matching(elementId.toLowerCase(Locale.ROOT)).boost(1.0f)))
.sort(f -> f.score())
.fetchHits(50);
} catch (Exception e) {
log.error("Hibernate Search element search failed for '{}': {}", elementId, e.getMessage());
return Collections.emptyList();
}
}
/**
* Find commits that affected a specific relation using Hibernate Search.
*
* @param relationKey the relation key (e.g., "CP-1023 REALIZES CR-1047")
* @return matching commits sorted by relevance
*/
@Transactional(readOnly = true)
public List<ArchitectureCommitIndex> findByRelation(String relationKey) {
if (relationKey == null || relationKey.isBlank()) {
return Collections.emptyList();
}
try {
SearchSession session = Search.session(entityManager);
return session.search(ArchitectureCommitIndex.class)
.where(f -> f.bool()
.should(f.match().field("affectedRelationIds")
.matching(relationKey.toLowerCase(Locale.ROOT)).boost(3.0f))
.should(f.match().field("tokenizedChangeText")
.matching(relationKey.toLowerCase(Locale.ROOT)).boost(1.0f)))
.sort(f -> f.score())
.fetchHits(50);
} catch (Exception e) {
log.error("Hibernate Search relation search failed for '{}': {}", relationKey, e.getMessage());
return Collections.emptyList();
}
}
/**
* Build an aggregated history view for a specific element.
*
* <p>Computes firstSeen, lastSeen, occurrence count, volatility,
* and recent commit messages — useful for understanding how
* an element has evolved over time.
*
* @param elementId the element ID (e.g., "CP-1023")
* @return the aggregation, or {@code null} if no history found
*/
@Transactional(readOnly = true)
public ElementHistoryAggregation aggregateElementHistory(String elementId) {
List<ArchitectureCommitIndex> commits = findByElement(elementId);
if (commits.isEmpty()) {
return null;
}
long totalCommits = indexRepository.count();
Instant firstSeen = commits.stream()
.map(ArchitectureCommitIndex::getCommitTimestamp)
.min(Comparator.naturalOrder())
.orElse(null);
Instant lastSeen = commits.stream()
.map(ArchitectureCommitIndex::getCommitTimestamp)
.max(Comparator.naturalOrder())
.orElse(null);
List<String> recentMessages = commits.stream()
.sorted(Comparator.comparing(ArchitectureCommitIndex::getCommitTimestamp).reversed())
.limit(5)
.map(ArchitectureCommitIndex::getMessage)
.toList();
double volatility = ElementHistoryAggregation.computeVolatility(commits.size(), (int) totalCommits);
return new ElementHistoryAggregation(
elementId, firstSeen, lastSeen,
commits.size(), volatility, recentMessages);
}
}