CommitIndexService.java

package com.taxonomy.architecture.service;

import com.taxonomy.dsl.parser.DslTokenizer;
import com.taxonomy.dsl.storage.DslCommit;
import com.taxonomy.dsl.storage.DslGitRepository;
import com.taxonomy.dsl.storage.DslGitRepositoryFactory;
import com.taxonomy.dto.ElementHistoryAggregation;
import com.taxonomy.architecture.model.ArchitectureCommitIndex;
import com.taxonomy.architecture.repository.ArchitectureCommitIndexRepository;
import com.taxonomy.workspace.service.WorkspaceContext;
import jakarta.persistence.EntityManager;
import jakarta.persistence.PersistenceContext;
import org.hibernate.search.mapper.orm.Search;
import org.hibernate.search.mapper.orm.session.SearchSession;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.stereotype.Service;
import org.springframework.transaction.annotation.Transactional;

import java.io.IOException;
import java.time.Instant;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
import java.util.Locale;
import java.util.Set;

/**
 * Populates and queries the {@link ArchitectureCommitIndex} from JGit history.
 *
 * <p>Each commit on a DSL branch is parsed and tokenized to enable
 * history search over architecture evolution.
 *
 * <p>Search methods use <b>Hibernate Search</b> (Lucene backend) with custom
 * analyzers ({@code "dsl"}, {@code "csv-keyword"}) for full-text queries,
 * replacing the previous JPQL {@code LIKE}/{@code LOWER()} approach.
 */
@Service
public class CommitIndexService {

    private static final Logger log = LoggerFactory.getLogger(CommitIndexService.class);

    private final DslGitRepositoryFactory repositoryFactory;
    private final ArchitectureCommitIndexRepository indexRepository;
    private final DslTokenizer tokenizer = new DslTokenizer();

    @PersistenceContext
    private EntityManager entityManager;

    public CommitIndexService(DslGitRepositoryFactory repositoryFactory,
                              ArchitectureCommitIndexRepository indexRepository) {
        this.repositoryFactory = repositoryFactory;
        this.indexRepository = indexRepository;
    }

    /**
     * Resolve the Git repository for the given workspace context.
     *
     * @param ctx the workspace context (use {@link WorkspaceContext#SHARED}
     *            for the system repository)
     * @return the resolved DslGitRepository
     */
    private DslGitRepository resolveRepository(WorkspaceContext ctx) {
        return repositoryFactory.resolveRepository(ctx);
    }

    /**
     * Index all unindexed commits on the given branch.
     *
     * <p>Uses the system repository (SHARED context). Use
     * {@link #indexBranch(String, WorkspaceContext)} for workspace-aware resolution.
     *
     * @param branch the Git branch to index
     * @return number of newly indexed commits
     */
    @Transactional
    public int indexBranch(String branch) {
        return indexBranch(branch, WorkspaceContext.SHARED);
    }

    /**
     * Index all unindexed commits on the given branch.
     *
     * <p><b>Known limitation:</b> {@link ArchitectureCommitIndex} is keyed only by
     * {@code commit_id} and filtered by {@code branch}. In factory mode with
     * per-workspace repositories, different workspaces may share branch names
     * (e.g. {@code draft}), causing potential cross-workspace leakage in search
     * results. A future enhancement should add a repository/workspace
     * discriminator column to {@code ArchitectureCommitIndex} and include it in
     * uniqueness constraints and search filters.
     *
     * @param branch the Git branch to index
     * @param ctx    the workspace context for repository resolution
     * @return number of newly indexed commits
     */
    @Transactional
    public int indexBranch(String branch, WorkspaceContext ctx) {
        try {
            DslGitRepository repo = resolveRepository(ctx);
            List<DslCommit> commits = repo.getDslHistory(branch);
            int indexed = 0;

            for (DslCommit commit : commits) {
                if (indexRepository.existsByCommitId(commit.commitId())) {
                    continue; // already indexed
                }

                String dslText = repo.getDslAtCommit(commit.commitId());
                if (dslText == null) {
                    continue;
                }

                ArchitectureCommitIndex entry = new ArchitectureCommitIndex();
                entry.setCommitId(commit.commitId());
                entry.setAuthor(commit.author());
                entry.setCommitTimestamp(commit.timestamp());
                entry.setMessage(commit.message());
                entry.setBranch(branch);
                entry.setChangedFiles("architecture.taxdsl");

                // Tokenize DSL content
                String tokenized = tokenizer.tokenize(dslText);
                entry.setTokenizedChangeText(tokenized);

                // Extract affected IDs
                Set<String> elementIds = tokenizer.extractElementIds(dslText);
                entry.setAffectedElementIds(String.join(",", elementIds));

                Set<String> relationKeys = tokenizer.extractRelationKeys(dslText);
                entry.setAffectedRelationIds(String.join(";", relationKeys));

                indexRepository.save(entry);
                indexed++;
            }

            if (indexed > 0) {
                log.info("Indexed {} new commit(s) on branch '{}'", indexed, branch);
            }
            return indexed;
        } catch (IOException e) {
            log.error("Failed to index branch '{}'", branch, e);
            return 0;
        }
    }

    /**
     * Full-text search across commit history using Hibernate Search.
     *
     * <p>Searches tokenized DSL text (with boost 1.0), commit messages (boost 0.5),
     * and affected element/relation IDs (boost 3.0) for relevance-ranked results.
     *
     * @param query the search query
     * @return matching commits ranked by relevance
     */
    @Transactional(readOnly = true)
    public List<ArchitectureCommitIndex> search(String query) {
        return search(query, 50);
    }

    /**
     * Full-text search across commit history using Hibernate Search.
     *
     * @param query      the search query
     * @param maxResults maximum number of results to return
     * @return matching commits ranked by relevance
     */
    @Transactional(readOnly = true)
    public List<ArchitectureCommitIndex> search(String query, int maxResults) {
        if (query == null || query.isBlank()) {
            return Collections.emptyList();
        }
        try {
            SearchSession session = Search.session(entityManager);
            String lower = query.toLowerCase(Locale.ROOT);

            return session.search(ArchitectureCommitIndex.class)
                    .where(f -> f.bool()
                        .should(f.match().field("tokenizedChangeText").matching(lower).boost(1.0f))
                        .should(f.match().field("message").matching(query).boost(0.5f))
                        .should(f.match().field("affectedElementIds").matching(lower).boost(3.0f))
                        .should(f.match().field("affectedRelationIds").matching(lower).boost(2.0f)))
                    .sort(f -> f.score())
                    .fetchHits(maxResults);
        } catch (Exception e) {
            log.error("Hibernate Search commit search failed for '{}': {}", query, e.getMessage());
            return Collections.emptyList();
        }
    }

    /**
     * Find commits that affected a specific element using Hibernate Search.
     *
     * @param elementId the element ID (e.g., "CP-1023")
     * @return matching commits sorted by relevance
     */
    @Transactional(readOnly = true)
    public List<ArchitectureCommitIndex> findByElement(String elementId) {
        if (elementId == null || elementId.isBlank()) {
            return Collections.emptyList();
        }
        try {
            SearchSession session = Search.session(entityManager);
            return session.search(ArchitectureCommitIndex.class)
                    .where(f -> f.bool()
                        .should(f.match().field("affectedElementIds")
                                    .matching(elementId.toLowerCase(Locale.ROOT)).boost(3.0f))
                        .should(f.match().field("tokenizedChangeText")
                                    .matching(elementId.toLowerCase(Locale.ROOT)).boost(1.0f)))
                    .sort(f -> f.score())
                    .fetchHits(50);
        } catch (Exception e) {
            log.error("Hibernate Search element search failed for '{}': {}", elementId, e.getMessage());
            return Collections.emptyList();
        }
    }

    /**
     * Find commits that affected a specific relation using Hibernate Search.
     *
     * @param relationKey the relation key (e.g., "CP-1023 REALIZES CR-1047")
     * @return matching commits sorted by relevance
     */
    @Transactional(readOnly = true)
    public List<ArchitectureCommitIndex> findByRelation(String relationKey) {
        if (relationKey == null || relationKey.isBlank()) {
            return Collections.emptyList();
        }
        try {
            SearchSession session = Search.session(entityManager);
            return session.search(ArchitectureCommitIndex.class)
                    .where(f -> f.bool()
                        .should(f.match().field("affectedRelationIds")
                                    .matching(relationKey.toLowerCase(Locale.ROOT)).boost(3.0f))
                        .should(f.match().field("tokenizedChangeText")
                                    .matching(relationKey.toLowerCase(Locale.ROOT)).boost(1.0f)))
                    .sort(f -> f.score())
                    .fetchHits(50);
        } catch (Exception e) {
            log.error("Hibernate Search relation search failed for '{}': {}", relationKey, e.getMessage());
            return Collections.emptyList();
        }
    }

    /**
     * Build an aggregated history view for a specific element.
     *
     * <p>Computes firstSeen, lastSeen, occurrence count, volatility,
     * and recent commit messages — useful for understanding how
     * an element has evolved over time.
     *
     * @param elementId the element ID (e.g., "CP-1023")
     * @return the aggregation, or {@code null} if no history found
     */
    @Transactional(readOnly = true)
    public ElementHistoryAggregation aggregateElementHistory(String elementId) {
        List<ArchitectureCommitIndex> commits = findByElement(elementId);
        if (commits.isEmpty()) {
            return null;
        }

        long totalCommits = indexRepository.count();

        Instant firstSeen = commits.stream()
                .map(ArchitectureCommitIndex::getCommitTimestamp)
                .min(Comparator.naturalOrder())
                .orElse(null);
        Instant lastSeen = commits.stream()
                .map(ArchitectureCommitIndex::getCommitTimestamp)
                .max(Comparator.naturalOrder())
                .orElse(null);

        List<String> recentMessages = commits.stream()
                .sorted(Comparator.comparing(ArchitectureCommitIndex::getCommitTimestamp).reversed())
                .limit(5)
                .map(ArchitectureCommitIndex::getMessage)
                .toList();

        double volatility = ElementHistoryAggregation.computeVolatility(commits.size(), (int) totalCommits);

        return new ElementHistoryAggregation(
                elementId, firstSeen, lastSeen,
                commits.size(), volatility, recentMessages);
    }
}