JavaBlobExtractor.java

/*******************************************************************************
 * Copyright (c) 2026 Carsten Hammer.
 *
 * This program and the accompanying materials
 * are made available under the terms of the Eclipse Public License 2.0
 * which accompanies this distribution, and is available at
 * https://www.eclipse.org/legal/epl-2.0/
 *
 * SPDX-License-Identifier: EPL-2.0
 *
 * Contributors:
 *     Carsten Hammer
 *******************************************************************************/
package org.eclipse.jgit.storage.hibernate.service;

import java.util.HashMap;
import java.util.Map;
import java.util.logging.Level;
import java.util.logging.Logger;

import org.eclipse.jdt.core.dom.AST;
import org.eclipse.jdt.core.dom.ASTParser;
import org.eclipse.jdt.core.dom.CompilationUnit;
import org.eclipse.jdt.core.dom.ImportDeclaration;
import org.eclipse.jgit.storage.hibernate.entity.JavaBlobIndex;
import org.eclipse.jgit.storage.hibernate.search.JavaStructureVisitor;

/**
 * Extracts structural metadata from Java source code using JDT's
 * {@link ASTParser} without bindings.
 * <p>
 * This extractor parses Java source files into an AST and visits the tree to
 * collect package names, declared types, methods, fields, supertypes,
 * interfaces, and import statements. No classpath or binding resolution is
 * required.
 * </p>
 */
public class JavaBlobExtractor {

	private static final Logger LOG = Logger
			.getLogger(JavaBlobExtractor.class.getName());

	private static final int MAX_SNIPPET_LENGTH = 65535;

	/**
	 * Extract structural metadata from a Java source file.
	 *
	 * @param source
	 *            the Java source code
	 * @param filePath
	 *            the file path within the repository
	 * @param repoName
	 *            the repository name
	 * @param blobOid
	 *            the blob object SHA-1
	 * @param commitOid
	 *            the commit object SHA-1
	 * @return a populated {@link JavaBlobIndex} entity
	 */
	public JavaBlobIndex extract(String source, String filePath,
			String repoName, String blobOid, String commitOid) {
		JavaBlobIndex idx = new JavaBlobIndex();
		idx.setRepositoryName(repoName);
		idx.setBlobObjectId(blobOid);
		idx.setCommitObjectId(commitOid);
		idx.setFilePath(filePath);
		idx.setSourceSnippet(truncate(source, MAX_SNIPPET_LENGTH));

		if (!filePath.endsWith(".java")) { //$NON-NLS-1$
			return idx;
		}

		try {
			@SuppressWarnings("deprecation")
			ASTParser parser = ASTParser.newParser(AST.JLS_Latest);
			parser.setResolveBindings(false);
			parser.setSource(source.toCharArray());
			parser.setKind(ASTParser.K_COMPILATION_UNIT);

			CompilationUnit cu = (CompilationUnit) parser.createAST(null);

			// Extract package
			if (cu.getPackage() != null) {
				idx.setPackageName(cu.getPackage().getName()
						.getFullyQualifiedName());
			}

			// Build import map for FQN resolution of simple names
			Map<String, String> importMap = buildImportMap(cu);
			idx.setImportStatements(serializeImports(cu));

			// Walk AST for types, methods, fields, extends, implements
			JavaStructureVisitor visitor = new JavaStructureVisitor(importMap,
					idx.getPackageName());
			cu.accept(visitor);

			idx.setDeclaredTypes(visitor.getTypes());
			idx.setFullyQualifiedNames(visitor.getFQNs());
			idx.setDeclaredMethods(visitor.getMethods());
			idx.setDeclaredFields(visitor.getFields());
			idx.setExtendsTypes(visitor.getSuperTypes());
			idx.setImplementsTypes(visitor.getInterfaces());
			idx.setAnnotations(visitor.getAnnotations());
			idx.setTypeKind(visitor.getTypeKind());
			idx.setVisibility(visitor.getVisibility());
			idx.setSimpleClassName(extractSimpleClassName(filePath));
			idx.setProjectName(extractProjectName(filePath));
			idx.setLineCount(countLines(source));
			idx.setTypeDocumentation(visitor.getTypeDocumentation());
			idx.setMethodSignatures(visitor.getMethodSignatures());
			idx.setReferencedTypes(visitor.getReferencedTypes());
			idx.setStringLiterals(visitor.getStringLiterals());
			idx.setHasMainMethod(visitor.hasMainMethod());
		} catch (Exception e) {
			// Graceful degradation: return partial results on parse errors
			LOG.log(Level.WARNING,
					"Failed to parse Java source: {0}: {1} - returning partial results", //$NON-NLS-1$
					new Object[] { filePath, e.getMessage() });
		}

		return idx;
	}

	private static Map<String, String> buildImportMap(CompilationUnit cu) {
		Map<String, String> importMap = new HashMap<>();
		for (Object imp : cu.imports()) {
			if (imp instanceof ImportDeclaration importDecl) {
				String fqn = importDecl.getName().getFullyQualifiedName();
				if (!importDecl.isOnDemand()) {
					String simpleName = fqn
							.substring(fqn.lastIndexOf('.') + 1);
					importMap.put(simpleName, fqn);
				}
			}
		}
		return importMap;
	}

	private static String serializeImports(CompilationUnit cu) {
		StringBuilder sb = new StringBuilder();
		for (Object imp : cu.imports()) {
			if (imp instanceof ImportDeclaration importDecl) {
				if (sb.length() > 0) {
					sb.append('\n');
				}
				sb.append(importDecl.getName().getFullyQualifiedName());
				if (importDecl.isOnDemand()) {
					sb.append(".*"); //$NON-NLS-1$
				}
			}
		}
		return sb.toString();
	}

	private static String truncate(String text, int maxLength) {
		if (text == null) {
			return null;
		}
		if (text.length() <= maxLength) {
			return text;
		}
		return text.substring(0, maxLength);
	}

	private static String extractProjectName(String filePath) {
		int srcIdx = filePath.indexOf("/src/"); //$NON-NLS-1$
		if (srcIdx < 0) {
			srcIdx = filePath.indexOf("/tst/"); //$NON-NLS-1$
		}
		if (srcIdx > 0) {
			String beforeSrc = filePath.substring(0, srcIdx);
			int lastSlash = beforeSrc.lastIndexOf('/');
			return lastSlash >= 0 ? beforeSrc.substring(lastSlash + 1)
					: beforeSrc;
		}
		return null;
	}

	private static String extractSimpleClassName(String filePath) {
		int lastSlash = filePath.lastIndexOf('/');
		String filename = lastSlash >= 0 ? filePath.substring(lastSlash + 1)
				: filePath;
		if (filename.endsWith(".java")) { //$NON-NLS-1$
			return filename.substring(0,
					filename.length() - ".java".length()); //$NON-NLS-1$
		}
		return filename;
	}

	private static int countLines(String source) {
		if (source == null || source.isEmpty()) {
			return 0;
		}
		int count = 1;
		for (int i = 0; i < source.length(); i++) {
			if (source.charAt(i) == '\n') {
				count++;
			}
		}
		return count;
	}
}