GenericTextFileStrategy.java
/*******************************************************************************
* Copyright (c) 2026 Carsten Hammer.
*
* This program and the accompanying materials
* are made available under the terms of the Eclipse Public License 2.0
* which accompanies this distribution, and is available at
* https://www.eclipse.org/legal/epl-2.0/
*
* SPDX-License-Identifier: EPL-2.0
*
* Contributors:
* Carsten Hammer
*******************************************************************************/
package org.eclipse.jgit.storage.hibernate.search.strategies;
import java.util.Collections;
import java.util.Set;
import java.util.StringJoiner;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.eclipse.jgit.storage.hibernate.search.BlobIndexData;
import org.eclipse.jgit.storage.hibernate.search.FileTypeStrategy;
/**
* Fallback strategy for extracting searchable metadata from any text file.
* <p>
* Uses regex scanning to find strings matching Java FQN patterns. This
* strategy is used for all file types that don't have a dedicated strategy.
* </p>
*/
public class GenericTextFileStrategy implements FileTypeStrategy {
private static final int MAX_SNIPPET_LENGTH = 65535;
private static final Pattern FQN_PATTERN = Pattern.compile(
"[a-z][a-z0-9]*(\\.[a-z][a-z0-9]*)*\\.[A-Z]\\w*"); //$NON-NLS-1$
@Override
public Set<String> supportedExtensions() {
return Collections.emptySet();
}
@Override
public Set<String> supportedFilenames() {
return Collections.emptySet();
}
@Override
public BlobIndexData extract(String source, String filePath) {
BlobIndexData data = new BlobIndexData();
data.setFileType(detectFileType(filePath));
data.setSourceSnippet(truncate(source, MAX_SNIPPET_LENGTH));
StringJoiner fqns = new StringJoiner("\n"); //$NON-NLS-1$
Matcher m = FQN_PATTERN.matcher(source);
while (m.find()) {
fqns.add(m.group());
}
String fqnStr = fqns.toString();
if (!fqnStr.isEmpty()) {
data.setFullyQualifiedNames(
truncate(fqnStr, MAX_SNIPPET_LENGTH));
}
return data;
}
@Override
public String fileType() {
return "text"; //$NON-NLS-1$
}
private static String detectFileType(String filePath) {
int dot = filePath.lastIndexOf('.');
if (dot >= 0) {
return filePath.substring(dot + 1).toLowerCase();
}
return "text"; //$NON-NLS-1$
}
private static String truncate(String text, int maxLength) {
if (text == null) {
return null;
}
if (text.length() <= maxLength) {
return text;
}
return text.substring(0, maxLength);
}
}