NodeEmbeddingBinder.java

package com.taxonomy.search;

import com.taxonomy.catalog.model.TaxonomyNode;
import com.taxonomy.catalog.model.TaxonomyRelation;
import org.hibernate.search.engine.backend.document.DocumentElement;
import org.hibernate.search.engine.backend.document.IndexFieldReference;
import org.hibernate.search.mapper.pojo.bridge.TypeBridge;
import org.hibernate.search.mapper.pojo.bridge.binding.TypeBindingContext;
import org.hibernate.search.mapper.pojo.bridge.mapping.programmatic.TypeBinder;
import org.hibernate.search.mapper.pojo.bridge.runtime.TypeBridgeWriteContext;

/**
 * Hibernate Search {@link TypeBinder} that computes a DJL/ONNX embedding vector for
 * a {@link TaxonomyNode} and writes it to an {@code "embedding"} {@code @VectorField}.
 *
 * <p>The enriched text used for embedding includes:
 * <ul>
 *   <li>The node's English name and description.</li>
 *   <li>Outgoing and incoming relation summaries (e.g. "Outgoing: supports X, Y").</li>
 * </ul>
 *
 * <p>Graceful degradation: when the DJL model is unavailable the bridge writes nothing,
 * leaving the document without an embedding vector. KNN queries will not match it, which
 * mirrors the existing behaviour where the vector index is absent.
 */
public class NodeEmbeddingBinder implements TypeBinder {

    @Override
    public void bind(TypeBindingContext context) {
        context.dependencies()
                .use("nameEn")
                .use("descriptionEn")
                .use("outgoingRelations.relationType")
                .use("outgoingRelations.targetNode.nameEn")
                .use("incomingRelations.relationType")
                .use("incomingRelations.sourceNode.nameEn");

        IndexFieldReference<float[]> embeddingField =
                EmbeddingBridgeSupport.createEmbeddingField(context);

        context.bridge(TaxonomyNode.class, new Bridge(embeddingField));
    }

    public static final class Bridge implements TypeBridge<TaxonomyNode> {

        private final IndexFieldReference<float[]> embeddingField;

        Bridge(IndexFieldReference<float[]> embeddingField) {
            this.embeddingField = embeddingField;
        }

        @Override
        public void write(DocumentElement target, TaxonomyNode node,
                TypeBridgeWriteContext context) {
            EmbeddingBridgeSupport.writeEmbedding(target, embeddingField, node,
                    Bridge::buildEnrichedText);
        }

        public static String buildEnrichedText(TaxonomyNode node) {
            StringBuilder sb = new StringBuilder();
            if (node.getNameEn() != null) sb.append(node.getNameEn()).append(".\n");
            if (node.getDescriptionEn() != null && !node.getDescriptionEn().isBlank()) {
                sb.append(node.getDescriptionEn()).append("\n");
            }
            if (!node.getOutgoingRelations().isEmpty()) {
                sb.append("Outgoing: ");
                for (TaxonomyRelation r : node.getOutgoingRelations()) {
                    if (r.getRelationType() == null) continue;
                    sb.append(r.getRelationType().name().toLowerCase().replace('_', ' '));
                    String targetName = (r.getTargetNode() != null && r.getTargetNode().getNameEn() != null)
                            ? r.getTargetNode().getNameEn() : "";
                    sb.append(" ").append(targetName).append(", ");
                }
                if (sb.toString().endsWith(", ")) {
                    sb.setLength(sb.length() - 2); // remove trailing ", "
                }
                sb.append(".\n");
            }
            if (!node.getIncomingRelations().isEmpty()) {
                sb.append("Incoming: ");
                for (TaxonomyRelation r : node.getIncomingRelations()) {
                    if (r.getRelationType() == null) continue;
                    sb.append(r.getRelationType().name().toLowerCase().replace('_', ' '));
                    String sourceName = (r.getSourceNode() != null && r.getSourceNode().getNameEn() != null)
                            ? r.getSourceNode().getNameEn() : "";
                    sb.append(" ").append(sourceName).append(", ");
                }
                if (sb.toString().endsWith(", ")) {
                    sb.setLength(sb.length() - 2);
                }
                sb.append(".\n");
            }
            return sb.toString().trim();
        }
    }
}