ApqcExcelParser.java

package com.taxonomy.catalog.service.importer;

import com.taxonomy.dsl.mapping.ExternalElement;
import com.taxonomy.dsl.mapping.ExternalRelation;
import org.apache.poi.ss.usermodel.*;

import java.io.InputStream;
import java.util.*;

/**
 * Parses APQC Process Classification Framework (PCF) Excel (.xlsx) files into
 * {@link ExternalElement}s and {@link ExternalRelation}s.
 *
 * <p>Many APQC licensees deliver the PCF as {@code .xlsx}. This parser uses
 * Apache POI (already a project dependency) to read the workbook.
 *
 * <p>Expected column structure (flexible header matching):
 * <pre>
 * PCF ID | Name | Level | Description
 * 1.0    | Develop Vision and Strategy | 1 |
 * </pre>
 */
public class ApqcExcelParser implements ExternalParser {

    private static final String[] LEVEL_TYPES = {
        "Category", "ProcessGroup", "Process", "Activity", "Task"
    };

    @Override
    public String fileFormat() {
        return "xlsx";
    }

    @Override
    public ParsedExternalModel parse(InputStream input) throws Exception {
        List<ExternalElement> elements = new ArrayList<>();
        List<ExternalRelation> relations = new ArrayList<>();
        Map<String, String> idByPcfId = new LinkedHashMap<>();

        try (Workbook workbook = WorkbookFactory.create(input)) {
            Sheet sheet = workbook.getSheetAt(0);
            if (sheet == null) {
                return new ParsedExternalModel(elements, relations);
            }

            Iterator<Row> rowIterator = sheet.iterator();
            if (!rowIterator.hasNext()) {
                return new ParsedExternalModel(elements, relations);
            }

            // Parse header row
            Row headerRow = rowIterator.next();
            int pcfIdCol = findColumn(headerRow, "PCF ID", "Id", "ID", "pcf_id");
            int nameCol = findColumn(headerRow, "Name", "name", "Process Name", "Title");
            int levelCol = findColumn(headerRow, "Level", "level", "Hierarchy Level");
            int descCol = findColumn(headerRow, "Description", "description", "Desc");

            while (rowIterator.hasNext()) {
                Row row = rowIterator.next();
                String pcfId = getCellString(row, pcfIdCol);
                String name = getCellString(row, nameCol);
                String levelStr = getCellString(row, levelCol);
                String description = getCellString(row, descCol, false);

                if (pcfId == null || pcfId.isBlank()) continue;

                int level = parseLevel(levelStr, pcfId);
                String type = level >= 1 && level <= LEVEL_TYPES.length
                        ? LEVEL_TYPES[level - 1] : "Category";

                String elementId = "apqc-" + pcfId.replace(".", "-");
                Map<String, String> props = new LinkedHashMap<>();
                props.put("pcfId", pcfId);
                String parentPcfId = deriveParentPcfId(pcfId);
                if (parentPcfId != null) {
                    props.put("parentId", "apqc-" + parentPcfId.replace(".", "-"));
                }

                elements.add(new ExternalElement(elementId, type, name, description, props));
                idByPcfId.put(pcfId, elementId);

                // Create parent-child relation
                if (parentPcfId != null && idByPcfId.containsKey(parentPcfId)) {
                    relations.add(new ExternalRelation(
                            idByPcfId.get(parentPcfId), elementId, "ParentChild", Map.of()));
                }
            }
        }

        return new ParsedExternalModel(elements, relations);
    }

    private int findColumn(Row headerRow, String... candidates) {
        for (Cell cell : headerRow) {
            String value = getCellValueAsString(cell);
            if (value == null) continue;
            for (String candidate : candidates) {
                if (value.trim().equalsIgnoreCase(candidate)) {
                    return cell.getColumnIndex();
                }
            }
        }
        return -1;
    }

    private String getCellString(Row row, int colIndex) {
        return getCellString(row, colIndex, true);
    }

    private String getCellString(Row row, int colIndex, boolean trim) {
        if (colIndex < 0) return null;
        Cell cell = row.getCell(colIndex);
        return getCellValueAsString(cell, trim);
    }

    private String getCellValueAsString(Cell cell) {
        return getCellValueAsString(cell, true);
    }

    private String getCellValueAsString(Cell cell, boolean trim) {
        if (cell == null) return null;
        return switch (cell.getCellType()) {
            case STRING -> {
                RichTextString rts = cell.getRichStringCellValue();
                String s = rts != null ? rts.getString() : cell.getStringCellValue();
                yield trim ? s.trim() : s.strip();
            }
            case NUMERIC -> {
                double num = cell.getNumericCellValue();
                if (num == Math.floor(num) && !Double.isInfinite(num)) {
                    yield String.valueOf((long) num);
                }
                yield String.valueOf(num);
            }
            case BOOLEAN -> String.valueOf(cell.getBooleanCellValue());
            default -> null;
        };
    }

    private int parseLevel(String levelStr, String pcfId) {
        if (levelStr != null && !levelStr.isBlank()) {
            try {
                return Integer.parseInt(levelStr.trim());
            } catch (NumberFormatException ignored) {
                // Fall through
            }
        }
        long dots = pcfId.chars().filter(c -> c == '.').count();
        return (int) dots + 1;
    }

    private String deriveParentPcfId(String pcfId) {
        int lastDot = pcfId.lastIndexOf('.');
        if (lastDot <= 0) return null;
        return pcfId.substring(0, lastDot);
    }
}