DocumentImportController.java
package com.taxonomy.provenance.controller;
import com.taxonomy.dto.AiExtractedCandidate;
import com.taxonomy.dto.DocumentParseResult;
import com.taxonomy.dto.RegulationArchitectureMatch;
import com.taxonomy.dto.RequirementSourceLinkDto;
import com.taxonomy.dto.SourceArtifactDto;
import com.taxonomy.model.LinkType;
import com.taxonomy.model.SourceType;
import com.taxonomy.provenance.model.SourceArtifact;
import com.taxonomy.provenance.model.SourceFragment;
import com.taxonomy.provenance.model.SourceVersion;
import com.taxonomy.provenance.service.DocumentAnalysisService;
import com.taxonomy.provenance.service.DocumentParserService;
import com.taxonomy.provenance.service.SourceProvenanceService;
import io.swagger.v3.oas.annotations.Operation;
import io.swagger.v3.oas.annotations.tags.Tag;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.http.MediaType;
import org.springframework.http.ResponseEntity;
import org.springframework.web.bind.annotation.*;
import org.springframework.web.multipart.MultipartFile;
import java.util.List;
import java.util.Map;
/**
* REST API for document import and source provenance management.
*
* <p>Endpoints:
* <ul>
* <li>{@code POST /api/documents/upload} — Upload and parse a PDF/DOCX document</li>
* <li>{@code POST /api/documents/extract-ai} — AI-assisted requirement extraction</li>
* <li>{@code POST /api/documents/map-regulation} — Direct regulation-to-architecture mapping</li>
* <li>{@code GET /api/provenance/sources} — List all source artifacts</li>
* <li>{@code GET /api/provenance/links/{requirementId}} — Get provenance links for a requirement</li>
* </ul>
*/
@RestController
@RequestMapping("/api")
@Tag(name = "Document Import & Provenance")
public class DocumentImportController {
private static final Logger log = LoggerFactory.getLogger(DocumentImportController.class);
/** Maximum upload size: 50 MB. */
private static final long MAX_UPLOAD_SIZE = 50L * 1024 * 1024;
private final DocumentParserService parserService;
private final SourceProvenanceService provenanceService;
private final DocumentAnalysisService analysisService;
public DocumentImportController(DocumentParserService parserService,
SourceProvenanceService provenanceService,
DocumentAnalysisService analysisService) {
this.parserService = parserService;
this.provenanceService = provenanceService;
this.analysisService = analysisService;
}
/**
* Uploads and parses a PDF or DOCX document, extracting requirement
* candidates. The document is registered as a source artifact for
* provenance tracking.
*/
@Operation(summary = "Upload and parse document",
description = "Uploads a PDF or DOCX document, extracts requirement candidates, " +
"and registers the document as a source artifact for provenance tracking.")
@PostMapping(value = "/documents/upload", consumes = MediaType.MULTIPART_FORM_DATA_VALUE)
public ResponseEntity<?> uploadDocument(
@RequestParam("file") MultipartFile file,
@RequestParam(value = "title", required = false) String title,
@RequestParam(value = "sourceType", required = false, defaultValue = "REGULATION") String sourceType) {
if (file.isEmpty()) {
return ResponseEntity.badRequest().body(Map.of("error", "File is empty"));
}
if (file.getSize() > MAX_UPLOAD_SIZE) {
return ResponseEntity.badRequest().body(Map.of("error",
"File exceeds maximum size of " + (MAX_UPLOAD_SIZE / (1024 * 1024)) + " MB"));
}
try {
// Parse the document
DocumentParseResult result = parserService.parse(file);
// Determine source type
SourceType type;
try {
type = SourceType.valueOf(sourceType.toUpperCase());
} catch (IllegalArgumentException e) {
type = SourceType.UPLOADED_DOCUMENT;
}
// Create source artifact
String artifactTitle = (title != null && !title.isBlank())
? title : file.getOriginalFilename();
SourceArtifact artifact = provenanceService.createArtifact(type, artifactTitle);
// Create source version
String contentHash = parserService.computeContentHash(file.getBytes());
SourceVersion version = provenanceService.createVersion(
artifact, result.getMimeType(), contentHash);
result.setSourceArtifactId(artifact.getId());
result.setSourceVersionId(version.getId());
return ResponseEntity.ok(result);
} catch (Exception e) {
log.error("Document upload failed for file '{}'", file.getOriginalFilename(), e);
return ResponseEntity.badRequest().body(Map.of(
"error", "Failed to parse document. Please check file format and try again."));
}
}
/**
* Uploads and parses a PDF or DOCX document, then uses the LLM to extract
* requirement candidates with AI assistance. Returns both the rule-based
* parse result and AI-extracted candidates.
*/
@Operation(summary = "AI-assisted extraction",
description = "Uploads a document and uses AI to extract requirement candidates " +
"with confidence scores and requirement type classification.")
@PostMapping(value = "/documents/extract-ai", consumes = MediaType.MULTIPART_FORM_DATA_VALUE)
public ResponseEntity<?> extractWithAi(
@RequestParam("file") MultipartFile file,
@RequestParam(value = "sourceType", defaultValue = "REGULATION") String sourceType) {
if (file.isEmpty()) {
return ResponseEntity.badRequest().body(Map.of("error", "File is empty"));
}
if (file.getSize() > MAX_UPLOAD_SIZE) {
return ResponseEntity.badRequest().body(Map.of("error",
"File exceeds maximum size of " + (MAX_UPLOAD_SIZE / (1024 * 1024)) + " MB"));
}
try {
DocumentParseResult parseResult = parserService.parse(file);
// Build full text from all candidates for AI analysis
String fullText = buildFullTextFromCandidates(parseResult);
List<AiExtractedCandidate> aiCandidates =
analysisService.extractWithAi(fullText, sourceType);
return ResponseEntity.ok(Map.of(
"fileName", parseResult.getFileName() != null ? parseResult.getFileName() : "",
"totalPages", parseResult.getTotalPages(),
"ruleBased", parseResult.getCandidates() != null ? parseResult.getCandidates() : List.of(),
"aiCandidates", aiCandidates));
} catch (Exception e) {
log.error("AI extraction failed for file '{}'", file.getOriginalFilename(), e);
return ResponseEntity.badRequest().body(Map.of(
"error", "AI extraction failed: " + e.getMessage()));
}
}
/**
* Uploads a regulation document and maps it directly to architecture taxonomy
* nodes using the LLM. Returns matches with confidence scores, link types,
* and paragraph references.
*/
@Operation(summary = "Direct regulation-to-architecture mapping",
description = "Uploads a regulation document and maps it directly to architecture " +
"taxonomy nodes with confidence scores and link types.")
@PostMapping(value = "/documents/map-regulation", consumes = MediaType.MULTIPART_FORM_DATA_VALUE)
public ResponseEntity<?> mapRegulation(@RequestParam("file") MultipartFile file) {
if (file.isEmpty()) {
return ResponseEntity.badRequest().body(Map.of("error", "File is empty"));
}
if (file.getSize() > MAX_UPLOAD_SIZE) {
return ResponseEntity.badRequest().body(Map.of("error",
"File exceeds maximum size of " + (MAX_UPLOAD_SIZE / (1024 * 1024)) + " MB"));
}
try {
DocumentParseResult parseResult = parserService.parse(file);
// Build full text from all candidates for regulation mapping
String fullText = buildFullTextFromCandidates(parseResult);
List<RegulationArchitectureMatch> matches =
analysisService.mapRegulationToArchitecture(fullText);
return ResponseEntity.ok(Map.of(
"fileName", parseResult.getFileName() != null ? parseResult.getFileName() : "",
"totalPages", parseResult.getTotalPages(),
"matches", matches));
} catch (Exception e) {
log.error("Regulation mapping failed for file '{}'", file.getOriginalFilename(), e);
return ResponseEntity.badRequest().body(Map.of(
"error", "Regulation mapping failed: " + e.getMessage()));
}
}
/**
* Lists all registered source artifacts.
*/
@Operation(summary = "List source artifacts",
description = "Returns all registered source artifacts (documents, requests, etc.)")
@GetMapping("/provenance/sources")
public ResponseEntity<List<SourceArtifactDto>> listSources() {
return ResponseEntity.ok(provenanceService.listAllArtifacts());
}
/**
* Returns provenance links for a specific requirement.
*/
@Operation(summary = "Get requirement provenance",
description = "Returns all source links for a given requirement ID")
@GetMapping("/provenance/links/{requirementId}")
public ResponseEntity<List<RequirementSourceLinkDto>> getLinks(
@PathVariable String requirementId) {
return ResponseEntity.ok(provenanceService.getLinksForRequirement(requirementId));
}
/**
* Links selected requirement candidates from a parsed document to the
* provenance model. Called after the user reviews and selects candidates
* in the GUI.
*/
@Operation(summary = "Confirm selected candidates",
description = "Links selected requirement candidates to the provenance model")
@PostMapping("/documents/confirm-candidates")
public ResponseEntity<?> confirmCandidates(@RequestBody Map<String, Object> body) {
try {
Long artifactId = ((Number) body.get("sourceArtifactId")).longValue();
Long versionId = ((Number) body.get("sourceVersionId")).longValue();
@SuppressWarnings("unchecked")
List<Map<String, Object>> candidates = (List<Map<String, Object>>) body.get("candidates");
if (candidates == null || candidates.isEmpty()) {
return ResponseEntity.badRequest().body(Map.of("error", "No candidates provided"));
}
var artifactOpt = provenanceService.findArtifactById(artifactId);
if (artifactOpt.isEmpty()) {
return ResponseEntity.badRequest().body(Map.of("error", "Source artifact not found"));
}
var versionOpt = provenanceService.findVersionById(versionId);
int linked = 0;
for (Map<String, Object> c : candidates) {
String text = (String) c.get("text");
String section = (String) c.get("sectionHeading");
if (text == null || text.isBlank()) continue;
SourceFragment fragment = provenanceService.createFragment(
versionOpt.orElse(null), text, section, null);
// Create a requirement ID based on artifact + candidate index
String reqId = "DOC-" + artifactId + "-" + linked;
provenanceService.linkRequirement(reqId, artifactOpt.get(),
versionOpt.orElse(null), fragment, LinkType.EXTRACTED_FROM);
linked++;
}
return ResponseEntity.ok(Map.of(
"linked", linked,
"message", linked + " requirement candidate(s) linked to source"));
} catch (Exception e) {
log.error("Failed to confirm candidates", e);
return ResponseEntity.badRequest().body(Map.of(
"error", "Failed to confirm candidates: " + e.getMessage()));
}
}
// ── Private helpers ───────────────────────────────────────────────────────
/**
* Builds a full document text from all parsed candidates, preserving section headings.
* This provides richer context to the LLM than the truncated rawTextPreview.
*/
private String buildFullTextFromCandidates(DocumentParseResult result) {
if (result.getCandidates() == null || result.getCandidates().isEmpty()) {
return result.getRawTextPreview() != null ? result.getRawTextPreview() : "";
}
StringBuilder sb = new StringBuilder();
for (var candidate : result.getCandidates()) {
if (candidate.getSectionHeading() != null && !candidate.getSectionHeading().isBlank()) {
sb.append(candidate.getSectionHeading()).append(":\n");
}
sb.append(candidate.getText()).append("\n\n");
}
return sb.toString();
}
}