StreamPipelineBuilder.java
/*******************************************************************************
* Copyright (c) 2025 Carsten Hammer and others.
*
* This program and the accompanying materials
* are made available under the terms of the Eclipse Public License 2.0
* which accompanies this distribution, and is available at
* https://www.eclipse.org/legal/epl-2.0/
*
* SPDX-License-Identifier: EPL-2.0
*
* Contributors:
* Carsten Hammer
*******************************************************************************/
package org.sandbox.jdt.internal.corext.fix.helper;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import org.eclipse.jdt.core.dom.ASTNode;
import org.eclipse.jdt.core.dom.CompilationUnit;
import org.eclipse.jdt.core.dom.EnhancedForStatement;
import org.eclipse.jdt.core.dom.MethodInvocation;
import org.eclipse.jdt.core.dom.Statement;
import org.eclipse.jdt.internal.corext.dom.ScopeAnalyzer;
/**
* Builder class for constructing stream pipelines from enhanced for-loops.
*
* <p>
* This class analyzes the body of an enhanced for-loop and determines if it can
* be converted into a stream pipeline. It handles various patterns including:
* <ul>
* <li>Simple forEach operations</li>
* <li>MAP operations (variable declarations with initializers)</li>
* <li>FILTER operations (IF statements)</li>
* <li>REDUCE operations (accumulator patterns including SUM, PRODUCT,
* INCREMENT, MAX, MIN)</li>
* <li>ANYMATCH/NONEMATCH/ALLMATCH operations (early returns)</li>
* </ul>
*
* <p><b>Architecture Overview:</b></p>
* <p>The conversion process involves three phases:</p>
*
* <ol>
* <li><b>Analysis Phase</b> ({@link #analyze()}):
* <ul>
* <li>Validates preconditions via {@link PreconditionsChecker}</li>
* <li>Parses loop body into {@link ProspectiveOperation}s</li>
* <li>Validates variable scoping</li>
* <li>Returns true if conversion is possible</li>
* </ul>
* </li>
* <li><b>Construction Phase</b> ({@link #buildPipeline()}):
* <ul>
* <li>Determines if .stream() prefix is needed</li>
* <li>Chains operations into MethodInvocation</li>
* <li>Generates lambda parameters and arguments</li>
* <li>Returns the complete pipeline expression</li>
* </ul>
* </li>
* <li><b>Wrapping Phase</b> ({@link #wrapPipeline(MethodInvocation)}):
* <ul>
* <li>Wraps in appropriate Statement type</li>
* <li>Handles reducers (assignment to accumulator)</li>
* <li>Handles anyMatch/noneMatch/allMatch (IF with early return)</li>
* <li>Returns the final Statement to replace the for-loop</li>
* </ul>
* </li>
* </ol>
*
* <p><b>Supported Patterns:</b></p>
* <ul>
* <li><b>FOREACH:</b> {@code for (x : xs) { action(x); }} → {@code xs.forEach(x -> action(x))}</li>
* <li><b>MAP:</b> {@code for (x : xs) { T y = f(x); ... }} → {@code xs.stream().map(x -> f(x))...}</li>
* <li><b>FILTER:</b> {@code for (x : xs) { if (p(x)) { ... } }} → {@code xs.stream().filter(x -> p(x))...}</li>
* <li><b>REDUCE:</b> {@code for (x : xs) { sum += x; }} → {@code sum = xs.stream().reduce(sum, Integer::sum)}</li>
* <li><b>ANYMATCH:</b> {@code for (x : xs) { if (p(x)) return true; } return false;} → {@code if (xs.stream().anyMatch(x -> p(x))) return true;}</li>
* </ul>
*
* <p>
* <b>Supported Reduction Patterns:</b>
* <ul>
* <li>INCREMENT: {@code i++}, {@code ++i}</li>
* <li>DECREMENT: {@code i--}, {@code --i}, {@code i -= 1}</li>
* <li>SUM: {@code sum += value}</li>
* <li>PRODUCT: {@code product *= value}</li>
* <li>STRING_CONCAT: {@code str += substring}</li>
* <li>MAX: {@code max = Math.max(max, value)}</li>
* <li>MIN: {@code min = Math.min(min, value)}</li>
* <li>CUSTOM_AGGREGATE: Custom aggregation patterns</li>
* </ul>
*
* <p><b>Thread Safety:</b> This class is not thread-safe. Create a new instance
* for each loop to be analyzed.</p>
*
* <p><b>Usage Example:</b></p>
* <pre>{@code
* PreconditionsChecker preconditions = new PreconditionsChecker(forLoop, ...);
* StreamPipelineBuilder builder = new StreamPipelineBuilder(forLoop, preconditions);
*
* if (builder.analyze()) {
* MethodInvocation pipeline = builder.buildPipeline();
* Statement replacement = builder.wrapPipeline(pipeline);
* // Replace forLoop with replacement
* }
* }</pre>
*
* <p>
* Based on the NetBeans mapreduce hints implementation:
* https://github.com/apache/netbeans/tree/master/java/java.hints/src/org/netbeans/modules/java/hints/jdk/mapreduce
*
* @see ProspectiveOperation
* @see PreconditionsChecker
* @see org.sandbox.jdt.internal.corext.util.VariableResolver
* @see org.sandbox.jdt.internal.corext.util.ExpressionHelper
* @see Refactorer
*/
public class StreamPipelineBuilder {
private final EnhancedForStatement forLoop;
private final PreconditionsChecker preconditions;
private final ReducePatternDetector reduceDetector;
private final CollectPatternDetector collectDetector;
private final IfStatementAnalyzer ifAnalyzer;
private final LoopBodyParser loopBodyParser;
private List<ProspectiveOperation> operations;
private String loopVariableName;
private boolean analyzed = false;
private boolean convertible = false;
private boolean isAnyMatchPattern = false;
private boolean isNoneMatchPattern = false;
private boolean isAllMatchPattern = false;
/** Assembler for building the final pipeline (initialized after analysis). */
private PipelineAssembler pipelineAssembler;
/**
* Creates a new StreamPipelineBuilder for the given for-loop.
*
* @param forLoop the enhanced for-loop to analyze
* @param preconditions the preconditions checker for the loop
* @throws IllegalArgumentException if forLoop or preconditions is null
*/
public StreamPipelineBuilder(EnhancedForStatement forLoop, PreconditionsChecker preconditions) {
if (forLoop == null) {
throw new IllegalArgumentException("forLoop cannot be null");
}
if (preconditions == null) {
throw new IllegalArgumentException("preconditions cannot be null");
}
this.forLoop = forLoop;
this.preconditions = preconditions;
this.reduceDetector = new ReducePatternDetector(forLoop);
this.collectDetector = new CollectPatternDetector(forLoop);
this.ifAnalyzer = new IfStatementAnalyzer(forLoop);
// Internal invariant: EnhancedForStatement must have a parameter with a name
assert forLoop.getParameter() != null && forLoop.getParameter().getName() != null
: "forLoop must have a valid parameter with a name";
this.loopVariableName = forLoop.getParameter().getName().getIdentifier();
this.operations = new ArrayList<>();
this.isAnyMatchPattern = preconditions.isAnyMatchPattern();
this.isNoneMatchPattern = preconditions.isNoneMatchPattern();
this.isAllMatchPattern = preconditions.isAllMatchPattern();
// Initialize LoopBodyParser with all required dependencies
this.loopBodyParser = new LoopBodyParser(forLoop, reduceDetector, collectDetector, ifAnalyzer,
isAnyMatchPattern, isNoneMatchPattern, isAllMatchPattern);
}
/**
* Analyzes the loop body to determine if it can be converted to a stream
* pipeline.
*
* <p>
* This method should be called before attempting to build the pipeline. It
* inspects the loop body and extracts a sequence of
* {@link ProspectiveOperation}s that represent the transformation.
*
* @return true if the loop can be converted to a stream pipeline, false
* otherwise
*/
public boolean analyze() {
if (analyzed) {
return convertible;
}
analyzed = true;
// Check basic preconditions
if (!preconditions.isSafeToRefactor()
// || !preconditions.iteratesOverIterable()
) {
convertible = false;
return false;
}
// Parse the loop body into operations
Statement loopBody = forLoop.getBody();
operations = parseLoopBody(loopBody, loopVariableName);
// Check if we have any operations
if (operations.isEmpty()) {
convertible = false;
return false;
}
// Check for unsafe collect pattern: if collecting to a variable that is
// also read during iteration, we cannot convert
if (hasUnsafeCollectPattern(loopBody)) {
convertible = false;
return false;
}
// Validate variable scoping
if (!validateVariableScope(operations, loopVariableName)) {
convertible = false;
return false;
}
// Initialize the pipeline assembler for building the final pipeline
pipelineAssembler = new PipelineAssembler(forLoop, operations, loopVariableName);
pipelineAssembler.setUsedVariableNames(getUsedVariableNames(forLoop));
pipelineAssembler.setReduceDetector(reduceDetector);
pipelineAssembler.setCollectDetector(collectDetector);
convertible = true;
return true;
}
/**
* Builds the stream pipeline from the analyzed operations.
*
* <p>This method constructs a chain of method invocations representing
* the complete stream pipeline. It automatically determines whether to
* use {@code .stream()} prefix or direct collection methods like
* {@code .forEach()}.</p>
*
* <p><b>Examples:</b></p>
* <pre>{@code
* // Simple forEach (no .stream() needed)
* list.forEach(item -> System.out.println(item))
*
* // Complex pipeline (needs .stream())
* list.stream()
* .filter(item -> item != null)
* .map(item -> item.toString())
* .reduce("", String::concat)
* }</pre>
*
* <p><b>Prerequisites:</b></p>
* <ul>
* <li>{@link #analyze()} must have been called and returned {@code true}</li>
* <li>The operations list must not be empty</li>
* </ul>
*
* @return a MethodInvocation representing the stream pipeline, or null if
* the loop cannot be converted
* @see #analyze()
* @see #wrapPipeline(MethodInvocation)
* @see PipelineAssembler
*/
public MethodInvocation buildPipeline() {
if (!analyzed || !convertible || pipelineAssembler == null) {
return null;
}
return pipelineAssembler.buildPipeline();
}
/**
* Returns whether the pipeline needs the java.util.Arrays import.
* This is true when iterating over an array.
*
* <p>This method should be called after {@link #buildPipeline()} to determine
* if an import needs to be added.</p>
*
* @return true if Arrays import is needed
*/
public boolean needsArraysImport() {
return pipelineAssembler != null && pipelineAssembler.needsArraysImport();
}
/**
* Returns whether the pipeline needs the java.util.stream.Collectors import.
* This is true when using collect operations (toList, toSet, etc.), but false
* when using Java 16+ .toList() directly.
*
* <p>This method should be called after {@link #buildPipeline()} to determine
* if an import needs to be added.</p>
*
* @return true if Collectors import is needed
*/
public boolean needsCollectorsImport() {
// Delegate to PipelineAssembler which knows if .toList() is being used
if (pipelineAssembler != null) {
return pipelineAssembler.needsCollectorsImport();
}
// Fallback: check if any operation is a COLLECT operation
if (operations == null) {
return false;
}
return operations.stream()
.anyMatch(op -> op.getOperationType() == OperationType.COLLECT);
}
/**
* Returns whether the pipeline is a COLLECT operation.
* This is true for any collect operation, regardless of whether it uses
* Collectors.toList() or Java 16+ .toList().
*
* <p>Used to determine if declaration merging should be applied.</p>
*
* @return true if this is a collect operation
*/
public boolean isCollectOperation() {
if (operations == null) {
return false;
}
return operations.stream()
.anyMatch(op -> op.getOperationType() == OperationType.COLLECT);
}
/**
* Checks if the loop has an unsafe COLLECT pattern.
*
* <p>A COLLECT pattern is unsafe if the target collection variable is
* read (not just written to) during the iteration. For example:</p>
* <pre>{@code
* for (Integer item : items) {
* result.add(item);
* System.out.println("Size: " + result.size()); // Unsafe - reading result
* }
* }</pre>
*
* @param loopBody the loop body to check
* @return true if an unsafe collect pattern is detected
*/
private boolean hasUnsafeCollectPattern(Statement loopBody) {
// Check if there's a COLLECT operation
boolean hasCollect = operations.stream()
.anyMatch(op -> op.getOperationType() == OperationType.COLLECT);
if (!hasCollect) {
return false;
}
// Find the target variable from the COLLECT operation
String collectTarget = collectDetector.getTargetVariable();
if (collectTarget == null) {
return false;
}
// Check if the target variable is read during iteration
return collectDetector.isTargetReadDuringIteration(loopBody, collectTarget);
}
/**
* Wraps the stream pipeline in an appropriate statement type based on the terminal operation.
*
* <p>
* The wrapping strategy depends on the type of terminal operation:
* <ul>
* <li><b>ANYMATCH</b>: Wraps in {@code if (stream.anyMatch(...)) { return true; }}</li>
* <li><b>NONEMATCH</b>: Wraps in {@code if (!stream.noneMatch(...)) { return false; }}</li>
* <li><b>ALLMATCH</b>: Wraps in {@code if (!stream.allMatch(...)) { return false; }}</li>
* <li><b>REDUCE</b>: Wraps in assignment {@code accumulatorVariable = stream.reduce(...)}</li>
* <li><b>FOREACH</b> (and others): Wraps in {@link org.eclipse.jdt.core.dom.ExpressionStatement}</li>
* </ul>
*
* @param pipeline the stream pipeline to wrap (must not be null)
* @return a Statement wrapping the pipeline, or null if pipeline is null
* @see OperationType
* @see PipelineAssembler
*/
public Statement wrapPipeline(MethodInvocation pipeline) {
if (pipelineAssembler == null) {
return null;
}
return pipelineAssembler.wrapPipeline(pipeline);
}
/**
* Analyzes the body of an enhanced for-loop and extracts a list of
* {@link ProspectiveOperation} objects representing the operations that can be
* mapped to stream operations.
*
* <p><b>Example Patterns:</b></p>
* <pre>{@code
* // MAP: Variable declaration with initializer
* for (Integer num : numbers) {
* int squared = num * num; // → .map(num -> num * num)
* System.out.println(squared);
* }
*
* // FILTER: IF statement
* for (String item : items) {
* if (item != null) { // → .filter(item -> item != null)
* System.out.println(item);
* }
* }
*
* // REDUCE: Accumulator pattern
* int sum = 0;
* for (Integer num : numbers) {
* sum += num; // → .reduce(sum, Integer::sum)
* }
* }</pre>
*
* @param body the {@link Statement} representing the loop body
* @param loopVarName the name of the loop variable
* @return a list of {@link ProspectiveOperation} objects
*/
private List<ProspectiveOperation> parseLoopBody(Statement body, String loopVarName) {
return loopBodyParser.parse(body, loopVarName);
}
/**
* Validates that variables used in operations are properly scoped.
*
* <p>
* This method ensures that:
* <ul>
* <li>Consumed variables are available in the current scope (defined earlier in
* pipeline)</li>
* <li>Produced variables don't shadow loop variables improperly</li>
* <li>Accumulator variables don't leak into lambda scopes</li>
* </ul>
*
* <p>
* <b>Relationship with {@link #isSafeSideEffect}:</b>
* </p>
* <p>
* While {@code isSafeSideEffect} performs early detection of obvious assignment
* issues during pipeline construction, this method performs comprehensive scope
* checking across the entire pipeline to catch variable availability issues.
* Both methods work together to ensure safe conversions:
* <ul>
* <li>{@code isSafeSideEffect}: Detects unsafe assignments to external/loop
* variables</li>
* <li>{@code validateVariableScope}: Validates all variables are properly
* defined and scoped</li>
* </ul>
*
* <p>
* <b>Algorithm:</b>
* </p>
* <p>
* Tracks available variables as we process operations in sequence. For each
* operation:
* <ol>
* <li>Check that all consumed variables (except loop var and accumulators) are
* available</li>
* <li>Add any produced variables to the available set for subsequent
* operations</li>
* <li>Return false if any consumed variable is used before being defined</li>
* </ol>
*
* @param operations the list of operations to validate (must not be null)
* @param loopVarName the loop variable name (must not be null)
* @return true if all variables are properly scoped, false otherwise
* @throws IllegalArgumentException if operations or loopVarName is null
*/
private boolean validateVariableScope(List<ProspectiveOperation> operations, String loopVarName) {
if (operations == null) {
throw new IllegalArgumentException("operations cannot be null");
}
if (loopVarName == null) {
throw new IllegalArgumentException("loopVarName cannot be null");
}
Set<String> availableVars = new HashSet<>();
availableVars.add(loopVarName);
// Add all variables from outer scope (method parameters, fields, etc.)
// These are always available in lambdas
Collection<String> outerScopeVars = getUsedVariableNames(forLoop);
availableVars.addAll(outerScopeVars);
// Track if we've moved past the loop variable to a mapped variable
boolean loopVarConsumed = false;
for (ProspectiveOperation op : operations) {
if (op == null) {
throw new IllegalStateException("Encountered null ProspectiveOperation in operations list");
}
// Check consumed variables are available
Set<String> consumed = op.getConsumedVariables();
for (String var : consumed) {
// Accumulator variables are in outer scope, always available
if (isAccumulatorVariable(var, operations)) {
continue;
}
// Variables from outer scope (method parameters, fields, etc.) are always available
if (outerScopeVars.contains(var)) {
continue;
}
// After a MAP produces a new variable, the loop variable should not be used
// unless it's the current operation that consumes it
if (var.equals(loopVarName)) {
if (loopVarConsumed && op.getProducedVariableName() != null) {
// Loop variable used after it's been replaced by a MAP - scope violation
return false;
}
} else {
// Non-loop, non-accumulator, non-outer-scope variable - must be in availableVars
if (!availableVars.contains(var)) {
// Variable used before it's defined - this is a scope violation
return false;
}
}
}
// Add produced variables to available set and mark loop var as consumed if applicable
String produced = op.getProducedVariableName();
if (produced != null && !produced.isEmpty()) {
availableVars.add(produced);
// If this MAP operation consumed the loop variable, mark it as consumed
if (consumed.contains(loopVarName)) {
loopVarConsumed = true;
// Remove loop variable from available vars - it's now been replaced
availableVars.remove(loopVarName);
}
}
}
return true;
}
/**
* Checks if a variable is an accumulator variable in any REDUCE operation.
*
* @param varName the variable name to check
* @param operations the list of operations
* @return true if the variable is an accumulator, false otherwise
*/
private boolean isAccumulatorVariable(String varName, List<ProspectiveOperation> operations) {
for (ProspectiveOperation op : operations) {
if (op.getOperationType() == OperationType.REDUCE) {
if (varName.equals(op.getAccumulatorVariableName())) {
return true;
}
}
}
return false;
}
/**
* Gets all variable names used in the scope of the given AST node.
* This is used to generate unique lambda parameter names that don't clash
* with existing variables in scope.
*
* @param node the AST node to analyze
* @return collection of variable names used in the node's scope
*/
private static Collection<String> getUsedVariableNames(ASTNode node) {
CompilationUnit root = (CompilationUnit) node.getRoot();
return new ScopeAnalyzer(root).getUsedVariableNames(node.getStartPosition(), node.getLength());
}
/**
* Checks if the analyzed operations include a REDUCE operation.
*
* @return true if there is a REDUCE operation, false otherwise
*/
public boolean hasReduceOperation() {
if (!analyzed || !convertible) {
return false;
}
return operations.stream()
.anyMatch(op -> op.getOperationType() == OperationType.REDUCE);
}
/**
* Gets the accumulator variable name for REDUCE operations.
*
* @return the accumulator variable name, or null if no REDUCE operation exists
*/
public String getAccumulatorVariableName() {
if (!analyzed || !convertible) {
return null;
}
for (ProspectiveOperation op : operations) {
if (op.getOperationType() == OperationType.REDUCE) {
return op.getAccumulatorVariableName();
}
}
return null;
}
/**
* Gets the target collection variable name for COLLECT operations.
*
* @return the target variable name, or null if no COLLECT operation exists
*/
public String getCollectTargetVariable() {
if (!analyzed || !convertible || collectDetector == null) {
return null;
}
return collectDetector.getTargetVariable();
}
}