MiningConfig.java

/*******************************************************************************
 * Copyright (c) 2025 Carsten Hammer.
 *
 * This program and the accompanying materials
 * are made available under the terms of the Eclipse Public License 2.0
 * which accompanies this distribution, and is available at
 * https://www.eclipse.org/legal/epl-2.0/
 *
 * SPDX-License-Identifier: EPL-2.0
 *
 * Contributors:
 *     Carsten Hammer
 *******************************************************************************/
package org.sandbox.mining.core.config;

import java.io.IOException;
import java.io.InputStream;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.Collections;
import java.util.List;
import java.util.Map;

import org.yaml.snakeyaml.Yaml;

/**
 * Parses and holds the mining configuration from a repos.yml file.
 *
 * <p>Expected YAML structure:</p>
 * <pre>
 * mining:
 *   start-date: "2024-01-01"
 *   batch-size: 50
 *   max-diff-lines-per-commit: 500
 *   min-diff-lines-per-commit: 10
 *   max-files-per-commit: 20
 *   timeout-per-repo-minutes: 10
 *   repositories:
 *     - url: https://github.com/example/repo.git
 *       branch: main
 *       paths:
 *         - src/main/java
 * </pre>
 */
public class MiningConfig {

	private List<RepoEntry> repositories = Collections.emptyList();
	private String startDate;
	private String endDate;
	private int batchSize = 50;
	private int maxDiffLinesPerCommit = 500;
	private int minDiffLinesPerCommit = 10;
	private int maxFilesPerCommit = 20;
	private int timeoutPerRepoMinutes = 10;
	private List<EpochEntry> epochs = Collections.emptyList();

	/**
	 * Represents a single epoch (time range) for mining.
	 */
	public static class EpochEntry {
		private String start;
		private String end;

		public EpochEntry() {
		}

		public EpochEntry(String start, String end) {
			this.start = start;
			this.end = end;
		}

		public String getStart() { return start; }
		public void setStart(String start) { this.start = start; }
		public String getEnd() { return end; }
		public void setEnd(String end) { this.end = end; }

		@Override
		public String toString() {
			return start + " to " + end; //$NON-NLS-1$
		}
	}

	public MiningConfig() {
	}

	/**
	 * Parses a repos.yml configuration file.
	 *
	 * @param configPath path to the YAML configuration file
	 * @return parsed MiningConfig
	 * @throws IOException if the file cannot be read
	 */
	public static MiningConfig parse(Path configPath) throws IOException {
		Yaml yaml = new Yaml();
		try (InputStream in = Files.newInputStream(configPath)) {
			Map<String, Object> root = yaml.load(in);
			return fromMap(root);
		}
	}

	/**
	 * Parses a repos.yml configuration from an input stream.
	 *
	 * @param in input stream containing YAML content
	 * @return parsed MiningConfig
	 */
	public static MiningConfig parse(InputStream in) {
		Yaml yaml = new Yaml();
		Map<String, Object> root = yaml.load(in);
		return fromMap(root);
	}

	@SuppressWarnings("unchecked")
	private static MiningConfig fromMap(Map<String, Object> root) {
		MiningConfig config = new MiningConfig();
		if (root == null) {
			return config;
		}

		Map<String, Object> mining = (Map<String, Object>) root.get("mining");
		if (mining == null) {
			return config;
		}

		// Look for settings in mining.settings first, then fall back to mining directly
		Map<String, Object> settings = (Map<String, Object>) mining.get("settings");
		Map<String, Object> source = settings != null ? settings : mining;

		// Parse start-date
		Object startDateObj = source.get("start-date");
		if (startDateObj != null) {
			config.startDate = startDateObj.toString();
		}

		// Parse end-date
		Object endDateObj = source.get("end-date");
		if (endDateObj != null) {
			config.endDate = endDateObj.toString();
		}

		// Parse batch-size
		Object batchSizeObj = source.get("batch-size");
		if (batchSizeObj instanceof Number n) {
			config.batchSize = n.intValue();
		}

		// Parse max-diff-lines-per-commit
		Object maxDiffObj = source.get("max-diff-lines-per-commit");
		if (maxDiffObj instanceof Number n) {
			config.maxDiffLinesPerCommit = n.intValue();
		}

		// Parse min-diff-lines-per-commit
		Object minDiffObj = source.get("min-diff-lines-per-commit");
		if (minDiffObj instanceof Number n) {
			config.minDiffLinesPerCommit = n.intValue();
		}

		// Parse max-files-per-commit
		Object maxFilesObj = source.get("max-files-per-commit");
		if (maxFilesObj instanceof Number n) {
			config.maxFilesPerCommit = n.intValue();
		}

		// Parse timeout
		Object timeoutObj = source.get("timeout-per-repo-minutes");
		if (timeoutObj instanceof Number n) {
			config.timeoutPerRepoMinutes = n.intValue();
		}

		// Parse repositories (always from mining directly)
		Object reposObj = mining.get("repositories");
		if (reposObj instanceof List<?> reposList) {
			config.repositories = reposList.stream().map(obj -> {
				if (obj instanceof Map<?, ?> map) {
					RepoEntry entry = new RepoEntry();
					entry.setUrl((String) map.get("url"));
					Object branchObj = map.get("branch");
					if (branchObj != null) {
						entry.setBranch(branchObj.toString());
					}
					Object pathsObj = map.get("paths");
					if (pathsObj instanceof List<?> pathsList) {
						entry.setPaths(pathsList.stream().map(Object::toString).toList());
					}
					return entry;
				}
				return null;
			}).filter(e -> e != null).toList();
		}

		// Parse epochs (from mining.settings.epochs)
		Object epochsObj = source.get("epochs");
		if (epochsObj instanceof List<?> epochsList) {
			config.epochs = epochsList.stream().map(obj -> {
				if (obj instanceof Map<?, ?> map) {
					EpochEntry entry = new EpochEntry();
					Object s = map.get("start");
					if (s != null) entry.setStart(s.toString());
					Object e = map.get("end");
					if (e != null) entry.setEnd(e.toString());
					return entry;
				}
				return null;
			}).filter(e -> e != null).toList();
		}

		return config;
	}

	public List<RepoEntry> getRepositories() {
		return repositories;
	}

	public void setRepositories(List<RepoEntry> repositories) {
		this.repositories = repositories != null ? repositories : Collections.emptyList();
	}

	public String getStartDate() {
		return startDate;
	}

	public void setStartDate(String startDate) {
		this.startDate = startDate;
	}

	public String getEndDate() {
		return endDate;
	}

	public void setEndDate(String endDate) {
		this.endDate = endDate;
	}

	public List<EpochEntry> getEpochs() {
		return epochs;
	}

	public void setEpochs(List<EpochEntry> epochs) {
		this.epochs = epochs != null ? epochs : Collections.emptyList();
	}

	public int getBatchSize() {
		return batchSize;
	}

	public void setBatchSize(int batchSize) {
		this.batchSize = batchSize;
	}

	public int getMaxDiffLinesPerCommit() {
		return maxDiffLinesPerCommit;
	}

	public void setMaxDiffLinesPerCommit(int maxDiffLinesPerCommit) {
		this.maxDiffLinesPerCommit = maxDiffLinesPerCommit;
	}

	public int getMinDiffLinesPerCommit() {
		return minDiffLinesPerCommit;
	}

	public void setMinDiffLinesPerCommit(int minDiffLinesPerCommit) {
		this.minDiffLinesPerCommit = minDiffLinesPerCommit;
	}

	public int getMaxFilesPerCommit() {
		return maxFilesPerCommit;
	}

	public void setMaxFilesPerCommit(int maxFilesPerCommit) {
		this.maxFilesPerCommit = maxFilesPerCommit;
	}

	public int getTimeoutPerRepoMinutes() {
		return timeoutPerRepoMinutes;
	}

	public void setTimeoutPerRepoMinutes(int timeoutPerRepoMinutes) {
		this.timeoutPerRepoMinutes = timeoutPerRepoMinutes;
	}
}