diff --git a/Makefile b/Makefile index 830a2246..859cd0db 100644 --- a/Makefile +++ b/Makefile @@ -231,8 +231,8 @@ else echo -n "Enter choice (default: 2): "; \ read DELETE_VOLUMES_CHOICE; \ export DELETE_VOLUMES_CHOICE; \ - fi - @$(MAKE) label-studio-$(INSTALLER)-uninstall DELETE_VOLUMES_CHOICE=$$DELETE_VOLUMES_CHOICE; \ + fi; \ + $(MAKE) label-studio-$(INSTALLER)-uninstall DELETE_VOLUMES_CHOICE=$$DELETE_VOLUMES_CHOICE; \ $(MAKE) milvus-$(INSTALLER)-uninstall DELETE_VOLUMES_CHOICE=$$DELETE_VOLUMES_CHOICE; \ $(MAKE) deer-flow-$(INSTALLER)-uninstall DELETE_VOLUMES_CHOICE=$$DELETE_VOLUMES_CHOICE; \ $(MAKE) datamate-$(INSTALLER)-uninstall DELETE_VOLUMES_CHOICE=$$DELETE_VOLUMES_CHOICE diff --git a/backend/api-gateway/src/main/java/com/datamate/gateway/ApiGatewayApplication.java b/backend/api-gateway/src/main/java/com/datamate/gateway/ApiGatewayApplication.java index ee504973..687645bc 100644 --- a/backend/api-gateway/src/main/java/com/datamate/gateway/ApiGatewayApplication.java +++ b/backend/api-gateway/src/main/java/com/datamate/gateway/ApiGatewayApplication.java @@ -45,6 +45,16 @@ public RouteLocator customRouteLocator(RouteLocatorBuilder builder) { .route("python-service", r -> r.path("/api/rag/**", "api/models/**") .uri("http://datamate-backend-python:18000")) + // 数据评估服务路由 + .route("data-operator", r -> r.path("/api/operators/**") + .uri("http://datamate-backend-python:18000")) + + .route("data-categories", r -> r.path("/api/categories/**") + .uri("http://datamate-backend-python:18000")) + + .route("data-cleaning", r -> r.path("/api/cleaning/**") + .uri("http://datamate-backend-python:18000")) + .route("deer-flow-frontend", r -> r.path("/chat/**") .uri("http://deer-flow-frontend:3000")) diff --git a/backend/openapi/README.md b/backend/openapi/README.md index 18fbe63d..d03f1cc4 100644 --- a/backend/openapi/README.md +++ b/backend/openapi/README.md @@ -126,8 +126,6 @@ OPENAPI_DIR="openapi/specs" SERVICES=( "data-annotation-service" "data-management-service" - "operator-market-service" - "data-cleaning-service" "data-synthesis-service" "data-evaluation-service" "pipeline-orchestration-service" diff --git a/backend/services/data-cleaning-service/pom.xml b/backend/services/data-cleaning-service/pom.xml deleted file mode 100644 index c1fa2c9b..00000000 --- a/backend/services/data-cleaning-service/pom.xml +++ /dev/null @@ -1,89 +0,0 @@ - - - 4.0.0 - - - com.datamate - services - 1.0.0-SNAPSHOT - ../pom.xml - - - data-cleaning-service - Data Cleaning Service - 数据清洗服务 - - - - com.datamate - domain-common - ${project.version} - - - com.datamate - data-management-service - ${project.version} - - - com.datamate - operator-market-service - ${project.version} - - - org.springframework.boot - spring-boot-starter-test - test - - - org.springframework.boot - spring-boot-starter-web - - - org.springdoc - springdoc-openapi-starter-webmvc-ui - - - org.projectlombok - lombok - - - org.openapitools - jackson-databind-nullable - - - org.apache.commons - commons-compress - 1.26.1 - - - - org.mapstruct - mapstruct - - - - org.mapstruct - mapstruct-processor - ${mapstruct.version} - provided - - - org.springframework.data - spring-data-commons - - - - - - - org.springframework.boot - spring-boot-maven-plugin - - - - - - diff --git a/backend/services/data-cleaning-service/src/main/java/com/datamate/cleaning/DataCleaningServiceConfiguration.java b/backend/services/data-cleaning-service/src/main/java/com/datamate/cleaning/DataCleaningServiceConfiguration.java deleted file mode 100644 index 1ea5ad09..00000000 --- a/backend/services/data-cleaning-service/src/main/java/com/datamate/cleaning/DataCleaningServiceConfiguration.java +++ /dev/null @@ -1,18 +0,0 @@ -package com.datamate.cleaning; - -import org.springframework.context.annotation.ComponentScan; -import org.springframework.scheduling.annotation.EnableAsync; -import org.springframework.scheduling.annotation.EnableScheduling; - -/** - * 数据归集服务配置类 - * 基于DataX的数据归集和同步服务,支持多种数据源的数据采集和归集 - */ -@EnableAsync -@EnableScheduling -@ComponentScan(basePackages = { - "com.datamate.cleaning" -}) -public class DataCleaningServiceConfiguration { - // Configuration class for JAR packaging - no main method needed -} diff --git a/backend/services/data-cleaning-service/src/main/java/com/datamate/cleaning/application/CleaningTaskService.java b/backend/services/data-cleaning-service/src/main/java/com/datamate/cleaning/application/CleaningTaskService.java deleted file mode 100644 index 461e8809..00000000 --- a/backend/services/data-cleaning-service/src/main/java/com/datamate/cleaning/application/CleaningTaskService.java +++ /dev/null @@ -1,417 +0,0 @@ -package com.datamate.cleaning.application; - - -import com.datamate.cleaning.application.scheduler.CleaningTaskScheduler; -import com.datamate.cleaning.common.enums.CleaningTaskStatusEnum; -import com.datamate.cleaning.common.enums.ExecutorType; -import com.datamate.cleaning.domain.model.TaskProcess; -import com.datamate.cleaning.domain.repository.CleaningResultRepository; -import com.datamate.cleaning.domain.repository.CleaningTaskRepository; -import com.datamate.cleaning.domain.repository.OperatorInstanceRepository; -import com.datamate.cleaning.infrastructure.validator.CleanTaskValidator; -import com.datamate.cleaning.interfaces.dto.*; -import com.datamate.common.domain.enums.EdgeType; -import com.datamate.common.domain.enums.NodeType; -import com.datamate.common.domain.model.LineageEdge; -import com.datamate.common.domain.model.LineageNode; -import com.datamate.common.domain.service.LineageService; -import com.datamate.common.infrastructure.exception.BusinessException; -import com.datamate.common.infrastructure.exception.SystemErrorCode; -import com.datamate.common.interfaces.PagedResponse; -import com.datamate.common.interfaces.PagingQuery; -import com.datamate.datamanagement.application.DatasetApplicationService; -import com.datamate.datamanagement.application.DatasetFileApplicationService; -import com.datamate.datamanagement.common.enums.DatasetType; -import com.datamate.datamanagement.domain.model.dataset.Dataset; -import com.datamate.datamanagement.domain.model.dataset.DatasetFile; -import com.datamate.datamanagement.interfaces.dto.CreateDatasetRequest; -import com.datamate.operator.domain.repository.OperatorRepository; -import com.datamate.operator.infrastructure.exception.OperatorErrorCode; -import com.datamate.operator.interfaces.dto.OperatorDto; -import com.fasterxml.jackson.core.JsonProcessingException; -import com.fasterxml.jackson.databind.JsonNode; -import com.fasterxml.jackson.databind.ObjectMapper; -import com.fasterxml.jackson.databind.PropertyNamingStrategies; -import com.fasterxml.jackson.dataformat.yaml.YAMLFactory; -import lombok.RequiredArgsConstructor; -import lombok.extern.slf4j.Slf4j; -import org.apache.commons.collections4.CollectionUtils; -import org.apache.commons.io.FileUtils; -import org.apache.commons.lang3.StringUtils; -import org.springframework.stereotype.Service; -import org.springframework.transaction.annotation.Transactional; -import org.yaml.snakeyaml.DumperOptions; -import org.yaml.snakeyaml.Yaml; - -import java.io.BufferedWriter; -import java.io.File; -import java.io.FileWriter; -import java.io.IOException; -import java.nio.file.Files; -import java.nio.file.Paths; -import java.util.*; -import java.util.concurrent.atomic.AtomicReference; -import java.util.function.Function; -import java.util.function.Predicate; -import java.util.regex.Matcher; -import java.util.regex.Pattern; -import java.util.stream.Collectors; -import java.util.stream.Stream; - -@Slf4j -@Service -@RequiredArgsConstructor -public class CleaningTaskService { - private final CleaningTaskRepository cleaningTaskRepo; - - private final OperatorInstanceRepository operatorInstanceRepo; - - private final OperatorRepository operatorRepo; - - private final CleaningResultRepository cleaningResultRepo; - - private final CleaningTaskScheduler taskScheduler; - - private final DatasetApplicationService datasetService; - - private final DatasetFileApplicationService datasetFileService; - - private final CleanTaskValidator cleanTaskValidator; - - private final LineageService lineageService; - - private final String DATASET_PATH = "/dataset"; - - private final String FLOW_PATH = "/flow"; - - private static final Pattern STANDARD_LEVEL_PATTERN = Pattern.compile( - "\\b(DEBUG|Debug|INFO|Info|WARN|Warn|WARNING|Warning|ERROR|Error|FATAL|Fatal)\\b" - ); - - private static final Pattern EXCEPTION_SUFFIX_PATTERN = Pattern.compile( - "\\b\\w+(Warning|Error|Exception)\\b" - ); - - private final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); - - public List getTasks(String status, String keywords, Integer page, Integer size) { - List tasks = cleaningTaskRepo.findTasks(status, keywords, page, size); - tasks.forEach(this::setProcess); - return tasks; - } - - private void setProcess(CleaningTaskDto task) { - int[] count = cleaningResultRepo.countByInstanceId(task.getId()); - task.setProgress(CleaningProcess.of(task.getFileCount(), count[0], count[1])); - } - - public int countTasks(String status, String keywords) { - return cleaningTaskRepo.findTasks(status, keywords, null, null).size(); - } - - @Transactional - public CleaningTaskDto createTask(CreateCleaningTaskRequest request) { - cleanTaskValidator.checkNameDuplication(request.getName()); - cleanTaskValidator.checkInputAndOutput(request.getInstance()); - - ExecutorType executorType = cleanTaskValidator.checkAndGetExecutorType(request.getInstance()); - - Dataset destDataset; - if (StringUtils.isNotBlank(request.getDestDatasetId())) { - destDataset = datasetService.getDataset(request.getDestDatasetId()); - } else { - CreateDatasetRequest createDatasetRequest = new CreateDatasetRequest(); - createDatasetRequest.setName(request.getDestDatasetName()); - createDatasetRequest.setDatasetType(DatasetType.valueOf(request.getDestDatasetType())); - createDatasetRequest.setStatus("ACTIVE"); - destDataset = datasetService.createDataset(createDatasetRequest); - } - Dataset srcDataset = datasetService.getDataset(request.getSrcDatasetId()); - - CleaningTaskDto task = new CleaningTaskDto(); - task.setName(request.getName()); - task.setDescription(request.getDescription()); - task.setStatus(CleaningTaskStatusEnum.PENDING); - String taskId = UUID.randomUUID().toString(); - task.setId(taskId); - task.setSrcDatasetId(request.getSrcDatasetId()); - task.setSrcDatasetName(request.getSrcDatasetName()); - task.setDestDatasetId(destDataset.getId()); - task.setDestDatasetName(destDataset.getName()); - task.setBeforeSize(srcDataset.getSizeBytes()); - task.setFileCount(srcDataset.getFileCount().intValue()); - cleaningTaskRepo.insertTask(task); - // 记录血缘关系 - addCleaningToGraph(srcDataset, task, destDataset); - - operatorInstanceRepo.insertInstance(taskId, request.getInstance()); - operatorRepo.incrementUsageCount(request.getInstance().stream() - .map(OperatorInstanceDto::getId) - .collect(Collectors.toList())); - - prepareTask(task, request.getInstance(), executorType); - scanDataset(taskId, request.getSrcDatasetId()); - - return task; - } - - private void addCleaningToGraph(Dataset srcDataset, CleaningTaskDto task, Dataset destDataset) { - LineageNode fromNode = new LineageNode(); - fromNode.setId(srcDataset.getId()); - fromNode.setName(srcDataset.getName()); - fromNode.setDescription(srcDataset.getDescription()); - fromNode.setNodeType(NodeType.DATASET); - - LineageNode toNode = new LineageNode(); - toNode.setId(destDataset.getId()); - toNode.setName(destDataset.getName()); - toNode.setDescription(destDataset.getDescription()); - toNode.setNodeType(NodeType.DATASET); - - LineageEdge edge = new LineageEdge(); - edge.setProcessId(task.getId()); - edge.setName(task.getName()); - edge.setDescription(task.getDescription()); - edge.setEdgeType(EdgeType.DATA_CLEANING); - edge.setFromNodeId(fromNode.getId()); - edge.setToNodeId(toNode.getId()); - - lineageService.generateGraph(fromNode, edge, toNode); - } - - public CleaningTaskDto getTask(String taskId) { - CleaningTaskDto task = cleaningTaskRepo.findTaskById(taskId); - setProcess(task); - task.setInstance(operatorInstanceRepo.findOperatorByInstanceId(taskId)); - return task; - } - - public List getTaskResults(String taskId) { - return cleaningResultRepo.findByInstanceId(taskId); - } - - public List getTaskLog(String taskId, int retryCount) { - cleanTaskValidator.checkTaskId(taskId); - String logPath = FLOW_PATH + "/" + taskId + "/output.log"; - if (retryCount > 0) { - logPath += "." + retryCount; - } - try (Stream lines = Files.lines(Paths.get(logPath))) { - List logs = new ArrayList<>(); - AtomicReference lastLevel = new AtomicReference<>("INFO"); - lines.forEach(line -> { - lastLevel.set(getLogLevel(line, lastLevel.get())); - CleaningTaskLog log = new CleaningTaskLog(); - log.setLevel(lastLevel.get()); - log.setMessage(line); - logs.add(log); - }); - return logs; - } catch (IOException e) { - log.error("Fail to read log file {}", logPath, e); - return Collections.emptyList(); - } - } - - private String getLogLevel(String logLine, String defaultLevel) { - if (logLine == null || logLine.trim().isEmpty()) { - return defaultLevel; - } - - Matcher stdMatcher = STANDARD_LEVEL_PATTERN.matcher(logLine); - if (stdMatcher.find()) { - return stdMatcher.group(1).toUpperCase(); - } - - Matcher exMatcher = EXCEPTION_SUFFIX_PATTERN.matcher(logLine); - if (exMatcher.find()) { - String match = exMatcher.group(1).toUpperCase(); - if ("WARNING".equals(match)) return "WARN"; - if ("ERROR".equals(match) || "EXCEPTION".equals(match)) return "ERROR"; - } - return defaultLevel; - } - - @Transactional - public void deleteTask(String taskId) { - cleanTaskValidator.checkTaskId(taskId); - cleaningTaskRepo.deleteTaskById(taskId); - operatorInstanceRepo.deleteByInstanceId(taskId); - cleaningResultRepo.deleteByInstanceId(taskId); - try { - FileUtils.deleteDirectory(new File(FLOW_PATH + "/" + taskId)); - } catch (IOException e) { - log.warn("Can't delete flow path with task id: {}.", taskId, e); - } - } - - public void executeTask(String taskId) { - List succeed = cleaningResultRepo.findByInstanceId(taskId, "COMPLETED"); - Set succeedSet = succeed.stream().map(CleaningResultDto::getSrcFileId).collect(Collectors.toSet()); - CleaningTaskDto task = cleaningTaskRepo.findTaskById(taskId); - scanDataset(taskId, task.getSrcDatasetId(), succeedSet); - cleaningResultRepo.deleteByInstanceId(taskId, "FAILED"); - taskScheduler.executeTask(taskId, task.getRetryCount() + 1); - } - - private void prepareTask(CleaningTaskDto task, List instances, ExecutorType executorType) { - List allOperators = operatorRepo.findAllOperators(); - Map operatorDtoMap = allOperators.stream() - .collect(Collectors.toMap(OperatorDto::getId, Function.identity())); - - TaskProcess process = new TaskProcess(); - process.setInstanceId(task.getId()); - process.setDatasetId(task.getDestDatasetId()); - process.setExecutorType(executorType.getValue()); - process.setDatasetPath(FLOW_PATH + "/" + task.getId() + "/dataset.jsonl"); - process.setExportPath(DATASET_PATH + "/" + task.getDestDatasetId()); - process.setProcess(instances.stream() - .map(instance -> { - OperatorDto operatorDto = operatorDtoMap.get(instance.getId()); - Map stringObjectMap = getDefaultValue(operatorDto); - stringObjectMap.putAll(instance.getOverrides()); - Map runtime = getRuntime(operatorDto); - stringObjectMap.putAll(runtime); - return Map.of(instance.getId(), stringObjectMap); - }) - .toList()); - - ObjectMapper jsonMapper = new ObjectMapper(new YAMLFactory()); - jsonMapper.setPropertyNamingStrategy(PropertyNamingStrategies.SNAKE_CASE); - JsonNode jsonNode = jsonMapper.valueToTree(process); - - DumperOptions options = new DumperOptions(); - options.setIndent(2); - options.setDefaultFlowStyle(DumperOptions.FlowStyle.BLOCK); - Yaml yaml = new Yaml(options); - - File file = new File(FLOW_PATH + "/" + task.getId() + "/process.yaml"); - file.getParentFile().mkdirs(); - - try (FileWriter writer = new FileWriter(file)) { - yaml.dump(jsonMapper.treeToValue(jsonNode, Map.class), writer); - } catch (IOException e) { - log.error("Failed to prepare process.yaml.", e); - throw BusinessException.of(SystemErrorCode.FILE_SYSTEM_ERROR); - } - } - - private Map getDefaultValue(OperatorDto operatorDto) { - if (StringUtils.isBlank(operatorDto.getSettings())) { - return new HashMap<>(); - } - - Map defaultSettings = new HashMap<>(); - try { - Map> settings = OBJECT_MAPPER.readValue(operatorDto.getSettings(), Map.class); - for (Map.Entry> entry : settings.entrySet()) { - String key = entry.getKey(); - Map setting = entry.getValue(); - String type = setting.get("type").toString(); - switch (type) { - case "slider": - case "switch": - case "select": - case "input": - case "radio": - case "checkbox": - if (setting.containsKey("defaultVal")) { - defaultSettings.put(key, setting.get("defaultVal")); - } - break; - case "range": - List rangeDefault = getRangeDefault(setting); - if (CollectionUtils.isNotEmpty(rangeDefault)) { - defaultSettings.put(key, rangeDefault); - } - break; - default: - } - } - return defaultSettings; - } catch (JsonProcessingException e) { - throw BusinessException.of(OperatorErrorCode.SETTINGS_PARSE_FAILED, e.getMessage()); - } - } - - private List getRangeDefault(Map setting) { - List defaultValue = new ArrayList<>(); - Object properties = setting.get("properties"); - if (properties instanceof List list) { - for (Object o : list) { - Map map = OBJECT_MAPPER.convertValue(o, Map.class); - if (map.containsKey("defaultVal")) { - defaultValue.add(map.get("defaultVal")); - } - } - } - return defaultValue; - } - - private Map getRuntime(OperatorDto operatorDto) { - if (StringUtils.isBlank(operatorDto.getRuntime())) { - return new HashMap<>(); - } - try { - return OBJECT_MAPPER.readValue(operatorDto.getRuntime(), Map.class); - } catch (JsonProcessingException e) { - throw BusinessException.of(OperatorErrorCode.SETTINGS_PARSE_FAILED, e.getMessage()); - } - } - - private void scanDataset(String taskId, String srcDatasetId) { - doScan(taskId, srcDatasetId, file -> true); - } - - private void scanDataset(String taskId, String srcDatasetId, Set succeedFiles) { - doScan(taskId, srcDatasetId, file -> !succeedFiles.contains(file.getId())); - } - - private void doScan(String taskId, String srcDatasetId, Predicate filterCondition) { - cleanTaskValidator.checkTaskId(taskId); - String targetFilePath = FLOW_PATH + "/" + taskId + "/dataset.jsonl"; - File targetFile = new File(targetFilePath); - if (targetFile.getParentFile() != null && !targetFile.getParentFile().exists()) { - targetFile.getParentFile().mkdirs(); - } - - int pageNumber = 0; - int pageSize = 500; - try (BufferedWriter writer = new BufferedWriter(new FileWriter(targetFile))) { - PagedResponse datasetFiles; - do { - PagingQuery pageRequest = new PagingQuery(pageNumber, pageSize); - datasetFiles = datasetFileService.getDatasetFiles(srcDatasetId, null, null, null, pageRequest); - if (datasetFiles.getContent().isEmpty()) { - break; - } - for (DatasetFile content : datasetFiles.getContent()) { - if (!filterCondition.test(content)) { - continue; - } - Map fileMap = Map.of( - "fileName", content.getFileName(), - "fileSize", content.getFileSize(), - "filePath", content.getFilePath(), - "fileType", content.getFileType(), - "fileId", content.getId() - ); - writer.write(OBJECT_MAPPER.writeValueAsString(fileMap)); - writer.newLine(); - } - pageNumber++; - } while (pageNumber < datasetFiles.getTotalPages()); - } catch (IOException e) { - log.error("Failed to write dataset.jsonl for taskId: {}", taskId, e); - throw BusinessException.of(SystemErrorCode.FILE_SYSTEM_ERROR); - } - } - - public void stopTask(String taskId) { - taskScheduler.stopTask(taskId); - } - - public List getInstanceByTemplateId(String templateId) { - return operatorInstanceRepo.findInstanceByInstanceId(templateId); - } -} diff --git a/backend/services/data-cleaning-service/src/main/java/com/datamate/cleaning/application/CleaningTemplateService.java b/backend/services/data-cleaning-service/src/main/java/com/datamate/cleaning/application/CleaningTemplateService.java deleted file mode 100644 index e7364e48..00000000 --- a/backend/services/data-cleaning-service/src/main/java/com/datamate/cleaning/application/CleaningTemplateService.java +++ /dev/null @@ -1,117 +0,0 @@ -package com.datamate.cleaning.application; - - -import com.datamate.cleaning.domain.repository.CleaningTemplateRepository; -import com.datamate.cleaning.domain.repository.OperatorInstanceRepository; -import com.datamate.cleaning.infrastructure.validator.CleanTaskValidator; -import com.datamate.cleaning.interfaces.dto.*; -import com.datamate.cleaning.domain.model.entity.TemplateWithInstance; -import com.datamate.common.infrastructure.exception.BusinessException; -import com.datamate.operator.application.OperatorService; -import com.datamate.operator.domain.repository.OperatorViewRepository; -import com.datamate.operator.infrastructure.exception.OperatorErrorCode; -import com.datamate.operator.interfaces.dto.OperatorDto; -import com.fasterxml.jackson.core.JsonProcessingException; -import com.fasterxml.jackson.databind.ObjectMapper; -import lombok.RequiredArgsConstructor; -import org.apache.commons.lang3.StringUtils; -import org.springframework.stereotype.Service; -import org.springframework.transaction.annotation.Transactional; - -import java.util.Comparator; -import java.util.List; -import java.util.Map; -import java.util.UUID; -import java.util.function.Function; -import java.util.stream.Collectors; - -@Service -@RequiredArgsConstructor -public class CleaningTemplateService { - private final CleaningTemplateRepository cleaningTemplateRepo; - - private final OperatorInstanceRepository operatorInstanceRepo; - - private final OperatorViewRepository operatorViewRepo; - - private final CleanTaskValidator cleanTaskValidator; - - private final OperatorService operatorService; - - private final ObjectMapper objectMapper = new ObjectMapper(); - - public List getTemplates(String keywords) { - List allOperators = - operatorViewRepo.findOperatorsByCriteria(null, null, null, null, null); - Map operatorsMap = allOperators.stream() - .collect(Collectors.toMap(OperatorDto::getId, Function.identity())); - List allTemplates = cleaningTemplateRepo.findAllTemplates(keywords); - Map> templatesMap = allTemplates.stream() - .collect(Collectors.groupingBy(TemplateWithInstance::getId)); - return templatesMap.entrySet().stream().map(twi -> { - List value = twi.getValue(); - CleaningTemplateDto template = new CleaningTemplateDto(); - template.setId(twi.getKey()); - template.setName(value.getFirst().getName()); - template.setDescription(value.getFirst().getDescription()); - template.setInstance(value.stream().filter(v -> StringUtils.isNotBlank(v.getOperatorId())) - .sorted(Comparator.comparingInt(TemplateWithInstance::getOpIndex)) - .map(v -> { - OperatorDto operator = operatorsMap.get(v.getOperatorId()); - if (StringUtils.isNotBlank(v.getSettingsOverride())) { - try { - operator.setOverrides(objectMapper.readValue(v.getSettingsOverride(), Map.class)); - } catch (JsonProcessingException e) { - throw BusinessException.of(OperatorErrorCode.SETTINGS_PARSE_FAILED, e.getMessage()); - } - operatorService.overrideSettings(operator); - } - return operator; - }).toList()); - template.setCreatedAt(value.getFirst().getCreatedAt()); - template.setUpdatedAt(value.getFirst().getUpdatedAt()); - return template; - }).toList(); - } - - @Transactional - public CleaningTemplateDto createTemplate(CreateCleaningTemplateRequest request) { - cleanTaskValidator.checkInputAndOutput(request.getInstance()); - cleanTaskValidator.checkAndGetExecutorType(request.getInstance()); - CleaningTemplateDto template = new CleaningTemplateDto(); - String templateId = UUID.randomUUID().toString(); - template.setId(templateId); - template.setName(request.getName()); - template.setDescription(request.getDescription()); - cleaningTemplateRepo.insertTemplate(template); - - operatorInstanceRepo.insertInstance(templateId, request.getInstance()); - return template; - } - - public CleaningTemplateDto getTemplate(String templateId) { - CleaningTemplateDto template = cleaningTemplateRepo.findTemplateById(templateId); - template.setInstance(operatorInstanceRepo.findOperatorByInstanceId(templateId)); - return template; - } - - @Transactional - public CleaningTemplateDto updateTemplate(String templateId, UpdateCleaningTemplateRequest request) { - CleaningTemplateDto template = cleaningTemplateRepo.findTemplateById(templateId); - if (template == null) { - return null; - } - template.setName(request.getName()); - template.setDescription(request.getDescription()); - cleaningTemplateRepo.updateTemplate(template); - operatorInstanceRepo.deleteByInstanceId(templateId); - operatorInstanceRepo.insertInstance(templateId, request.getInstance()); - return template; - } - - @Transactional - public void deleteTemplate(String templateId) { - cleaningTemplateRepo.deleteTemplate(templateId); - operatorInstanceRepo.deleteByInstanceId(templateId); - } -} diff --git a/backend/services/data-cleaning-service/src/main/java/com/datamate/cleaning/application/scheduler/CleaningTaskScheduler.java b/backend/services/data-cleaning-service/src/main/java/com/datamate/cleaning/application/scheduler/CleaningTaskScheduler.java deleted file mode 100644 index 92df8457..00000000 --- a/backend/services/data-cleaning-service/src/main/java/com/datamate/cleaning/application/scheduler/CleaningTaskScheduler.java +++ /dev/null @@ -1,44 +0,0 @@ -package com.datamate.cleaning.application.scheduler; - -import com.datamate.cleaning.infrastructure.httpclient.RuntimeClient; -import com.datamate.cleaning.common.enums.CleaningTaskStatusEnum; -import com.datamate.cleaning.domain.repository.CleaningTaskRepository; -import com.datamate.cleaning.interfaces.dto.CleaningTaskDto; -import lombok.RequiredArgsConstructor; -import org.springframework.stereotype.Service; - -import java.time.LocalDateTime; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.Executors; - -@Service -@RequiredArgsConstructor -public class CleaningTaskScheduler { - private final CleaningTaskRepository cleaningTaskRepo; - - private final RuntimeClient runtimeClient; - - private final ExecutorService taskExecutor = Executors.newFixedThreadPool(5); - - public void executeTask(String taskId, int retryCount) { - taskExecutor.submit(() -> submitTask(taskId, retryCount)); - } - - private void submitTask(String taskId, int retryCount) { - CleaningTaskDto task = new CleaningTaskDto(); - task.setId(taskId); - task.setStatus(CleaningTaskStatusEnum.RUNNING); - task.setStartedAt(LocalDateTime.now()); - task.setRetryCount(retryCount); - cleaningTaskRepo.updateTask(task); - runtimeClient.submitTask(taskId); - } - - public void stopTask(String taskId) { - runtimeClient.stopTask(taskId); - CleaningTaskDto task = new CleaningTaskDto(); - task.setId(taskId); - task.setStatus(CleaningTaskStatusEnum.STOPPED); - cleaningTaskRepo.updateTask(task); - } -} diff --git a/backend/services/data-cleaning-service/src/main/java/com/datamate/cleaning/common/enums/CleaningTaskStatusEnum.java b/backend/services/data-cleaning-service/src/main/java/com/datamate/cleaning/common/enums/CleaningTaskStatusEnum.java deleted file mode 100644 index 458ed266..00000000 --- a/backend/services/data-cleaning-service/src/main/java/com/datamate/cleaning/common/enums/CleaningTaskStatusEnum.java +++ /dev/null @@ -1,39 +0,0 @@ -package com.datamate.cleaning.common.enums; - -import com.datamate.common.infrastructure.exception.BusinessException; -import com.datamate.common.infrastructure.exception.SystemErrorCode; -import com.fasterxml.jackson.annotation.JsonCreator; -import com.fasterxml.jackson.annotation.JsonValue; - -public enum CleaningTaskStatusEnum { - PENDING("PENDING"), - - RUNNING("RUNNING"), - - COMPLETED("COMPLETED"), - - STOPPED("STOPPED"), - - FAILED("FAILED"); - - private final String value; - - CleaningTaskStatusEnum(String value) { - this.value = value; - } - - @JsonValue - public String getValue() { - return value; - } - - @JsonCreator - public static CleaningTaskStatusEnum fromValue(String value) { - for (CleaningTaskStatusEnum b : CleaningTaskStatusEnum.values()) { - if (b.value.equals(value)) { - return b; - } - } - throw BusinessException.of(SystemErrorCode.INVALID_PARAMETER); - } -} diff --git a/backend/services/data-cleaning-service/src/main/java/com/datamate/cleaning/common/enums/ExecutorType.java b/backend/services/data-cleaning-service/src/main/java/com/datamate/cleaning/common/enums/ExecutorType.java deleted file mode 100644 index e22f4a19..00000000 --- a/backend/services/data-cleaning-service/src/main/java/com/datamate/cleaning/common/enums/ExecutorType.java +++ /dev/null @@ -1,25 +0,0 @@ -package com.datamate.cleaning.common.enums; - -import lombok.Getter; - -@Getter -public enum ExecutorType { - DATAMATE("datamate"), - DATA_JUICER_RAY("ray"), - DATA_JUICER_DEFAULT("default"); - - private final String value; - - ExecutorType(String value) { - this.value = value; - } - - public static ExecutorType fromValue(String value) { - for (ExecutorType type : ExecutorType.values()) { - if (type.value.equals(value)) { - return type; - } - } - throw new IllegalArgumentException("Unexpected value '" + value + "'"); - } -} diff --git a/backend/services/data-cleaning-service/src/main/java/com/datamate/cleaning/common/exception/CleanErrorCode.java b/backend/services/data-cleaning-service/src/main/java/com/datamate/cleaning/common/exception/CleanErrorCode.java deleted file mode 100644 index 68a45215..00000000 --- a/backend/services/data-cleaning-service/src/main/java/com/datamate/cleaning/common/exception/CleanErrorCode.java +++ /dev/null @@ -1,23 +0,0 @@ -package com.datamate.cleaning.common.exception; - -import com.datamate.common.infrastructure.exception.ErrorCode; -import lombok.AllArgsConstructor; -import lombok.Getter; - -@Getter -@AllArgsConstructor -public enum CleanErrorCode implements ErrorCode { - /** - * 清洗任务名称重复 - */ - DUPLICATE_TASK_NAME("clean.0001", "清洗任务名称重复"), - - OPERATOR_LIST_EMPTY("clean.0002", "任务列表为空"), - - IN_AND_OUT_NOT_MATCH("clean.0003", "算子输入输出不匹配"), - - EXECUTOR_NOT_MATCH("clean.0004", "算子执行器不匹配"); - - private final String code; - private final String message; -} diff --git a/backend/services/data-cleaning-service/src/main/java/com/datamate/cleaning/domain/model/TaskProcess.java b/backend/services/data-cleaning-service/src/main/java/com/datamate/cleaning/domain/model/TaskProcess.java deleted file mode 100644 index 4cd61a25..00000000 --- a/backend/services/data-cleaning-service/src/main/java/com/datamate/cleaning/domain/model/TaskProcess.java +++ /dev/null @@ -1,24 +0,0 @@ -package com.datamate.cleaning.domain.model; - -import lombok.Getter; -import lombok.Setter; - -import java.util.List; -import java.util.Map; - - -@Getter -@Setter -public class TaskProcess { - private String instanceId; - - private String datasetId; - - private String datasetPath; - - private String exportPath; - - private String executorType; - - private List>> process; -} diff --git a/backend/services/data-cleaning-service/src/main/java/com/datamate/cleaning/domain/model/entity/CleaningResult.java b/backend/services/data-cleaning-service/src/main/java/com/datamate/cleaning/domain/model/entity/CleaningResult.java deleted file mode 100644 index 16fbad59..00000000 --- a/backend/services/data-cleaning-service/src/main/java/com/datamate/cleaning/domain/model/entity/CleaningResult.java +++ /dev/null @@ -1,32 +0,0 @@ -package com.datamate.cleaning.domain.model.entity; - -import com.baomidou.mybatisplus.annotation.TableName; -import lombok.Getter; -import lombok.Setter; - -@Getter -@Setter -@TableName(value = "t_clean_result", autoResultMap = true) -public class CleaningResult { - private String instanceId; - - private String srcFileId; - - private String destFileId; - - private String srcName; - - private String destName; - - private String srcType; - - private String destType; - - private long srcSize; - - private long destSize; - - private String status; - - private String result; -} diff --git a/backend/services/data-cleaning-service/src/main/java/com/datamate/cleaning/domain/model/entity/CleaningTask.java b/backend/services/data-cleaning-service/src/main/java/com/datamate/cleaning/domain/model/entity/CleaningTask.java deleted file mode 100644 index a612cbe8..00000000 --- a/backend/services/data-cleaning-service/src/main/java/com/datamate/cleaning/domain/model/entity/CleaningTask.java +++ /dev/null @@ -1,45 +0,0 @@ -package com.datamate.cleaning.domain.model.entity; - -import com.baomidou.mybatisplus.annotation.TableName; -import com.datamate.cleaning.common.enums.CleaningTaskStatusEnum; -import com.datamate.common.domain.model.base.BaseEntity; -import lombok.Getter; -import lombok.Setter; - -import java.time.LocalDateTime; - -/** - * CleaningTask - */ - -@Getter -@Setter -@TableName(value = "t_clean_task", autoResultMap = true) -public class CleaningTask extends BaseEntity { - private String name; - - private String description; - - private CleaningTaskStatusEnum status; - - private String srcDatasetId; - - private String srcDatasetName; - - private String destDatasetId; - - private String destDatasetName; - - private Long beforeSize; - - private Long afterSize; - - private Integer fileCount; - - private Integer retryCount; - - private LocalDateTime startedAt; - - private LocalDateTime finishedAt; -} - diff --git a/backend/services/data-cleaning-service/src/main/java/com/datamate/cleaning/domain/model/entity/CleaningTemplate.java b/backend/services/data-cleaning-service/src/main/java/com/datamate/cleaning/domain/model/entity/CleaningTemplate.java deleted file mode 100644 index 1486edec..00000000 --- a/backend/services/data-cleaning-service/src/main/java/com/datamate/cleaning/domain/model/entity/CleaningTemplate.java +++ /dev/null @@ -1,26 +0,0 @@ -package com.datamate.cleaning.domain.model.entity; - -import com.baomidou.mybatisplus.annotation.TableId; -import com.baomidou.mybatisplus.annotation.TableName; -import lombok.Getter; -import lombok.Setter; - -import java.time.LocalDateTime; - -@Getter -@Setter -@TableName(value = "t_clean_template", autoResultMap = true) -public class CleaningTemplate { - @TableId - private String id; - - private String name; - - private String description; - - private LocalDateTime createdAt; - - private LocalDateTime updatedAt; - - private String createdBy; -} diff --git a/backend/services/data-cleaning-service/src/main/java/com/datamate/cleaning/domain/model/entity/OperatorInstance.java b/backend/services/data-cleaning-service/src/main/java/com/datamate/cleaning/domain/model/entity/OperatorInstance.java deleted file mode 100644 index 3d4ccea8..00000000 --- a/backend/services/data-cleaning-service/src/main/java/com/datamate/cleaning/domain/model/entity/OperatorInstance.java +++ /dev/null @@ -1,18 +0,0 @@ -package com.datamate.cleaning.domain.model.entity; - -import com.baomidou.mybatisplus.annotation.TableName; -import lombok.Getter; -import lombok.Setter; - -@Getter -@Setter -@TableName(value = "t_operator_instance", autoResultMap = true) -public class OperatorInstance { - private String instanceId; - - private String operatorId; - - private int opIndex; - - private String settingsOverride; -} diff --git a/backend/services/data-cleaning-service/src/main/java/com/datamate/cleaning/domain/model/entity/TemplateWithInstance.java b/backend/services/data-cleaning-service/src/main/java/com/datamate/cleaning/domain/model/entity/TemplateWithInstance.java deleted file mode 100644 index 3df00e6c..00000000 --- a/backend/services/data-cleaning-service/src/main/java/com/datamate/cleaning/domain/model/entity/TemplateWithInstance.java +++ /dev/null @@ -1,27 +0,0 @@ -package com.datamate.cleaning.domain.model.entity; - -import lombok.Getter; -import lombok.Setter; - -import java.time.LocalDateTime; - - -@Getter -@Setter -public class TemplateWithInstance { - private String id; - - private String name; - - private String description; - - private LocalDateTime createdAt; - - private LocalDateTime updatedAt; - - private String operatorId; - - private Integer opIndex; - - private String settingsOverride; -} diff --git a/backend/services/data-cleaning-service/src/main/java/com/datamate/cleaning/domain/repository/CleaningResultRepository.java b/backend/services/data-cleaning-service/src/main/java/com/datamate/cleaning/domain/repository/CleaningResultRepository.java deleted file mode 100644 index b7358398..00000000 --- a/backend/services/data-cleaning-service/src/main/java/com/datamate/cleaning/domain/repository/CleaningResultRepository.java +++ /dev/null @@ -1,20 +0,0 @@ -package com.datamate.cleaning.domain.repository; - - -import com.baomidou.mybatisplus.extension.repository.IRepository; -import com.datamate.cleaning.domain.model.entity.CleaningResult; -import com.datamate.cleaning.interfaces.dto.CleaningResultDto; - -import java.util.List; - -public interface CleaningResultRepository extends IRepository { - void deleteByInstanceId(String instanceId); - - void deleteByInstanceId(String instanceId, String status); - - int[] countByInstanceId(String instanceId); - - List findByInstanceId(String instanceId); - - List findByInstanceId(String instanceId, String status); -} diff --git a/backend/services/data-cleaning-service/src/main/java/com/datamate/cleaning/domain/repository/CleaningTaskRepository.java b/backend/services/data-cleaning-service/src/main/java/com/datamate/cleaning/domain/repository/CleaningTaskRepository.java deleted file mode 100644 index e8aeb1f4..00000000 --- a/backend/services/data-cleaning-service/src/main/java/com/datamate/cleaning/domain/repository/CleaningTaskRepository.java +++ /dev/null @@ -1,21 +0,0 @@ -package com.datamate.cleaning.domain.repository; - -import com.baomidou.mybatisplus.extension.repository.IRepository; -import com.datamate.cleaning.domain.model.entity.CleaningTask; -import com.datamate.cleaning.interfaces.dto.CleaningTaskDto; - -import java.util.List; - -public interface CleaningTaskRepository extends IRepository { - List findTasks(String status, String keywords, Integer page, Integer size); - - CleaningTaskDto findTaskById(String taskId); - - void insertTask(CleaningTaskDto task); - - void updateTask(CleaningTaskDto task); - - void deleteTaskById(String taskId); - - boolean isNameExist(String name); -} diff --git a/backend/services/data-cleaning-service/src/main/java/com/datamate/cleaning/domain/repository/CleaningTemplateRepository.java b/backend/services/data-cleaning-service/src/main/java/com/datamate/cleaning/domain/repository/CleaningTemplateRepository.java deleted file mode 100644 index 72b60fcf..00000000 --- a/backend/services/data-cleaning-service/src/main/java/com/datamate/cleaning/domain/repository/CleaningTemplateRepository.java +++ /dev/null @@ -1,20 +0,0 @@ -package com.datamate.cleaning.domain.repository; - -import com.baomidou.mybatisplus.extension.repository.IRepository; -import com.datamate.cleaning.domain.model.entity.TemplateWithInstance; -import com.datamate.cleaning.domain.model.entity.CleaningTemplate; -import com.datamate.cleaning.interfaces.dto.CleaningTemplateDto; - -import java.util.List; - -public interface CleaningTemplateRepository extends IRepository { - List findAllTemplates(String keywords); - - CleaningTemplateDto findTemplateById(String templateId); - - void insertTemplate(CleaningTemplateDto template); - - void updateTemplate(CleaningTemplateDto template); - - void deleteTemplate(String templateId); -} diff --git a/backend/services/data-cleaning-service/src/main/java/com/datamate/cleaning/domain/repository/OperatorInstanceRepository.java b/backend/services/data-cleaning-service/src/main/java/com/datamate/cleaning/domain/repository/OperatorInstanceRepository.java deleted file mode 100644 index 9d1900ec..00000000 --- a/backend/services/data-cleaning-service/src/main/java/com/datamate/cleaning/domain/repository/OperatorInstanceRepository.java +++ /dev/null @@ -1,18 +0,0 @@ -package com.datamate.cleaning.domain.repository; - -import com.baomidou.mybatisplus.extension.repository.IRepository; -import com.datamate.cleaning.interfaces.dto.OperatorInstanceDto; -import com.datamate.cleaning.domain.model.entity.OperatorInstance; -import com.datamate.operator.interfaces.dto.OperatorDto; - -import java.util.List; - -public interface OperatorInstanceRepository extends IRepository { - void insertInstance(String instanceId, List instances); - - void deleteByInstanceId(String instanceId); - - List findOperatorByInstanceId(String instanceId); - - List findInstanceByInstanceId(String instanceId); -} diff --git a/backend/services/data-cleaning-service/src/main/java/com/datamate/cleaning/infrastructure/converter/CleaningResultConverter.java b/backend/services/data-cleaning-service/src/main/java/com/datamate/cleaning/infrastructure/converter/CleaningResultConverter.java deleted file mode 100644 index b8866a01..00000000 --- a/backend/services/data-cleaning-service/src/main/java/com/datamate/cleaning/infrastructure/converter/CleaningResultConverter.java +++ /dev/null @@ -1,15 +0,0 @@ -package com.datamate.cleaning.infrastructure.converter; - -import com.datamate.cleaning.domain.model.entity.CleaningResult; -import com.datamate.cleaning.interfaces.dto.CleaningResultDto; -import org.mapstruct.Mapper; -import org.mapstruct.factory.Mappers; - -import java.util.List; - -@Mapper -public interface CleaningResultConverter { - CleaningResultConverter INSTANCE = Mappers.getMapper(CleaningResultConverter.class); - - List convertEntityToDto(List cleaningResult); -} diff --git a/backend/services/data-cleaning-service/src/main/java/com/datamate/cleaning/infrastructure/converter/CleaningTaskConverter.java b/backend/services/data-cleaning-service/src/main/java/com/datamate/cleaning/infrastructure/converter/CleaningTaskConverter.java deleted file mode 100644 index 01da42c0..00000000 --- a/backend/services/data-cleaning-service/src/main/java/com/datamate/cleaning/infrastructure/converter/CleaningTaskConverter.java +++ /dev/null @@ -1,19 +0,0 @@ -package com.datamate.cleaning.infrastructure.converter; - -import com.datamate.cleaning.domain.model.entity.CleaningTask; -import com.datamate.cleaning.interfaces.dto.CleaningTaskDto; -import org.mapstruct.Mapper; -import org.mapstruct.factory.Mappers; - -import java.util.List; - -@Mapper -public interface CleaningTaskConverter { - CleaningTaskConverter INSTANCE = Mappers.getMapper(CleaningTaskConverter.class); - - CleaningTaskDto fromEntityToDto(CleaningTask source); - - List fromEntityToDto(List source); - - CleaningTask fromDtoToEntity(CleaningTaskDto source); -} diff --git a/backend/services/data-cleaning-service/src/main/java/com/datamate/cleaning/infrastructure/converter/CleaningTemplateConverter.java b/backend/services/data-cleaning-service/src/main/java/com/datamate/cleaning/infrastructure/converter/CleaningTemplateConverter.java deleted file mode 100644 index a77c5362..00000000 --- a/backend/services/data-cleaning-service/src/main/java/com/datamate/cleaning/infrastructure/converter/CleaningTemplateConverter.java +++ /dev/null @@ -1,15 +0,0 @@ -package com.datamate.cleaning.infrastructure.converter; - -import com.datamate.cleaning.domain.model.entity.CleaningTemplate; -import com.datamate.cleaning.interfaces.dto.CleaningTemplateDto; -import org.mapstruct.Mapper; -import org.mapstruct.factory.Mappers; - -@Mapper -public interface CleaningTemplateConverter { - CleaningTemplateConverter INSTANCE = Mappers.getMapper(CleaningTemplateConverter.class); - - CleaningTemplate fromDtoToEntity(CleaningTemplateDto dto); - - CleaningTemplateDto fromEntityToDto(CleaningTemplate entity); -} diff --git a/backend/services/data-cleaning-service/src/main/java/com/datamate/cleaning/infrastructure/converter/OperatorInstanceConverter.java b/backend/services/data-cleaning-service/src/main/java/com/datamate/cleaning/infrastructure/converter/OperatorInstanceConverter.java deleted file mode 100644 index 3cb47f32..00000000 --- a/backend/services/data-cleaning-service/src/main/java/com/datamate/cleaning/infrastructure/converter/OperatorInstanceConverter.java +++ /dev/null @@ -1,72 +0,0 @@ -package com.datamate.cleaning.infrastructure.converter; - - -import com.datamate.cleaning.domain.model.entity.OperatorInstance; -import com.datamate.cleaning.interfaces.dto.OperatorInstanceDto; -import com.datamate.common.infrastructure.exception.BusinessException; -import com.datamate.common.infrastructure.exception.SystemErrorCode; -import com.datamate.operator.domain.model.OperatorView; -import com.datamate.operator.interfaces.dto.OperatorDto; -import com.fasterxml.jackson.core.JsonProcessingException; -import com.fasterxml.jackson.core.type.TypeReference; -import com.fasterxml.jackson.databind.ObjectMapper; -import org.mapstruct.Mapper; -import org.mapstruct.Mapping; -import org.mapstruct.Named; -import org.mapstruct.factory.Mappers; - -import java.util.Arrays; -import java.util.Collections; -import java.util.List; -import java.util.Map; - -@Mapper -public interface OperatorInstanceConverter { - OperatorInstanceConverter INSTANCE = Mappers.getMapper(OperatorInstanceConverter.class); - - ObjectMapper OBJECT_MAPPER = new ObjectMapper(); - - @Mapping(target = "settingsOverride", source = "overrides", qualifiedByName = "mapToString") - @Mapping(target = "operatorId", source = "id") - OperatorInstance fromDtoToEntity(OperatorInstanceDto instance); - - @Mapping(target = "overrides", source = "settingsOverride", qualifiedByName = "stringToMap") - @Mapping(target = "id", source = "operatorId") - OperatorInstanceDto fromEntityToDto(OperatorInstance instance); - - List fromEntityToDtoList(List instance); - - @Named("mapToString") - static String mapToString(Map objects) { - try { - return OBJECT_MAPPER.writeValueAsString(objects); - } catch (JsonProcessingException e) { - throw BusinessException.of(SystemErrorCode.UNKNOWN_ERROR); - } - } - - @Named("stringToMap") - static Map stringToMap(String json) { - if (json == null) { - return Collections.emptyMap(); - } - try { - return OBJECT_MAPPER.readValue(json, new TypeReference<>() {}); - } catch (JsonProcessingException e) { - throw BusinessException.of(SystemErrorCode.UNKNOWN_ERROR); - } - } - - @Mapping(target = "categories", source = "categories", qualifiedByName = "stringToList") - OperatorDto fromEntityToDto(OperatorView operator); - - List fromEntityToDto(List operator); - - @Named("stringToList") - default List stringToList(String input) { - if (input == null || input.isEmpty()) { - return Collections.emptyList(); - } - return Arrays.stream(input.split(",")).toList(); - } -} diff --git a/backend/services/data-cleaning-service/src/main/java/com/datamate/cleaning/infrastructure/httpclient/RuntimeClient.java b/backend/services/data-cleaning-service/src/main/java/com/datamate/cleaning/infrastructure/httpclient/RuntimeClient.java deleted file mode 100644 index 0c713563..00000000 --- a/backend/services/data-cleaning-service/src/main/java/com/datamate/cleaning/infrastructure/httpclient/RuntimeClient.java +++ /dev/null @@ -1,68 +0,0 @@ -package com.datamate.cleaning.infrastructure.httpclient; - -import com.datamate.common.infrastructure.exception.BusinessException; -import com.datamate.common.infrastructure.exception.SystemErrorCode; -import lombok.extern.slf4j.Slf4j; -import org.springframework.beans.factory.annotation.Value; -import org.springframework.stereotype.Component; - -import java.io.IOException; -import java.net.URI; -import java.net.http.HttpClient; -import java.net.http.HttpRequest; -import java.net.http.HttpResponse; -import java.text.MessageFormat; -import java.time.Duration; - -@Slf4j -@Component -public class RuntimeClient { - private final String CREATE_TASK_URL = "/api/task/{0}/submit"; - - private final String STOP_TASK_URL = "/api/task/{0}/stop"; - - @Value("${runtime.protocol:http}") - private String protocol; - - @Value("${runtime.host:datamate-runtime}") - private String host; - - @Value("${runtime.port:8081}") - private int port; - - private final HttpClient CLIENT = HttpClient.newBuilder().connectTimeout(Duration.ofSeconds(10)).build(); - - public void submitTask(String taskId) { - send(MessageFormat.format(getRequestUrl(CREATE_TASK_URL), taskId)); - } - - public void stopTask(String taskId) { - send(MessageFormat.format(getRequestUrl(STOP_TASK_URL), taskId)); - } - - private String getRequestUrl(String url) { - return protocol + "://" + host + ":" + port + url; - } - - private void send(String url) { - HttpRequest request = HttpRequest.newBuilder() - .uri(URI.create(url)) - .timeout(Duration.ofSeconds(30)) - .header("Content-Type", "application/json") - .POST(HttpRequest.BodyPublishers.noBody()) - .build(); - - try { - HttpResponse response = CLIENT.send(request, HttpResponse.BodyHandlers.ofString()); - int statusCode = response.statusCode(); - - if (statusCode < 200 || statusCode >= 300) { - log.error("Request failed with status code: {}", statusCode); - throw BusinessException.of(SystemErrorCode.SYSTEM_BUSY); - } - } catch (IOException | InterruptedException e) { - log.error("Error occurred while making the request.", e); - throw BusinessException.of(SystemErrorCode.UNKNOWN_ERROR); - } - } -} diff --git a/backend/services/data-cleaning-service/src/main/java/com/datamate/cleaning/infrastructure/persistence/Impl/CleaningResultRepositoryImpl.java b/backend/services/data-cleaning-service/src/main/java/com/datamate/cleaning/infrastructure/persistence/Impl/CleaningResultRepositoryImpl.java deleted file mode 100644 index 0ed303b7..00000000 --- a/backend/services/data-cleaning-service/src/main/java/com/datamate/cleaning/infrastructure/persistence/Impl/CleaningResultRepositoryImpl.java +++ /dev/null @@ -1,58 +0,0 @@ -package com.datamate.cleaning.infrastructure.persistence.Impl; - -import com.baomidou.mybatisplus.core.conditions.query.LambdaQueryWrapper; -import com.baomidou.mybatisplus.extension.repository.CrudRepository; -import com.datamate.cleaning.common.enums.CleaningTaskStatusEnum; -import com.datamate.cleaning.domain.model.entity.CleaningResult; -import com.datamate.cleaning.domain.repository.CleaningResultRepository; -import com.datamate.cleaning.infrastructure.converter.CleaningResultConverter; -import com.datamate.cleaning.infrastructure.persistence.mapper.CleaningResultMapper; -import com.datamate.cleaning.interfaces.dto.CleaningResultDto; -import lombok.RequiredArgsConstructor; -import org.apache.commons.lang3.StringUtils; -import org.springframework.stereotype.Repository; - -import java.util.List; - -@Repository -@RequiredArgsConstructor -public class CleaningResultRepositoryImpl extends CrudRepository - implements CleaningResultRepository { - private final CleaningResultMapper mapper; - - @Override - public void deleteByInstanceId(String instanceId) { - deleteByInstanceId(instanceId, null); - } - - @Override - public void deleteByInstanceId(String instanceId, String status) { - LambdaQueryWrapper queryWrapper = new LambdaQueryWrapper<>(); - queryWrapper.eq(CleaningResult::getInstanceId, instanceId) - .eq(StringUtils.isNotBlank(status), CleaningResult::getStatus, status); - mapper.delete(queryWrapper); - } - - @Override - public int[] countByInstanceId(String instanceId) { - LambdaQueryWrapper lambdaWrapper = new LambdaQueryWrapper<>(); - lambdaWrapper.eq(CleaningResult::getInstanceId, instanceId); - List cleaningResults = mapper.selectList(lambdaWrapper); - int succeed = Math.toIntExact(cleaningResults.stream() - .filter(result -> - StringUtils.equals(result.getStatus(), CleaningTaskStatusEnum.COMPLETED.getValue())) - .count()); - return new int[] {succeed, cleaningResults.size() - succeed}; - } - - public List findByInstanceId(String instanceId) { - return findByInstanceId(instanceId, null); - } - - public List findByInstanceId(String instanceId, String status) { - LambdaQueryWrapper queryWrapper = new LambdaQueryWrapper<>(); - queryWrapper.eq(CleaningResult::getInstanceId, instanceId) - .eq(StringUtils.isNotBlank(status), CleaningResult::getStatus, status); - return CleaningResultConverter.INSTANCE.convertEntityToDto(mapper.selectList(queryWrapper)); - } -} diff --git a/backend/services/data-cleaning-service/src/main/java/com/datamate/cleaning/infrastructure/persistence/Impl/CleaningTaskRepositoryImpl.java b/backend/services/data-cleaning-service/src/main/java/com/datamate/cleaning/infrastructure/persistence/Impl/CleaningTaskRepositoryImpl.java deleted file mode 100644 index a8b35580..00000000 --- a/backend/services/data-cleaning-service/src/main/java/com/datamate/cleaning/infrastructure/persistence/Impl/CleaningTaskRepositoryImpl.java +++ /dev/null @@ -1,65 +0,0 @@ -package com.datamate.cleaning.infrastructure.persistence.Impl; - -import com.baomidou.mybatisplus.core.conditions.query.LambdaQueryWrapper; -import com.baomidou.mybatisplus.core.metadata.IPage; -import com.baomidou.mybatisplus.extension.plugins.pagination.Page; -import com.baomidou.mybatisplus.extension.repository.CrudRepository; -import com.datamate.cleaning.domain.model.entity.CleaningTask; -import com.datamate.cleaning.domain.repository.CleaningTaskRepository; -import com.datamate.cleaning.infrastructure.converter.CleaningTaskConverter; -import com.datamate.cleaning.infrastructure.persistence.mapper.CleaningTaskMapper; -import com.datamate.cleaning.interfaces.dto.CleaningTaskDto; -import lombok.RequiredArgsConstructor; -import org.apache.commons.lang3.StringUtils; -import org.springframework.stereotype.Repository; - -import java.util.List; - - -@Repository -@RequiredArgsConstructor -public class CleaningTaskRepositoryImpl extends CrudRepository - implements CleaningTaskRepository { - private final CleaningTaskMapper mapper; - - public List findTasks(String status, String keywords, Integer page, Integer size) { - LambdaQueryWrapper lambdaWrapper = new LambdaQueryWrapper<>(); - lambdaWrapper.eq(StringUtils.isNotBlank(status), CleaningTask::getStatus, status); - if (StringUtils.isNotBlank(keywords)) { - lambdaWrapper.and(w -> - w.like(CleaningTask::getName, keywords) - .or() - .like(CleaningTask::getDescription, keywords)); - } - lambdaWrapper.orderByDesc(CleaningTask::getCreatedAt); - if (size != null && page != null) { - Page queryPage = new Page<>(page + 1, size); - IPage resultPage = mapper.selectPage(queryPage, lambdaWrapper); - return CleaningTaskConverter.INSTANCE.fromEntityToDto(resultPage.getRecords()); - } else { - return CleaningTaskConverter.INSTANCE.fromEntityToDto(mapper.selectList(lambdaWrapper)); - } - } - - public CleaningTaskDto findTaskById(String taskId) { - return CleaningTaskConverter.INSTANCE.fromEntityToDto(mapper.selectById(taskId)); - } - - public void insertTask(CleaningTaskDto task) { - mapper.insert(CleaningTaskConverter.INSTANCE.fromDtoToEntity(task)); - } - - public void updateTask(CleaningTaskDto task) { - mapper.updateById(CleaningTaskConverter.INSTANCE.fromDtoToEntity(task)); - } - - public void deleteTaskById(String taskId) { - mapper.deleteById(taskId); - } - - public boolean isNameExist(String name) { - LambdaQueryWrapper queryWrapper = new LambdaQueryWrapper<>(); - queryWrapper.eq(CleaningTask::getName, name); - return mapper.exists(queryWrapper); - } -} diff --git a/backend/services/data-cleaning-service/src/main/java/com/datamate/cleaning/infrastructure/persistence/Impl/CleaningTemplateRepositoryImpl.java b/backend/services/data-cleaning-service/src/main/java/com/datamate/cleaning/infrastructure/persistence/Impl/CleaningTemplateRepositoryImpl.java deleted file mode 100644 index 2afc8618..00000000 --- a/backend/services/data-cleaning-service/src/main/java/com/datamate/cleaning/infrastructure/persistence/Impl/CleaningTemplateRepositoryImpl.java +++ /dev/null @@ -1,56 +0,0 @@ -package com.datamate.cleaning.infrastructure.persistence.Impl; - - -import com.baomidou.mybatisplus.core.conditions.query.QueryWrapper; -import com.baomidou.mybatisplus.extension.repository.CrudRepository; -import com.datamate.cleaning.domain.model.entity.TemplateWithInstance; -import com.datamate.cleaning.domain.model.entity.CleaningTemplate; -import com.datamate.cleaning.domain.repository.CleaningTemplateRepository; -import com.datamate.cleaning.infrastructure.converter.CleaningTemplateConverter; -import com.datamate.cleaning.infrastructure.persistence.mapper.CleaningTemplateMapper; -import com.datamate.cleaning.interfaces.dto.CleaningTemplateDto; -import lombok.RequiredArgsConstructor; -import org.apache.commons.lang3.StringUtils; -import org.springframework.stereotype.Repository; - -import java.util.List; - - -@Repository -@RequiredArgsConstructor -public class CleaningTemplateRepositoryImpl extends CrudRepository - implements CleaningTemplateRepository { - private final CleaningTemplateMapper mapper; - - @Override - public List findAllTemplates(String keywords) { - QueryWrapper queryWrapper = new QueryWrapper<>(); - if (StringUtils.isNotBlank(keywords)) { - queryWrapper.like("name", keywords) - .or() - .like("description", keywords); - } - queryWrapper.orderByDesc("created_at"); - return mapper.findAllTemplates(queryWrapper); - } - - @Override - public CleaningTemplateDto findTemplateById(String templateId) { - return CleaningTemplateConverter.INSTANCE.fromEntityToDto(mapper.selectById(templateId)); - } - - @Override - public void insertTemplate(CleaningTemplateDto template) { - mapper.insert(CleaningTemplateConverter.INSTANCE.fromDtoToEntity(template)); - } - - @Override - public void updateTemplate(CleaningTemplateDto template) { - mapper.updateById(CleaningTemplateConverter.INSTANCE.fromDtoToEntity(template)); - } - - @Override - public void deleteTemplate(String templateId) { - mapper.deleteById(templateId); - } -} diff --git a/backend/services/data-cleaning-service/src/main/java/com/datamate/cleaning/infrastructure/persistence/Impl/OperatorInstanceRepositoryImpl.java b/backend/services/data-cleaning-service/src/main/java/com/datamate/cleaning/infrastructure/persistence/Impl/OperatorInstanceRepositoryImpl.java deleted file mode 100644 index 813f1e6d..00000000 --- a/backend/services/data-cleaning-service/src/main/java/com/datamate/cleaning/infrastructure/persistence/Impl/OperatorInstanceRepositoryImpl.java +++ /dev/null @@ -1,53 +0,0 @@ -package com.datamate.cleaning.infrastructure.persistence.Impl; - -import com.baomidou.mybatisplus.core.conditions.query.LambdaQueryWrapper; -import com.baomidou.mybatisplus.extension.repository.CrudRepository; -import com.datamate.cleaning.infrastructure.converter.OperatorInstanceConverter; -import com.datamate.cleaning.interfaces.dto.OperatorInstanceDto; -import com.datamate.cleaning.domain.model.entity.OperatorInstance; -import com.datamate.cleaning.domain.repository.OperatorInstanceRepository; -import com.datamate.cleaning.infrastructure.persistence.mapper.OperatorInstanceMapper; -import com.datamate.operator.interfaces.dto.OperatorDto; -import lombok.RequiredArgsConstructor; -import org.springframework.stereotype.Repository; - -import java.util.ArrayList; -import java.util.List; - -@Repository -@RequiredArgsConstructor -public class OperatorInstanceRepositoryImpl extends CrudRepository - implements OperatorInstanceRepository { - private final OperatorInstanceMapper mapper; - - @Override - public void insertInstance(String instanceId, List instances) { - List operatorInstances = new ArrayList<>(); - for (int i = 0; i < instances.size(); i++) { - OperatorInstance operatorInstance = OperatorInstanceConverter.INSTANCE.fromDtoToEntity(instances.get(i)); - operatorInstance.setInstanceId(instanceId); - operatorInstance.setOpIndex(i + 1); - operatorInstances.add(operatorInstance); - } - mapper.insert(operatorInstances); - } - - @Override - public void deleteByInstanceId(String instanceId) { - LambdaQueryWrapper lambdaWrapper = new LambdaQueryWrapper<>(); - lambdaWrapper.eq(OperatorInstance::getInstanceId, instanceId); - mapper.delete(lambdaWrapper); - } - - public List findOperatorByInstanceId(String instanceId) { - return OperatorInstanceConverter.INSTANCE.fromEntityToDto(mapper.findOperatorByInstanceId(instanceId)); - } - - @Override - public List findInstanceByInstanceId(String instanceId) { - LambdaQueryWrapper lambdaWrapper = new LambdaQueryWrapper<>(); - lambdaWrapper.eq(OperatorInstance::getInstanceId, instanceId) - .orderByAsc(OperatorInstance::getOpIndex); - return OperatorInstanceConverter.INSTANCE.fromEntityToDtoList(mapper.selectList(lambdaWrapper)); - } -} diff --git a/backend/services/data-cleaning-service/src/main/java/com/datamate/cleaning/infrastructure/persistence/mapper/CleaningResultMapper.java b/backend/services/data-cleaning-service/src/main/java/com/datamate/cleaning/infrastructure/persistence/mapper/CleaningResultMapper.java deleted file mode 100644 index 455de26e..00000000 --- a/backend/services/data-cleaning-service/src/main/java/com/datamate/cleaning/infrastructure/persistence/mapper/CleaningResultMapper.java +++ /dev/null @@ -1,11 +0,0 @@ -package com.datamate.cleaning.infrastructure.persistence.mapper; - -import com.baomidou.mybatisplus.core.mapper.BaseMapper; -import com.datamate.cleaning.domain.model.entity.CleaningResult; -import com.datamate.common.infrastructure.config.IgnoreDataScopeAnnotation; -import org.apache.ibatis.annotations.Mapper; - -@Mapper -@IgnoreDataScopeAnnotation -public interface CleaningResultMapper extends BaseMapper { -} diff --git a/backend/services/data-cleaning-service/src/main/java/com/datamate/cleaning/infrastructure/persistence/mapper/CleaningTaskMapper.java b/backend/services/data-cleaning-service/src/main/java/com/datamate/cleaning/infrastructure/persistence/mapper/CleaningTaskMapper.java deleted file mode 100644 index dd144b91..00000000 --- a/backend/services/data-cleaning-service/src/main/java/com/datamate/cleaning/infrastructure/persistence/mapper/CleaningTaskMapper.java +++ /dev/null @@ -1,9 +0,0 @@ -package com.datamate.cleaning.infrastructure.persistence.mapper; - -import com.baomidou.mybatisplus.core.mapper.BaseMapper; -import com.datamate.cleaning.domain.model.entity.CleaningTask; -import org.apache.ibatis.annotations.Mapper; - -@Mapper -public interface CleaningTaskMapper extends BaseMapper { -} diff --git a/backend/services/data-cleaning-service/src/main/java/com/datamate/cleaning/infrastructure/persistence/mapper/CleaningTemplateMapper.java b/backend/services/data-cleaning-service/src/main/java/com/datamate/cleaning/infrastructure/persistence/mapper/CleaningTemplateMapper.java deleted file mode 100644 index be5fca71..00000000 --- a/backend/services/data-cleaning-service/src/main/java/com/datamate/cleaning/infrastructure/persistence/mapper/CleaningTemplateMapper.java +++ /dev/null @@ -1,22 +0,0 @@ -package com.datamate.cleaning.infrastructure.persistence.mapper; - -import com.baomidou.mybatisplus.core.conditions.Wrapper; -import com.baomidou.mybatisplus.core.mapper.BaseMapper; -import com.baomidou.mybatisplus.core.toolkit.Constants; -import com.datamate.cleaning.domain.model.entity.TemplateWithInstance; -import com.datamate.cleaning.domain.model.entity.CleaningTemplate; -import com.datamate.common.infrastructure.config.IgnoreDataScopeAnnotation; -import org.apache.ibatis.annotations.Mapper; -import org.apache.ibatis.annotations.Param; -import org.apache.ibatis.annotations.Select; - -import java.util.List; - -@Mapper -@IgnoreDataScopeAnnotation -public interface CleaningTemplateMapper extends BaseMapper { - @Select("SELECT t.id AS id, name, description, created_at, updated_at, created_by, operator_id, op_index, " + - "settings_override FROM t_clean_template t LEFT JOIN t_operator_instance o ON t.id = o.instance_id " + - "${ew.customSqlSegment}") - List findAllTemplates(@Param(Constants.WRAPPER) Wrapper queryWrapper); -} diff --git a/backend/services/data-cleaning-service/src/main/java/com/datamate/cleaning/infrastructure/persistence/mapper/OperatorInstanceMapper.java b/backend/services/data-cleaning-service/src/main/java/com/datamate/cleaning/infrastructure/persistence/mapper/OperatorInstanceMapper.java deleted file mode 100644 index 01c838fa..00000000 --- a/backend/services/data-cleaning-service/src/main/java/com/datamate/cleaning/infrastructure/persistence/mapper/OperatorInstanceMapper.java +++ /dev/null @@ -1,26 +0,0 @@ -package com.datamate.cleaning.infrastructure.persistence.mapper; - -import com.baomidou.mybatisplus.core.mapper.BaseMapper; -import com.datamate.cleaning.domain.model.entity.OperatorInstance; -import com.datamate.common.infrastructure.config.IgnoreDataScopeAnnotation; -import com.datamate.operator.domain.model.OperatorView; -import org.apache.ibatis.annotations.Mapper; -import org.apache.ibatis.annotations.Select; - -import java.util.List; - - -@Mapper -@IgnoreDataScopeAnnotation -public interface OperatorInstanceMapper extends BaseMapper { - @Select("SELECT o.operator_id as id, o.operator_name as name, o.description, o.version, o.inputs, o.outputs, " + - "o.runtime, o.settings, o.created_at, o.updated_at, " + - "STRING_AGG(CAST(category_id AS TEXT), ',' ORDER BY o.created_at DESC) AS categories " + - "FROM t_operator_instance toi " + - "LEFT JOIN v_operator o ON toi.operator_id = o.operator_id " + - "WHERE toi.instance_id = #{instanceId} " + - "GROUP BY o.operator_id, o.operator_name, o.description, o.version, o.inputs, o.outputs, o.runtime, " + - " o.settings, o.created_at, o.updated_at, toi.op_index " + - "ORDER BY toi.op_index") - List findOperatorByInstanceId(String instanceId); -} diff --git a/backend/services/data-cleaning-service/src/main/java/com/datamate/cleaning/infrastructure/validator/CleanTaskValidator.java b/backend/services/data-cleaning-service/src/main/java/com/datamate/cleaning/infrastructure/validator/CleanTaskValidator.java deleted file mode 100644 index 82e48c7e..00000000 --- a/backend/services/data-cleaning-service/src/main/java/com/datamate/cleaning/infrastructure/validator/CleanTaskValidator.java +++ /dev/null @@ -1,84 +0,0 @@ -package com.datamate.cleaning.infrastructure.validator; - -import com.datamate.cleaning.common.enums.ExecutorType; -import com.datamate.cleaning.common.exception.CleanErrorCode; -import com.datamate.cleaning.domain.repository.CleaningTaskRepository; -import com.datamate.cleaning.interfaces.dto.OperatorInstanceDto; -import com.datamate.common.infrastructure.exception.BusinessException; -import com.datamate.common.infrastructure.exception.SystemErrorCode; -import com.datamate.common.setting.application.SysParamApplicationService; -import com.datamate.operator.domain.contants.OperatorConstant; -import lombok.RequiredArgsConstructor; -import org.apache.commons.collections4.CollectionUtils; -import org.apache.commons.lang3.StringUtils; -import org.springframework.stereotype.Component; - -import java.util.List; -import java.util.Locale; -import java.util.regex.Pattern; - - -@Component -@RequiredArgsConstructor -public class CleanTaskValidator { - private final CleaningTaskRepository cleaningTaskRepo; - - private final SysParamApplicationService sysParamApplicationService; - - private final Pattern UUID_PATTERN = Pattern.compile( - "^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$" - ); - - public void checkNameDuplication(String name) { - if (cleaningTaskRepo.isNameExist(name)) { - throw BusinessException.of(CleanErrorCode.DUPLICATE_TASK_NAME); - } - } - - public void checkInputAndOutput(List operators) { - if (operators == null || operators.size() <= 1) { - return; - } - for (int i = 1; i < operators.size(); i++) { - OperatorInstanceDto front = operators.get(i - 1); - OperatorInstanceDto back = operators.get(i); - if (StringUtils.equals(front.getOutputs(), back.getInputs()) || StringUtils.equalsAny("multimodal", - front.getOutputs(), back.getOutputs())) { - continue; - } - throw BusinessException.of(CleanErrorCode.IN_AND_OUT_NOT_MATCH, - String.format(Locale.ROOT, "ops(name: [%s, %s]) inputs and outputs does not match", - front.getName(), back.getName())); - } - } - - public void checkTaskId(String id) { - if (id == null || !UUID_PATTERN.matcher(id).matches()) { - throw BusinessException.of(SystemErrorCode.INVALID_PARAMETER); - } - } - - public ExecutorType checkAndGetExecutorType(List operators) { - if (operators == null || operators.isEmpty()) { - throw BusinessException.of(CleanErrorCode.OPERATOR_LIST_EMPTY); - } - for (int i = 1; i < operators.size(); i++) { - OperatorInstanceDto front = operators.get(i - 1); - OperatorInstanceDto back = operators.get(i); - boolean frontHas = CollectionUtils.isNotEmpty(front.getCategories()) - && front.getCategories().contains(OperatorConstant.CATEGORY_DATA_JUICER_ID); - boolean backHas = CollectionUtils.isNotEmpty(back.getCategories()) - && back.getCategories().contains(OperatorConstant.CATEGORY_DATA_JUICER_ID); - if (frontHas == backHas) { - continue; - } - throw BusinessException.of(CleanErrorCode.EXECUTOR_NOT_MATCH, - String.format(Locale.ROOT, "ops(name: [%s, %s]) executor does not match", - front.getName(), back.getName())); - } - if (operators.getFirst().getCategories().contains(OperatorConstant.CATEGORY_DATA_JUICER_ID)) { - return ExecutorType.fromValue(sysParamApplicationService.getParamByKey("DATA_JUICER_EXECUTOR")); - } - return ExecutorType.DATAMATE; - } -} diff --git a/backend/services/data-cleaning-service/src/main/java/com/datamate/cleaning/interfaces/dto/CleaningProcess.java b/backend/services/data-cleaning-service/src/main/java/com/datamate/cleaning/interfaces/dto/CleaningProcess.java deleted file mode 100644 index 0f5c7f3e..00000000 --- a/backend/services/data-cleaning-service/src/main/java/com/datamate/cleaning/interfaces/dto/CleaningProcess.java +++ /dev/null @@ -1,52 +0,0 @@ -package com.datamate.cleaning.interfaces.dto; - - -import lombok.Getter; -import lombok.Setter; - -import java.math.BigDecimal; -import java.math.RoundingMode; - -/** - * CleaningProcess - */ - -@Getter -@Setter -public class CleaningProcess { - private Float process; - - private Float successRate; - - private Integer totalFileNum; - - private Integer succeedFileNum; - - private Integer failedFileNum; - - private Integer finishedFileNum; - - public CleaningProcess(int totalFileNum, int succeedFileNum, int failedFileNum) { - this.totalFileNum = totalFileNum; - this.succeedFileNum = succeedFileNum; - this.failedFileNum = failedFileNum; - this.finishedFileNum = succeedFileNum + failedFileNum; - if (totalFileNum == 0) { - this.process = 0.0f; - } else { - this.process = BigDecimal.valueOf(finishedFileNum * 100L) - .divide(BigDecimal.valueOf(totalFileNum), 2, RoundingMode.HALF_UP).floatValue(); - } - if (finishedFileNum == 0) { - this.successRate = 0f; - } else { - this.successRate = BigDecimal.valueOf(succeedFileNum * 100L) - .divide(BigDecimal.valueOf(finishedFileNum), 2, RoundingMode.HALF_UP).floatValue(); - } - } - - public static CleaningProcess of(int totalFileNum, int succeedFileNum, int failedFileNum) { - return new CleaningProcess(totalFileNum, succeedFileNum, failedFileNum); - } -} - diff --git a/backend/services/data-cleaning-service/src/main/java/com/datamate/cleaning/interfaces/dto/CleaningResultDto.java b/backend/services/data-cleaning-service/src/main/java/com/datamate/cleaning/interfaces/dto/CleaningResultDto.java deleted file mode 100644 index 151abe0e..00000000 --- a/backend/services/data-cleaning-service/src/main/java/com/datamate/cleaning/interfaces/dto/CleaningResultDto.java +++ /dev/null @@ -1,30 +0,0 @@ -package com.datamate.cleaning.interfaces.dto; - -import lombok.Getter; -import lombok.Setter; - -@Getter -@Setter -public class CleaningResultDto { - private String instanceId; - - private String srcFileId; - - private String destFileId; - - private String srcName; - - private String destName; - - private String srcType; - - private String destType; - - private long srcSize; - - private long destSize; - - private String status; - - private String result; -} diff --git a/backend/services/data-cleaning-service/src/main/java/com/datamate/cleaning/interfaces/dto/CleaningTaskDto.java b/backend/services/data-cleaning-service/src/main/java/com/datamate/cleaning/interfaces/dto/CleaningTaskDto.java deleted file mode 100644 index bc1274f1..00000000 --- a/backend/services/data-cleaning-service/src/main/java/com/datamate/cleaning/interfaces/dto/CleaningTaskDto.java +++ /dev/null @@ -1,60 +0,0 @@ -package com.datamate.cleaning.interfaces.dto; - -import com.datamate.cleaning.common.enums.CleaningTaskStatusEnum; - -import java.time.LocalDateTime; -import java.util.List; - -import com.datamate.operator.interfaces.dto.OperatorDto; -import lombok.Getter; -import lombok.Setter; -import org.springframework.format.annotation.DateTimeFormat; - -/** - * CleaningTask - */ - -@Getter -@Setter -public class CleaningTaskDto { - - private String id; - - private String name; - - private String description; - - private String srcDatasetId; - - private String srcDatasetName; - - private String destDatasetId; - - private String destDatasetName; - - private Long beforeSize; - - private Long afterSize; - - private Integer fileCount; - - private Integer retryCount; - - private CleaningTaskStatusEnum status; - - private String templateId; - - private List instance; - - private CleaningProcess progress; - - @DateTimeFormat(iso = DateTimeFormat.ISO.DATE_TIME) - private LocalDateTime createdAt; - - @DateTimeFormat(iso = DateTimeFormat.ISO.DATE_TIME) - private LocalDateTime startedAt; - - @DateTimeFormat(iso = DateTimeFormat.ISO.DATE_TIME) - private LocalDateTime finishedAt; -} - diff --git a/backend/services/data-cleaning-service/src/main/java/com/datamate/cleaning/interfaces/dto/CleaningTaskLog.java b/backend/services/data-cleaning-service/src/main/java/com/datamate/cleaning/interfaces/dto/CleaningTaskLog.java deleted file mode 100644 index b5d45ea6..00000000 --- a/backend/services/data-cleaning-service/src/main/java/com/datamate/cleaning/interfaces/dto/CleaningTaskLog.java +++ /dev/null @@ -1,12 +0,0 @@ -package com.datamate.cleaning.interfaces.dto; - -import lombok.Getter; -import lombok.Setter; - -@Getter -@Setter -public class CleaningTaskLog { - private String level; - - private String message; -} diff --git a/backend/services/data-cleaning-service/src/main/java/com/datamate/cleaning/interfaces/dto/CleaningTemplateDto.java b/backend/services/data-cleaning-service/src/main/java/com/datamate/cleaning/interfaces/dto/CleaningTemplateDto.java deleted file mode 100644 index 29c71028..00000000 --- a/backend/services/data-cleaning-service/src/main/java/com/datamate/cleaning/interfaces/dto/CleaningTemplateDto.java +++ /dev/null @@ -1,34 +0,0 @@ -package com.datamate.cleaning.interfaces.dto; - -import java.time.LocalDateTime; -import java.util.ArrayList; -import java.util.List; - -import com.datamate.operator.interfaces.dto.OperatorDto; -import lombok.Getter; -import lombok.Setter; -import org.springframework.format.annotation.DateTimeFormat; - -/** - * CleaningTemplate - */ - -@Getter -@Setter -public class CleaningTemplateDto { - - private String id; - - private String name; - - private String description; - - private List instance = new ArrayList<>(); - - @DateTimeFormat(iso = DateTimeFormat.ISO.DATE_TIME) - private LocalDateTime createdAt; - - @DateTimeFormat(iso = DateTimeFormat.ISO.DATE_TIME) - private LocalDateTime updatedAt; -} - diff --git a/backend/services/data-cleaning-service/src/main/java/com/datamate/cleaning/interfaces/dto/CreateCleaningTaskRequest.java b/backend/services/data-cleaning-service/src/main/java/com/datamate/cleaning/interfaces/dto/CreateCleaningTaskRequest.java deleted file mode 100644 index 09aefdfe..00000000 --- a/backend/services/data-cleaning-service/src/main/java/com/datamate/cleaning/interfaces/dto/CreateCleaningTaskRequest.java +++ /dev/null @@ -1,46 +0,0 @@ -package com.datamate.cleaning.interfaces.dto; - -import java.util.ArrayList; -import java.util.List; - - -import lombok.Getter; -import lombok.Setter; -import org.springaicommunity.mcp.annotation.McpToolParam; - -/** - * CreateCleaningTaskRequest - */ - -@Getter -@Setter -public class CreateCleaningTaskRequest { - @McpToolParam(description = "新建清洗任务名称") - private String name; - - @McpToolParam(description = "新建清洗任务描述") - private String description; - - @McpToolParam(description = "清洗任务使用的源数据集ID") - private String srcDatasetId; - - @McpToolParam(description = "清洗任务使用的源数据集名称") - private String srcDatasetName; - - @McpToolParam(description = "清洗任务写入的目标数据集ID", required = false) - private String destDatasetId; - - @McpToolParam(description = "清洗任务写入的目标数据集名称,若destDatasetId为空,则创建新数据集。") - private String destDatasetName; - - @McpToolParam(description = "清洗任务创建的目标数据集类型,取值范围为TEXT/IMAGE/VIDEO/AUDIO/OTHER") - private String destDatasetType; - - @McpToolParam(description = "清洗任务使用的模板名称,与instance参数二选一,至少指定一个,优先级更高", required = false) - private String templateId; - - @McpToolParam(description = "清洗任务使用的算子列表,与templateId参数二选一,至少指定一个。" + - "注意:单个任务只能使用一种归属的算子,无法混合使用,如全部使用DataMate算子或全部使用DataJuicer算子。", required = false) - private List instance = new ArrayList<>(); -} - diff --git a/backend/services/data-cleaning-service/src/main/java/com/datamate/cleaning/interfaces/dto/CreateCleaningTemplateRequest.java b/backend/services/data-cleaning-service/src/main/java/com/datamate/cleaning/interfaces/dto/CreateCleaningTemplateRequest.java deleted file mode 100644 index 11dd8b49..00000000 --- a/backend/services/data-cleaning-service/src/main/java/com/datamate/cleaning/interfaces/dto/CreateCleaningTemplateRequest.java +++ /dev/null @@ -1,23 +0,0 @@ -package com.datamate.cleaning.interfaces.dto; - -import java.util.ArrayList; -import java.util.List; - -import lombok.Getter; -import lombok.Setter; - -/** - * CreateCleaningTemplateRequest - */ - -@Getter -@Setter -public class CreateCleaningTemplateRequest { - - private String name; - - private String description; - - private List instance = new ArrayList<>(); -} - diff --git a/backend/services/data-cleaning-service/src/main/java/com/datamate/cleaning/interfaces/dto/OperatorInstanceDto.java b/backend/services/data-cleaning-service/src/main/java/com/datamate/cleaning/interfaces/dto/OperatorInstanceDto.java deleted file mode 100644 index eb89caa1..00000000 --- a/backend/services/data-cleaning-service/src/main/java/com/datamate/cleaning/interfaces/dto/OperatorInstanceDto.java +++ /dev/null @@ -1,37 +0,0 @@ -package com.datamate.cleaning.interfaces.dto; - -import java.util.HashMap; -import java.util.List; -import java.util.Map; - - -import lombok.Getter; -import lombok.Setter; -import org.springaicommunity.mcp.annotation.McpToolParam; - -/** - * OperatorInstance - */ - -@Getter -@Setter -public class OperatorInstanceDto { - @McpToolParam(description = "算子ID") - private String id; - - @McpToolParam(description = "算子名称") - private String name; - - @McpToolParam(description = "算子输入类型,取值范围为text/image/audio/video/multimodal") - private String inputs; - - @McpToolParam(description = "算子输出类型,取值范围为text/image/audio/video/multimodal") - private String outputs; - - @McpToolParam(description = "算子所属分类的所有ID组成的列表。", required = false) - private List categories; - - @McpToolParam(description = "算子需要覆盖的参数", required = false) - private Map overrides = new HashMap<>(); -} - diff --git a/backend/services/data-cleaning-service/src/main/java/com/datamate/cleaning/interfaces/dto/UpdateCleaningTemplateRequest.java b/backend/services/data-cleaning-service/src/main/java/com/datamate/cleaning/interfaces/dto/UpdateCleaningTemplateRequest.java deleted file mode 100644 index 753d62be..00000000 --- a/backend/services/data-cleaning-service/src/main/java/com/datamate/cleaning/interfaces/dto/UpdateCleaningTemplateRequest.java +++ /dev/null @@ -1,26 +0,0 @@ -package com.datamate.cleaning.interfaces.dto; - -import java.util.ArrayList; -import java.util.List; - - -import lombok.Getter; -import lombok.Setter; - -/** - * UpdateCleaningTemplateRequest - */ - -@Getter -@Setter -public class UpdateCleaningTemplateRequest { - - private String id; - - private String name; - - private String description; - - private List instance = new ArrayList<>(); -} - diff --git a/backend/services/data-cleaning-service/src/main/java/com/datamate/cleaning/interfaces/rest/CleaningTaskController.java b/backend/services/data-cleaning-service/src/main/java/com/datamate/cleaning/interfaces/rest/CleaningTaskController.java deleted file mode 100644 index fc7d1b51..00000000 --- a/backend/services/data-cleaning-service/src/main/java/com/datamate/cleaning/interfaces/rest/CleaningTaskController.java +++ /dev/null @@ -1,87 +0,0 @@ -package com.datamate.cleaning.interfaces.rest; - -import com.datamate.cleaning.application.CleaningTaskService; -import com.datamate.cleaning.application.scheduler.CleaningTaskScheduler; -import com.datamate.cleaning.interfaces.dto.*; -import com.datamate.common.interfaces.PagedResponse; -import lombok.RequiredArgsConstructor; -import org.apache.commons.lang3.StringUtils; -import org.springaicommunity.mcp.annotation.McpTool; -import org.springaicommunity.mcp.annotation.McpToolParam; -import org.springframework.web.bind.annotation.*; - -import java.util.List; - - -@RestController -@RequestMapping("/cleaning/tasks") -@RequiredArgsConstructor -public class CleaningTaskController { - private final CleaningTaskService cleaningTaskService; - - private final CleaningTaskScheduler taskScheduler; - - @GetMapping - public PagedResponse cleaningTasksGet( - @RequestParam("page") Integer page, - @RequestParam("size") Integer size, @RequestParam(value = "status", required = false) String status, - @RequestParam(value = "keyword", required = false) String keyword) { - List tasks = cleaningTaskService.getTasks(status, keyword, page, size); - int count = cleaningTaskService.countTasks(status, keyword); - int totalPages = (count + size + 1) / size; - return PagedResponse.of(tasks, page, count, totalPages); - } - - @PostMapping - @McpTool(name = "create_cleaning_task", description = "根据模板ID或算子列表创建清洗任务。") - public CleaningTaskDto cleaningTasksPost(@McpToolParam(description = "创建任务请求体,需要将参数包装在request对象中。") - @RequestBody CreateCleaningTaskRequest request) { - if (request.getInstance().isEmpty() && StringUtils.isNotBlank(request.getTemplateId())) { - request.setInstance(cleaningTaskService.getInstanceByTemplateId(request.getTemplateId())); - } - CleaningTaskDto task = cleaningTaskService.createTask(request); - taskScheduler.executeTask(task.getId(), 0); - return task; - } - - @PostMapping("/{taskId}/stop") - public String cleaningTasksStop(@PathVariable("taskId") String taskId) { - cleaningTaskService.stopTask(taskId); - return taskId; - } - - @PostMapping("/{taskId}/execute") - public String cleaningTasksStart(@PathVariable("taskId") String taskId) { - cleaningTaskService.executeTask(taskId); - return taskId; - } - - @GetMapping("/{taskId}") - public CleaningTaskDto cleaningTasksTaskIdGet(@PathVariable("taskId") String taskId) { - return cleaningTaskService.getTask(taskId); - } - - @DeleteMapping("/{taskId}") - public String cleaningTasksTaskIdDelete(@PathVariable("taskId") String taskId) { - cleaningTaskService.deleteTask(taskId); - return taskId; - } - - @DeleteMapping - public void cleaningTasksDelete(@RequestParam List taskIds) { - for (String taskId : taskIds) { - cleaningTaskService.deleteTask(taskId); - } - } - - @GetMapping("/{taskId}/result") - public List cleaningTasksTaskIdGetResult(@PathVariable("taskId") String taskId) { - return cleaningTaskService.getTaskResults(taskId); - } - - @GetMapping("/{taskId}/log/{retryCount}") - public List cleaningTasksTaskIdGetLog(@PathVariable("taskId") String taskId, - @PathVariable("retryCount") int retryCount) { - return cleaningTaskService.getTaskLog(taskId, retryCount); - } -} diff --git a/backend/services/data-cleaning-service/src/main/java/com/datamate/cleaning/interfaces/rest/CleaningTemplateController.java b/backend/services/data-cleaning-service/src/main/java/com/datamate/cleaning/interfaces/rest/CleaningTemplateController.java deleted file mode 100644 index de9cc1ba..00000000 --- a/backend/services/data-cleaning-service/src/main/java/com/datamate/cleaning/interfaces/rest/CleaningTemplateController.java +++ /dev/null @@ -1,78 +0,0 @@ -package com.datamate.cleaning.interfaces.rest; - -import com.datamate.cleaning.application.CleaningTemplateService; -import com.datamate.cleaning.interfaces.dto.CleaningTemplateDto; -import com.datamate.cleaning.interfaces.dto.CreateCleaningTemplateRequest; -import com.datamate.cleaning.interfaces.dto.UpdateCleaningTemplateRequest; -import com.datamate.common.interfaces.PagedResponse; -import lombok.RequiredArgsConstructor; -import org.springaicommunity.mcp.annotation.McpTool; -import org.springaicommunity.mcp.annotation.McpToolParam; -import org.springframework.web.bind.annotation.DeleteMapping; -import org.springframework.web.bind.annotation.GetMapping; -import org.springframework.web.bind.annotation.PathVariable; -import org.springframework.web.bind.annotation.PostMapping; -import org.springframework.web.bind.annotation.PutMapping; -import org.springframework.web.bind.annotation.RequestBody; -import org.springframework.web.bind.annotation.RequestMapping; -import org.springframework.web.bind.annotation.RequestParam; -import org.springframework.web.bind.annotation.RestController; - -import java.util.Comparator; -import java.util.List; - - -@RestController -@RequestMapping("/cleaning/templates") -@RequiredArgsConstructor -public class CleaningTemplateController { - private final CleaningTemplateService cleaningTemplateService; - - @GetMapping - @McpTool(name = "query_cleaning_template", description = "查询模板列表") - public PagedResponse cleaningTemplatesGet( - @RequestParam(value = "page", required = false) - @McpToolParam(description = "页码,从0开始", required = false) Integer page, - @RequestParam(value = "size", required = false) - @McpToolParam(description = "每页大小", required = false) Integer size, - @RequestParam(value = "keyword", required = false) - @McpToolParam(description = "关键词,从名称与描述中查询", required = false) String keyword) { - List templates = cleaningTemplateService.getTemplates(keyword); - if (page == null || size == null) { - return PagedResponse.of(templates.stream() - .sorted(Comparator.comparing(CleaningTemplateDto::getCreatedAt).reversed()).toList()); - } - int count = templates.size(); - int totalPages = (count + size + 1) / size; - List limitTemplates = templates.stream() - .sorted(Comparator.comparing(CleaningTemplateDto::getCreatedAt).reversed()) - .skip((long) page * size) - .limit(size).toList(); - return PagedResponse.of(limitTemplates, page, count, totalPages); - } - - @PostMapping - public CleaningTemplateDto cleaningTemplatesPost( - @RequestBody CreateCleaningTemplateRequest request) { - return cleaningTemplateService.createTemplate(request); - } - - @GetMapping("/{templateId}") - public CleaningTemplateDto cleaningTemplatesTemplateIdGet( - @PathVariable("templateId") String templateId) { - return cleaningTemplateService.getTemplate(templateId); - } - - @PutMapping("/{templateId}") - public CleaningTemplateDto cleaningTemplatesTemplateIdPut( - @PathVariable("templateId") String templateId, @RequestBody UpdateCleaningTemplateRequest request) { - return cleaningTemplateService.updateTemplate(templateId, request); - } - - @DeleteMapping("/{templateId}") - public String cleaningTemplatesTemplateIdDelete( - @PathVariable("templateId") String templateId) { - cleaningTemplateService.deleteTemplate(templateId); - return templateId; - } -} diff --git a/backend/services/main-application/pom.xml b/backend/services/main-application/pom.xml index d49f05b0..4ddc5198 100644 --- a/backend/services/main-application/pom.xml +++ b/backend/services/main-application/pom.xml @@ -56,16 +56,6 @@ data-management-service ${project.version} - - com.datamate - operator-market-service - ${project.version} - - - com.datamate - data-cleaning-service - ${project.version} - diff --git a/backend/services/operator-market-service/pom.xml b/backend/services/operator-market-service/pom.xml deleted file mode 100644 index 503b1501..00000000 --- a/backend/services/operator-market-service/pom.xml +++ /dev/null @@ -1,81 +0,0 @@ - - - 4.0.0 - - - com.datamate - services - 1.0.0-SNAPSHOT - ../pom.xml - - - operator-market-service - Operator Market Service - 算子市场服务 - - - - com.datamate - domain-common - ${project.version} - - - org.springframework.boot - spring-boot-starter-web - - - org.springframework.boot - spring-boot-starter-data-redis - - - org.springframework.boot - spring-boot-starter-test - test - - - org.springframework.cloud - spring-cloud-starter-openfeign - - - org.springdoc - springdoc-openapi-starter-webmvc-ui - - - org.openapitools - jackson-databind-nullable - - - jakarta.validation - jakarta.validation-api - - - org.projectlombok - lombok - provided - - - org.apache.commons - commons-compress - 1.26.1 - - - - org.mapstruct - mapstruct-processor - ${mapstruct.version} - provided - - - - - - - org.springframework.boot - spring-boot-maven-plugin - - - - diff --git a/backend/services/operator-market-service/src/main/java/com/datamate/operator/OperatorMarketServiceConfiguration.java b/backend/services/operator-market-service/src/main/java/com/datamate/operator/OperatorMarketServiceConfiguration.java deleted file mode 100644 index 1e0e5c7f..00000000 --- a/backend/services/operator-market-service/src/main/java/com/datamate/operator/OperatorMarketServiceConfiguration.java +++ /dev/null @@ -1,23 +0,0 @@ -package com.datamate.operator; - -import org.springframework.boot.autoconfigure.domain.EntityScan; -import org.springframework.context.annotation.ComponentScan; -import org.springframework.context.annotation.Configuration; -import org.springframework.scheduling.annotation.EnableAsync; -import org.springframework.scheduling.annotation.EnableScheduling; - -/** - * Operator Market Service Configuration - * 算子市场服务配置类 - 版本、安装、评分、仓库 - */ -@Configuration -@EnableAsync -@EnableScheduling -@EntityScan(basePackages = "com.datamate.operator.domain.model") -@ComponentScan(basePackages = { - "com.datamate.operator" -}) -public class OperatorMarketServiceConfiguration { - // Service configuration class for JAR packaging - // 作为jar包形式提供服务的配置类 -} diff --git a/backend/services/operator-market-service/src/main/java/com/datamate/operator/application/CategoryService.java b/backend/services/operator-market-service/src/main/java/com/datamate/operator/application/CategoryService.java deleted file mode 100644 index 271db76f..00000000 --- a/backend/services/operator-market-service/src/main/java/com/datamate/operator/application/CategoryService.java +++ /dev/null @@ -1,68 +0,0 @@ -package com.datamate.operator.application; - - -import com.datamate.operator.domain.repository.CategoryRelationRepository; -import com.datamate.operator.domain.repository.CategoryRepository; -import com.datamate.operator.interfaces.dto.CategoryDto; -import com.datamate.operator.interfaces.dto.CategoryRelationDto; -import com.datamate.operator.interfaces.dto.CategoryTreeResponse; -import lombok.RequiredArgsConstructor; -import org.apache.commons.lang3.StringUtils; -import org.springframework.stereotype.Service; - -import java.time.LocalDateTime; -import java.util.*; -import java.util.concurrent.atomic.AtomicInteger; -import java.util.function.Function; -import java.util.stream.Collectors; - -@Service -@RequiredArgsConstructor -public class CategoryService { - - - private final CategoryRepository categoryRepo; - - private final CategoryRelationRepository categoryRelationRepo; - - public List getAllCategories() { - List allCategories = categoryRepo.findAllCategories(); - List allRelations = categoryRelationRepo.findAllRelation(); - - Map relationMap = allRelations.stream() - .collect(Collectors.groupingBy( - CategoryRelationDto::getCategoryId, - Collectors.collectingAndThen(Collectors.counting(), Math::toIntExact))); - - Map nameMap = allCategories.stream() - .collect(Collectors.toMap(CategoryDto::getId, Function.identity())); - Map> groupedByParentId = allCategories.stream() - .filter(relation -> !StringUtils.equals(relation.getParentId(), "0")) - .collect(Collectors.groupingBy(CategoryDto::getParentId)); - - return groupedByParentId.entrySet().stream() - .sorted(categoryComparator(nameMap)) - .map(entry -> { - String parentId = entry.getKey(); - List group = entry.getValue(); - CategoryTreeResponse response = new CategoryTreeResponse(); - response.setId(parentId); - response.setName(nameMap.get(parentId).getName()); - AtomicInteger totalCount = new AtomicInteger(); - response.setCategories(group.stream().peek(category -> { - category.setCount(relationMap.getOrDefault(category.getId(), 0)); - totalCount.getAndAdd(relationMap.getOrDefault(category.getId(), 0)); - }).sorted(Comparator.comparing(CategoryDto::getCreatedAt)).toList()); - response.setCount(totalCount.get()); - return response; - }).collect(Collectors.toCollection(ArrayList::new)); - } - - private Comparator>> categoryComparator(Map categoryMap) { - return (entry1, entry2) -> { - LocalDateTime index1 = categoryMap.get(entry1.getKey()).getCreatedAt(); - LocalDateTime index2 = categoryMap.get(entry2.getKey()).getCreatedAt(); - return index1.compareTo(index2); - }; - } -} diff --git a/backend/services/operator-market-service/src/main/java/com/datamate/operator/application/OperatorService.java b/backend/services/operator-market-service/src/main/java/com/datamate/operator/application/OperatorService.java deleted file mode 100644 index 2cb6592f..00000000 --- a/backend/services/operator-market-service/src/main/java/com/datamate/operator/application/OperatorService.java +++ /dev/null @@ -1,305 +0,0 @@ -package com.datamate.operator.application; - -import com.datamate.common.domain.model.ChunkUploadPreRequest; -import com.datamate.common.domain.service.FileService; -import com.datamate.common.infrastructure.exception.BusinessException; -import com.datamate.common.infrastructure.exception.SystemErrorCode; -import com.datamate.operator.domain.contants.OperatorConstant; -import com.datamate.operator.domain.repository.OperatorReleaseRepository; -import com.datamate.operator.infrastructure.converter.OperatorConverter; -import com.datamate.operator.domain.model.OperatorView; -import com.datamate.operator.domain.repository.CategoryRelationRepository; -import com.datamate.operator.domain.repository.OperatorRepository; -import com.datamate.operator.domain.repository.OperatorViewRepository; -import com.datamate.operator.infrastructure.exception.OperatorErrorCode; -import com.datamate.operator.infrastructure.parser.ParserHolder; -import com.datamate.operator.interfaces.dto.OperatorDto; -import com.datamate.operator.interfaces.dto.OperatorReleaseDto; -import com.datamate.operator.interfaces.dto.UploadOperatorRequest; -import com.fasterxml.jackson.core.JsonProcessingException; -import com.fasterxml.jackson.databind.ObjectMapper; -import lombok.RequiredArgsConstructor; -import lombok.extern.slf4j.Slf4j; -import org.apache.commons.collections4.CollectionUtils; -import org.apache.commons.collections4.MapUtils; -import org.apache.commons.io.FileUtils; -import org.apache.commons.lang3.StringUtils; -import org.springframework.beans.factory.annotation.Value; -import org.springframework.core.io.Resource; -import org.springframework.core.io.UrlResource; -import org.springframework.stereotype.Service; -import org.springframework.transaction.annotation.Transactional; - -import java.io.File; -import java.io.IOException; -import java.net.MalformedURLException; -import java.nio.charset.StandardCharsets; -import java.nio.file.Files; -import java.nio.file.Path; -import java.nio.file.Paths; -import java.time.LocalDateTime; -import java.util.*; -import java.util.stream.Collectors; -import java.util.stream.Stream; - -@Service -@Slf4j -@RequiredArgsConstructor -public class OperatorService { - private final OperatorRepository operatorRepo; - - private final OperatorViewRepository operatorViewRepo; - - private final CategoryRelationRepository relationRepo; - - private final OperatorReleaseRepository operatorReleaseRepo; - - private final ParserHolder parserHolder; - - private final FileService fileService; - - private final ObjectMapper objectMapper = new ObjectMapper(); - - @Value("${operator.base.path:/operators}") - private String operatorBasePath; - - public List getOperators(Integer page, Integer size, List> categories, - String keyword, Boolean isStar) { - return operatorViewRepo.findOperatorsByCriteria(page, size, keyword, categories, isStar); - } - - public int getOperatorsCount(List> categories, String keyword, Boolean isStar) { - return operatorViewRepo.countOperatorsByCriteria(keyword, categories, isStar); - } - - public OperatorDto getOperatorById(String id) { - OperatorView operator = operatorViewRepo.findOperatorById(id); - OperatorDto operatorDto = OperatorConverter.INSTANCE.fromEntityToDto(operator); - if (StringUtils.isNotBlank(operatorDto.getFileName())) { - String filePath = getExtractPath(getStem(operatorDto.getFileName())); - String requirements = filePath + "/requirements.txt"; - operatorDto.setRequirements(readRequirements(requirements)); - operatorDto.setReadme(getReadmeContent(filePath)); - } - operatorDto.setFileName(null); - operatorDto.setReleases(operatorReleaseRepo.findAllByOperatorId(id)); - return operatorDto; - } - - @Transactional - public OperatorDto createOperator(OperatorDto req) { - overrideSettings(req); - operatorRepo.insertOperator(req); - relationRepo.batchInsert(req.getId(), req.getCategories()); - if (CollectionUtils.isNotEmpty(req.getReleases())) { - OperatorReleaseDto release = req.getReleases().getFirst(); - release.setId(req.getId()); - release.setVersion(req.getVersion()); - release.setReleaseDate(LocalDateTime.now()); - operatorReleaseRepo.insertOperatorRelease(release); - } - parserHolder.extractTo(getFileType(req.getFileName()), getUploadPath(req.getFileName()), - getExtractPath(getStem(req.getFileName()))); - return getOperatorById(req.getId()); - } - - @Transactional - public OperatorDto updateOperator(String id, OperatorDto req) { - OperatorDto operator = getOperatorById(id); - overrideSettings(req); - operatorRepo.updateOperator(req); - if (StringUtils.isNotBlank(req.getFileName()) && CollectionUtils.isNotEmpty(req.getCategories())) { - relationRepo.batchUpdate(id, req.getCategories()); - } - if (CollectionUtils.isNotEmpty(req.getReleases())) { - OperatorReleaseDto release = req.getReleases().getFirst(); - release.setId(req.getId()); - release.setVersion(req.getVersion()); - release.setReleaseDate(LocalDateTime.now()); - if (StringUtils.equals(operator.getVersion(), req.getVersion())) { - operatorReleaseRepo.updateOperatorRelease(release); - } else { - operatorReleaseRepo.insertOperatorRelease(release); - } - } - if (StringUtils.isNotBlank(req.getFileName())) { - parserHolder.extractTo(getFileType(req.getFileName()), getUploadPath(req.getFileName()), - getExtractPath(getStem(req.getFileName()))); - } - return getOperatorById(id); - } - - @Transactional - public void deleteOperator(String id) { - if (operatorRepo.operatorInTemplateOrRunning(id)) { - throw BusinessException.of(OperatorErrorCode.OPERATOR_IN_INSTANCE); - } - if (relationRepo.operatorIsPredefined(id)) { - throw BusinessException.of(OperatorErrorCode.CANT_DELETE_PREDEFINED_OPERATOR); - } - OperatorView operator = operatorViewRepo.findOperatorById(id); - operatorRepo.deleteOperator(id); - relationRepo.deleteByOperatorId(id); - operatorReleaseRepo.deleteOperatorRelease(id); - FileUtils.deleteQuietly(new File(getExtractPath(getStem(operator.getFileName())))); - } - - public OperatorDto uploadOperator(String fileName) { - return parserHolder.parseYamlFromArchive(getFileType(fileName), new File(getUploadPath(fileName)), - OperatorConstant.YAML_PATH); - } - - public String preUpload() { - ChunkUploadPreRequest request = ChunkUploadPreRequest.builder().build(); - request.setUploadPath(operatorBasePath + File.separator + "upload"); - request.setTotalFileNum(1); - request.setServiceId(OperatorConstant.SERVICE_ID); - return fileService.preUpload(request); - } - - public void chunkUpload(UploadOperatorRequest request) { - fileService.chunkUpload(OperatorConverter.INSTANCE.toChunkRequest(request)); - } - - private String getFileType(String fileName) { - return fileName.substring(fileName.lastIndexOf('.') + 1); - } - - private String getStem(String fileName) { - return fileName.substring(0, fileName.lastIndexOf('.')); - } - - private String getUploadPath(String fileName) { - return operatorBasePath + File.separator + "upload" + File.separator + fileName; - } - - private String getExtractPath(String fileName) { - return operatorBasePath + File.separator + "extract" + File.separator + fileName; - } - - public void overrideSettings(OperatorDto operatorDto) { - if (StringUtils.isBlank(operatorDto.getSettings()) || MapUtils.isEmpty(operatorDto.getOverrides())) { - return; - } - try { - Map> settings = objectMapper.readValue(operatorDto.getSettings(), Map.class); - for (Map.Entry entry : operatorDto.getOverrides().entrySet()) { - String key = entry.getKey(); - if (!settings.containsKey(key)) { - continue; - } - Object value = entry.getValue(); - Map setting = settings.get(key); - String type = setting.get("type").toString(); - switch (type) { - case "slider": - case "switch": - case "select": - case "input": - case "radio": - setting.put("defaultVal", value); - break; - case "checkbox": - setting.put("defaultVal", convertObjectToListString(value)); - break; - case "range": - updateProperties(setting, value); - default: - } - settings.put(key, setting); - } - operatorDto.setSettings(objectMapper.writeValueAsString(settings)); - } catch (JsonProcessingException e) { - throw BusinessException.of(OperatorErrorCode.SETTINGS_PARSE_FAILED, e.getMessage()); - } - } - - public Resource downloadExampleOperator(File file) { - try { - Resource resource = new UrlResource(file.toURI()); - if (resource.exists()) { - return resource; - } else { - throw BusinessException.of(SystemErrorCode.RESOURCE_NOT_FOUND); - } - } catch (MalformedURLException ex) { - log.error("File not found: {}", file.getName(), ex); - throw BusinessException.of(SystemErrorCode.RESOURCE_NOT_FOUND); - } - } - - private String convertObjectToListString(Object object) { - if (object == null) { - return null; - } else if (object instanceof List list) { - List result = new ArrayList<>(); - for (Object item : list) { - result.add(String.valueOf(item)); - } - return String.join(",", result); - } else { - return object.toString(); - } - } - - private void updateProperties(Map setting, Object value) { - List defaultValue = new ArrayList<>(); - if (value instanceof List) { - defaultValue.addAll((List) value); - } - - Object properties = setting.get("properties"); - if (properties instanceof List list) { - if (defaultValue.size() != list.size()) { - return; - } - List> result = new ArrayList<>(); - for (int i = 0; i < list.size(); i++) { - Map map = objectMapper.convertValue(list.get(i), Map.class); - map.put("defaultVal", defaultValue.get(i)); - result.add(map); - } - setting.put("properties", result); - } - } - - private List readRequirements(String filePath) { - Path path = Paths.get(filePath); - if (!Files.exists(path) || !Files.isRegularFile(path)) { - log.warn("requirements文件不存在或路径错误: {}", filePath); - return Collections.emptyList(); - } - - List requirements = new ArrayList<>(); - try (Stream lines = Files.lines(path)) { - requirements = lines.map(String::trim) - .filter(line -> !line.isEmpty()) - .filter(line -> !line.startsWith("#")) - .collect(Collectors.toList()); - } catch (IOException e) { - log.warn("读取requirements文件异常: {}", e.getMessage()); - } - return requirements; - } - - private String getReadmeContent(String directoryPath) { - Path dir = Paths.get(directoryPath); - if (!Files.exists(dir) || !Files.isDirectory(dir)) { - System.err.println("目录不存在: " + directoryPath); - return null; - } - List candidateNames = Arrays.asList("README.md", "readme.md", "Readme.md"); - for (String fileName : candidateNames) { - Path filePath = dir.resolve(fileName); - if (Files.exists(filePath) && Files.isRegularFile(filePath)) { - try { - byte[] bytes = Files.readAllBytes(filePath); - return new String(bytes, StandardCharsets.UTF_8); - } catch (IOException e) { - log.warn("找到文件但读取失败: {}, 错误: {}", filePath, e.getMessage()); - } - } - } - return ""; - } -} diff --git a/backend/services/operator-market-service/src/main/java/com/datamate/operator/domain/contants/OperatorConstant.java b/backend/services/operator-market-service/src/main/java/com/datamate/operator/domain/contants/OperatorConstant.java deleted file mode 100644 index 947b41d2..00000000 --- a/backend/services/operator-market-service/src/main/java/com/datamate/operator/domain/contants/OperatorConstant.java +++ /dev/null @@ -1,54 +0,0 @@ -package com.datamate.operator.domain.contants; - -import java.util.HashMap; -import java.util.Map; - -public class OperatorConstant { - public static String SERVICE_ID = "operator"; - - public static String YAML_PATH = "metadata.yml"; - - public static String CATEGORY_PYTHON = "python"; - - public static String CATEGORY_PYTHON_ID = "9eda9d5d-072b-499b-916c-797a0a8750e1"; - - public static String CATEGORY_JAVA = "java"; - - public static String CATEGORY_JAVA_ID = "b5bfc548-8ef6-417c-b8a6-a4197c078249"; - - public static String CATEGORY_CUSTOMIZED_ID = "ec2cdd17-8b93-4a81-88c4-ac9e98d10757"; - - public static String CATEGORY_TEXT_ID = "d8a5df7a-52a9-42c2-83c4-01062e60f597"; - - public static String CATEGORY_IMAGE_ID = "de36b61c-9e8a-4422-8c31-d30585c7100f"; - - public static String CATEGORY_AUDIO_ID = "42dd9392-73e4-458c-81ff-41751ada47b5"; - - public static String CATEGORY_VIDEO_ID = "a233d584-73c8-4188-ad5d-8f7c8dda9c27"; - - public static String CATEGORY_ALL_ID = "4d7dbd77-0a92-44f3-9056-2cd62d4a71e4"; - - public static String CATEGORY_STAR_ID = "51847c24-bba9-11f0-888b-5b143cb738aa"; - - public static String CATEGORY_PREDEFINED_ID = "96a3b07a-3439-4557-a835-525faad60ca3"; - - public static String CATEGORY_DATAMATE_ID = "431e7798-5426-4e1a-aae6-b9905a836b34"; - - public static String CATEGORY_DATA_JUICER_ID = "79b385b4-fde8-4617-bcba-02a176938996"; - - public static String CATEGORY_OTHER_VENDOR_ID = "f00eaa3e-96c1-4de4-96cd-9848ef5429ec"; - - public static Map CATEGORY_MAP = new HashMap<>(); - - static { - CATEGORY_MAP.put(CATEGORY_PYTHON, CATEGORY_PYTHON_ID); - CATEGORY_MAP.put(CATEGORY_JAVA, CATEGORY_JAVA_ID); - CATEGORY_MAP.put("text", CATEGORY_TEXT_ID); - CATEGORY_MAP.put("image", CATEGORY_IMAGE_ID); - CATEGORY_MAP.put("audio", CATEGORY_AUDIO_ID); - CATEGORY_MAP.put("video", CATEGORY_VIDEO_ID); - CATEGORY_MAP.put("all", CATEGORY_ALL_ID); - CATEGORY_MAP.put("datamate", CATEGORY_DATAMATE_ID); - CATEGORY_MAP.put("data-juicer", CATEGORY_DATA_JUICER_ID); - } -} diff --git a/backend/services/operator-market-service/src/main/java/com/datamate/operator/domain/model/Category.java b/backend/services/operator-market-service/src/main/java/com/datamate/operator/domain/model/Category.java deleted file mode 100644 index cd52f1a8..00000000 --- a/backend/services/operator-market-service/src/main/java/com/datamate/operator/domain/model/Category.java +++ /dev/null @@ -1,24 +0,0 @@ -package com.datamate.operator.domain.model; - -import com.baomidou.mybatisplus.annotation.TableName; -import lombok.Getter; -import lombok.Setter; - -import java.time.LocalDateTime; - -@Setter -@Getter -@TableName(value = "t_operator_category", autoResultMap = true) -public class Category { - private String id; - - private String name; - - private String value; - - private String type; - - private String parentId; - - private LocalDateTime createdAt; -} diff --git a/backend/services/operator-market-service/src/main/java/com/datamate/operator/domain/model/CategoryRelation.java b/backend/services/operator-market-service/src/main/java/com/datamate/operator/domain/model/CategoryRelation.java deleted file mode 100644 index 9d40cb92..00000000 --- a/backend/services/operator-market-service/src/main/java/com/datamate/operator/domain/model/CategoryRelation.java +++ /dev/null @@ -1,16 +0,0 @@ -package com.datamate.operator.domain.model; - -import com.baomidou.mybatisplus.annotation.TableName; -import lombok.AllArgsConstructor; -import lombok.Getter; -import lombok.Setter; - -@Setter -@Getter -@AllArgsConstructor -@TableName(value = "t_operator_category_relation", autoResultMap = true) -public class CategoryRelation { - private String categoryId; - - private String operatorId; -} diff --git a/backend/services/operator-market-service/src/main/java/com/datamate/operator/domain/model/Operator.java b/backend/services/operator-market-service/src/main/java/com/datamate/operator/domain/model/Operator.java deleted file mode 100644 index 9959f007..00000000 --- a/backend/services/operator-market-service/src/main/java/com/datamate/operator/domain/model/Operator.java +++ /dev/null @@ -1,38 +0,0 @@ -package com.datamate.operator.domain.model; - -import com.baomidou.mybatisplus.annotation.TableName; -import com.datamate.common.domain.model.base.BaseEntity; -import lombok.Getter; -import lombok.Setter; - -import java.time.LocalDateTime; - -@Getter -@Setter -@TableName(value = "t_operator") -public class Operator extends BaseEntity { - private String name; - - private String description; - - private String version; - - private String inputs; - - private String outputs; - - private String runtime; - - private String settings; - - private String fileName; - - private Long fileSize; - - private String metrics; - - private Integer usageCount; - - private Boolean isStar; -} - diff --git a/backend/services/operator-market-service/src/main/java/com/datamate/operator/domain/model/OperatorRelease.java b/backend/services/operator-market-service/src/main/java/com/datamate/operator/domain/model/OperatorRelease.java deleted file mode 100644 index 77bd244e..00000000 --- a/backend/services/operator-market-service/src/main/java/com/datamate/operator/domain/model/OperatorRelease.java +++ /dev/null @@ -1,24 +0,0 @@ -package com.datamate.operator.domain.model; - -import com.baomidou.mybatisplus.annotation.TableField; -import com.baomidou.mybatisplus.annotation.TableName; -import com.datamate.common.infrastructure.config.PgJsonTypeHandler; -import lombok.Getter; -import lombok.Setter; - -import java.time.LocalDateTime; -import java.util.List; - -@Getter -@Setter -@TableName(value = "t_operator_release", autoResultMap = true) -public class OperatorRelease { - private String id; - - private String version; - - private LocalDateTime releaseDate; - - @TableField(typeHandler = PgJsonTypeHandler.class) - private List changelog; -} diff --git a/backend/services/operator-market-service/src/main/java/com/datamate/operator/domain/model/OperatorView.java b/backend/services/operator-market-service/src/main/java/com/datamate/operator/domain/model/OperatorView.java deleted file mode 100644 index 7e589601..00000000 --- a/backend/services/operator-market-service/src/main/java/com/datamate/operator/domain/model/OperatorView.java +++ /dev/null @@ -1,47 +0,0 @@ -package com.datamate.operator.domain.model; - -import com.baomidou.mybatisplus.annotation.TableField; -import com.baomidou.mybatisplus.annotation.TableName; -import lombok.Getter; -import lombok.Setter; - -import java.time.LocalDateTime; - -@Getter -@Setter -@TableName(value = "v_operator") -public class OperatorView { - @TableField(value = "operator_id") - private String id; - - @TableField(value = "operator_name") - private String name; - - private String description; - - private String version; - - private String inputs; - - private String outputs; - - private String categories; - - private String runtime; - - private String settings; - - private String fileName; - - private Long fileSize; - - private String metrics; - - private Integer usageCount; - - private Boolean isStar; - - private LocalDateTime createdAt; - - private LocalDateTime updatedAt; -} diff --git a/backend/services/operator-market-service/src/main/java/com/datamate/operator/domain/repository/CategoryRelationRepository.java b/backend/services/operator-market-service/src/main/java/com/datamate/operator/domain/repository/CategoryRelationRepository.java deleted file mode 100644 index 146363b5..00000000 --- a/backend/services/operator-market-service/src/main/java/com/datamate/operator/domain/repository/CategoryRelationRepository.java +++ /dev/null @@ -1,20 +0,0 @@ -package com.datamate.operator.domain.repository; - -import com.baomidou.mybatisplus.extension.repository.IRepository; -import com.datamate.operator.domain.model.CategoryRelation; -import com.datamate.operator.interfaces.dto.CategoryRelationDto; - -import java.util.List; - -public interface CategoryRelationRepository extends IRepository { - - List findAllRelation(); - - void batchInsert(String operatorId, List categories); - - void batchUpdate(String operatorId, List categories); - - void deleteByOperatorId(String operatorId); - - boolean operatorIsPredefined(String operatorId); -} diff --git a/backend/services/operator-market-service/src/main/java/com/datamate/operator/domain/repository/CategoryRepository.java b/backend/services/operator-market-service/src/main/java/com/datamate/operator/domain/repository/CategoryRepository.java deleted file mode 100644 index d0409989..00000000 --- a/backend/services/operator-market-service/src/main/java/com/datamate/operator/domain/repository/CategoryRepository.java +++ /dev/null @@ -1,11 +0,0 @@ -package com.datamate.operator.domain.repository; - -import com.baomidou.mybatisplus.extension.repository.IRepository; -import com.datamate.operator.domain.model.Category; -import com.datamate.operator.interfaces.dto.CategoryDto; - -import java.util.List; - -public interface CategoryRepository extends IRepository { - List findAllCategories(); -} diff --git a/backend/services/operator-market-service/src/main/java/com/datamate/operator/domain/repository/OperatorReleaseRepository.java b/backend/services/operator-market-service/src/main/java/com/datamate/operator/domain/repository/OperatorReleaseRepository.java deleted file mode 100644 index 3ada0039..00000000 --- a/backend/services/operator-market-service/src/main/java/com/datamate/operator/domain/repository/OperatorReleaseRepository.java +++ /dev/null @@ -1,17 +0,0 @@ -package com.datamate.operator.domain.repository; - -import com.baomidou.mybatisplus.extension.repository.IRepository; -import com.datamate.operator.domain.model.OperatorRelease; -import com.datamate.operator.interfaces.dto.OperatorReleaseDto; - -import java.util.List; - -public interface OperatorReleaseRepository extends IRepository { - List findAllByOperatorId(String operatorId); - - void insertOperatorRelease(OperatorReleaseDto operatorRelease); - - void updateOperatorRelease(OperatorReleaseDto operatorRelease); - - void deleteOperatorRelease(String operatorId); -} diff --git a/backend/services/operator-market-service/src/main/java/com/datamate/operator/domain/repository/OperatorRepository.java b/backend/services/operator-market-service/src/main/java/com/datamate/operator/domain/repository/OperatorRepository.java deleted file mode 100644 index 3555a147..00000000 --- a/backend/services/operator-market-service/src/main/java/com/datamate/operator/domain/repository/OperatorRepository.java +++ /dev/null @@ -1,23 +0,0 @@ -package com.datamate.operator.domain.repository; - -import com.baomidou.mybatisplus.extension.repository.IRepository; -import com.datamate.operator.domain.model.Operator; -import com.datamate.operator.interfaces.dto.OperatorDto; - -import java.util.List; - -public interface OperatorRepository extends IRepository { - List findAllOperators(); - - void updateOperator(OperatorDto operator); - - void insertOperator(OperatorDto operator); - - void deleteOperator(String id); - - int countOperatorByStar(boolean isStar); - - boolean operatorInTemplateOrRunning(String operatorId); - - void incrementUsageCount(List operatorIds); -} diff --git a/backend/services/operator-market-service/src/main/java/com/datamate/operator/domain/repository/OperatorViewRepository.java b/backend/services/operator-market-service/src/main/java/com/datamate/operator/domain/repository/OperatorViewRepository.java deleted file mode 100644 index 6e24a472..00000000 --- a/backend/services/operator-market-service/src/main/java/com/datamate/operator/domain/repository/OperatorViewRepository.java +++ /dev/null @@ -1,16 +0,0 @@ -package com.datamate.operator.domain.repository; - -import com.baomidou.mybatisplus.extension.repository.IRepository; -import com.datamate.operator.domain.model.OperatorView; -import com.datamate.operator.interfaces.dto.OperatorDto; - -import java.util.List; - -public interface OperatorViewRepository extends IRepository { - List findOperatorsByCriteria(Integer page, Integer size, String keyword, - List> categories, Boolean isStar); - - int countOperatorsByCriteria(String keyword, List> categories, Boolean isStar); - - OperatorView findOperatorById(String id); -} diff --git a/backend/services/operator-market-service/src/main/java/com/datamate/operator/infrastructure/converter/CategoryConverter.java b/backend/services/operator-market-service/src/main/java/com/datamate/operator/infrastructure/converter/CategoryConverter.java deleted file mode 100644 index 6aef958c..00000000 --- a/backend/services/operator-market-service/src/main/java/com/datamate/operator/infrastructure/converter/CategoryConverter.java +++ /dev/null @@ -1,15 +0,0 @@ -package com.datamate.operator.infrastructure.converter; - -import com.datamate.operator.domain.model.Category; -import com.datamate.operator.interfaces.dto.CategoryDto; -import org.mapstruct.Mapper; -import org.mapstruct.factory.Mappers; - -import java.util.List; - -@Mapper -public interface CategoryConverter { - CategoryConverter INSTANCE = Mappers.getMapper(CategoryConverter.class); - - List fromEntityToDto (List dto); -} diff --git a/backend/services/operator-market-service/src/main/java/com/datamate/operator/infrastructure/converter/CategoryRelationConverter.java b/backend/services/operator-market-service/src/main/java/com/datamate/operator/infrastructure/converter/CategoryRelationConverter.java deleted file mode 100644 index f20b79e2..00000000 --- a/backend/services/operator-market-service/src/main/java/com/datamate/operator/infrastructure/converter/CategoryRelationConverter.java +++ /dev/null @@ -1,15 +0,0 @@ -package com.datamate.operator.infrastructure.converter; - -import com.datamate.operator.domain.model.CategoryRelation; -import com.datamate.operator.interfaces.dto.CategoryRelationDto; -import org.mapstruct.Mapper; -import org.mapstruct.factory.Mappers; - -import java.util.List; - -@Mapper -public interface CategoryRelationConverter { - CategoryRelationConverter INSTANCE = Mappers.getMapper(CategoryRelationConverter.class); - - List fromEntityToDto (List dto); -} diff --git a/backend/services/operator-market-service/src/main/java/com/datamate/operator/infrastructure/converter/OperatorConverter.java b/backend/services/operator-market-service/src/main/java/com/datamate/operator/infrastructure/converter/OperatorConverter.java deleted file mode 100644 index f531ce38..00000000 --- a/backend/services/operator-market-service/src/main/java/com/datamate/operator/infrastructure/converter/OperatorConverter.java +++ /dev/null @@ -1,39 +0,0 @@ -package com.datamate.operator.infrastructure.converter; - -import com.datamate.common.domain.model.ChunkUploadRequest; -import com.datamate.operator.domain.model.Operator; -import com.datamate.operator.domain.model.OperatorView; -import com.datamate.operator.interfaces.dto.OperatorDto; -import com.datamate.operator.interfaces.dto.UploadOperatorRequest; -import org.mapstruct.Mapper; -import org.mapstruct.Mapping; -import org.mapstruct.Named; -import org.mapstruct.factory.Mappers; - -import java.util.Arrays; -import java.util.Collections; -import java.util.List; - -@Mapper -public interface OperatorConverter { - OperatorConverter INSTANCE = Mappers.getMapper(OperatorConverter.class); - - @Mapping(target = "categories", source = "categories", qualifiedByName = "stringToList") - OperatorDto fromEntityToDto(OperatorView operator); - - List fromEntityViewToDto(List operator); - - List fromEntityToDto(List operator); - - @Named("stringToList") - static List stringToList(String input) { - if (input == null || input.isEmpty()) { - return Collections.emptyList(); - } - return Arrays.stream(input.split(",")).map(String::valueOf).toList(); - } - - Operator fromDtoToEntity(OperatorDto operator); - - ChunkUploadRequest toChunkRequest(UploadOperatorRequest request); -} diff --git a/backend/services/operator-market-service/src/main/java/com/datamate/operator/infrastructure/converter/OperatorReleaseConverter.java b/backend/services/operator-market-service/src/main/java/com/datamate/operator/infrastructure/converter/OperatorReleaseConverter.java deleted file mode 100644 index e03eda49..00000000 --- a/backend/services/operator-market-service/src/main/java/com/datamate/operator/infrastructure/converter/OperatorReleaseConverter.java +++ /dev/null @@ -1,17 +0,0 @@ -package com.datamate.operator.infrastructure.converter; - -import com.datamate.operator.domain.model.OperatorRelease; -import com.datamate.operator.interfaces.dto.OperatorReleaseDto; -import org.mapstruct.Mapper; -import org.mapstruct.factory.Mappers; - -import java.util.List; - -@Mapper -public interface OperatorReleaseConverter { - OperatorReleaseConverter INSTANCE = Mappers.getMapper(OperatorReleaseConverter.class); - - List fromEntityToDto(List dto); - - OperatorRelease fromDtoToEntity(OperatorReleaseDto dto); -} diff --git a/backend/services/operator-market-service/src/main/java/com/datamate/operator/infrastructure/exception/OperatorErrorCode.java b/backend/services/operator-market-service/src/main/java/com/datamate/operator/infrastructure/exception/OperatorErrorCode.java deleted file mode 100644 index 4a767680..00000000 --- a/backend/services/operator-market-service/src/main/java/com/datamate/operator/infrastructure/exception/OperatorErrorCode.java +++ /dev/null @@ -1,27 +0,0 @@ -package com.datamate.operator.infrastructure.exception; - -import com.datamate.common.infrastructure.exception.ErrorCode; -import lombok.AllArgsConstructor; -import lombok.Getter; - -@Getter -@AllArgsConstructor -public enum OperatorErrorCode implements ErrorCode { - /** - * 不支持的文件类型 - */ - UNSUPPORTED_FILE_TYPE("op.0001", "不支持的文件类型"), - - YAML_NOT_FOUND("op.0002", "算子中缺少元数据文件"), - - FIELD_NOT_FOUND("op.0003", "缺少必要的字段"), - - SETTINGS_PARSE_FAILED("op.0004", "settings字段解析失败"), - - OPERATOR_IN_INSTANCE("op.0005", "算子已被编排在模板或未完成的任务中"), - - CANT_DELETE_PREDEFINED_OPERATOR("op.0006", "预置算子无法删除"); - - private final String code; - private final String message; -} diff --git a/backend/services/operator-market-service/src/main/java/com/datamate/operator/infrastructure/parser/AbstractParser.java b/backend/services/operator-market-service/src/main/java/com/datamate/operator/infrastructure/parser/AbstractParser.java deleted file mode 100644 index a9f9d3c5..00000000 --- a/backend/services/operator-market-service/src/main/java/com/datamate/operator/infrastructure/parser/AbstractParser.java +++ /dev/null @@ -1,89 +0,0 @@ -package com.datamate.operator.infrastructure.parser; - -import com.datamate.common.infrastructure.exception.BusinessException; -import com.datamate.common.infrastructure.exception.SystemErrorCode; -import com.datamate.operator.domain.contants.OperatorConstant; -import com.datamate.operator.infrastructure.exception.OperatorErrorCode; -import com.datamate.operator.interfaces.dto.OperatorDto; -import com.datamate.operator.interfaces.dto.OperatorReleaseDto; -import com.fasterxml.jackson.core.JsonProcessingException; -import com.fasterxml.jackson.databind.ObjectMapper; -import org.yaml.snakeyaml.LoaderOptions; -import org.yaml.snakeyaml.Yaml; -import org.yaml.snakeyaml.constructor.SafeConstructor; - -import java.io.File; -import java.io.InputStream; -import java.util.*; - -public abstract class AbstractParser { - protected ObjectMapper objectMapper = new ObjectMapper(); - - protected OperatorDto parseYaml(InputStream yamlContent) { - Yaml yaml = new Yaml(new SafeConstructor(new LoaderOptions())); - Map content = yaml.load(yamlContent); - OperatorDto operator = new OperatorDto(); - operator.setId(toStringIfNotNull(content.get("raw_id"))); - operator.setName(toStringIfNotNull(content.get("name"))); - operator.setDescription(toStringIfNotNull(content.get("description"))); - operator.setVersion(toStringIfNotNull(content.get("version"))); - operator.setInputs(toStringIfNotNull(content.get("inputs"))); - operator.setOutputs(toStringIfNotNull(content.get("outputs"))); - operator.setRuntime(toJsonIfNotNull(content.get("runtime"))); - operator.setSettings(toJsonIfNotNull(content.get("settings"))); - operator.setMetrics(toJsonIfNotNull(content.get("metrics"))); - Object changelog = content.get("release"); - OperatorReleaseDto operatorReleaseDto = new OperatorReleaseDto(); - if (changelog instanceof List) { - operatorReleaseDto.setChangelog((List) changelog); - } else { - operatorReleaseDto.setChangelog(Collections.emptyList()); - } - operator.setReleases(List.of(operatorReleaseDto)); - List categories = new ArrayList<>(); - categories.add(OperatorConstant.CATEGORY_MAP.get(toLowerCaseIfNotNull(content.get("language")))); - categories.add(OperatorConstant.CATEGORY_MAP.get(toLowerCaseIfNotNull(content.get("modal")))); - categories.add(OperatorConstant.CATEGORY_MAP.getOrDefault(toLowerCaseIfNotNull(content.get("vendor")), - OperatorConstant.CATEGORY_OTHER_VENDOR_ID)); - categories.add(OperatorConstant.CATEGORY_CUSTOMIZED_ID); - operator.setCategories(categories); - return operator; - } - - /** - * 从压缩包内读取指定路径的 yaml 文件并解析为指定类型 - * @param archive 压缩包路径(zip 或 tar) - * @param entryPath 压缩包内部的文件路径,例如 "config/app.yaml" 或 "./config/app.yaml" - * @return 解析后的对象 - */ - public abstract OperatorDto parseYamlFromArchive(File archive, String entryPath); - - /** - * 将压缩包解压到目标目录(保持相对路径) - * @param archive 压缩包路径 - * @param targetDir 目标目录 - */ - public abstract void extractTo(File archive, String targetDir); - - private String toStringIfNotNull(Object obj) { - if (obj == null) { - throw BusinessException.of(OperatorErrorCode.FIELD_NOT_FOUND); - } - return obj.toString(); - } - - private String toLowerCaseIfNotNull(Object obj) { - if (obj == null) { - throw BusinessException.of(OperatorErrorCode.FIELD_NOT_FOUND); - } - return obj.toString().toLowerCase(Locale.ROOT); - } - - private String toJsonIfNotNull(Object obj) { - try { - return obj == null ? null : objectMapper.writeValueAsString(obj); - } catch (JsonProcessingException e) { - throw BusinessException.of(SystemErrorCode.UNKNOWN_ERROR, e.getMessage()); - } - } -} diff --git a/backend/services/operator-market-service/src/main/java/com/datamate/operator/infrastructure/parser/ParserHolder.java b/backend/services/operator-market-service/src/main/java/com/datamate/operator/infrastructure/parser/ParserHolder.java deleted file mode 100644 index fa52b3e9..00000000 --- a/backend/services/operator-market-service/src/main/java/com/datamate/operator/infrastructure/parser/ParserHolder.java +++ /dev/null @@ -1,64 +0,0 @@ -package com.datamate.operator.infrastructure.parser; - -import com.datamate.common.infrastructure.exception.BusinessException; -import com.datamate.common.infrastructure.exception.SystemErrorCode; -import com.datamate.operator.infrastructure.exception.OperatorErrorCode; -import com.datamate.operator.interfaces.dto.OperatorDto; -import jakarta.annotation.PostConstruct; -import org.apache.commons.io.FileUtils; -import org.springframework.stereotype.Component; - -import java.io.File; -import java.util.Map; -import java.util.concurrent.ConcurrentHashMap; - -@Component -public class ParserHolder { - // 存放 parser:key 为 parser 类型标识(例如 "zip" 或 "tar"),value 为 parser 实例 - private final Map parserMap = new ConcurrentHashMap<>(); - - // 注册 parser(可在启动时调用) - public void registerParser(String type, AbstractParser parser) { - if (type == null || parser == null) { - throw BusinessException.of(SystemErrorCode.UNKNOWN_ERROR); - } - parserMap.put(type, parser); - } - - // 根据类型获取 parser(可能为 null) - public AbstractParser getParser(String type) { - return parserMap.get(type); - } - - // 便捷代理:从指定类型的压缩包中读取 entry 并解析为 clazz - public OperatorDto parseYamlFromArchive(String type, File archive, String entryPath) { - AbstractParser parser = getParser(type); - if (parser == null) { - throw BusinessException.of(OperatorErrorCode.UNSUPPORTED_FILE_TYPE, - "No parser registered for type: " + type); - } - return parser.parseYamlFromArchive(archive, entryPath); - } - - // 便捷代理:将指定类型的压缩包解压到目标目录 - public void extractTo(String type, File archive, String targetDir) { - AbstractParser parser = getParser(type); - if (parser == null) { - throw BusinessException.of(OperatorErrorCode.UNSUPPORTED_FILE_TYPE, - "No parser registered for type: " + type); - } - parser.extractTo(archive, targetDir); - FileUtils.deleteQuietly(archive); - } - - public void extractTo(String type, String sourceDir, String targetDir) { - extractTo(type, new File(sourceDir), targetDir); - } - - @PostConstruct - public void init() { - // 注册 zip 和 tar parser,key 可根据需要调整(例如 "zip"/"tar") - registerParser("zip", new ZipParser()); - registerParser("tar", new TarParser()); - } -} diff --git a/backend/services/operator-market-service/src/main/java/com/datamate/operator/infrastructure/parser/TarParser.java b/backend/services/operator-market-service/src/main/java/com/datamate/operator/infrastructure/parser/TarParser.java deleted file mode 100644 index b363025e..00000000 --- a/backend/services/operator-market-service/src/main/java/com/datamate/operator/infrastructure/parser/TarParser.java +++ /dev/null @@ -1,77 +0,0 @@ -package com.datamate.operator.infrastructure.parser; - -import com.datamate.common.infrastructure.exception.BusinessException; -import com.datamate.common.infrastructure.exception.SystemErrorCode; -import com.datamate.operator.infrastructure.exception.OperatorErrorCode; -import com.datamate.operator.interfaces.dto.OperatorDto; -import org.apache.commons.compress.archivers.tar.TarArchiveEntry; -import org.apache.commons.compress.archivers.tar.TarArchiveInputStream; - -import java.io.File; -import java.io.IOException; -import java.io.InputStream; -import java.io.OutputStream; -import java.nio.file.Files; -import java.nio.file.Path; -import java.nio.file.Paths; -import java.util.Objects; - -public class TarParser extends AbstractParser { - - @Override - public OperatorDto parseYamlFromArchive(File archive, String entryPath) { - // 允许带或不带前导 "./" - String normalized = entryPath.startsWith("./") ? entryPath.substring(2) : entryPath; - try (InputStream fis = Files.newInputStream(archive.toPath()); - TarArchiveInputStream tis = new TarArchiveInputStream(fis)) { - TarArchiveEntry entry; - while ((entry = tis.getNextEntry()) != null) { - String name = entry.getName(); - if (Objects.equals(name, entryPath) || Objects.equals(name, normalized)) { - // 使用 SnakeYAML 解析当前 entry 的内容到目标类型 - return parseYaml(tis); - } - } - } catch (IOException e) { - throw BusinessException.of(SystemErrorCode.FILE_SYSTEM_ERROR, e.getMessage()); - } - throw BusinessException.of(OperatorErrorCode.YAML_NOT_FOUND, "Entry not found in tar: " + entryPath); - } - - @Override - public void extractTo(File archive, String targetDir) { - Path targetPath = Paths.get(targetDir); - try (InputStream fis = Files.newInputStream(archive.toPath()); - TarArchiveInputStream tis = new TarArchiveInputStream(fis)) { - Files.createDirectories(targetPath); - TarArchiveEntry entry; - while ((entry = tis.getNextEntry()) != null) { - String entryName = entry.getName(); - // 去掉可能的前导 "./" - if (entryName.startsWith("./")) { - entryName = entryName.substring(2); - } - - Path resolved = targetPath.resolve(entryName).toAbsolutePath().normalize(); - if (!resolved.startsWith(targetPath.toAbsolutePath().normalize())) { - throw BusinessException.of(SystemErrorCode.FILE_SYSTEM_ERROR, "Bad tar entry: " + entryName); - } - - if (entry.isDirectory()) { - Files.createDirectories(resolved); - } else { - Files.createDirectories(resolved.getParent()); - try (OutputStream os = Files.newOutputStream(resolved)) { - byte[] buffer = new byte[8192]; - int len; - while ((len = tis.read(buffer)) != -1) { - os.write(buffer, 0, len); - } - } - } - } - } catch (IOException e) { - throw BusinessException.of(SystemErrorCode.FILE_SYSTEM_ERROR, e.getMessage()); - } - } -} diff --git a/backend/services/operator-market-service/src/main/java/com/datamate/operator/infrastructure/parser/ZipParser.java b/backend/services/operator-market-service/src/main/java/com/datamate/operator/infrastructure/parser/ZipParser.java deleted file mode 100644 index f02bc25d..00000000 --- a/backend/services/operator-market-service/src/main/java/com/datamate/operator/infrastructure/parser/ZipParser.java +++ /dev/null @@ -1,77 +0,0 @@ -package com.datamate.operator.infrastructure.parser; - -import com.datamate.common.infrastructure.exception.BusinessException; -import com.datamate.common.infrastructure.exception.SystemErrorCode; -import com.datamate.operator.infrastructure.exception.OperatorErrorCode; -import com.datamate.operator.interfaces.dto.OperatorDto; - -import java.io.File; -import java.io.IOException; -import java.io.InputStream; -import java.io.OutputStream; -import java.nio.file.Files; -import java.nio.file.LinkOption; -import java.nio.file.Path; -import java.nio.file.Paths; -import java.util.Enumeration; -import java.util.zip.ZipEntry; -import java.util.zip.ZipFile; - -public class ZipParser extends AbstractParser { - - @Override - public OperatorDto parseYamlFromArchive(File archive, String entryPath) { - try (ZipFile zipFile = new ZipFile(archive)) { - // 允许带或不带前导 "./" - String normalized = entryPath.startsWith("./") ? entryPath.substring(2) : entryPath; - ZipEntry entry = zipFile.getEntry(entryPath); - if (entry == null) { - entry = zipFile.getEntry(normalized); - } - if (entry == null) { - throw BusinessException.of(OperatorErrorCode.YAML_NOT_FOUND, "Entry not found in zip: " + entryPath); - } - try (InputStream is = zipFile.getInputStream(entry)) { - // 使用 SnakeYAML 解析为目标类型 - return parseYaml(is); - } - } catch (IOException e) { - throw BusinessException.of(SystemErrorCode.FILE_SYSTEM_ERROR, e.getMessage()); - } - } - - @Override - public void extractTo(File archive, String targetDir) { - Path targetPath = Paths.get(targetDir); - try (ZipFile zipFile = new ZipFile(archive)) { - Files.createDirectories(targetPath); - Enumeration entries = zipFile.entries(); - while (entries.hasMoreElements()) { - ZipEntry entry = entries.nextElement(); - String entryName = entry.getName(); - - // 防止 Zip Slip:确保解压路径仍在 targetDir 下 - Path resolved = targetPath.resolve(entryName).toAbsolutePath().normalize(); - if (!resolved.startsWith(targetPath.toAbsolutePath().normalize())) { - throw BusinessException.of(SystemErrorCode.FILE_SYSTEM_ERROR, "Bad zip entry: " + entryName); - } - - if (entry.isDirectory()) { - Files.createDirectories(resolved); - } else { - Files.createDirectories(resolved.getParent()); - try (InputStream is = zipFile.getInputStream(entry); - OutputStream os = Files.newOutputStream(resolved)) { - byte[] buffer = new byte[8192]; - int len; - while ((len = is.read(buffer)) != -1) { - os.write(buffer, 0, len); - } - } - } - } - } catch (IOException e) { - throw BusinessException.of(SystemErrorCode.FILE_SYSTEM_ERROR, e.getMessage()); - } - } -} diff --git a/backend/services/operator-market-service/src/main/java/com/datamate/operator/infrastructure/persistence/Impl/CategoryRelationRepositoryImpl.java b/backend/services/operator-market-service/src/main/java/com/datamate/operator/infrastructure/persistence/Impl/CategoryRelationRepositoryImpl.java deleted file mode 100644 index 9701e208..00000000 --- a/backend/services/operator-market-service/src/main/java/com/datamate/operator/infrastructure/persistence/Impl/CategoryRelationRepositoryImpl.java +++ /dev/null @@ -1,60 +0,0 @@ -package com.datamate.operator.infrastructure.persistence.Impl; - -import com.baomidou.mybatisplus.core.conditions.query.LambdaQueryWrapper; -import com.baomidou.mybatisplus.extension.repository.CrudRepository; -import com.datamate.operator.domain.contants.OperatorConstant; -import com.datamate.operator.domain.model.CategoryRelation; -import com.datamate.operator.domain.repository.CategoryRelationRepository; -import com.datamate.operator.infrastructure.converter.CategoryRelationConverter; -import com.datamate.operator.infrastructure.persistence.mapper.CategoryRelationMapper; -import com.datamate.operator.interfaces.dto.CategoryRelationDto; -import lombok.RequiredArgsConstructor; -import org.springframework.stereotype.Repository; - -import java.util.List; - -@Repository -@RequiredArgsConstructor -public class CategoryRelationRepositoryImpl extends CrudRepository - implements CategoryRelationRepository { - private final CategoryRelationMapper mapper; - - @Override - public List findAllRelation() { - return CategoryRelationConverter.INSTANCE.fromEntityToDto(mapper.selectList(null)); - } - - @Override - public void batchInsert(String operatorId, List categories) { - List categoryRelations = categories.stream() - .map(category -> new CategoryRelation(category, operatorId)) - .toList(); - mapper.insert(categoryRelations); - } - - @Override - public void batchUpdate(String operatorId, List categories) { - List categoryRelations = categories.stream() - .map(category -> new CategoryRelation(category, operatorId)) - .toList(); - LambdaQueryWrapper queryWrapper = new LambdaQueryWrapper<>(); - queryWrapper.eq(CategoryRelation::getOperatorId, operatorId); - mapper.delete(queryWrapper); - mapper.insert(categoryRelations); - } - - @Override - public void deleteByOperatorId(String operatorId) { - LambdaQueryWrapper queryWrapper = new LambdaQueryWrapper<>(); - queryWrapper.eq(CategoryRelation::getOperatorId, operatorId); - mapper.delete(queryWrapper); - } - - @Override - public boolean operatorIsPredefined(String operatorId) { - LambdaQueryWrapper queryWrapper = new LambdaQueryWrapper<>(); - queryWrapper.eq(CategoryRelation::getOperatorId, operatorId) - .eq(CategoryRelation::getCategoryId, OperatorConstant.CATEGORY_PREDEFINED_ID); - return this.exists(queryWrapper); - } -} diff --git a/backend/services/operator-market-service/src/main/java/com/datamate/operator/infrastructure/persistence/Impl/CategoryRepositoryImpl.java b/backend/services/operator-market-service/src/main/java/com/datamate/operator/infrastructure/persistence/Impl/CategoryRepositoryImpl.java deleted file mode 100644 index 14d5c7c0..00000000 --- a/backend/services/operator-market-service/src/main/java/com/datamate/operator/infrastructure/persistence/Impl/CategoryRepositoryImpl.java +++ /dev/null @@ -1,24 +0,0 @@ -package com.datamate.operator.infrastructure.persistence.Impl; - -import com.baomidou.mybatisplus.extension.repository.CrudRepository; -import com.datamate.operator.domain.model.Category; -import com.datamate.operator.domain.repository.CategoryRepository; -import com.datamate.operator.infrastructure.converter.CategoryConverter; -import com.datamate.operator.infrastructure.persistence.mapper.CategoryMapper; -import com.datamate.operator.interfaces.dto.CategoryDto; -import lombok.RequiredArgsConstructor; -import org.springframework.stereotype.Repository; - -import java.util.List; - -@Repository -@RequiredArgsConstructor -public class CategoryRepositoryImpl extends CrudRepository implements CategoryRepository { - private final CategoryMapper mapper; - - - @Override - public List findAllCategories() { - return CategoryConverter.INSTANCE.fromEntityToDto(mapper.selectList(null)); - } -} diff --git a/backend/services/operator-market-service/src/main/java/com/datamate/operator/infrastructure/persistence/Impl/OperatorReleaseRepositoryImpl.java b/backend/services/operator-market-service/src/main/java/com/datamate/operator/infrastructure/persistence/Impl/OperatorReleaseRepositoryImpl.java deleted file mode 100644 index 59b98db2..00000000 --- a/backend/services/operator-market-service/src/main/java/com/datamate/operator/infrastructure/persistence/Impl/OperatorReleaseRepositoryImpl.java +++ /dev/null @@ -1,46 +0,0 @@ -package com.datamate.operator.infrastructure.persistence.Impl; - -import com.baomidou.mybatisplus.core.conditions.query.QueryWrapper; -import com.baomidou.mybatisplus.extension.repository.CrudRepository; -import com.datamate.operator.domain.model.OperatorRelease; -import com.datamate.operator.domain.repository.OperatorReleaseRepository; -import com.datamate.operator.infrastructure.converter.OperatorReleaseConverter; - -import com.datamate.operator.infrastructure.persistence.mapper.OperatorReleaseMapper; -import com.datamate.operator.interfaces.dto.OperatorReleaseDto; -import lombok.RequiredArgsConstructor; -import org.springframework.stereotype.Repository; - -import java.util.List; - -@Repository -@RequiredArgsConstructor -public class OperatorReleaseRepositoryImpl extends CrudRepository implements OperatorReleaseRepository { - private final OperatorReleaseMapper mapper; - - public List findAllByOperatorId(String operatorId) { - QueryWrapper queryWrapper = new QueryWrapper<>(); - queryWrapper.eq("id", operatorId) - .orderByDesc("release_date"); - return OperatorReleaseConverter.INSTANCE.fromEntityToDto(mapper.selectList(queryWrapper)); - } - - @Override - public void insertOperatorRelease(OperatorReleaseDto operatorReleaseDto) { - mapper.insert(OperatorReleaseConverter.INSTANCE.fromDtoToEntity(operatorReleaseDto)); - } - - @Override - public void updateOperatorRelease(OperatorReleaseDto operatorReleaseDto) { - QueryWrapper queryWrapper = new QueryWrapper<>(); - queryWrapper.eq("id", operatorReleaseDto.getId()) - .eq("version", operatorReleaseDto.getVersion()); - mapper.update(OperatorReleaseConverter.INSTANCE.fromDtoToEntity(operatorReleaseDto), queryWrapper); - } - - @Override - public void deleteOperatorRelease(String operatorId) { - mapper.delete(new QueryWrapper().eq("id", operatorId)); - } - -} diff --git a/backend/services/operator-market-service/src/main/java/com/datamate/operator/infrastructure/persistence/Impl/OperatorRepositoryImpl.java b/backend/services/operator-market-service/src/main/java/com/datamate/operator/infrastructure/persistence/Impl/OperatorRepositoryImpl.java deleted file mode 100644 index 7b43869b..00000000 --- a/backend/services/operator-market-service/src/main/java/com/datamate/operator/infrastructure/persistence/Impl/OperatorRepositoryImpl.java +++ /dev/null @@ -1,65 +0,0 @@ -package com.datamate.operator.infrastructure.persistence.Impl; - -import com.baomidou.mybatisplus.core.conditions.query.LambdaQueryWrapper; -import com.baomidou.mybatisplus.core.conditions.update.LambdaUpdateWrapper; -import com.baomidou.mybatisplus.extension.repository.CrudRepository; -import com.datamate.operator.infrastructure.converter.OperatorConverter; -import com.datamate.operator.domain.model.Operator; -import com.datamate.operator.domain.repository.OperatorRepository; -import com.datamate.operator.infrastructure.persistence.mapper.OperatorMapper; -import com.datamate.operator.interfaces.dto.OperatorDto; -import lombok.RequiredArgsConstructor; -import org.springframework.stereotype.Repository; - -import java.util.Collections; -import java.util.List; - -@Repository -@RequiredArgsConstructor -public class OperatorRepositoryImpl extends CrudRepository implements OperatorRepository { - private final OperatorMapper mapper; - - @Override - public List findAllOperators() { - return OperatorConverter.INSTANCE.fromEntityToDto(mapper.selectList(null)); - } - - @Override - public void updateOperator(OperatorDto operator) { - mapper.updateById(OperatorConverter.INSTANCE.fromDtoToEntity(operator)); - } - - @Override - public void insertOperator(OperatorDto operator) { - mapper.insert(OperatorConverter.INSTANCE.fromDtoToEntity(operator)); - } - - @Override - public void deleteOperator(String id) { - mapper.deleteById(id); - } - - @Override - public int countOperatorByStar(boolean isStar) { - LambdaQueryWrapper queryWrapper = new LambdaQueryWrapper<>(); - queryWrapper.eq(Operator::getIsStar, isStar); - return Math.toIntExact(mapper.selectCount(queryWrapper)); - } - - @Override - public boolean operatorInTemplateOrRunning(String operatorId) { - return mapper.operatorInTemplate(operatorId) > 0 && mapper.operatorInUnstopTask(operatorId) > 0; - } - - @Override - public void incrementUsageCount(List operatorIds) { - if (operatorIds == null || operatorIds.isEmpty()) { - return; - } - Collections.sort(operatorIds); - LambdaUpdateWrapper updateWrapper = new LambdaUpdateWrapper<>(); - updateWrapper.in(Operator::getId, operatorIds) - .setSql("usage_count = usage_count + 1"); - this.update(updateWrapper); - } -} diff --git a/backend/services/operator-market-service/src/main/java/com/datamate/operator/infrastructure/persistence/Impl/OperatorViewRepositoryImpl.java b/backend/services/operator-market-service/src/main/java/com/datamate/operator/infrastructure/persistence/Impl/OperatorViewRepositoryImpl.java deleted file mode 100644 index 54a77775..00000000 --- a/backend/services/operator-market-service/src/main/java/com/datamate/operator/infrastructure/persistence/Impl/OperatorViewRepositoryImpl.java +++ /dev/null @@ -1,86 +0,0 @@ -package com.datamate.operator.infrastructure.persistence.Impl; - -import com.baomidou.mybatisplus.core.conditions.query.QueryWrapper; -import com.baomidou.mybatisplus.core.metadata.IPage; -import com.baomidou.mybatisplus.core.toolkit.Wrappers; -import com.baomidou.mybatisplus.extension.plugins.pagination.Page; -import com.baomidou.mybatisplus.extension.repository.CrudRepository; -import com.datamate.operator.domain.model.OperatorView; -import com.datamate.operator.domain.repository.OperatorViewRepository; -import com.datamate.operator.infrastructure.converter.OperatorConverter; -import com.datamate.operator.infrastructure.persistence.mapper.OperatorViewMapper; -import com.datamate.operator.interfaces.dto.OperatorDto; -import io.micrometer.common.util.StringUtils; -import lombok.RequiredArgsConstructor; -import org.apache.commons.collections4.CollectionUtils; -import org.springframework.stereotype.Repository; - -import java.util.List; -import java.util.stream.Collectors; - -@Repository -@RequiredArgsConstructor -public class OperatorViewRepositoryImpl extends CrudRepository implements OperatorViewRepository { - private final OperatorViewMapper mapper; - - @Override - public List findOperatorsByCriteria(Integer page, Integer size, String keyword, - List> categories, Boolean isStar) { - QueryWrapper queryWrapper = getQueryWrapper(keyword, categories, isStar); - - Page queryPage; - if (size != null && page != null) { - queryPage = new Page<>(page + 1, size); - } else { - queryPage = new Page<>(1, -1); - } - IPage operators = mapper.findOperatorsByCriteria(queryPage, queryWrapper); - - return OperatorConverter.INSTANCE.fromEntityViewToDto(operators.getRecords()); - } - - @Override - public int countOperatorsByCriteria(String keyword, List> categories, Boolean isStar) { - QueryWrapper queryWrapper = getQueryWrapper(keyword, categories, isStar); - Integer count = mapper.countOperatorsByCriteria(queryWrapper); - return count != null ? count : 0; - } - - @Override - public OperatorView findOperatorById(String id) { - return mapper.findOperatorById(id); - } - - private QueryWrapper getQueryWrapper(String keyword, List> categories, Boolean isStar) { - QueryWrapper queryWrapper = Wrappers.query(); - queryWrapper.eq(isStar != null, "is_star", isStar); - if (StringUtils.isNotEmpty(keyword)) { - queryWrapper.and(w -> - w.apply("operator_name ILIKE {0}", "%" + keyword + "%") - .or() - .apply("description ILIKE {0}", "%" + keyword + "%")); - } - StringBuilder havingSql = new StringBuilder(); - if (CollectionUtils.isNotEmpty(categories)) { - queryWrapper.in("category_id", categories.stream().flatMap(List::stream).toList()); - int index = 0; - for (List category : categories) { - if (index > 0) { - havingSql.append(" AND "); - } - havingSql.append("SUM(CASE WHEN CAST(category_id AS TEXT) IN ("); - havingSql.append(category.stream() - .map(id -> "'" + id + "'") - .collect(Collectors.joining(","))); - havingSql.append(") THEN 1 ELSE 0 END) > 0"); - index++; - } - } - - queryWrapper.groupBy("operator_id", "operator_name", "description", "version", "inputs", "outputs", - "runtime", "settings", "is_star", "file_size", "usage_count", "created_at", "updated_at", "created_by", "updated_by") - .having(!havingSql.isEmpty(), havingSql.toString()) - .orderByDesc("created_at"); - return queryWrapper; - } -} diff --git a/backend/services/operator-market-service/src/main/java/com/datamate/operator/infrastructure/persistence/mapper/CategoryMapper.java b/backend/services/operator-market-service/src/main/java/com/datamate/operator/infrastructure/persistence/mapper/CategoryMapper.java deleted file mode 100644 index ee7e1068..00000000 --- a/backend/services/operator-market-service/src/main/java/com/datamate/operator/infrastructure/persistence/mapper/CategoryMapper.java +++ /dev/null @@ -1,11 +0,0 @@ -package com.datamate.operator.infrastructure.persistence.mapper; - -import com.baomidou.mybatisplus.core.mapper.BaseMapper; -import com.datamate.common.infrastructure.config.IgnoreDataScopeAnnotation; -import com.datamate.operator.domain.model.Category; -import org.apache.ibatis.annotations.Mapper; - -@Mapper -@IgnoreDataScopeAnnotation -public interface CategoryMapper extends BaseMapper { -} diff --git a/backend/services/operator-market-service/src/main/java/com/datamate/operator/infrastructure/persistence/mapper/CategoryRelationMapper.java b/backend/services/operator-market-service/src/main/java/com/datamate/operator/infrastructure/persistence/mapper/CategoryRelationMapper.java deleted file mode 100644 index 9237fcaf..00000000 --- a/backend/services/operator-market-service/src/main/java/com/datamate/operator/infrastructure/persistence/mapper/CategoryRelationMapper.java +++ /dev/null @@ -1,11 +0,0 @@ -package com.datamate.operator.infrastructure.persistence.mapper; - -import com.baomidou.mybatisplus.core.mapper.BaseMapper; -import com.datamate.common.infrastructure.config.IgnoreDataScopeAnnotation; -import com.datamate.operator.domain.model.CategoryRelation; -import org.apache.ibatis.annotations.Mapper; - -@Mapper -@IgnoreDataScopeAnnotation -public interface CategoryRelationMapper extends BaseMapper { -} diff --git a/backend/services/operator-market-service/src/main/java/com/datamate/operator/infrastructure/persistence/mapper/OperatorMapper.java b/backend/services/operator-market-service/src/main/java/com/datamate/operator/infrastructure/persistence/mapper/OperatorMapper.java deleted file mode 100644 index 95ef7e1a..00000000 --- a/backend/services/operator-market-service/src/main/java/com/datamate/operator/infrastructure/persistence/mapper/OperatorMapper.java +++ /dev/null @@ -1,21 +0,0 @@ -package com.datamate.operator.infrastructure.persistence.mapper; - -import com.baomidou.mybatisplus.core.mapper.BaseMapper; -import com.datamate.common.infrastructure.config.IgnoreDataScopeAnnotation; -import com.datamate.operator.domain.model.Operator; -import org.apache.ibatis.annotations.Mapper; -import org.apache.ibatis.annotations.Select; - -@Mapper -public interface OperatorMapper extends BaseMapper { - - @IgnoreDataScopeAnnotation - @Select("SELECT count(1) FROM t_operator_instance oi JOIN t_clean_template t ON oi.instance_id = t.id " + - "WHERE oi.operator_id = #{operatorId}") - int operatorInTemplate(String operatorId); - - @IgnoreDataScopeAnnotation - @Select("SELECT count(1) FROM t_operator_instance oi JOIN t_clean_task t ON oi.instance_id = t.id " + - "WHERE oi.operator_id = #{operatorId} AND t.status != 'COMPLETED'") - int operatorInUnstopTask(String operatorId); -} diff --git a/backend/services/operator-market-service/src/main/java/com/datamate/operator/infrastructure/persistence/mapper/OperatorReleaseMapper.java b/backend/services/operator-market-service/src/main/java/com/datamate/operator/infrastructure/persistence/mapper/OperatorReleaseMapper.java deleted file mode 100644 index de9023d6..00000000 --- a/backend/services/operator-market-service/src/main/java/com/datamate/operator/infrastructure/persistence/mapper/OperatorReleaseMapper.java +++ /dev/null @@ -1,11 +0,0 @@ -package com.datamate.operator.infrastructure.persistence.mapper; - -import com.baomidou.mybatisplus.core.mapper.BaseMapper; -import com.datamate.common.infrastructure.config.IgnoreDataScopeAnnotation; -import com.datamate.operator.domain.model.OperatorRelease; -import org.apache.ibatis.annotations.Mapper; - -@Mapper -@IgnoreDataScopeAnnotation -public interface OperatorReleaseMapper extends BaseMapper { -} diff --git a/backend/services/operator-market-service/src/main/java/com/datamate/operator/infrastructure/persistence/mapper/OperatorViewMapper.java b/backend/services/operator-market-service/src/main/java/com/datamate/operator/infrastructure/persistence/mapper/OperatorViewMapper.java deleted file mode 100644 index 99ed5e4f..00000000 --- a/backend/services/operator-market-service/src/main/java/com/datamate/operator/infrastructure/persistence/mapper/OperatorViewMapper.java +++ /dev/null @@ -1,33 +0,0 @@ -package com.datamate.operator.infrastructure.persistence.mapper; - -import com.baomidou.mybatisplus.core.conditions.Wrapper; -import com.baomidou.mybatisplus.core.mapper.BaseMapper; -import com.baomidou.mybatisplus.core.metadata.IPage; -import com.baomidou.mybatisplus.core.toolkit.Constants; -import com.datamate.operator.domain.model.OperatorView; -import org.apache.ibatis.annotations.Mapper; -import org.apache.ibatis.annotations.Param; -import org.apache.ibatis.annotations.Select; - - -@Mapper -public interface OperatorViewMapper extends BaseMapper { - @Select("SELECT operator_id AS id, operator_name AS name, description, version, inputs, outputs, runtime, " + - "settings, is_star, file_size, usage_count, created_at, updated_at, created_by, updated_by, " + - "STRING_AGG(CAST(category_id AS TEXT), ',' ORDER BY created_at DESC) AS categories " + - "FROM v_operator ${ew.customSqlSegment}") - IPage findOperatorsByCriteria(IPage page, - @Param(Constants.WRAPPER) Wrapper queryWrapper); - - @Select("SELECT COUNT(1) FROM (SELECT operator_id AS id, operator_name AS name, description, version, inputs, outputs, runtime, " + - "settings, is_star, file_size, usage_count, created_at, updated_at, created_by, updated_by FROM v_operator ${ew.customSqlSegment}) AS t") - Integer countOperatorsByCriteria(@Param(Constants.WRAPPER) Wrapper queryWrapper); - - @Select("SELECT operator_id AS id, operator_name AS name, description, version, inputs, outputs, runtime, " + - "settings, is_star, file_name, file_size, usage_count, metrics, created_at, updated_at, created_by, updated_by, " + - "STRING_AGG(category_name, ',' ORDER BY created_at DESC) AS categories " + - "FROM v_operator WHERE operator_id = #{id} " + - "GROUP BY operator_id, operator_name, description, version, inputs, outputs, runtime, settings, is_star, " + - "file_name, file_size, usage_count, metrics, created_at, updated_at, created_by, updated_by") - OperatorView findOperatorById(@Param("id") String id); -} diff --git a/backend/services/operator-market-service/src/main/java/com/datamate/operator/interfaces/dto/CategoryDto.java b/backend/services/operator-market-service/src/main/java/com/datamate/operator/interfaces/dto/CategoryDto.java deleted file mode 100644 index bd29df01..00000000 --- a/backend/services/operator-market-service/src/main/java/com/datamate/operator/interfaces/dto/CategoryDto.java +++ /dev/null @@ -1,24 +0,0 @@ -package com.datamate.operator.interfaces.dto; - -import lombok.Getter; -import lombok.Setter; - -import java.time.LocalDateTime; - -@Setter -@Getter -public class CategoryDto { - private String id; - - private String name; - - private String value; - - private long count; - - private String type; - - private String parentId; - - private LocalDateTime createdAt; -} diff --git a/backend/services/operator-market-service/src/main/java/com/datamate/operator/interfaces/dto/CategoryRelationDto.java b/backend/services/operator-market-service/src/main/java/com/datamate/operator/interfaces/dto/CategoryRelationDto.java deleted file mode 100644 index e573d6b6..00000000 --- a/backend/services/operator-market-service/src/main/java/com/datamate/operator/interfaces/dto/CategoryRelationDto.java +++ /dev/null @@ -1,12 +0,0 @@ -package com.datamate.operator.interfaces.dto; - -import lombok.Getter; -import lombok.Setter; - -@Setter -@Getter -public class CategoryRelationDto { - private String categoryId; - - private String operatorId; -} diff --git a/backend/services/operator-market-service/src/main/java/com/datamate/operator/interfaces/dto/CategoryTreePagedResponse.java b/backend/services/operator-market-service/src/main/java/com/datamate/operator/interfaces/dto/CategoryTreePagedResponse.java deleted file mode 100644 index cc584b2f..00000000 --- a/backend/services/operator-market-service/src/main/java/com/datamate/operator/interfaces/dto/CategoryTreePagedResponse.java +++ /dev/null @@ -1,24 +0,0 @@ -package com.datamate.operator.interfaces.dto; - -import com.datamate.common.interfaces.PagedResponse; -import lombok.AllArgsConstructor; -import lombok.Getter; -import lombok.Setter; - -import java.util.List; - -@Getter -@Setter -@AllArgsConstructor -public class CategoryTreePagedResponse extends PagedResponse { - Integer starCount; - - public CategoryTreePagedResponse(List content, Integer starCount) { - super(content); - this.starCount = starCount; - } - - public static CategoryTreePagedResponse of(List content, Integer starCount) { - return new CategoryTreePagedResponse(content, starCount); - } -} diff --git a/backend/services/operator-market-service/src/main/java/com/datamate/operator/interfaces/dto/CategoryTreeResponse.java b/backend/services/operator-market-service/src/main/java/com/datamate/operator/interfaces/dto/CategoryTreeResponse.java deleted file mode 100644 index 4e80506f..00000000 --- a/backend/services/operator-market-service/src/main/java/com/datamate/operator/interfaces/dto/CategoryTreeResponse.java +++ /dev/null @@ -1,22 +0,0 @@ -package com.datamate.operator.interfaces.dto; - -import lombok.Getter; -import lombok.NoArgsConstructor; -import lombok.Setter; - -import java.util.ArrayList; -import java.util.List; - - -@Getter -@Setter -@NoArgsConstructor -public class CategoryTreeResponse { - private String id; - - private String name; - - private Integer count; - - private List categories = new ArrayList<>(); -} diff --git a/backend/services/operator-market-service/src/main/java/com/datamate/operator/interfaces/dto/LabelDto.java b/backend/services/operator-market-service/src/main/java/com/datamate/operator/interfaces/dto/LabelDto.java deleted file mode 100644 index 4b811151..00000000 --- a/backend/services/operator-market-service/src/main/java/com/datamate/operator/interfaces/dto/LabelDto.java +++ /dev/null @@ -1,13 +0,0 @@ -package com.datamate.operator.interfaces.dto; - -import lombok.Getter; -import lombok.Setter; - -@Getter -@Setter -public class LabelDto { - - private String id; - - private String name; -} diff --git a/backend/services/operator-market-service/src/main/java/com/datamate/operator/interfaces/dto/OperatorDto.java b/backend/services/operator-market-service/src/main/java/com/datamate/operator/interfaces/dto/OperatorDto.java deleted file mode 100644 index bc431426..00000000 --- a/backend/services/operator-market-service/src/main/java/com/datamate/operator/interfaces/dto/OperatorDto.java +++ /dev/null @@ -1,60 +0,0 @@ -package com.datamate.operator.interfaces.dto; - -import lombok.Getter; -import lombok.Setter; -import org.springframework.format.annotation.DateTimeFormat; - -import java.time.LocalDateTime; -import java.util.List; -import java.util.Map; - -/** - * OperatorDto - */ - -@Getter -@Setter -public class OperatorDto { - private String id; - - private String name; - - private String description; - - private String version; - - private String inputs; - - private String outputs; - - private List categories; - - private String runtime; - - private String settings; - - private Map overrides; - - private String fileName; - - private Long fileSize; - - private String metrics; - - private Integer usageCount; - - private Boolean isStar; - - private List requirements; - - private String readme; - - private List releases; - - @DateTimeFormat(iso = DateTimeFormat.ISO.DATE_TIME) - private LocalDateTime createdAt; - - @DateTimeFormat(iso = DateTimeFormat.ISO.DATE_TIME) - private LocalDateTime updatedAt; -} - diff --git a/backend/services/operator-market-service/src/main/java/com/datamate/operator/interfaces/dto/OperatorReleaseDto.java b/backend/services/operator-market-service/src/main/java/com/datamate/operator/interfaces/dto/OperatorReleaseDto.java deleted file mode 100644 index 8d0ba2c5..00000000 --- a/backend/services/operator-market-service/src/main/java/com/datamate/operator/interfaces/dto/OperatorReleaseDto.java +++ /dev/null @@ -1,19 +0,0 @@ -package com.datamate.operator.interfaces.dto; - -import lombok.Getter; -import lombok.Setter; - -import java.time.LocalDateTime; -import java.util.List; - -@Getter -@Setter -public class OperatorReleaseDto { - private String id; - - private String version; - - private LocalDateTime releaseDate; - - private List changelog; -} diff --git a/backend/services/operator-market-service/src/main/java/com/datamate/operator/interfaces/dto/OperatorsListPostRequest.java b/backend/services/operator-market-service/src/main/java/com/datamate/operator/interfaces/dto/OperatorsListPostRequest.java deleted file mode 100644 index 3c107a3b..00000000 --- a/backend/services/operator-market-service/src/main/java/com/datamate/operator/interfaces/dto/OperatorsListPostRequest.java +++ /dev/null @@ -1,31 +0,0 @@ -package com.datamate.operator.interfaces.dto; - -import java.util.ArrayList; -import java.util.List; - - -import com.datamate.common.interfaces.PagingQuery; -import lombok.Getter; -import lombok.Setter; -import org.springaicommunity.mcp.annotation.McpToolParam; - -/** - * OperatorsListPostRequest - */ - -@Getter -@Setter -public class OperatorsListPostRequest extends PagingQuery { - @McpToolParam(description = "算子分类id列表,每个父分类下的id放到一个列表中,最后汇总成一个大的列表", required = false) - private List> categories = new ArrayList<>(); - - @McpToolParam(description = "算子关键词,支持查询算子名称和算子描述关键词查询", required = false) - private String keyword; - - @McpToolParam(description = "算子关联的标签名称,当前暂不支持", required = false) - private String labelName; - - @McpToolParam(description = "算子是否被收藏", required = false) - private Boolean isStar; -} - diff --git a/backend/services/operator-market-service/src/main/java/com/datamate/operator/interfaces/dto/UploadOperatorRequest.java b/backend/services/operator-market-service/src/main/java/com/datamate/operator/interfaces/dto/UploadOperatorRequest.java deleted file mode 100644 index c6ebb611..00000000 --- a/backend/services/operator-market-service/src/main/java/com/datamate/operator/interfaces/dto/UploadOperatorRequest.java +++ /dev/null @@ -1,34 +0,0 @@ -package com.datamate.operator.interfaces.dto; - -import lombok.Getter; -import lombok.Setter; -import org.springframework.web.multipart.MultipartFile; - -/** - * 上传文件请求 - * 用于分块上传文件时的请求参数封装,支持大文件分片上传功能 - */ -@Getter -@Setter -public class UploadOperatorRequest { - /** 预上传返回的id,用来确认同一个任务 */ - private String reqId; - - /** 文件编号,用于标识批量上传中的第几个文件 */ - private int fileNo; - - /** 文件名称 */ - private String fileName; - - /** 文件总分块数量 */ - private int totalChunkNum; - - /** 当前分块编号,从1开始 */ - private int chunkNo; - - /** 上传的文件分块内容 */ - private MultipartFile file; - - /** 文件分块的校验和(十六进制字符串),用于验证文件完整性 */ - private String checkSumHex; -} diff --git a/backend/services/operator-market-service/src/main/java/com/datamate/operator/interfaces/rest/CategoryController.java b/backend/services/operator-market-service/src/main/java/com/datamate/operator/interfaces/rest/CategoryController.java deleted file mode 100644 index df51fe63..00000000 --- a/backend/services/operator-market-service/src/main/java/com/datamate/operator/interfaces/rest/CategoryController.java +++ /dev/null @@ -1,32 +0,0 @@ -package com.datamate.operator.interfaces.rest; - -import com.datamate.common.interfaces.PagedResponse; -import com.datamate.operator.application.CategoryService; -import com.datamate.operator.domain.repository.OperatorRepository; -import com.datamate.operator.interfaces.dto.CategoryTreePagedResponse; -import com.datamate.operator.interfaces.dto.CategoryTreeResponse; -import lombok.RequiredArgsConstructor; -import org.springaicommunity.mcp.annotation.McpTool; -import org.springframework.web.bind.annotation.GetMapping; -import org.springframework.web.bind.annotation.RequestMapping; -import org.springframework.web.bind.annotation.RestController; - -import java.util.List; - - -@RestController -@RequestMapping("/categories") -@RequiredArgsConstructor -public class CategoryController { - private final CategoryService categoryService; - - private final OperatorRepository operatorRepo; - - @GetMapping("/tree") - @McpTool(name = "query_category_tree", - description = "算子树状分类查询,获取包含分组维度(如语言、模态)及资源统计数量的分页层级分类数据。") - public PagedResponse categoryTreeGet() { - List allCategories = categoryService.getAllCategories(); - return CategoryTreePagedResponse.of(allCategories, operatorRepo.countOperatorByStar(true)); - } -} diff --git a/backend/services/operator-market-service/src/main/java/com/datamate/operator/interfaces/rest/OperatorController.java b/backend/services/operator-market-service/src/main/java/com/datamate/operator/interfaces/rest/OperatorController.java deleted file mode 100644 index df50cb79..00000000 --- a/backend/services/operator-market-service/src/main/java/com/datamate/operator/interfaces/rest/OperatorController.java +++ /dev/null @@ -1,92 +0,0 @@ -package com.datamate.operator.interfaces.rest; - -import com.datamate.common.infrastructure.common.IgnoreResponseWrap; -import com.datamate.common.interfaces.PagedResponse; -import com.datamate.operator.application.OperatorService; -import com.datamate.operator.interfaces.dto.OperatorDto; -import com.datamate.operator.interfaces.dto.OperatorsListPostRequest; -import com.datamate.operator.interfaces.dto.UploadOperatorRequest; -import lombok.RequiredArgsConstructor; -import org.springaicommunity.mcp.annotation.McpTool; -import org.springframework.core.io.Resource; -import org.springframework.http.HttpHeaders; -import org.springframework.http.HttpStatus; -import org.springframework.http.MediaType; -import org.springframework.http.ResponseEntity; -import org.springframework.web.bind.annotation.*; - -import java.io.File; -import java.util.List; - -@RestController -@RequestMapping("/operators") -@RequiredArgsConstructor -public class OperatorController { - private final OperatorService operatorService; - - @PostMapping("/list") - @McpTool(name = "query_operator_list", description = "根据参数查询算子列表") - public PagedResponse operatorsListPost(@RequestBody OperatorsListPostRequest request) { - List> categories = request.getCategories(); - List responses = operatorService.getOperators(request.getPage(), request.getSize(), - categories, request.getKeyword(), request.getIsStar()); - int count = operatorService.getOperatorsCount(categories, request.getKeyword(), request.getIsStar()); - int totalPages = (count + request.getSize() + 1) / request.getSize(); - return PagedResponse.of(responses, request.getPage(), count, totalPages); - } - - @GetMapping("/{id}") - public OperatorDto operatorsIdGet(@PathVariable("id") String id) { - return operatorService.getOperatorById(id); - } - - @PutMapping("/{id}") - public OperatorDto operatorsIdPut(@PathVariable("id") String id, - @RequestBody OperatorDto updateOperatorRequest) { - return operatorService.updateOperator(id, updateOperatorRequest); - } - - @PostMapping("/create") - public OperatorDto operatorsCreatePost(@RequestBody OperatorDto createOperatorRequest) { - return operatorService.createOperator(createOperatorRequest); - } - - @PostMapping("/upload") - public OperatorDto operatorsUploadPost(@RequestBody UploadOperatorRequest request) { - return operatorService.uploadOperator(request.getFileName()); - } - - @PostMapping(value = "/upload/pre-upload", produces = MediaType.APPLICATION_JSON_VALUE) - public String preUpload() { - return operatorService.preUpload(); - } - - @PostMapping("/upload/chunk") - public void chunkUpload(@ModelAttribute UploadOperatorRequest request) { - operatorService.chunkUpload(request); - } - - @DeleteMapping("/{id}") - public void operatorDelete(@PathVariable("id") String id) { - operatorService.deleteOperator(id); - } - - @IgnoreResponseWrap - @GetMapping(value = "/examples/download", produces = MediaType.APPLICATION_OCTET_STREAM_VALUE + ";charset=UTF-8") - public ResponseEntity downloadDatasetFileById() { - try { - File file = new File("/opt/backend/test_operator.tar"); - Resource resource = operatorService.downloadExampleOperator(file); - return ResponseEntity.ok() - .contentType(MediaType.APPLICATION_OCTET_STREAM) - .header(HttpHeaders.ACCESS_CONTROL_EXPOSE_HEADERS, HttpHeaders.CONTENT_DISPOSITION) - .header(HttpHeaders.CONTENT_DISPOSITION, - "attachment; filename=\"" + file.getName() + "\"") - .body(resource); - } catch (IllegalArgumentException e) { - return ResponseEntity.status(HttpStatus.NOT_FOUND).build(); - } catch (Exception e) { - return ResponseEntity.status(HttpStatus.INTERNAL_SERVER_ERROR).build(); - } - } -} diff --git a/backend/services/pom.xml b/backend/services/pom.xml index f103535b..df00fede 100644 --- a/backend/services/pom.xml +++ b/backend/services/pom.xml @@ -20,8 +20,6 @@ data-management-service - operator-market-service - data-cleaning-service rag-indexer-service diff --git a/deployment/docker/datamate/docker-compose.yml b/deployment/docker/datamate/docker-compose.yml index 9d9e0fec..e2d97e41 100644 --- a/deployment/docker/datamate/docker-compose.yml +++ b/deployment/docker/datamate/docker-compose.yml @@ -35,6 +35,8 @@ services: - flow_volume:/flow - log_volume:/var/log/datamate - graph_data_volume:/data/rag_storage + - operator-upload-volume:/operators/upload + - operator-runtime-volume:/operators/extract networks: [ datamate ] depends_on: - datamate-database diff --git a/deployment/helm/datamate/values.yaml b/deployment/helm/datamate/values.yaml index 9ba25700..1044b809 100644 --- a/deployment/helm/datamate/values.yaml +++ b/deployment/helm/datamate/values.yaml @@ -143,6 +143,7 @@ backend-python: - *datasetVolume - *flowVolume - *logVolume + - *operatorVolume volumeMounts: - name: dataset-volume mountPath: /dataset @@ -150,6 +151,8 @@ backend-python: mountPath: /flow - name: log-volume mountPath: /var/log/datamate + - name: operator-volume + mountPath: /operators gateway: env: diff --git a/frontend/public/config/error-code.json b/frontend/public/config/error-code.json index 17137f5e..8270f4db 100644 --- a/frontend/public/config/error-code.json +++ b/frontend/public/config/error-code.json @@ -1,20 +1,25 @@ { + "0": "成功", + "cleaning.0001": "清洗任务不存在", + "cleaning.0002": "清洗任务名称重复", + "cleaning.0003": "清洗模板不存在", + "cleaning.0004": "清洗模板名称重复", + "cleaning.0005": "算子输入输出类型不匹配", + "cleaning.0006": "执行器类型无效", + "cleaning.0007": "数据集不存在", + "cleaning.0008": "文件系统错误", + "cleaning.0009": "设置解析错误", + "cleaning.0010": "任务ID不能为空", + "operator.0001": "算子不存在", + "operator.0002": "算子正在使用中", + "operator.0003": "无法删除预置算子", + "operator.0004": "不支持的文件类型", + "operator.0005": "解析算子包失败", + "operator.0006": "缺少必要的字段", "400": "请求参数错误", "401": "登录已过期,请重新登录", "403": "没有权限访问该资源", "404": "请求的资源不存在", "500": "服务器内部错误,请稍后重试", - "502": "网关错误", - "op.0001": "不支持的文件类型", - "op.0002": "算子中缺少元数据文件", - "op.0003": "缺少必要的字段", - "op.0004": "settings字段解析失败", - "op.0005": "算子ID已存在", - "op.0006": "算子名称已存在", - "op.0007": "算子已被编排在模板或未完成的任务中", - "op.0008": "预置算子无法删除", - "clean.0001": "清洗任务名称重复", - "clean.0002": "任务列表为空", - "clean.0003": "算子输入输出不匹配", - "clean.0004": "算子执行器不匹配" + "502": "网关错误" } \ No newline at end of file diff --git a/frontend/src/pages/DataCleansing/Create/components/ParamConfig.tsx b/frontend/src/pages/DataCleansing/Create/components/ParamConfig.tsx index 057f446d..8636b7db 100644 --- a/frontend/src/pages/DataCleansing/Create/components/ParamConfig.tsx +++ b/frontend/src/pages/DataCleansing/Create/components/ParamConfig.tsx @@ -26,11 +26,11 @@ const ParamConfig: React.FC = ({ onParamChange, }) => { if (!param) return null; - let defaultVal: any = param.defaultVal; + let defaultVal: any = operator.overrides?.[paramKey] ?? param.defaultVal; if (param.type === "range") { - defaultVal = Array.isArray(param.defaultVal) - ? param.defaultVal + defaultVal = Array.isArray(defaultVal) + ? defaultVal : [ param?.properties?.[0]?.defaultVal, param?.properties?.[1]?.defaultVal, @@ -217,6 +217,7 @@ const ParamConfig: React.FC = ({ key={paramKey} > - {t("dataCleansing.detail.logTable.nthRun", { num: num })} + {t("dataCleansing.detail.logTable.nthRun", { num: selectedLog })}
diff --git a/frontend/src/pages/DataCleansing/Home/components/TemplateList.tsx b/frontend/src/pages/DataCleansing/Home/components/TemplateList.tsx index b8d8ada1..f967b50e 100644 --- a/frontend/src/pages/DataCleansing/Home/components/TemplateList.tsx +++ b/frontend/src/pages/DataCleansing/Home/components/TemplateList.tsx @@ -39,7 +39,7 @@ export default function TemplateList() { }, { key: "delete", - label: t("dataCleansing.actions.deleteTemplate"), + label: t("dataCleansing.actions.delete"), danger: true, icon: , onClick: deleteTemplate, // implement delete logic diff --git a/frontend/src/pages/OperatorMarket/Detail/components/ChangeLog.tsx b/frontend/src/pages/OperatorMarket/Detail/components/ChangeLog.tsx index 5484a9e1..55c3891b 100644 --- a/frontend/src/pages/OperatorMarket/Detail/components/ChangeLog.tsx +++ b/frontend/src/pages/OperatorMarket/Detail/components/ChangeLog.tsx @@ -22,7 +22,7 @@ export default function ChangeLog({ operator }) { )}
    - {release.changelog.map((change, changeIndex) => ( + {release.changelog?.map((change, changeIndex) => (
  • {change} diff --git a/frontend/src/pages/OperatorMarket/Home/OperatorMarket.tsx b/frontend/src/pages/OperatorMarket/Home/OperatorMarket.tsx index c22d1bc7..04dcf210 100644 --- a/frontend/src/pages/OperatorMarket/Home/OperatorMarket.tsx +++ b/frontend/src/pages/OperatorMarket/Home/OperatorMarket.tsx @@ -76,14 +76,10 @@ export default function OperatorMarketPage() { }; const handleDeleteOperator = async (operator: OperatorI) => { - try { - await deleteOperatorByIdUsingDelete(operator.id); - message.success(t("operatorMarket.home.operations.messages.deleteSuccess")); - fetchData(); - await initCategoriesTree(); - } catch (error) { - message.error(t("operatorMarket.home.operations.messages.deleteFailed")); - } + await deleteOperatorByIdUsingDelete(operator.id); + message.success(t("operatorMarket.home.operations.messages.deleteSuccess")); + fetchData(); + await initCategoriesTree(); }; const handleStar = async (operator: OperatorI) => { diff --git a/frontend/src/pages/OperatorMarket/operator.const.tsx b/frontend/src/pages/OperatorMarket/operator.const.tsx index 1104a118..75d99cb0 100644 --- a/frontend/src/pages/OperatorMarket/operator.const.tsx +++ b/frontend/src/pages/OperatorMarket/operator.const.tsx @@ -148,10 +148,10 @@ export const mapOperator = (op: OperatorI, t: (key: string) => string) => { label: t("operatorMarket.const.language"), value: "Python", }, - { - label: t("operatorMarket.const.function"), - value: functionLabel, - }, + // { + // label: t("operatorMarket.const.function"), + // value: functionLabel, + // }, ], }; }; @@ -198,4 +198,4 @@ export const formatBytes = (bytes: number | null | undefined, decimals: number = // 4. 格式化数值并拼接单位 // parseFloat 用于去掉末尾多余的 0 (例如 "1.20 MB" -> "1.2 MB") return `${parseFloat((bytes / Math.pow(k, i)).toFixed(dm))} ${sizes[i]}`; -}; \ No newline at end of file +}; diff --git a/frontend/vite.config.ts b/frontend/vite.config.ts index 8bc37239..ebd6935d 100644 --- a/frontend/vite.config.ts +++ b/frontend/vite.config.ts @@ -13,54 +13,25 @@ export default defineConfig({ }, server: { host: "0.0.0.0", - proxy: (() => { - const pythonProxyConfig = { - target: "http://localhost:18000", + proxy: { + "^/api": { + target: "http://localhost:8080", // 本地后端服务地址 changeOrigin: true, secure: false, - configure: (proxy: { on: (event: string, handler: (arg: unknown) => void) => void }) => { - proxy.on("proxyReq", (proxyReq: unknown) => { - (proxyReq as { removeHeader: (name: string) => void }).removeHeader("referer"); - (proxyReq as { removeHeader: (name: string) => void }).removeHeader("origin"); + rewrite: (path) => path.replace(/^\/api/, "/api"), + configure: (proxy, options) => { + // proxy 是 'http-proxy' 的实例 + proxy.on("proxyReq", (proxyReq, req, res) => { + // 可以在这里修改请求头 + proxyReq.removeHeader("referer"); + proxyReq.removeHeader("origin"); }); - proxy.on("proxyRes", (proxyRes: unknown) => { - const res = proxyRes as { headers: Record }; - delete res.headers["set-cookie"]; - res.headers["cookies"] = ""; + proxy.on("proxyRes", (proxyRes, req, res) => { + delete proxyRes.headers["set-cookie"]; + proxyRes.headers["cookies"] = ""; // 清除 cookies 头 }); }, - }; - - const javaProxyConfig = { - target: "http://localhost:8080", - changeOrigin: true, - secure: false, - configure: (proxy: { on: (event: string, handler: (arg: unknown) => void) => void }) => { - proxy.on("proxyReq", (proxyReq: unknown) => { - (proxyReq as { removeHeader: (name: string) => void }).removeHeader("referer"); - (proxyReq as { removeHeader: (name: string) => void }).removeHeader("origin"); - }); - proxy.on("proxyRes", (proxyRes: unknown) => { - const res = proxyRes as { headers: Record }; - delete res.headers["set-cookie"]; - res.headers["cookies"] = ""; - }); - }, - }; - - // Python 服务: rag, synthesis, annotation, evaluation, models - const pythonPaths = ["rag", "synthesis", "annotation", "data-collection", "evaluation", "models"]; - // Java 服务: data-management, knowledge-base - const javaPaths = ["data-management", "knowledge-base", "operators"]; - - const proxy: Record = {}; - for (const p of pythonPaths) { - proxy[`/api/${p}`] = pythonProxyConfig; - } - for (const p of javaPaths) { - proxy[`/api/${p}`] = javaProxyConfig; - } - return proxy; - })(), + }, + }, }, }); diff --git a/runtime/datamate-python/app/core/exception/codes.py b/runtime/datamate-python/app/core/exception/codes.py index d741174b..294e6d56 100644 --- a/runtime/datamate-python/app/core/exception/codes.py +++ b/runtime/datamate-python/app/core/exception/codes.py @@ -86,6 +86,26 @@ def __init__(self): RATIO_ALREADY_EXISTS: Final = ErrorCode("ratio.0003", "Task already exists", 400) RATIO_DELETE_FAILED: Final = ErrorCode("ratio.0004", "Failed to delete task", 500) + # ========== 清洗模块 ========== + CLEANING_TASK_NOT_FOUND: Final = ErrorCode("cleaning.0001", "Cleaning task not found", 404) + CLEANING_NAME_DUPLICATED: Final = ErrorCode("cleaning.0002", "Cleaning task name is duplicated", 400) + CLEANING_TEMPLATE_NOT_FOUND: Final = ErrorCode("cleaning.0003", "Cleaning template not found", 404) + CLEANING_TEMPLATE_NAME_DUPLICATED: Final = ErrorCode("cleaning.0004", "Cleaning template name is duplicated", 400) + CLEANING_INVALID_OPERATOR_INPUT: Final = ErrorCode("cleaning.0005", "Invalid operator input/output types", 400) + CLEANING_INVALID_EXECUTOR_TYPE: Final = ErrorCode("cleaning.0006", "Invalid executor type", 400) + CLEANING_DATASET_NOT_FOUND: Final = ErrorCode("cleaning.0007", "Dataset not found", 404) + CLEANING_FILE_SYSTEM_ERROR: Final = ErrorCode("cleaning.0008", "File system error", 500) + CLEANING_SETTINGS_PARSE_ERROR: Final = ErrorCode("cleaning.0009", "Settings parse error", 400) + CLEANING_TASK_ID_REQUIRED: Final = ErrorCode("cleaning.0010", "Task ID is required", 400) + + # ========== 算子市场模块 ========== + OPERATOR_NOT_FOUND: Final = ErrorCode("operator.0001", "Operator not found", 404) + OPERATOR_IN_INSTANCE: Final = ErrorCode("operator.0002", "Operator is in use", 400) + OPERATOR_CANNOT_DELETE_PREDEFINED: Final = ErrorCode("operator.0003", "Cannot delete predefined operator", 400) + OPERATOR_UNSUPPORTED_FILE_TYPE: Final = ErrorCode("operator.0004", "Unsupported file type", 400) + OPERATOR_PARSE_FAILED: Final = ErrorCode("operator.0005", "Failed to parse operator package", 400) + OPERATOR_FIELD_NOT_FOUND: Final = ErrorCode("operator.0006", "Required field is missing", 400) + # ========== 系统模块 ========== SYSTEM_MODEL_NOT_FOUND: Final = ErrorCode("system.0006", "Model configuration not found", 404) SYSTEM_MODEL_HEALTH_CHECK_FAILED: Final = ErrorCode("system.0007", "Model health check failed", 500) diff --git a/runtime/datamate-python/app/core/exception/middleware.py b/runtime/datamate-python/app/core/exception/middleware.py index 82b03ca2..561d130d 100644 --- a/runtime/datamate-python/app/core/exception/middleware.py +++ b/runtime/datamate-python/app/core/exception/middleware.py @@ -69,7 +69,7 @@ async def dispatch(self, request: Request, call_next): except Exception as exc: # 捕获所有未处理的异常 logger.error( - f"Unhandled exception occurred at {request.method} {request.url.path}", + f"Unhandled exception occurred at {request.method} {request.url.path}", exc, exc_info=True ) return self._error_response( diff --git a/runtime/datamate-python/app/db/models/__init__.py b/runtime/datamate-python/app/db/models/__init__.py index 2b83de26..060e4b64 100644 --- a/runtime/datamate-python/app/db/models/__init__.py +++ b/runtime/datamate-python/app/db/models/__init__.py @@ -21,6 +21,17 @@ EvaluationItem ) +from .operator import ( + Operator, + Category, + CategoryRelation, + OperatorRelease +) + +from .chunk_upload import ( + ChunkUploadPreRequest +) + __all__ = [ "Dataset", "DatasetTag", @@ -32,4 +43,9 @@ "LabelingProject", "EvaluationTask", "EvaluationItem", + "Operator", + "Category", + "CategoryRelation", + "OperatorRelease", + "ChunkUploadPreRequest", ] diff --git a/runtime/datamate-python/app/db/models/chunk_upload.py b/runtime/datamate-python/app/db/models/chunk_upload.py new file mode 100644 index 00000000..e110af98 --- /dev/null +++ b/runtime/datamate-python/app/db/models/chunk_upload.py @@ -0,0 +1,38 @@ +""" +Chunk Upload Database Model +分片上传数据库模型 +""" +from sqlalchemy import Column, String, Integer, DateTime +from sqlalchemy.sql import func + +from app.db.models.base_entity import Base + + +class ChunkUploadPreRequest(Base): + """分片上传预请求""" + __tablename__ = "t_chunk_upload_request" + + id = Column(String(36), primary_key=True, comment="请求ID") + total_file_num = Column(Integer, nullable=False, comment="总文件数") + uploaded_file_num = Column(Integer, nullable=True, comment="已上传文件数") + upload_path = Column(String(512), nullable=False, comment="文件路径") + timeout = Column(DateTime, nullable=False, comment="上传请求超时时间") + service_id = Column(String(64), nullable=True, comment="上传请求所属服务ID") + check_info = Column(String(512), nullable=True, comment="业务信息") + + def increment_uploaded_file_num(self): + """增加已上传文件数""" + if self.uploaded_file_num is None: + self.uploaded_file_num = 1 + else: + self.uploaded_file_num += 1 + + def is_upload_complete(self) -> bool: + """检查是否已完成上传""" + return (self.uploaded_file_num is not None and + self.uploaded_file_num == self.total_file_num) + + def is_request_timeout(self) -> bool: + """检查是否已超时""" + from datetime import datetime + return self.timeout is not None and datetime.utcnow() > self.timeout diff --git a/runtime/datamate-python/app/db/models/cleaning.py b/runtime/datamate-python/app/db/models/cleaning.py new file mode 100644 index 00000000..c2965be9 --- /dev/null +++ b/runtime/datamate-python/app/db/models/cleaning.py @@ -0,0 +1,59 @@ +from sqlalchemy import Column, String, BigInteger, Integer, TIMESTAMP +from app.db.models.base_entity import BaseEntity, Base + + +class CleaningTask(BaseEntity): + """Data cleaning task entity""" + __tablename__ = "t_clean_task" + + id = Column(String(36), primary_key=True, comment="Task ID") + name = Column(String(255), nullable=False, comment="Task name") + description = Column(String(1024), nullable=True, comment="Task description") + status = Column(String(50), nullable=False, default="PENDING", comment="Task status: PENDING, RUNNING, COMPLETED, STOPPED, FAILED") + src_dataset_id = Column(String(36), nullable=False, comment="Source dataset ID") + src_dataset_name = Column(String(255), nullable=False, comment="Source dataset name") + dest_dataset_id = Column(String(36), nullable=True, comment="Destination dataset ID") + dest_dataset_name = Column(String(255), nullable=True, comment="Destination dataset name") + before_size = Column(BigInteger, nullable=True, comment="Data size before cleaning") + after_size = Column(BigInteger, nullable=True, comment="Data size after cleaning") + file_count = Column(Integer, nullable=True, comment="Total file count") + retry_count = Column(Integer, default=0, nullable=False, comment="Retry count") + started_at = Column(TIMESTAMP, nullable=True, comment="Task start time") + finished_at = Column(TIMESTAMP, nullable=True, comment="Task finish time") + + +class CleaningTemplate(BaseEntity): + """Data cleaning template entity""" + __tablename__ = "t_clean_template" + + id = Column(String(36), primary_key=True, comment="Template ID") + name = Column(String(255), nullable=False, comment="Template name") + description = Column(String(1024), nullable=True, comment="Template description") + + +class CleaningResult(Base): + """Data cleaning result entity""" + __tablename__ = "t_clean_result" + + instance_id = Column(String(36), primary_key=True, comment="Instance ID (task or template ID)") + src_file_id = Column(String(36), primary_key=True, comment="Source file ID") + dest_file_id = Column(String(36), nullable=True, comment="Destination file ID") + src_name = Column(String(512), nullable=True, comment="Source file name") + dest_name = Column(String(512), nullable=True, comment="Destination file name") + src_type = Column(String(50), nullable=True, comment="Source file type") + dest_type = Column(String(50), nullable=True, comment="Destination file type") + src_size = Column(BigInteger, nullable=True, comment="Source file size") + dest_size = Column(BigInteger, nullable=True, comment="Destination file size") + status = Column(String(50), nullable=True, comment="Cleaning status: COMPLETED, FAILED, etc.") + result = Column(String(1024), nullable=True, comment="Cleaning result message") + + +class OperatorInstance(Base): + """Operator instance in task or template""" + __tablename__ = "t_operator_instance" + + instance_id = Column(String(36), primary_key=True, comment="Instance ID (task or template ID)") + operator_id = Column(String(36), primary_key=True, comment="Operator ID") + op_index = Column(Integer, nullable=False, comment="Operator execution order") + settings_override = Column(String(4096), nullable=True, comment="Operator settings override (JSON)") + diff --git a/runtime/datamate-python/app/db/models/operator.py b/runtime/datamate-python/app/db/models/operator.py new file mode 100644 index 00000000..57362461 --- /dev/null +++ b/runtime/datamate-python/app/db/models/operator.py @@ -0,0 +1,70 @@ +""" +Operator Market Data Models +算子市场数据模型 +""" +from sqlalchemy import Column, String, Integer, Boolean, BigInteger, Text, JSON, TIMESTAMP, Index +from sqlalchemy.sql import func + +from app.db.models.base_entity import Base, BaseEntity + + +class Operator(BaseEntity): + """算子实体""" + __tablename__ = "t_operator" + + id = Column(String(36), primary_key=True, index=True, comment="算子ID") + name = Column(String(255), nullable=False, comment="算子名称") + description = Column(Text, nullable=True, comment="算子描述") + version = Column(String(50), nullable=False, comment="算子版本") + inputs = Column(Text, nullable=True, comment="输入定义(JSON)") + outputs = Column(Text, nullable=True, comment="输出定义(JSON)") + runtime = Column(Text, nullable=True, comment="运行时配置(JSON)") + settings = Column(Text, nullable=True, comment="算子设置(JSON)") + file_name = Column(String(255), nullable=True, comment="文件名") + file_size = Column(BigInteger, nullable=True, comment="文件大小(字节)") + metrics = Column(Text, nullable=True, comment="算子指标(JSON)") + usage_count = Column(Integer, default=0, nullable=False, comment="使用次数") + is_star = Column(Boolean, default=False, nullable=False, comment="是否收藏") + + __table_args__ = ( + Index("idx_is_star", "is_star"), + ) + + +class Category(BaseEntity): + """算子分类实体""" + __tablename__ = "t_operator_category" + + id = Column(String(36), primary_key=True, index=True, comment="分类ID") + name = Column(String(255), nullable=False, comment="分类名称") + value = Column(String(255), nullable=True, comment="分类值") + type = Column(String(50), nullable=True, comment="分类类型") + parent_id = Column(String(36), nullable=False, default="0", comment="父分类ID") + + +class CategoryRelation(BaseEntity): + """算子分类关系实体""" + __tablename__ = "t_operator_category_relation" + + category_id = Column(String(36), primary_key=True, comment="分类ID") + operator_id = Column(String(36), primary_key=True, comment="算子ID") + + __table_args__ = ( + Index("idx_category_id", "category_id"), + Index("idx_operator_id", "operator_id"), + ) + + +class OperatorRelease(BaseEntity): + """算子发布版本实体""" + __tablename__ = "t_operator_release" + + id = Column(String(36), primary_key=True, comment="算子ID") + version = Column(String(50), primary_key=True, comment="版本号") + release_date = Column(TIMESTAMP, nullable=False, default=func.now(), comment="发布时间") + changelog = Column(JSON, nullable=True, comment="更新日志列表") + + +# Ignore data scope for operator models +for model in [Operator, Category, CategoryRelation, OperatorRelease]: + model.__ignore_data_scope__ = True diff --git a/runtime/datamate-python/app/module/__init__.py b/runtime/datamate-python/app/module/__init__.py index 7d3c482b..edf8f547 100644 --- a/runtime/datamate-python/app/module/__init__.py +++ b/runtime/datamate-python/app/module/__init__.py @@ -7,6 +7,9 @@ from .evaluation.interface import router as evaluation_router from .collection.interface import router as collection_route from .rag.interface.rag_interface import router as rag_router +from .operator.interface import operator_router +from .operator.interface import category_router +from .cleaning.interface import router as cleaning_router router = APIRouter( prefix="/api" @@ -19,5 +22,8 @@ router.include_router(evaluation_router) router.include_router(collection_route) router.include_router(rag_router) +router.include_router(operator_router) +router.include_router(category_router) +router.include_router(cleaning_router) __all__ = ["router"] diff --git a/runtime/datamate-python/app/module/cleaning/__init__.py b/runtime/datamate-python/app/module/cleaning/__init__.py new file mode 100644 index 00000000..7224d83c --- /dev/null +++ b/runtime/datamate-python/app/module/cleaning/__init__.py @@ -0,0 +1,50 @@ +from .schema import ( + CleaningTaskStatus, + OperatorInstanceDto, + CleaningProcess, + CleaningTaskDto, + CreateCleaningTaskRequest, + CleaningResultDto, + CleaningTaskLog, + CleaningTemplateDto, + CreateCleaningTemplateRequest, + UpdateCleaningTemplateRequest, +) + +from .repository import ( + CleaningTaskRepository, + CleaningTemplateRepository, + CleaningResultRepository, + OperatorInstanceRepository, +) + +from .service import ( + CleaningTaskValidator, + CleaningTaskScheduler, + CleaningTemplateService, + CleaningTaskService, +) + +from .runtime_client import RuntimeClient + +__all__ = [ + "CleaningTaskStatus", + "OperatorInstanceDto", + "CleaningProcess", + "CleaningTaskDto", + "CreateCleaningTaskRequest", + "CleaningResultDto", + "CleaningTaskLog", + "CleaningTemplateDto", + "CreateCleaningTemplateRequest", + "UpdateCleaningTemplateRequest", + "CleaningTaskRepository", + "CleaningTemplateRepository", + "CleaningResultRepository", + "OperatorInstanceRepository", + "CleaningTaskValidator", + "CleaningTaskScheduler", + "CleaningTemplateService", + "CleaningTaskService", + "RuntimeClient", +] diff --git a/runtime/datamate-python/app/module/cleaning/exceptions.py b/runtime/datamate-python/app/module/cleaning/exceptions.py new file mode 100644 index 00000000..85c0718f --- /dev/null +++ b/runtime/datamate-python/app/module/cleaning/exceptions.py @@ -0,0 +1,57 @@ +from typing import Optional + + +class CleaningException(Exception): + """Base exception for cleaning module""" + def __init__(self, message: str, details: Optional[dict] = None): + self.message = message + self.details = details + super().__init__(self.message) + + +class CleaningNameDuplicationError(CleaningException): + """Exception raised when cleaning task name is duplicated""" + def __init__(self, name: str): + super().__init__(f"Cleaning task name '{name}' is duplicated") + + +class CleaningTaskNotFoundError(CleaningException): + """Exception raised when cleaning task is not found""" + def __init__(self, task_id: str): + super().__init__(f"Cleaning task '{task_id}' not found") + + +class CleaningTemplateNotFoundError(CleaningException): + """Exception raised when cleaning template is not found""" + def __init__(self, template_id: str): + super().__init__(f"Cleaning template '{template_id}' not found") + + +class InvalidOperatorInputError(CleaningException): + """Exception raised when operator input/output types are invalid""" + def __init__(self, message: str = "Invalid operator input/output types"): + super().__init__(message) + + +class ExecutorTypeError(CleaningException): + """Exception raised when executor type is invalid""" + def __init__(self, message: str = "Invalid executor type"): + super().__init__(message) + + +class DatasetNotFoundError(CleaningException): + """Exception raised when dataset is not found""" + def __init__(self, dataset_id: str): + super().__init__(f"Dataset '{dataset_id}' not found") + + +class FileSystemError(CleaningException): + """Exception raised when file system operations fail""" + def __init__(self, message: str, details: Optional[dict] = None): + super().__init__(f"File system error: {message}", details) + + +class SettingsParseError(CleaningException): + """Exception raised when operator settings parsing fails""" + def __init__(self, message: str, details: Optional[dict] = None): + super().__init__(f"Settings parse error: {message}", details) diff --git a/runtime/datamate-python/app/module/cleaning/interface/__init__.py b/runtime/datamate-python/app/module/cleaning/interface/__init__.py new file mode 100644 index 00000000..a8d5421d --- /dev/null +++ b/runtime/datamate-python/app/module/cleaning/interface/__init__.py @@ -0,0 +1,8 @@ +from fastapi import APIRouter + +from .cleaning_task_routes import router as task_router +from .cleaning_template_routes import router as template_router + +router = APIRouter() +router.include_router(task_router) +router.include_router(template_router) diff --git a/runtime/datamate-python/app/module/cleaning/interface/cleaning_task_routes.py b/runtime/datamate-python/app/module/cleaning/interface/cleaning_task_routes.py new file mode 100644 index 00000000..1f8cba2b --- /dev/null +++ b/runtime/datamate-python/app/module/cleaning/interface/cleaning_task_routes.py @@ -0,0 +1,234 @@ +from typing import Optional + +from fastapi import APIRouter, Depends +from sqlalchemy.ext.asyncio import AsyncSession + +from app.core.logging import get_logger +from app.db.session import get_db +from app.module.cleaning.schema import ( + CleaningTaskDto, + CreateCleaningTaskRequest, + CleaningResultDto, + CleaningTaskLog, +) +from app.module.cleaning.service import CleaningTaskService +from app.module.shared.schema import StandardResponse, PaginatedData + +logger = get_logger(__name__) + +router = APIRouter(prefix="/cleaning/tasks", tags=["Cleaning Tasks"]) + + +def _get_operator_service(): + """Get operator service""" + from app.module.operator.service import OperatorService + from app.module.operator.repository import ( + OperatorRepository, + CategoryRelationRepository, + OperatorReleaseRepository, + ) + from app.module.operator.parsers import ParserHolder + from app.module.shared.file_service import FileService + from app.module.shared.chunk_upload_repository import ChunkUploadRepository + + return OperatorService( + operator_repo=OperatorRepository(None), + category_relation_repo=CategoryRelationRepository(None), + operator_release_repo=OperatorReleaseRepository(None), + parser_holder=ParserHolder(), + file_service=FileService(ChunkUploadRepository()), + ) + + +def _get_task_service(db: AsyncSession) -> CleaningTaskService: + """Get cleaning task service instance""" + from app.module.cleaning.service import ( + CleaningTaskScheduler, + CleaningTaskValidator, + ) + from app.module.cleaning.repository import ( + CleaningTaskRepository, + CleaningResultRepository, + OperatorInstanceRepository, + ) + from app.module.cleaning.runtime_client import RuntimeClient + from app.module.dataset.service import DatasetManagementService + from app.module.shared.common.lineage import LineageService + + runtime_client = RuntimeClient() + scheduler = CleaningTaskScheduler( + task_repo=CleaningTaskRepository(None), + runtime_client=runtime_client + ) + operator_service = _get_operator_service() + dataset_service = DatasetManagementService(db) + lineage_service = LineageService(db) + + task_repo = CleaningTaskRepository(None) + + return CleaningTaskService( + task_repo=task_repo, + result_repo=CleaningResultRepository(None), + operator_instance_repo=OperatorInstanceRepository(None), + operator_service=operator_service, + scheduler=scheduler, + validator=CleaningTaskValidator(task_repo=task_repo, template_repo=None), + dataset_service=dataset_service, + lineage_service=lineage_service, + ) + + +@router.get( + "", + response_model=StandardResponse[PaginatedData[CleaningTaskDto]], + summary="查询清洗任务列表", + description="根据参数查询清洗任务列表(支持分页、状态过滤、关键词搜索)", + tags=['mcp'] +) +async def get_cleaning_tasks( + page: int = 0, + size: int = 10, + status: Optional[str] = None, + keyword: Optional[str] = None, + db: AsyncSession = Depends(get_db), +): + """Query cleaning tasks""" + task_service = _get_task_service(db) + + tasks = await task_service.get_tasks(db, status, keyword, page, size) + count = await task_service.count_tasks(db, status, keyword) + total_pages = (count + size - 1) // size if size > 0 else 0 + + return StandardResponse( + code="0", + message="success", + data=PaginatedData( + page=page, + size=size, + total_elements=count, + total_pages=total_pages, + content=tasks, + ) + ) + + +@router.post( + "", + response_model=StandardResponse[CleaningTaskDto], + summary="创建清洗任务", + description="根据模板ID或算子列表创建清洗任务", + tags=['mcp'] +) +async def create_cleaning_task( + request: CreateCleaningTaskRequest, + db: AsyncSession = Depends(get_db), +): + """Create cleaning task""" + task_service = _get_task_service(db) + + task = await task_service.create_task(db, request) + await db.commit() + + await task_service.execute_task(db, task.id) + await db.commit() + + return StandardResponse(code="0", message="success", data=task) + + +@router.get( + "/{task_id}", + response_model=StandardResponse[CleaningTaskDto], + summary="获取清洗任务详情", + description="根据ID获取清洗任务详细信息" +) +async def get_cleaning_task( + task_id: str, + db: AsyncSession = Depends(get_db), +): + """Get cleaning task by ID""" + task_service = _get_task_service(db) + task = await task_service.get_task(db, task_id) + return StandardResponse(code="0", message="success", data=task) + + +@router.delete( + "/{task_id}", + response_model=StandardResponse[str], + summary="删除清洗任务", + description="删除指定的清洗任务" +) +async def delete_cleaning_task( + task_id: str, + db: AsyncSession = Depends(get_db), +): + """Delete cleaning task""" + task_service = _get_task_service(db) + await task_service.delete_task(db, task_id) + await db.commit() + return StandardResponse(code="0", message="success", data=task_id) + + +@router.post( + "/{task_id}/stop", + response_model=StandardResponse[str], + summary="停止清洗任务", + description="停止正在运行的清洗任务" +) +async def stop_cleaning_task( + task_id: str, + db: AsyncSession = Depends(get_db), +): + """Stop cleaning task""" + task_service = _get_task_service(db) + await task_service.stop_task(db, task_id) + return StandardResponse(code="0", message="success", data=task_id) + + +@router.post( + "/{task_id}/execute", + response_model=StandardResponse[str], + summary="执行清洗任务", + description="重新执行清洗任务" +) +async def execute_cleaning_task( + task_id: str, + db: AsyncSession = Depends(get_db), +): + """Execute cleaning task""" + task_service = _get_task_service(db) + await task_service.execute_task(db, task_id) + await db.commit() + return StandardResponse(code="0", message="success", data=task_id) + + +@router.get( + "/{task_id}/result", + response_model=StandardResponse[list[CleaningResultDto]], + summary="获取清洗任务结果", + description="获取指定清洗任务的执行结果" +) +async def get_cleaning_task_results( + task_id: str, + db: AsyncSession = Depends(get_db), +): + """Get cleaning task results""" + task_service = _get_task_service(db) + results = await task_service.get_task_results(db, task_id) + return StandardResponse(code="0", message="success", data=results) + + +@router.get( + "/{task_id}/log/{retry_count}", + response_model=StandardResponse[list[CleaningTaskLog]], + summary="获取清洗任务日志", + description="获取指定清洗任务的执行日志" +) +async def get_cleaning_task_log( + task_id: str, + retry_count: int, + db: AsyncSession = Depends(get_db), +): + """Get cleaning task log""" + task_service = _get_task_service(db) + logs = await task_service.get_task_log(db, task_id, retry_count) + return StandardResponse(code="0", message="success", data=logs) diff --git a/runtime/datamate-python/app/module/cleaning/interface/cleaning_template_routes.py b/runtime/datamate-python/app/module/cleaning/interface/cleaning_template_routes.py new file mode 100644 index 00000000..102a625e --- /dev/null +++ b/runtime/datamate-python/app/module/cleaning/interface/cleaning_template_routes.py @@ -0,0 +1,180 @@ +import math +from typing import Optional + +from fastapi import APIRouter, Depends, Query +from sqlalchemy import select, func +from sqlalchemy.ext.asyncio import AsyncSession + +from app.core.logging import get_logger +from app.db.session import get_db +from app.module.cleaning.schema import ( + CleaningTemplateDto, + CreateCleaningTemplateRequest, + UpdateCleaningTemplateRequest, +) +from app.module.cleaning.service import CleaningTemplateService +from app.module.shared.schema import StandardResponse, PaginatedData + +logger = get_logger(__name__) + +router = APIRouter(prefix="/cleaning/templates", tags=["Cleaning Templates"]) + + +def _get_operator_service(): + """Get operator service""" + from app.module.operator.service import OperatorService + from app.module.operator.repository import ( + OperatorRepository, + CategoryRelationRepository, + OperatorReleaseRepository, + ) + from app.module.operator.parsers import ParserHolder + from app.module.shared.file_service import FileService + from app.module.shared.chunk_upload_repository import ChunkUploadRepository + + return OperatorService( + operator_repo=OperatorRepository(None), + category_relation_repo=CategoryRelationRepository(None), + operator_release_repo=OperatorReleaseRepository(None), + parser_holder=ParserHolder(), + file_service=FileService(ChunkUploadRepository()), + ) + + +def _get_template_service(db: AsyncSession) -> CleaningTemplateService: + """Get cleaning template service instance""" + from app.module.cleaning.service import CleaningTaskValidator + from app.module.cleaning.repository import ( + CleaningTemplateRepository, + OperatorInstanceRepository, + ) + + operator_service = _get_operator_service() + + template_repo = CleaningTemplateRepository(None) + + return CleaningTemplateService( + template_repo=template_repo, + operator_instance_repo=OperatorInstanceRepository(None), + operator_service=operator_service, + validator=CleaningTaskValidator(task_repo=None, template_repo=template_repo), + ) + + +@router.get( + "", + response_model=StandardResponse[PaginatedData[CleaningTemplateDto]], + summary="查询清洗模板列表", + description="分页查询清洗模板" +) +async def get_cleaning_templates( + page: int = Query(1, description="页码"), + size: int = Query(20, description="每页数量"), + keyword: Optional[str] = Query(None, description="关键词搜索"), + db: AsyncSession = Depends(get_db), +): + """Query cleaning templates with pagination""" + from app.db.models.cleaning import CleaningTemplate + + template_service = _get_template_service(db) + + query = select(CleaningTemplate) + + if keyword: + keyword_pattern = f"%{keyword}%" + query = query.where( + CleaningTemplate.name.ilike(keyword_pattern) | CleaningTemplate.description.ilike(keyword_pattern) + ) + + count_query = select(func.count()).select_from(query.subquery()) + total = (await db.execute(count_query)).scalar_one() + + items = await template_service.get_templates(db, keyword) + + total_pages = math.ceil(total / size) if total > 0 else 0 + + return StandardResponse( + code="0", + message="success", + data=PaginatedData( + content=items, + total_elements=total, + total_pages=total_pages, + page=page, + size=size, + ) + ) + + +@router.post( + "", + response_model=StandardResponse[CleaningTemplateDto], + summary="创建清洗模板", + description="创建新的清洗模板" +) +async def create_cleaning_template( + request: CreateCleaningTemplateRequest, + db: AsyncSession = Depends(get_db), +): + """Create cleaning template""" + template_service = _get_template_service(db) + + template = await template_service.create_template(db, request) + await db.commit() + + return StandardResponse(code="0", message="success", data=template) + + +@router.get( + "/{template_id}", + response_model=StandardResponse[CleaningTemplateDto], + summary="获取清洗模板详情", + description="根据ID获取清洗模板详细信息" +) +async def get_cleaning_template( + template_id: str, + db: AsyncSession = Depends(get_db), +): + """Get cleaning template by ID""" + template_service = _get_template_service(db) + + template = await template_service.get_template(db, template_id) + return StandardResponse(code="0", message="success", data=template) + + +@router.put( + "/{template_id}", + response_model=StandardResponse[CleaningTemplateDto], + summary="更新清洗模板", + description="更新清洗模板信息" +) +async def update_cleaning_template( + template_id: str, + request: UpdateCleaningTemplateRequest, + db: AsyncSession = Depends(get_db), +): + """Update cleaning template""" + template_service = _get_template_service(db) + + template = await template_service.update_template(db, template_id, request) + await db.commit() + + return StandardResponse(code="0", message="success", data=template) + + +@router.delete( + "/{template_id}", + response_model=StandardResponse[str], + summary="删除清洗模板", + description="删除指定的清洗模板" +) +async def delete_cleaning_template( + template_id: str, + db: AsyncSession = Depends(get_db), +): + """Delete cleaning template""" + template_service = _get_template_service(db) + await template_service.delete_template(db, template_id) + await db.commit() + + return StandardResponse(code="0", message="success", data=template_id) diff --git a/runtime/datamate-python/app/module/cleaning/repository/__init__.py b/runtime/datamate-python/app/module/cleaning/repository/__init__.py new file mode 100644 index 00000000..f8663a94 --- /dev/null +++ b/runtime/datamate-python/app/module/cleaning/repository/__init__.py @@ -0,0 +1,11 @@ +from .cleaning_task_repository import CleaningTaskRepository +from .cleaning_template_repository import CleaningTemplateRepository +from .cleaning_result_repository import CleaningResultRepository +from .operator_instance_repository import OperatorInstanceRepository + +__all__ = [ + "CleaningTaskRepository", + "CleaningTemplateRepository", + "CleaningResultRepository", + "OperatorInstanceRepository", +] diff --git a/runtime/datamate-python/app/module/cleaning/repository/cleaning_result_repository.py b/runtime/datamate-python/app/module/cleaning/repository/cleaning_result_repository.py new file mode 100644 index 00000000..a6aa62e3 --- /dev/null +++ b/runtime/datamate-python/app/module/cleaning/repository/cleaning_result_repository.py @@ -0,0 +1,75 @@ +from typing import List, Optional +from sqlalchemy.ext.asyncio import AsyncSession +from sqlalchemy import select, delete +from app.db.models.cleaning import CleaningResult +from app.module.cleaning.schema import CleaningResultDto + + +class CleaningResultRepository: + """Repository for cleaning result operations""" + + def __init__(self, model=None): + self.model = model if model else CleaningResult + + async def find_by_instance_id( + self, + db: AsyncSession, + instance_id: str, + status: Optional[str] = None + ) -> List[CleaningResultDto]: + """Query results by instance ID""" + query = select(self.model).where(self.model.instance_id == instance_id) + + if status: + query = query.where(self.model.status == status) + + result = await db.execute(query) + results = result.scalars().all() + + return [ + CleaningResultDto( + instance_id=res.instance_id, + src_file_id=res.src_file_id, + dest_file_id=res.dest_file_id, + src_name=res.src_name, + dest_name=res.dest_name, + src_type=res.src_type, + dest_type=res.dest_type, + src_size=res.src_size, + dest_size=res.dest_size, + status=res.status, + result=res.result + ) + for res in results + ] + + async def count_by_instance_id( + self, + db: AsyncSession, + instance_id: str + ) -> tuple[int, int]: + """Count results by instance ID (completed, failed)""" + total_query = select(self.model).where(self.model.instance_id == instance_id) + completed_query = total_query.where(self.model.status == "COMPLETED") + failed_query = total_query.where(self.model.status == "FAILED") + + total = len((await db.execute(total_query)).scalars().all()) + completed = len((await db.execute(completed_query)).scalars().all()) + failed = len((await db.execute(failed_query)).scalars().all()) + + return (completed, failed) + + async def delete_by_instance_id( + self, + db: AsyncSession, + instance_id: str, + status: Optional[str] = None + ) -> None: + """Delete results by instance ID""" + query = delete(self.model).where(self.model.instance_id == instance_id) + + if status: + query = query.where(self.model.status == status) + + await db.execute(query) + await db.flush() diff --git a/runtime/datamate-python/app/module/cleaning/repository/cleaning_task_repository.py b/runtime/datamate-python/app/module/cleaning/repository/cleaning_task_repository.py new file mode 100644 index 00000000..7c83d9a2 --- /dev/null +++ b/runtime/datamate-python/app/module/cleaning/repository/cleaning_task_repository.py @@ -0,0 +1,140 @@ +from typing import List, Optional +from sqlalchemy.ext.asyncio import AsyncSession +from sqlalchemy import select, delete, func +from app.db.models.cleaning import CleaningTask +from app.module.cleaning.schema import CleaningTaskDto + + +class CleaningTaskRepository: + """Repository for cleaning task operations""" + + def __init__(self, model=None): + self.model = model if model else CleaningTask + + async def find_tasks( + self, + db: AsyncSession, + status: Optional[str] = None, + keyword: Optional[str] = None, + page: Optional[int] = None, + size: Optional[int] = None + ) -> List[CleaningTaskDto]: + """Query cleaning tasks""" + query = select(self.model) + + if status: + query = query.where(self.model.status == status) + + if keyword: + keyword_pattern = f"%{keyword}%" + query = query.where( + self.model.name.ilike(keyword_pattern) | self.model.description.ilike(keyword_pattern) + ) + + query = query.order_by(self.model.created_at.desc()) + + if page is not None and size is not None: + offset = max((page - 1) * size, 0) + query = query.offset(offset).limit(size) + + result = await db.execute(query) + tasks = result.scalars().all() + + return [ + CleaningTaskDto( + id=task.id, + name=task.name, + description=task.description, + status=task.status, + src_dataset_id=task.src_dataset_id, + src_dataset_name=task.src_dataset_name, + dest_dataset_id=task.dest_dataset_id, + dest_dataset_name=task.dest_dataset_name, + before_size=task.before_size, + after_size=task.after_size, + file_count=task.file_count, + retry_count=task.retry_count, + started_at=task.started_at, + finished_at=task.finished_at, + created_at=task.created_at + ) + for task in tasks + ] + + async def find_task_by_id(self, db: AsyncSession, task_id: str) -> Optional[CleaningTaskDto]: + """Query task by ID""" + query = select(self.model).where(self.model.id == task_id) + result = await db.execute(query) + task = result.scalar_one_or_none() + + if not task: + return None + + return CleaningTaskDto( + id=task.id, + name=task.name, + description=task.description, + status=task.status, + src_dataset_id=task.src_dataset_id, + src_dataset_name=task.src_dataset_name, + dest_dataset_id=task.dest_dataset_id, + dest_dataset_name=task.dest_dataset_name, + before_size=task.before_size, + after_size=task.after_size, + file_count=task.file_count, + retry_count=task.retry_count, + started_at=task.started_at, + finished_at=task.finished_at, + created_at=task.created_at + ) + + async def insert_task(self, db: AsyncSession, task: CleaningTaskDto) -> None: + """Insert new task""" + from app.db.models.cleaning import CleaningTask as CleaningTaskModel + + db_task = CleaningTaskModel( + id=task.id, + name=task.name, + description=task.description, + status=task.status, + src_dataset_id=task.src_dataset_id, + src_dataset_name=task.src_dataset_name, + dest_dataset_id=task.dest_dataset_id, + dest_dataset_name=task.dest_dataset_name, + before_size=task.before_size, + after_size=task.after_size, + file_count=task.file_count, + retry_count=task.retry_count + ) + db.add(db_task) + await db.flush() + + async def update_task(self, db: AsyncSession, task: CleaningTaskDto) -> None: + """Update task""" + query = select(CleaningTask).where(CleaningTask.id == task.id) + result = await db.execute(query) + db_task = result.scalar_one_or_none() + + if db_task: + if task.status: + db_task.status = task.status + if task.started_at: + db_task.started_at = task.started_at + if task.finished_at: + db_task.finished_at = task.finished_at + if task.retry_count is not None: + db_task.retry_count = task.retry_count + + await db.flush() + + async def delete_task_by_id(self, db: AsyncSession, task_id: str) -> None: + """Delete task by ID""" + query = delete(self.model).where(self.model.id == task_id) + await db.execute(query) + await db.flush() + + async def is_name_exist(self, db: AsyncSession, name: str) -> bool: + """Check if task name exists""" + query = select(func.count()).select_from(self.model).where(self.model.name == name) + result = await db.execute(query) + return result.scalar_one() > 0 if result else False diff --git a/runtime/datamate-python/app/module/cleaning/repository/cleaning_template_repository.py b/runtime/datamate-python/app/module/cleaning/repository/cleaning_template_repository.py new file mode 100644 index 00000000..aa35ba71 --- /dev/null +++ b/runtime/datamate-python/app/module/cleaning/repository/cleaning_template_repository.py @@ -0,0 +1,63 @@ +from typing import List, Optional +from sqlalchemy.ext.asyncio import AsyncSession +from sqlalchemy import select, delete, func +from app.db.models.cleaning import CleaningTemplate + + +class CleaningTemplateRepository: + """Repository for cleaning template operations""" + + def __init__(self, model=None): + self.model = model if model else CleaningTemplate + + async def find_all_templates( + self, + db: AsyncSession, + keyword: Optional[str] = None + ) -> List[CleaningTemplate]: + """Query all templates""" + query = select(self.model) + + if keyword: + keyword_pattern = f"%{keyword}%" + query = query.where( + self.model.name.ilike(keyword_pattern) | self.model.description.ilike(keyword_pattern) + ) + + query = query.order_by(self.model.created_at.desc()) + result = await db.execute(query) + return result.scalars().all() + + async def find_template_by_id(self, db: AsyncSession, template_id: str) -> Optional[CleaningTemplate]: + """Query template by ID""" + query = select(self.model).where(self.model.id == template_id) + result = await db.execute(query) + return result.scalar_one_or_none() + + async def insert_template(self, db: AsyncSession, template: CleaningTemplate) -> None: + """Insert new template""" + db.add(template) + await db.flush() + + async def update_template(self, db: AsyncSession, template: CleaningTemplate) -> None: + """Update template""" + query = select(self.model).where(self.model.id == template.id) + result = await db.execute(query) + db_template = result.scalar_one_or_none() + + if db_template: + db_template.name = template.name + db_template.description = template.description + await db.flush() + + async def delete_template(self, db: AsyncSession, template_id: str) -> None: + """Delete template""" + query = delete(self.model).where(self.model.id == template_id) + await db.execute(query) + await db.flush() + + async def is_name_exist(self, db: AsyncSession, name: str) -> bool: + """Check if template name exists""" + query = select(func.count()).select_from(self.model).where(self.model.name == name) + result = await db.execute(query) + return result.scalar_one() > 0 if result else False diff --git a/runtime/datamate-python/app/module/cleaning/repository/operator_instance_repository.py b/runtime/datamate-python/app/module/cleaning/repository/operator_instance_repository.py new file mode 100644 index 00000000..b8a20b1d --- /dev/null +++ b/runtime/datamate-python/app/module/cleaning/repository/operator_instance_repository.py @@ -0,0 +1,56 @@ +import json +from typing import List +from sqlalchemy.ext.asyncio import AsyncSession +from sqlalchemy import select, delete +from app.db.models.cleaning import OperatorInstance + + +class OperatorInstanceRepository: + """Repository for operator instance operations""" + + def __init__(self, model=None): + self.model = model if model else OperatorInstance + + async def find_operator_by_instance_id( + self, + db: AsyncSession, + instance_id: str + ) -> List[OperatorInstance]: + """Query operator instances by instance ID""" + query = select(self.model).where(self.model.instance_id == instance_id) + query = query.order_by(self.model.op_index.asc()) + result = await db.execute(query) + return result.scalars().all() + + async def find_instance_by_instance_id( + self, + db: AsyncSession, + instance_id: str + ) -> List[OperatorInstance]: + """Query instances for template (same as find_operator_by_instance_id)""" + return await self.find_operator_by_instance_id(db, instance_id) + + async def insert_instance( + self, + db: AsyncSession, + instance_id: str, + instances: List + ) -> None: + """Insert operator instances""" + from app.db.models.cleaning import OperatorInstance as OperatorInstanceModel + + for idx, instance in enumerate(instances): + db_instance = OperatorInstanceModel( + instance_id=instance_id, + operator_id=instance.id, + op_index=idx, + settings_override=json.dumps(instance.overrides), + ) + db.add(db_instance) + await db.flush() + + async def delete_by_instance_id(self, db: AsyncSession, instance_id: str) -> None: + """Delete instances by instance ID""" + query = delete(self.model).where(self.model.instance_id == instance_id) + await db.execute(query) + await db.flush() diff --git a/runtime/datamate-python/app/module/cleaning/runtime_client.py b/runtime/datamate-python/app/module/cleaning/runtime_client.py new file mode 100644 index 00000000..0983256f --- /dev/null +++ b/runtime/datamate-python/app/module/cleaning/runtime_client.py @@ -0,0 +1,61 @@ +import httpx +from typing import Optional +from app.core.logging import get_logger + +logger = get_logger(__name__) + + +class RuntimeClient: + """HTTP client for communicating with runtime service""" + + def __init__(self, base_url: str = "http://datamate-runtime:8081"): + self.base_url = base_url + self.client = httpx.AsyncClient(timeout=60.0) + + async def submit_task(self, task_id: str) -> bool: + """Submit cleaning task to runtime executor""" + try: + url = f"{self.base_url}/api/task/{task_id}/submit" + response = await self.client.post(url) + response.raise_for_status() + logger.info(f"Task {task_id} submitted successfully") + return True + except httpx.HTTPError as e: + logger.error(f"Failed to submit task {task_id}: {e}") + return False + except Exception as e: + logger.error(f"Unexpected error submitting task {task_id}: {e}") + return False + + async def stop_task(self, task_id: str) -> bool: + """Stop running cleaning task""" + try: + url = f"{self.base_url}/api/task/{task_id}/stop" + response = await self.client.post(url) + response.raise_for_status() + logger.info(f"Task {task_id} stopped successfully") + return True + except httpx.HTTPError as e: + logger.error(f"Failed to stop task {task_id}: {e}") + return False + except Exception as e: + logger.error(f"Unexpected error stopping task {task_id}: {e}") + return False + + async def get_task_status(self, task_id: str) -> Optional[dict]: + """Get task status from runtime""" + try: + url = f"{self.base_url}/api/task/{task_id}/status" + response = await self.client.get(url) + response.raise_for_status() + return response.json() + except httpx.HTTPError as e: + logger.error(f"Failed to get task status {task_id}: {e}") + return None + except Exception as e: + logger.error(f"Unexpected error getting task status {task_id}: {e}") + return None + + async def close(self): + """Close HTTP client""" + await self.client.aclose() diff --git a/runtime/datamate-python/app/module/cleaning/schema/__init__.py b/runtime/datamate-python/app/module/cleaning/schema/__init__.py new file mode 100644 index 00000000..6a38375b --- /dev/null +++ b/runtime/datamate-python/app/module/cleaning/schema/__init__.py @@ -0,0 +1,25 @@ +from .cleaning import ( + CleaningTaskStatus, + OperatorInstanceDto, + CleaningProcess, + CleaningTaskDto, + CreateCleaningTaskRequest, + CleaningResultDto, + CleaningTaskLog, + CleaningTemplateDto, + CreateCleaningTemplateRequest, + UpdateCleaningTemplateRequest, +) + +__all__ = [ + "CleaningTaskStatus", + "OperatorInstanceDto", + "CleaningProcess", + "CleaningTaskDto", + "CreateCleaningTaskRequest", + "CleaningResultDto", + "CleaningTaskLog", + "CleaningTemplateDto", + "CreateCleaningTemplateRequest", + "UpdateCleaningTemplateRequest", +] diff --git a/runtime/datamate-python/app/module/cleaning/schema/cleaning.py b/runtime/datamate-python/app/module/cleaning/schema/cleaning.py new file mode 100644 index 00000000..0571b29c --- /dev/null +++ b/runtime/datamate-python/app/module/cleaning/schema/cleaning.py @@ -0,0 +1,138 @@ +from typing import Optional, List, Dict, Any +from pydantic import BaseModel, Field +from datetime import datetime +from app.module.shared.schema.common import BaseResponseModel + + +class CleaningTaskStatus: + PENDING = "PENDING" + RUNNING = "RUNNING" + COMPLETED = "COMPLETED" + STOPPED = "STOPPED" + FAILED = "FAILED" + + +class OperatorInstanceDto(BaseResponseModel): + """Operator instance DTO for task or template""" + id: str = Field(..., description="Operator ID") + name: Optional[str] = Field(None, description="Operator name") + description: Optional[str] = Field(None, description="Operator description") + inputs: Optional[str] = Field(None, description="Input types: text/image/audio/video/multimodal") + outputs: Optional[str] = Field(None, description="Output types: text/image/audio/video/multimodal") + categories: Optional[List[str]] = Field(None, description="Category IDs") + settings: Optional[str] = Field(None, description="算子设置(JSON)") + overrides: Dict[str, Any] = Field(default_factory=dict, description="Operator parameter overrides") + + +class CleaningProcess(BaseResponseModel): + """Task progress information (matches Java version)""" + process: float = Field(..., description="Progress percentage") + successRate: float = Field(..., description="Success rate percentage") + totalFileNum: int = Field(..., description="Total file count") + succeedFileNum: int = Field(..., description="Succeeded file count") + failedFileNum: int = Field(..., description="Failed file count") + finishedFileNum: int = Field(..., description="Finished file count") + + @classmethod + def of(cls, total: int, succeed: int, failed: int) -> 'CleaningProcess': + """Create progress info (matches Java version logic)""" + finished_file_num = succeed + failed + + if total == 0: + process = 0.0 + else: + process = round(finished_file_num * 100.0 / total, 2) + + if finished_file_num == 0: + success_rate = 0.0 + else: + success_rate = round(succeed * 100.0 / finished_file_num, 2) + + return cls( + process=process, + successRate=success_rate, + totalFileNum=total, + succeedFileNum=succeed, + failedFileNum=failed, + finishedFileNum=finished_file_num, + ) + + +class CleaningTaskDto(BaseResponseModel): + """Cleaning task DTO""" + id: Optional[str] = Field(None, description="Task ID") + name: Optional[str] = Field(None, description="Task name") + description: Optional[str] = Field(None, description="Task description") + src_dataset_id: Optional[str] = Field(None, description="Source dataset ID") + src_dataset_name: Optional[str] = Field(None, description="Source dataset name") + dest_dataset_id: Optional[str] = Field(None, description="Destination dataset ID") + dest_dataset_name: Optional[str] = Field(None, description="Destination dataset name") + before_size: Optional[int] = Field(None, description="Data size before cleaning") + after_size: Optional[int] = Field(None, description="Data size after cleaning") + file_count: Optional[int] = Field(None, description="Total file count") + retry_count: Optional[int] = Field(None, description="Retry count") + status: Optional[str] = Field(None, description="Task status") + template_id: Optional[str] = Field(None, description="Template ID if created from template") + instance: Optional[List[OperatorInstanceDto]] = Field(None, description="Operator instances") + progress: Optional[CleaningProcess] = Field(None, description="Task progress") + created_at: Optional[datetime] = Field(None, description="Creation time") + started_at: Optional[datetime] = Field(None, description="Start time") + finished_at: Optional[datetime] = Field(None, description="Finish time") + + +class CreateCleaningTaskRequest(BaseResponseModel): + """Request to create cleaning task""" + name: str = Field(..., description="Cleaning task name") + description: str = Field(..., description="Cleaning task description") + src_dataset_id: str = Field(..., description="Source dataset ID") + src_dataset_name: str = Field(..., description="Source dataset name") + dest_dataset_id: Optional[str] = Field(None, description="Destination dataset ID") + dest_dataset_name: str = Field(..., description="Destination dataset name, creates new dataset if destDatasetId is empty") + dest_dataset_type: str = Field(..., description="Destination dataset type: TEXT/IMAGE/VIDEO/AUDIO/OTHER") + template_id: Optional[str] = Field(None, description="Template ID (alternative to instance)") + instance: List[OperatorInstanceDto] = Field(default_factory=list, description="Operator list (alternative to templateId)") + + +class CleaningResultDto(BaseResponseModel): + """Cleaning result DTO""" + instance_id: Optional[str] = Field(None, description="Instance ID") + src_file_id: Optional[str] = Field(None, description="Source file ID") + dest_file_id: Optional[str] = Field(None, description="Destination file ID") + src_name: Optional[str] = Field(None, description="Source file name") + dest_name: Optional[str] = Field(None, description="Destination file name") + src_type: Optional[str] = Field(None, description="Source file type") + dest_type: Optional[str] = Field(None, description="Destination file type") + src_size: Optional[int] = Field(None, description="Source file size") + dest_size: Optional[int] = Field(None, description="Destination file size") + status: Optional[str] = Field(None, description="Cleaning status") + result: Optional[str] = Field(None, description="Cleaning result message") + + +class CleaningTaskLog(BaseResponseModel): + """Task log entry""" + level: str = Field(..., description="Log level: INFO, WARN, ERROR") + message: str = Field(..., description="Log message") + + +class CleaningTemplateDto(BaseResponseModel): + """Cleaning template DTO""" + id: Optional[str] = Field(None, description="Template ID") + name: Optional[str] = Field(None, description="Template name") + description: Optional[str] = Field(None, description="Template description") + instance: List[OperatorInstanceDto] = Field(default_factory=list, description="Operator instances") + created_at: Optional[datetime] = Field(None, description="Creation time") + updated_at: Optional[datetime] = Field(None, description="Update time") + + +class CreateCleaningTemplateRequest(BaseResponseModel): + """Request to create cleaning template""" + name: str = Field(..., description="Template name") + description: str = Field(..., description="Template description") + instance: List[OperatorInstanceDto] = Field(..., description="Operator instances") + + +class UpdateCleaningTemplateRequest(BaseResponseModel): + """Request to update cleaning template""" + name: str = Field(..., description="Template name") + description: str = Field(..., description="Template description") + instance: List[OperatorInstanceDto] = Field(..., description="Operator instances") diff --git a/runtime/datamate-python/app/module/cleaning/service/__init__.py b/runtime/datamate-python/app/module/cleaning/service/__init__.py new file mode 100644 index 00000000..ed305edf --- /dev/null +++ b/runtime/datamate-python/app/module/cleaning/service/__init__.py @@ -0,0 +1,11 @@ +from .cleaning_task_validator import CleaningTaskValidator +from .cleaning_task_scheduler import CleaningTaskScheduler +from .cleaning_template_service import CleaningTemplateService +from .cleaning_task_service import CleaningTaskService + +__all__ = [ + "CleaningTaskValidator", + "CleaningTaskScheduler", + "CleaningTemplateService", + "CleaningTaskService", +] diff --git a/runtime/datamate-python/app/module/cleaning/service/cleaning_task_scheduler.py b/runtime/datamate-python/app/module/cleaning/service/cleaning_task_scheduler.py new file mode 100644 index 00000000..cd1d1321 --- /dev/null +++ b/runtime/datamate-python/app/module/cleaning/service/cleaning_task_scheduler.py @@ -0,0 +1,41 @@ +from sqlalchemy.ext.asyncio import AsyncSession +from app.core.logging import get_logger +from app.module.cleaning.repository import CleaningTaskRepository +from app.module.cleaning.runtime_client import RuntimeClient + +logger = get_logger(__name__) + + +class CleaningTaskScheduler: + """Scheduler for executing cleaning tasks""" + + def __init__(self, task_repo: CleaningTaskRepository, runtime_client: RuntimeClient): + self.task_repo = task_repo + self.runtime_client = runtime_client + + async def execute_task(self, db: AsyncSession, task_id: str, retry_count: int) -> bool: + """Execute cleaning task""" + from app.module.cleaning.schema import CleaningTaskDto, CleaningTaskStatus + from datetime import datetime + + task = CleaningTaskDto() + task.id = task_id + task.status = CleaningTaskStatus.RUNNING + task.started_at = datetime.now() + task.retry_count = retry_count + + await self.task_repo.update_task(db, task) + return await self.runtime_client.submit_task(task_id) + + async def stop_task(self, db: AsyncSession, task_id: str) -> bool: + """Stop cleaning task""" + from app.module.cleaning.schema import CleaningTaskDto, CleaningTaskStatus + + await self.runtime_client.stop_task(task_id) + + task = CleaningTaskDto() + task.id = task_id + task.status = CleaningTaskStatus.STOPPED + + await self.task_repo.update_task(db, task) + return True diff --git a/runtime/datamate-python/app/module/cleaning/service/cleaning_task_service.py b/runtime/datamate-python/app/module/cleaning/service/cleaning_task_service.py new file mode 100644 index 00000000..9886a18d --- /dev/null +++ b/runtime/datamate-python/app/module/cleaning/service/cleaning_task_service.py @@ -0,0 +1,446 @@ +import json +import re +import shutil +import uuid +from pathlib import Path +from typing import List, Dict, Any, Set + +from sqlalchemy import text +from sqlalchemy.ext.asyncio import AsyncSession + +from app.core.logging import get_logger +from app.db.models.base_entity import LineageNode, LineageEdge +from app.core.exception import BusinessError, ErrorCodes +from app.module.cleaning.repository import ( + CleaningTaskRepository, + CleaningResultRepository, + OperatorInstanceRepository, +) +from app.module.cleaning.schema import ( + CleaningTaskDto, + CreateCleaningTaskRequest, + CleaningResultDto, + CleaningTaskLog, + OperatorInstanceDto, + CleaningProcess, + CleaningTaskStatus, +) +from app.module.cleaning.service.cleaning_task_validator import CleaningTaskValidator +from app.module.cleaning.service.cleaning_task_scheduler import CleaningTaskScheduler +from app.module.shared.common.lineage import LineageService +from app.module.shared.schema.lineage import NodeType, EdgeType + +logger = get_logger(__name__) + +DATASET_PATH = "/dataset" +FLOW_PATH = "/flow" + + +class CleaningTaskService: + """Service for managing cleaning tasks""" + + def __init__( + self, + task_repo: CleaningTaskRepository, + result_repo: CleaningResultRepository, + operator_instance_repo: OperatorInstanceRepository, + operator_service, + scheduler: CleaningTaskScheduler, + validator: CleaningTaskValidator, + dataset_service, + lineage_service: LineageService, + ): + self.task_repo = task_repo + self.result_repo = result_repo + self.operator_instance_repo = operator_instance_repo + self.operator_service = operator_service + self.scheduler = scheduler + self.validator = validator + self.dataset_service = dataset_service + self.lineage_service = lineage_service + + async def get_tasks( + self, + db: AsyncSession, + status: str | None = None, + keyword: str | None = None, + page: int | None = None, + size: int | None = None, + ) -> List[CleaningTaskDto]: + """Get cleaning tasks""" + tasks = await self.task_repo.find_tasks(db, status, keyword, page, size) + + for task in tasks: + await self._set_process(db, task) + + return tasks + + async def _set_process(self, db: AsyncSession, task: CleaningTaskDto) -> None: + """Set task progress""" + completed, failed = await self.result_repo.count_by_instance_id(db, task.id) + task.progress = CleaningProcess.of(task.file_count or 0, completed, failed) + + async def count_tasks( + self, + db: AsyncSession, + status: str | None = None, + keyword: str | None = None, + ) -> int: + """Count cleaning tasks""" + tasks = await self.task_repo.find_tasks(db, status, keyword, None, None) + return len(tasks) + + async def get_task(self, db: AsyncSession, task_id: str) -> CleaningTaskDto: + """Get task by ID""" + task = await self.task_repo.find_task_by_id(db, task_id) + if not task: + raise BusinessError(ErrorCodes.CLEANING_TASK_NOT_FOUND, task_id) + + await self._set_process(db, task) + + instances = await self.operator_instance_repo.find_operator_by_instance_id(db, task_id) + + # Batch query operators + all_operators = await self.operator_service.get_operators(db=db, page=0, size=1000, categories=[], keyword=None, + is_star=None) + operator_map = {op.id: op for op in all_operators} + + task.instance = [] + for inst in instances: + operator = operator_map.get(inst.operator_id) + if operator: + task.instance.append(OperatorInstanceDto( + id=operator.id, + name=operator.name, + description=operator.description, + inputs=operator.inputs, + outputs=operator.outputs, + settings=operator.settings, + categories=operator.categories, + )) + else: + task.instance.append(OperatorInstanceDto(id=inst.operator_id)) + + return task + + async def create_task( + self, + db: AsyncSession, + request: CreateCleaningTaskRequest + ) -> CleaningTaskDto: + """Create new cleaning task""" + if request.instance and request.template_id: + instances = await self.get_instance_by_template_id(db, request.template_id) + request.instance = instances + + await self.validator.check_task_name_duplication(db, request.name) + self.validator.check_input_and_output(request.instance) + executor_type = self.validator.check_and_get_executor_type(request.instance) + + task_id = str(uuid.uuid4()) + + dest_dataset_id = request.dest_dataset_id + dest_dataset_name = request.dest_dataset_name + + if not dest_dataset_id: + logger.info(f"Creating new dataset: {dest_dataset_name}, type: {request.dest_dataset_type}") + dest_dataset_response = await self.dataset_service.create_dataset( + name=dest_dataset_name, + dataset_type=request.dest_dataset_type, + description="", + status="ACTIVE" + ) + dest_dataset_id = dest_dataset_response.id + logger.info(f"Successfully created dataset: {dest_dataset_id}") + else: + logger.info(f"Using existing dataset: {dest_dataset_id}") + dest_dataset_response = await self.dataset_service.get_dataset(dest_dataset_id) + + src_dataset = await self.dataset_service.get_dataset(request.src_dataset_id) + if not src_dataset: + raise BusinessError(ErrorCodes.CLEANING_DATASET_NOT_FOUND, request.src_dataset_id) + + task_dto = CleaningTaskDto( + id=task_id, + name=request.name, + description=request.description, + status=CleaningTaskStatus.PENDING, + src_dataset_id=request.src_dataset_id, + src_dataset_name=request.src_dataset_name, + dest_dataset_id=dest_dataset_id, + dest_dataset_name=dest_dataset_name, + before_size=src_dataset.totalSize, + file_count=src_dataset.fileCount, + retry_count=-1, + ) + + await self.task_repo.insert_task(db, task_dto) + + await self._add_cleaning_to_graph(src_dataset, task_dto, dest_dataset_response) + + await self.operator_instance_repo.insert_instance(db, task_id, request.instance) + + all_operators = await self.operator_service.get_operators(db=db, page=0, size=1000, categories=[], keyword=None, is_star=None) + operator_map = {op.id: op for op in all_operators} + + await self.prepare_task(dest_dataset_id, task_id, request.instance, operator_map, executor_type) + + return await self.get_task(db, task_id) + + async def _add_cleaning_to_graph( + self, + src_dataset, + task: CleaningTaskDto, + dest_dataset, + ) -> None: + """ + 添加清洗任务到血缘图 + """ + from_node = LineageNode( + id=src_dataset.id, + node_type=NodeType.DATASET.value, + name=src_dataset.name, + description=src_dataset.description or "", + ) + + to_node = LineageNode( + id=dest_dataset.id, + node_type=NodeType.DATASET.value, + name=dest_dataset.name, + description=dest_dataset.description or "", + ) + + edge = LineageEdge( + process_id=task.id, + name=task.name or "", + description=task.description or "", + edge_type=EdgeType.DATA_CLEANING.value, + from_node_id=from_node.id, + to_node_id=to_node.id, + ) + + await self.lineage_service.generate_graph(from_node, edge, to_node) + + async def prepare_task( + self, + dataset_id: str, + task_id: str, + instances: List[OperatorInstanceDto], + operator_map: dict, + executor_type: str, + ) -> None: + """Prepare task configuration file""" + process_config = { + "dataset_id": dataset_id, + "instance_id": task_id, + "dataset_path": f"{FLOW_PATH}/{task_id}/dataset.jsonl", + "export_path": f"{DATASET_PATH}/{dataset_id}", + "executor_type": executor_type, + "process": [], + } + + for instance in instances: + operator = operator_map.get(instance.id) + if not operator: + continue + + operator_config = self._get_default_values(operator) + operator_config.update(instance.overrides) + + runtime_config = self._get_runtime_config(operator) + operator_config.update(runtime_config) + + process_config["process"].append({instance.id: operator_config}) + + config_file_path = Path(f"{FLOW_PATH}/{task_id}/process.yaml") + config_file_path.parent.mkdir(parents=True, exist_ok=True) + + import yaml + try: + with open(config_file_path, 'w', encoding='utf-8') as f: + yaml.dump(process_config, f, default_flow_style=False, allow_unicode=True) + except Exception as e: + logger.error(f"Failed to write process.yaml: {e}") + raise BusinessError(ErrorCodes.CLEANING_FILE_SYSTEM_ERROR, str(e)) + + def _get_default_values(self, operator) -> Dict[str, Any]: + """Get default values from operator settings""" + if not operator.settings: + return {} + + try: + settings = json.loads(operator.settings) + defaults = {} + + for key, value in settings.items(): + setting_type = value.get("type") + if "defaultVal" in value: + defaults[key] = value["defaultVal"] + + return defaults + except json.JSONDecodeError as e: + logger.error(f"Failed to parse settings: {e}") + return {} + + def _get_runtime_config(self, operator) -> Dict[str, Any]: + """Get runtime configuration from operator""" + if not operator.runtime: + return {} + + try: + return json.loads(operator.runtime) + except json.JSONDecodeError as e: + logger.error(f"Failed to parse runtime config: {e}") + return {} + + async def scan_dataset( + self, + db: AsyncSession, + task_id: str, + src_dataset_id: str, + succeed_files: Set[str] | None = None, + ) -> None: + """Scan source dataset and create dataset.jsonl""" + target_file_path = Path(f"{FLOW_PATH}/{task_id}/dataset.jsonl") + target_file_path.parent.mkdir(parents=True, exist_ok=True) + + query = text(""" + SELECT id, file_name, file_path, file_type, file_size + FROM t_dm_dataset_files + WHERE dataset_id = :dataset_id + ORDER BY created_at + """) + + result = await db.execute(query, {"dataset_id": src_dataset_id}) + files = result.fetchall() + + with open(target_file_path, 'w', encoding='utf-8') as f: + for file in files: + if succeed_files and file.id in succeed_files: + continue + + file_info = { + "fileId": file.id, + "fileName": file.file_name, + "filePath": file.file_path, + "fileType": file.file_type, + "fileSize": file.file_size, + } + f.write(json.dumps(file_info, ensure_ascii=False) + "\n") + + async def get_task_results(self, db: AsyncSession, task_id: str) -> List[CleaningResultDto]: + """Get task results""" + return await self.result_repo.find_by_instance_id(db, task_id) + + async def get_task_log(self, db: AsyncSession, task_id: str, retry_count: int) -> List[CleaningTaskLog]: + """Get task log""" + self.validator.check_task_id(task_id) + + log_path = Path(f"{FLOW_PATH}/{task_id}/output.log") + if retry_count > 0: + log_path = Path(f"{FLOW_PATH}/{task_id}/output.log.{retry_count}") + + if not log_path.exists(): + return [] + + logs = [] + last_level = "INFO" + + standard_level_pattern = re.compile( + r"\b(DEBUG|Debug|INFO|Info|WARN|Warn|WARNING|Warning|ERROR|Error|FATAL|Fatal)\b" + ) + exception_suffix_pattern = re.compile(r"\b\w+(Warning|Error|Exception)\b") + + with open(log_path, 'r', encoding='utf-8') as f: + for line in f: + last_level = self._get_log_level(line, last_level, standard_level_pattern, exception_suffix_pattern) + logs.append(CleaningTaskLog(level=last_level, message=line.rstrip())) + + return logs + + def _get_log_level(self, line: str, default_level: str, std_pattern, ex_pattern) -> str: + """Extract log level from log line""" + if not line or not line.strip(): + return default_level + + std_match = std_pattern.search(line) + if std_match: + return std_match.group(1).upper() + + ex_match = ex_pattern.search(line) + if ex_match: + match = ex_match.group(1).upper() + if match == "WARNING": + return "WARN" + if match in ["ERROR", "EXCEPTION"]: + return "ERROR" + + return default_level + + async def delete_task(self, db: AsyncSession, task_id: str) -> None: + """Delete task""" + self.validator.check_task_id(task_id) + + await self.task_repo.delete_task_by_id(db, task_id) + await self.operator_instance_repo.delete_by_instance_id(db, task_id) + await self.result_repo.delete_by_instance_id(db, task_id) + + task_path = Path(f"{FLOW_PATH}/{task_id}") + if task_path.exists(): + try: + shutil.rmtree(task_path) + except Exception as e: + logger.warning(f"Failed to delete task path {task_id}: {e}") + + async def execute_task(self, db: AsyncSession, task_id: str) -> bool: + """Execute task""" + succeeded = await self.result_repo.find_by_instance_id(db, task_id, "COMPLETED") + succeed_set = {res.src_file_id for res in succeeded} + + task = await self.task_repo.find_task_by_id(db, task_id) + if not task: + raise BusinessError(ErrorCodes.CLEANING_TASK_NOT_FOUND, task_id) + + await self.scan_dataset(db, task_id, task.src_dataset_id, succeed_set) + await self.result_repo.delete_by_instance_id(db, task_id, "FAILED") + + return await self.scheduler.execute_task(db, task_id, (task.retry_count or 0) + 1) + + async def stop_task(self, db: AsyncSession, task_id: str) -> bool: + """Stop task""" + return await self.scheduler.stop_task(db, task_id) + + async def get_instance_by_template_id( + self, + db: AsyncSession, + template_id: str + ) -> List[OperatorInstanceDto]: + """Get instances by template ID (delegated to template service)""" + instances = await self.operator_instance_repo.find_operator_by_instance_id(db, template_id) + + # Batch query operators + all_operators = await self.operator_service.get_operators(db=db, page=0, size=1000, categories=[], keyword=None, + is_star=None) + operator_map = {op.id: op for op in all_operators} + + result = [] + for inst in instances: + operator = operator_map.get(inst.operator_id) + if operator: + operator_dto = OperatorInstanceDto( + id=operator.id, + name=operator.name, + description=operator.description, + inputs=operator.inputs, + outputs=operator.outputs, + settings=operator.settings, + categories=operator.categories, + ) + if inst.settings_override: + try: + operator_dto.overrides = json.loads(inst.settings_override) + except json.JSONDecodeError as e: + logger.error(f"Failed to parse settings for {inst.operator_id}: {e}") + result.append(operator_dto) + + return result diff --git a/runtime/datamate-python/app/module/cleaning/service/cleaning_task_validator.py b/runtime/datamate-python/app/module/cleaning/service/cleaning_task_validator.py new file mode 100644 index 00000000..32cc47db --- /dev/null +++ b/runtime/datamate-python/app/module/cleaning/service/cleaning_task_validator.py @@ -0,0 +1,89 @@ +from sqlalchemy.ext.asyncio import AsyncSession + +from app.core.exception import BusinessError, ErrorCodes +from app.module.cleaning.schema import OperatorInstanceDto +from app.module.operator.constants import CATEGORY_DATA_JUICER_ID, CATEGORY_DATAMATE_ID + + +class CleaningTaskValidator: + """Validator for cleaning tasks and templates""" + + def __init__(self, task_repo=None, template_repo=None): + self.task_repo = task_repo + self.template_repo = template_repo + + async def check_task_name_duplication(self, db: AsyncSession, name: str) -> None: + """Check if task name is duplicated""" + if not name: + raise BusinessError(ErrorCodes.CLEANING_NAME_DUPLICATED) + if await self.task_repo.is_name_exist(db, name): + raise BusinessError(ErrorCodes.CLEANING_NAME_DUPLICATED) + + async def check_template_name_duplication(self, db: AsyncSession, name: str) -> None: + """Check if template name is duplicated""" + if not name: + raise BusinessError(ErrorCodes.CLEANING_TEMPLATE_NAME_DUPLICATED) + if await self.template_repo.is_name_exist(db, name): + raise BusinessError(ErrorCodes.CLEANING_TEMPLATE_NAME_DUPLICATED) + + @staticmethod + def check_input_and_output(instances: list[OperatorInstanceDto]) -> None: + """Validate that operator input/output types are compatible""" + if not instances: + return + + for i in range(len(instances) - 1): + current = instances[i] + next_op = instances[i + 1] + + if not current.outputs: + raise BusinessError( + ErrorCodes.CLEANING_INVALID_OPERATOR_INPUT, + f"Operator {current.id} has no outputs defined" + ) + + if not next_op.inputs: + raise BusinessError( + ErrorCodes.CLEANING_INVALID_OPERATOR_INPUT, + f"Operator {next_op.id} has no inputs defined" + ) + + current_outputs = set(current.outputs.split(',')) + next_inputs = set(next_op.inputs.split(',')) + + if not current_outputs.intersection(next_inputs): + raise BusinessError( + ErrorCodes.CLEANING_INVALID_OPERATOR_INPUT, + f"Operator {current.id} outputs {current.outputs} " + f"but operator {next_op.id} requires {next_op.inputs}" + ) + + @staticmethod + def check_and_get_executor_type(instances: list[OperatorInstanceDto]) -> str: + """Check operator categories and determine executor type (datamate/datajuicer)""" + if not instances: + return "datamate" + + executor_types = set() + + for instance in instances: + if instance.categories: + for category in instance.categories: + if CATEGORY_DATA_JUICER_ID in category.lower(): + executor_types.add("default") + elif CATEGORY_DATAMATE_ID in category.lower(): + executor_types.add("datamate") + + if len(executor_types) > 1: + raise BusinessError( + ErrorCodes.CLEANING_INVALID_EXECUTOR_TYPE, + "Cannot mix DataMate and DataJuicer operators in same task" + ) + + return executor_types.pop() if executor_types else "datamate" + + @staticmethod + def check_task_id(task_id: str) -> None: + """Validate task ID""" + if not task_id: + raise BusinessError(ErrorCodes.CLEANING_TASK_ID_REQUIRED) diff --git a/runtime/datamate-python/app/module/cleaning/service/cleaning_template_service.py b/runtime/datamate-python/app/module/cleaning/service/cleaning_template_service.py new file mode 100644 index 00000000..2443bf4f --- /dev/null +++ b/runtime/datamate-python/app/module/cleaning/service/cleaning_template_service.py @@ -0,0 +1,226 @@ +import json +import uuid +from typing import List + +from sqlalchemy.ext.asyncio import AsyncSession + +from app.core.exception import BusinessError, ErrorCodes +from app.core.logging import get_logger +from app.module.cleaning import UpdateCleaningTemplateRequest +from app.module.cleaning.repository import ( + CleaningTemplateRepository, + OperatorInstanceRepository, +) +from app.module.cleaning.schema import ( + CleaningTemplateDto, + CreateCleaningTemplateRequest, + OperatorInstanceDto, +) +from app.module.cleaning.service.cleaning_task_validator import CleaningTaskValidator + +logger = get_logger(__name__) + + +class CleaningTemplateService: + """Service for managing cleaning templates""" + + def __init__( + self, + template_repo: CleaningTemplateRepository, + operator_instance_repo: OperatorInstanceRepository, + operator_service, + validator: CleaningTaskValidator, + ): + self.template_repo = template_repo + self.operator_instance_repo = operator_instance_repo + self.operator_service = operator_service + self.validator = validator + + async def get_templates( + self, + db: AsyncSession, + keyword: str | None = None + ) -> List[CleaningTemplateDto]: + """Get all templates""" + templates = await self.template_repo.find_all_templates(db, keyword) + + # Collect all operator IDs + template_instances_map = {} + for template in templates: + instances = await self.operator_instance_repo.find_operator_by_instance_id(db, template.id) + template_instances_map[template.id] = instances + + # Batch query all operators + all_operators = await self.operator_service.get_operators(db=db, page=0, size=1000, categories=[], keyword=None, + is_star=None) + operator_map = {op.id: op for op in all_operators} + + # Build result + result = [] + for template in templates: + template_dto = CleaningTemplateDto( + id=template.id, + name=template.name, + description=template.description, + instance=[], + created_at=template.created_at, + updated_at=template.updated_at, + ) + + instances = template_instances_map.get(template.id, []) + for inst in instances: + operator = operator_map.get(inst.operator_id) + if operator: + operator_dto = OperatorInstanceDto( + id=operator.id, + name=operator.name, + description=operator.description, + inputs=operator.inputs, + outputs=operator.outputs, + settings=operator.settings, + categories=operator.categories, + ) + if inst.settings_override: + try: + operator_dto.overrides = json.loads(inst.settings_override) + except json.JSONDecodeError as e: + logger.error(f"Failed to parse settings for {inst.operator_id}: {e}") + template_dto.instance.append(operator_dto) + + result.append(template_dto) + + return result + + async def get_template( + self, + db: AsyncSession, + template_id: str + ) -> CleaningTemplateDto: + """Get template by ID""" + template = await self.template_repo.find_template_by_id(db, template_id) + if not template: + raise BusinessError(ErrorCodes.CLEANING_TEMPLATE_NOT_FOUND, template_id) + + template_dto = CleaningTemplateDto( + id=template.id, + name=template.name, + description=template.description, + instance=[], + created_at=template.created_at, + updated_at=template.updated_at, + ) + + instances = await self.operator_instance_repo.find_operator_by_instance_id(db, template_id) + + # Batch query operators + all_operators = await self.operator_service.get_operators(db=db, page=0, size=1000, categories=[], keyword=None, + is_star=None) + operator_map = {op.id: op for op in all_operators} + + for inst in instances: + operator = operator_map.get(inst.operator_id) + if operator: + operator_dto = OperatorInstanceDto( + id=operator.id, + name=operator.name, + description=operator.description, + inputs=operator.inputs, + outputs=operator.outputs, + settings=operator.settings, + categories=operator.categories, + ) + if inst.settings_override: + try: + operator_dto.overrides = json.loads(inst.settings_override) + except json.JSONDecodeError as e: + logger.error(f"Failed to parse settings for {inst.operator_id}: {e}") + template_dto.instance.append(operator_dto) + + return template_dto + + async def create_template( + self, + db: AsyncSession, + request: CreateCleaningTemplateRequest + ) -> CleaningTemplateDto: + """Create new template""" + from app.db.models.cleaning import CleaningTemplate + + await self.validator.check_template_name_duplication(db, request.name) + self.validator.check_input_and_output(request.instance) + self.validator.check_and_get_executor_type(request.instance) + + template_id = str(uuid.uuid4()) + template = CleaningTemplate( + id=template_id, + name=request.name, + description=request.description, + ) + + await self.template_repo.insert_template(db, template) + + await self.operator_instance_repo.insert_instance(db, template_id, request.instance) + + return await self.get_template(db, template_id) + + async def update_template( + self, + db: AsyncSession, + template_id: str, + request: UpdateCleaningTemplateRequest + ) -> CleaningTemplateDto: + """Update template""" + + template = await self.template_repo.find_template_by_id(db, template_id) + if not template: + raise BusinessError(ErrorCodes.CLEANING_TEMPLATE_NOT_FOUND, template_id) + + template.name = request.name + template.description = request.description + + await self.template_repo.update_template(db, template) + await self.operator_instance_repo.delete_by_instance_id(db, template_id) + + await self.operator_instance_repo.insert_instance(db, template_id, request.instance) + + return await self.get_template(db, template_id) + + async def delete_template(self, db: AsyncSession, template_id: str) -> None: + """Delete template""" + await self.template_repo.delete_template(db, template_id) + await self.operator_instance_repo.delete_by_instance_id(db, template_id) + + async def get_instance_by_template_id( + self, + db: AsyncSession, + template_id: str + ) -> List[OperatorInstanceDto]: + """Get operator instances by template ID""" + instances = await self.operator_instance_repo.find_operator_by_instance_id(db, template_id) + + # Batch query operators + all_operators = await self.operator_service.get_operators(db=db, page=0, size=1000, categories=[], keyword=None, + is_star=None) + operator_map = {op.id: op for op in all_operators} + + result = [] + for inst in instances: + operator = operator_map.get(inst.operator_id) + if operator: + operator_dto = OperatorInstanceDto( + id=operator.id, + name=operator.name, + description=operator.description, + inputs=operator.inputs, + outputs=operator.outputs, + settings=operator.settings, + categories=operator.categories, + ) + if inst.settings_override: + try: + operator_dto.overrides = json.loads(inst.settings_override) + except json.JSONDecodeError as e: + logger.error(f"Failed to parse settings for {inst.operator_id}: {e}") + result.append(operator_dto) + + return result diff --git a/runtime/datamate-python/app/module/dataset/schema/__init__.py b/runtime/datamate-python/app/module/dataset/schema/__init__.py index 221c43f8..6a8b0bd0 100644 --- a/runtime/datamate-python/app/module/dataset/schema/__init__.py +++ b/runtime/datamate-python/app/module/dataset/schema/__init__.py @@ -10,6 +10,7 @@ from .dataset import ( DatasetResponse, DatasetTypeResponse, + CreateDatasetRequest, ) __all__ = [ @@ -21,4 +22,5 @@ "BatchUpdateFileTagsResponse", "FileTagUpdateResult", "FileTagUpdate", + "CreateDatasetRequest", ] \ No newline at end of file diff --git a/runtime/datamate-python/app/module/dataset/schema/dataset.py b/runtime/datamate-python/app/module/dataset/schema/dataset.py index 84334d8c..8095857f 100644 --- a/runtime/datamate-python/app/module/dataset/schema/dataset.py +++ b/runtime/datamate-python/app/module/dataset/schema/dataset.py @@ -9,6 +9,7 @@ class DatasetType(Enum): IMAGE = "IMAGE" AUDIO = "AUDIO" VIDEO = "VIDEO" + OTHER = "OTHER" class DatasetTypeResponse(BaseModel): """数据集类型响应模型""" @@ -18,6 +19,16 @@ class DatasetTypeResponse(BaseModel): supportedFormats: List[str] = Field(default_factory=list, description="支持的文件格式") icon: Optional[str] = Field(None, description="图标") +class CreateDatasetRequest(BaseModel): + """创建数据集请求模型""" + name: str = Field(..., description="数据集名称", min_length=1, max_length=100) + description: Optional[str] = Field(None, description="数据集描述", max_length=500) + datasetType: DatasetType = Field(..., description="数据集类型", alias="datasetType") + tags: Optional[List[str]] = Field(None, description="标签列表") + dataSource: Optional[str] = Field(None, description="数据源") + retentionDays: Optional[int] = Field(None, description="保留天数") + status: Optional[str] = Field(None, description="数据集状态") + class DatasetResponse(BaseModel): """DM服务数据集响应模型""" id: str = Field(..., description="数据集ID") diff --git a/runtime/datamate-python/app/module/dataset/service/service.py b/runtime/datamate-python/app/module/dataset/service/service.py index ff5869d7..5c1ddb4d 100644 --- a/runtime/datamate-python/app/module/dataset/service/service.py +++ b/runtime/datamate-python/app/module/dataset/service/service.py @@ -62,6 +62,84 @@ async def get_dataset(self, dataset_id: str) -> Optional[DatasetResponse]: logger.error(f"Failed to get dataset {dataset_id}: {e}") return None + async def create_dataset( + self, + name: str, + dataset_type: str, + description: str = "", + status: Optional[str] = None, + ) -> DatasetResponse: + """ + 创建数据集(参考Java版本DatasetApplicationService.createDataset) + + Args: + name: 数据集名称 + dataset_type: 数据集类型(TEXT/IMAGE/VIDEO/AUDIO/OTHER) + description: 数据集描述 + status: 数据集状态 + + Returns: + 创建的数据集响应 + """ + try: + logger.info(f"Creating dataset: {name}, type: {dataset_type}") + + # 1. 检查数据集名称是否已存在 + result = await self.db.execute( + select(Dataset).where(Dataset.name == name) + ) + existing_dataset = result.scalar_one_or_none() + if existing_dataset: + error_msg = f"Dataset with name '{name}' already exists" + logger.error(error_msg) + raise Exception(error_msg) + + # 2. 创建数据集对象 + dataset_id = str(uuid.uuid4()) + dataset_path = f"{os.path.join('/dataset', dataset_id)}" + + # 如果没有提供status,默认为DRAFT + if status is None: + status = "DRAFT" + + new_dataset = Dataset( + id=dataset_id, + name=name, + description=description, + dataset_type=dataset_type, + path=dataset_path, + size_bytes=0, + file_count=0, + status=status, + dataset_metadata="{}", + version=0, + created_by="system", + ) + + self.db.add(new_dataset) + await self.db.flush() + await self.db.commit() + + logger.info(f"Successfully created dataset: {new_dataset.id}") + + return DatasetResponse( + id=new_dataset.id, # type: ignore + name=new_dataset.name, # type: ignore + description=new_dataset.description or "", # type: ignore + datasetType=new_dataset.dataset_type, # type: ignore + status=new_dataset.status, # type: ignore + fileCount=new_dataset.file_count or 0, # type: ignore + totalSize=new_dataset.size_bytes or 0, # type: ignore + createdAt=new_dataset.created_at, # type: ignore + updatedAt=new_dataset.updated_at, # type: ignore + createdBy=new_dataset.created_by # type: ignore + ) + + except Exception as e: + await self.db.rollback() + logger.error(f"Failed to create dataset: {e}") + raise Exception(f"Failed to create dataset: {str(e)}") + async def get_dataset_files( self, dataset_id: str, diff --git a/runtime/datamate-python/app/module/operator/README.md b/runtime/datamate-python/app/module/operator/README.md new file mode 100644 index 00000000..703e8ed3 --- /dev/null +++ b/runtime/datamate-python/app/module/operator/README.md @@ -0,0 +1,138 @@ +# Operator Market Service - Python Implementation + +## 概述 + +这是 `operator-market-service` 的 Python 实现,已集成到 `runtime/datamate-python` 项目中。 + +## 功能 + +- **算子管理**:创建、查询、更新、删除算子 +- **分类管理**:树状分类结构查询 +- **文件上传**:支持算子文件上传和解析(支持 tar/zip 格式) +- **MCP 工具集成**:通过 fastapi-mcp 提供 MCP 工具接口 + +## 目录结构 + +``` +app/module/operator_market/ +├── __init__.py # 模块入口 +├── constants.py # 常量定义 +├── exceptions.py # 异常定义 +├── schema/ # Pydantic Schema 定义 +│ ├── __init__.py +│ ├── operator.py # 算子相关 Schema +│ ├── category.py # 分类相关 Schema +│ └── release.py # 发布版本 Schema +├── parsers/ # 文件解析器 +│ ├── __init__.py +│ ├── abstract_parser.py # 抽象解析器基类 +│ ├── tar_parser.py # TAR 文件解析器 +│ ├── zip_parser.py # ZIP 文件解析器 +│ └── parser_holder.py # 解析器持有者 +├── repository/ # 数据访问层 +│ ├── __init__.py +│ ├── operator_repository.py +│ ├── category_repository.py +│ ├── category_relation_repository.py +│ └── operator_release_repository.py +├── service/ # 服务层 +│ ├── __init__.py +│ ├── operator_service.py +│ └── category_service.py +└── interface/ # API 接口层 + ├── __init__.py + ├── operator_routes.py + └── category_routes.py +``` + +## API 端点 + +### 算子相关 (`/api/operator-market/operators`) + +| 方法 | 路径 | 描述 | +|------|--------|------| +| POST | `/list` | 查询算子列表(支持分页、分类过滤、关键词搜索) | +| GET | `/{operator_id}` | 获取算子详情 | +| PUT | `/{operator_id}` | 更新算子信息 | +| POST | `/create` | 创建新算子 | +| POST | `/upload` | 上传算子文件 | +| POST | `/upload/pre-upload` | 预上传(获取请求 ID) | +| POST | `/upload/chunk` | 分块上传 | +| DELETE | `/{operator_id}` | 删除算子 | +| GET | `/examples/download` | 下载示例算子 | + +### 分类相关 (`/api/operator-market/categories`) + +| 方法 | 路径 | 描述 | +|------|--------|------| +| GET | `/tree` | 获取分类树状结构 | + +## 数据库表 + +- `t_operator` - 算子表 +- `t_operator_category` - 分类表 +- `t_operator_category_relation` - 分类关系表 +- `t_operator_release` - 算子发布版本表 +- `v_operator` - 算子视图(包含分类信息) + +## 文件格式支持 + +算子文件需包含 `metadata.yml` 文件,格式如下: + +```yaml +raw_id: "operator-id" +name: "算子名称" +description: "算子描述" +version: "1.0.0" +language: "python" # python, java +modal: "text" # text, image, audio, video +vendor: "datamate" # datamate, data-juicer, or other +inputs: {...} +outputs: {...} +runtime: {...} +settings: {...} +metrics: {...} +release: + - "更新日志1" + - "更新日志2" +``` + +## 待实现功能 + +- [ ] 算子收藏功能完善 +- [ ] 标签过滤功能 + +## 使用示例 + +### 查询算子列表 + +```bash +curl -X POST "http://localhost:18000/api/operator-market/operators/list" \ + -H "Content-Type: application/json" \ + -d '{ + "page": 1, + "size": 10, + "keyword": "test", + "isStar": false + }' +``` + +### 获取分类树 + +```bash +curl -X GET "http://localhost:18000/api/operator-market/categories/tree" +``` + +### 创建算子 + +```bash +curl -X POST "http://localhost:18000/api/operator-market/operators/create" \ + -H "Content-Type: application/json" \ + -d '{ + "id": "new-operator-id", + "name": "新算子", + "description": "这是一个新算子", + "version": "1.0.0", + "fileName": "operator.tar" + }' +``` diff --git a/runtime/datamate-python/app/module/operator/__init__.py b/runtime/datamate-python/app/module/operator/__init__.py new file mode 100644 index 00000000..1ac84e31 --- /dev/null +++ b/runtime/datamate-python/app/module/operator/__init__.py @@ -0,0 +1,4 @@ +""" +Operator Market Service Module +算子市场服务模块 +""" diff --git a/runtime/datamate-python/app/module/operator/constants.py b/runtime/datamate-python/app/module/operator/constants.py new file mode 100644 index 00000000..e6d83ee9 --- /dev/null +++ b/runtime/datamate-python/app/module/operator/constants.py @@ -0,0 +1,50 @@ +""" +Operator Market Constants +算子市场常量定义 +""" + +# Service ID +SERVICE_ID = "operator" + +# YAML metadata path +YAML_PATH = "metadata.yml" + +# Example operator file path +EXAMPLE_OPERATOR_PATH = "/app/test_operator.tar" + +# Category IDs +CATEGORY_PYTHON = "python" +CATEGORY_PYTHON_ID = "9eda9d5d-072b-499b-916c-797a0a8750e1" + +CATEGORY_JAVA = "java" +CATEGORY_JAVA_ID = "b5bfc548-8ef6-417c-b8a6-a4197c078249" + +CATEGORY_CUSTOMIZED_ID = "ec2cdd17-8b93-4a81-88c4-ac9e98d10757" +CATEGORY_TEXT_ID = "d8a5df7a-52a9-42c2-83c4-01062e60f597" +CATEGORY_IMAGE_ID = "de36b61c-9e8a-4422-8c31-d30585c7100f" +CATEGORY_AUDIO_ID = "42dd9392-73e4-458c-81ff-41751ada47b5" +CATEGORY_VIDEO_ID = "a233d584-73c8-4188-ad5d-8f7c8dda9c27" +CATEGORY_ALL_ID = "4d7dbd77-0a92-44f3-9056-2cd62d4a71e4" +CATEGORY_STAR_ID = "51847c24-bba9-11f0-888b-5b143cb738aa" +CATEGORY_PREDEFINED_ID = "96a3b07a-3439-4557-a835-525faad60ca3" +CATEGORY_DATAMATE_ID = "431e7798-5426-4e1a-aae6-b9905a836b34" +CATEGORY_DATA_JUICER_ID = "79b385b4-fde8-4617-bcba-02a176938996" +CATEGORY_OTHER_VENDOR_ID = "f00eaa3e-96c1-4de4-96cd-9848ef5429ec" + +# Category mapping +CATEGORY_MAP = { + CATEGORY_PYTHON: CATEGORY_PYTHON_ID, + CATEGORY_JAVA: CATEGORY_JAVA_ID, + "text": CATEGORY_TEXT_ID, + "image": CATEGORY_IMAGE_ID, + "audio": CATEGORY_AUDIO_ID, + "video": CATEGORY_VIDEO_ID, + "all": CATEGORY_ALL_ID, + "datamate": CATEGORY_DATAMATE_ID, + "data-juicer": CATEGORY_DATA_JUICER_ID, +} + +# File paths +OPERATOR_BASE_PATH = "/operators" +UPLOAD_DIR = "upload" +EXTRACT_DIR = "extract" diff --git a/runtime/datamate-python/app/module/operator/exceptions.py b/runtime/datamate-python/app/module/operator/exceptions.py new file mode 100644 index 00000000..6eca13f5 --- /dev/null +++ b/runtime/datamate-python/app/module/operator/exceptions.py @@ -0,0 +1,72 @@ +""" +Operator Market Exceptions +算子市场异常定义 +""" +from enum import Enum +from typing import Optional + + +class OperatorErrorCode: + """算子错误码""" + def __init__(self, message: str, error_code: str): + self.message = message + self.error_code = error_code + + +class OperatorException(RuntimeError): + """算子异常基类""" + def __init__(self, operator_error_code: OperatorErrorCode): + self.message = operator_error_code.message + self.error_code = operator_error_code.error_code + super().__init__(self.message) + + +class OperatorErrorCodeEnum(Enum): + """算子错误码枚举""" + FIELD_NOT_FOUND = OperatorErrorCode( + "必填字段缺失", "OPERATOR_FIELD_NOT_FOUND" + ) + SETTINGS_PARSE_FAILED = OperatorErrorCode( + "设置解析失败", "OPERATOR_SETTINGS_PARSE_FAILED" + ) + OPERATOR_IN_INSTANCE = OperatorErrorCode( + "算子正在使用中", "OPERATOR_IN_INSTANCE" + ) + CANT_DELETE_PREDEFINED_OPERATOR = OperatorErrorCode( + "无法删除预定义算子", "CANT_DELETE_PREDEFINED_OPERATOR" + ) + + +class FieldNotFoundError(OperatorException): + """必填字段缺失""" + def __init__(self, field_name: str): + super().__init__( + OperatorErrorCodeEnum.FIELD_NOT_FOUND.value + ) + self.message = f"Required field '{field_name}' is missing" + self.field_name = field_name + + +class SettingsParseError(OperatorException): + """设置解析失败""" + def __init__(self, detail: Optional[str] = None): + super().__init__( + OperatorErrorCodeEnum.SETTINGS_PARSE_FAILED.value + ) + self.detail = detail + + +class OperatorInInstanceError(OperatorException): + """算子正在使用中""" + def __init__(self): + super().__init__( + OperatorErrorCodeEnum.OPERATOR_IN_INSTANCE.value + ) + + +class CannotDeletePredefinedOperatorError(OperatorException): + """无法删除预定义算子""" + def __init__(self): + super().__init__( + OperatorErrorCodeEnum.CANT_DELETE_PREDEFINED_OPERATOR.value + ) diff --git a/runtime/datamate-python/app/module/operator/interface/__init__.py b/runtime/datamate-python/app/module/operator/interface/__init__.py new file mode 100644 index 00000000..f83ad24f --- /dev/null +++ b/runtime/datamate-python/app/module/operator/interface/__init__.py @@ -0,0 +1,9 @@ +""" +Operator Market API Interfaces +算子市场 API 接口层 +""" +from .operator_routes import router as operator_router +from .category_routes import router as category_router + + +__all__ = ["operator_router", "category_router"] diff --git a/runtime/datamate-python/app/module/operator/interface/category_routes.py b/runtime/datamate-python/app/module/operator/interface/category_routes.py new file mode 100644 index 00000000..7483a5f0 --- /dev/null +++ b/runtime/datamate-python/app/module/operator/interface/category_routes.py @@ -0,0 +1,55 @@ +""" +Category API Routes +分类 API 路由 +""" +from fastapi import APIRouter, Depends + +from app.db.models.operator import Category, CategoryRelation, Operator +from app.db.session import get_db +from app.module.operator.repository import ( + CategoryRepository, + CategoryRelationRepository, +) +from app.module.operator.repository.operator_repository import OperatorRepository +from app.module.operator.schema import CategoryTreePagedResponse +from app.module.operator.schema.category import PaginatedCategoryTree +from app.module.operator.service import CategoryService +from app.module.shared.schema import StandardResponse + +router = APIRouter(prefix="/categories", tags=["Category"]) + + +def get_category_service() -> CategoryService: + """获取分类服务实例""" + return CategoryService( + category_repo=CategoryRepository(Category()), + category_relation_repo=CategoryRelationRepository(CategoryRelation()), + operator_repo=OperatorRepository(Operator()), + ) + + +@router.get( + "/tree", + response_model=StandardResponse[PaginatedCategoryTree], + summary="获取分类树", + description="获取算子树状分类结构,包含分组维度(如语言、模态)及资源统计数量", + tags=['mcp'] +) +async def get_category_tree( + service: CategoryService = Depends(get_category_service), + db=Depends(get_db) +): + """获取分类树""" + result = await service.get_all_categories(db) + + return StandardResponse( + code="0", + message="success", + data=PaginatedCategoryTree( + page=0, + size=len(result.categories), + total_elements=len(result.categories), + total_pages=1, + star_count=result.star_count, + content=result.categories, + )) diff --git a/runtime/datamate-python/app/module/operator/interface/operator_routes.py b/runtime/datamate-python/app/module/operator/interface/operator_routes.py new file mode 100644 index 00000000..4ae78f3a --- /dev/null +++ b/runtime/datamate-python/app/module/operator/interface/operator_routes.py @@ -0,0 +1,249 @@ +""" +Operator API Routes +算子 API 路由 +""" +from typing import Optional + +from fastapi import APIRouter, Depends, UploadFile, Form, File, Body +from fastapi.responses import FileResponse + +from app.core.logging import get_logger +from app.db.models.operator import Operator, CategoryRelation, OperatorRelease +from app.db.session import get_db +from app.module.operator.parsers import ParserHolder +from app.module.operator.repository import ( + OperatorRepository, + CategoryRelationRepository, + OperatorReleaseRepository, +) +from app.module.operator.schema import ( + OperatorDto, + OperatorUpdateDto, + OperatorListRequest, +) +from app.module.operator.service import OperatorService +from app.module.shared.chunk_upload_repository import ChunkUploadRepository +from app.module.shared.file_service import FileService +from app.module.shared.schema import StandardResponse, PaginatedData + +logger = get_logger(__name__) + +router = APIRouter(prefix="/operators", tags=["Operator"]) + + +def get_operator_service() -> OperatorService: + """获取算子服务实例""" + return OperatorService( + operator_repo=OperatorRepository(Operator()), + category_relation_repo=CategoryRelationRepository(CategoryRelation()), + operator_release_repo=OperatorReleaseRepository(OperatorRelease()), + parser_holder=ParserHolder(), + file_service=FileService(ChunkUploadRepository()), + ) + + +@router.post( + "/list", + response_model=StandardResponse[PaginatedData[OperatorDto]], + summary="查询算子列表", + description="根据参数查询算子列表(支持分页、分类过滤、关键词搜索)", + tags=['mcp'] +) +async def list_operators( + request: OperatorListRequest, + service: OperatorService = Depends(get_operator_service), + db = Depends(get_db), +): + """查询算子列表""" + operators = await service.get_operators( + page=request.page, + size=request.size, + categories=request.categories, + keyword=request.keyword, + is_star=request.is_star, + db=db + ) + + count = await service.count_operators( + categories=request.categories, + keyword=request.keyword, + is_star=request.is_star, + db=db + ) + + total_pages = (count + request.size - 1) // request.size + + return StandardResponse( + code="0", + message="success", + data=PaginatedData( + page=request.page, + size=request.size, + total_elements=count, + total_pages=total_pages, + content=operators, + ) + ) + + +@router.get( + "/{operator_id}", + response_model=StandardResponse[OperatorDto], + summary="获取算子详情", + description="根据 ID 获取算子详细信息" +) +async def get_operator( + operator_id: str, + service: OperatorService = Depends(get_operator_service), + db = Depends(get_db) +): + """获取算子详情""" + operator = await service.get_operator_by_id(operator_id, db) + operator.file_name = None + return StandardResponse(code="0", message="success", data=operator) + + +@router.put( + "/{operator_id}", + response_model=StandardResponse[OperatorDto], + summary="更新算子", + description="更新算子信息" +) +async def update_operator( + operator_id: str, + request: OperatorUpdateDto, + service: OperatorService = Depends(get_operator_service), + db = Depends(get_db) +): + """更新算子""" + operator = await service.update_operator(operator_id, request, db) + await db.commit() + return StandardResponse(code="0", message="success", data=operator) + + +@router.post( + "/create", + response_model=StandardResponse[OperatorDto], + summary="创建算子", + description="创建新算子" +) +async def create_operator( + request: OperatorDto, + service: OperatorService = Depends(get_operator_service), + db = Depends(get_db) +): + """创建算子""" + operator = await service.create_operator(request, db) + await db.commit() + return StandardResponse(code="0", message="success", data=operator) + + +@router.post( + "/upload", + response_model=StandardResponse[OperatorDto], + summary="上传算子", + description="上传算子文件并解析元数据" +) +async def upload_operator( + request: dict = Body(...), + service: OperatorService = Depends(get_operator_service), + db = Depends(get_db), +): + """上传算子""" + file_name = request.get("fileName") + if not file_name: + from fastapi import HTTPException + raise HTTPException(status_code=422, detail="fileName is required") + operator = await service.upload_operator(file_name, db) + return StandardResponse(code="0", message="success", data=operator) + + +@router.post( + "/upload/pre-upload", + response_model=StandardResponse[str], + summary="预上传", + description="获取预上传 ID,用于分块上传" +) +async def pre_upload( + service: OperatorService = Depends(get_operator_service), + db = Depends(get_db), +): + """预上传""" + req_id = await service.pre_upload(db) + await db.commit() + return StandardResponse( + code="0", + message="success", + data=req_id, + ) + + +@router.post( + "/upload/chunk", + response_model=StandardResponse[dict], + summary="分块上传", + description="分块上传算子文件" +) +async def chunk_upload( + req_id: str = Form(..., alias="reqId", description="预上传ID"), + file_no: int = Form(1, alias="fileNo", description="文件编号"), + file_name: str = Form(..., alias="fileName", description="文件名"), + total_chunk_num: int = Form(1, alias="totalChunkNum", description="总分块数"), + chunk_no: int = Form(1, alias="chunkNo", description="当前分块号"), + file: UploadFile = File(...), + check_sum_hex: Optional[str] = Form(None, alias="checkSumHex", description="校验和"), + service: OperatorService = Depends(get_operator_service), + db = Depends(get_db), +): + """分块上传""" + file_content = await file.read() + result = await service.chunk_upload( + req_id=req_id, + file_no=file_no, + file_name=file_name, + total_chunk_num=total_chunk_num, + chunk_no=chunk_no, + check_sum_hex=check_sum_hex, + file_content=file_content, + db=db + ) + await db.commit() + return StandardResponse(code="0", message="success", data=result.dict()) + + +@router.delete( + "/{operator_id}", + response_model=StandardResponse[None], + summary="删除算子", + description="删除算子" +) +async def delete_operator( + operator_id: str, + service: OperatorService = Depends(get_operator_service), + db = Depends(get_db), +): + """删除算子""" + await service.delete_operator(operator_id, db) + await db.commit() + return StandardResponse(code="0", message="success", data=None) + + +@router.get( + "/examples/download", + response_class=FileResponse, + summary="下载示例算子", + description="下载示例算子文件" +) +async def download_example_operator( + service: OperatorService = Depends(get_operator_service), +): + """下载示例算子""" + from app.module.operator.constants import EXAMPLE_OPERATOR_PATH + + example_path = EXAMPLE_OPERATOR_PATH + file_path = service.download_example_operator(example_path) + return FileResponse( + path=str(file_path), + filename=file_path.name, + media_type="application/octet-stream" + ) diff --git a/runtime/datamate-python/app/module/operator/parsers/__init__.py b/runtime/datamate-python/app/module/operator/parsers/__init__.py new file mode 100644 index 00000000..db3c0504 --- /dev/null +++ b/runtime/datamate-python/app/module/operator/parsers/__init__.py @@ -0,0 +1,15 @@ +""" +Operator File Parsers +算子文件解析器 +""" +from .abstract_parser import AbstractParser +from .tar_parser import TarParser +from .zip_parser import ZipParser +from .parser_holder import ParserHolder + +__all__ = [ + "AbstractParser", + "TarParser", + "ZipParser", + "ParserHolder", +] diff --git a/runtime/datamate-python/app/module/operator/parsers/abstract_parser.py b/runtime/datamate-python/app/module/operator/parsers/abstract_parser.py new file mode 100644 index 00000000..50ee98cf --- /dev/null +++ b/runtime/datamate-python/app/module/operator/parsers/abstract_parser.py @@ -0,0 +1,118 @@ +""" +Abstract Parser +抽象解析器基类 +""" +import json +import yaml +from abc import ABC, abstractmethod +from typing import Dict, Any, Optional + +from app.module.operator.schema import OperatorDto, OperatorReleaseDto +from app.module.operator.constants import CATEGORY_MAP, CATEGORY_OTHER_VENDOR_ID, CATEGORY_CUSTOMIZED_ID +from app.module.operator.exceptions import FieldNotFoundError + + +class AbstractParser(ABC): + """算子文件解析器抽象基类""" + + @abstractmethod + def parse_yaml_from_archive( + self, + archive_path: str, + entry_path: str, + file_name: Optional[str] = None, + file_size: Optional[int] = None + ) -> OperatorDto: + """ + 从压缩包内读取指定路径的 yaml 文件并解析为 OperatorDto + + Args: + archive_path: 压缩包路径(zip 或 tar) + entry_path: 压缩包内部的文件路径,例如 "config/app.yaml" + + Returns: + 解析后的 OperatorDto + """ + pass + + @abstractmethod + def extract_to(self, archive_path: str, target_dir: str) -> None: + """ + 将压缩包解压到目标目录(保持相对路径) + + Args: + archive_path: 压缩包路径 + target_dir: 目标目录 + """ + pass + + def parse_yaml( + self, + yaml_content: str, + file_name: Optional[str] = None, + file_size: Optional[int] = None + ) -> OperatorDto: + """解析 YAML 内容为 OperatorDto""" + content: Dict[str, Any] = yaml.safe_load(yaml_content) + + operator = OperatorDto( + id=self._to_string(content.get("raw_id")), + name=self._to_string(content.get("name")), + description=self._to_string(content.get("description")), + version=self._to_string(content.get("version")), + inputs=self._to_json(content.get("inputs")), + outputs=self._to_json(content.get("outputs")), + runtime=self._to_json(content.get("runtime")), + settings=self._to_json(content.get("settings")), + metrics=self._to_json(content.get("metrics")), + file_name=file_name, + file_size=file_size, + ) + + # Handle changelog + changelog = content.get("release") + if isinstance(changelog, list): + operator_release = OperatorReleaseDto( + id=operator.id, + version=operator.version, + changelog=changelog + ) + else: + operator_release = OperatorReleaseDto( + id=operator.id, + version=operator.version, + changelog=[] + ) + operator.releases = [operator_release] + + # Build categories + categories = [ + CATEGORY_MAP.get(self._to_lower(content.get("language")), ""), + CATEGORY_MAP.get(self._to_lower(content.get("modal")), ""), + CATEGORY_MAP.get(self._to_lower(content.get("vendor")), CATEGORY_OTHER_VENDOR_ID), + CATEGORY_CUSTOMIZED_ID, + ] + operator.categories = categories + + return operator + + def _to_string(self, obj: Any) -> str: + """转换为字符串""" + if obj is None: + raise FieldNotFoundError("field") + return str(obj) + + def _to_lower(self, obj: Any) -> str: + """转换为小写字符串""" + if obj is None: + raise FieldNotFoundError("field") + return str(obj).lower() + + def _to_json(self, obj: Any) -> Optional[str]: + """转换为 JSON 字符串""" + if obj is None: + return None + try: + return json.dumps(obj).strip('"').strip("'") + except (TypeError, ValueError) as e: + raise ValueError(f"Failed to serialize to JSON: {e}") diff --git a/runtime/datamate-python/app/module/operator/parsers/parser_holder.py b/runtime/datamate-python/app/module/operator/parsers/parser_holder.py new file mode 100644 index 00000000..83522df4 --- /dev/null +++ b/runtime/datamate-python/app/module/operator/parsers/parser_holder.py @@ -0,0 +1,59 @@ +""" +Parser Holder +解析器持有者,根据文件类型选择合适的解析器 +""" +import os +from typing import Dict, Type, Optional + +from app.module.operator.parsers.abstract_parser import AbstractParser +from app.module.operator.parsers.tar_parser import TarParser +from app.module.operator.parsers.zip_parser import ZipParser +from app.module.operator.schema import OperatorDto + + +class ParserHolder: + """解析器持有者,根据文件类型选择解析器""" + + def __init__(self): + self._parsers: Dict[str, AbstractParser] = { + "tar": TarParser(), + "gz": TarParser(), + "tgz": TarParser(), + "zip": ZipParser(), + } + + def get_parser(self, file_path: str) -> AbstractParser: + """根据文件扩展名获取解析器""" + _, ext = os.path.splitext(file_path) + file_type = ext.lstrip('.').lower() + + if file_type not in self._parsers: + raise ValueError(f"Unsupported file type: {file_type}") + + return self._parsers[file_type] + + def parse_yaml_from_archive( + self, + file_type: str, + archive_path: str, + entry_path: str, + file_name: Optional[str] = None, + file_size: Optional[int] = None + ) -> OperatorDto: + """从压缩包解析 YAML""" + if file_type not in self._parsers: + raise ValueError(f"Unsupported file type: {file_type}") + + return self._parsers[file_type].parse_yaml_from_archive( + archive_path, + entry_path, + file_name, + file_size + ) + + def extract_to(self, file_type: str, archive_path: str, target_dir: str) -> None: + """解压文件到目标目录""" + if file_type not in self._parsers: + raise ValueError(f"Unsupported file type: {file_type}") + + self._parsers[file_type].extract_to(archive_path, target_dir) diff --git a/runtime/datamate-python/app/module/operator/parsers/tar_parser.py b/runtime/datamate-python/app/module/operator/parsers/tar_parser.py new file mode 100644 index 00000000..9ce87f88 --- /dev/null +++ b/runtime/datamate-python/app/module/operator/parsers/tar_parser.py @@ -0,0 +1,47 @@ +""" +Tar File Parser +TAR 文件解析器 +""" +import tarfile +import os +from typing import Optional + +from app.module.operator.parsers.abstract_parser import AbstractParser +from app.module.operator.schema import OperatorDto + + +class TarParser(AbstractParser): + """TAR 压缩包解析器""" + + def parse_yaml_from_archive( + self, + archive_path: str, + entry_path: str, + file_name: Optional[str] = None, + file_size: Optional[int] = None + ) -> OperatorDto: + """从 TAR 文件中解析 YAML""" + try: + with tarfile.open(archive_path, 'r:*') as tar: + for member in tar.getmembers(): + if member.name == entry_path or member.name.endswith(f"/{entry_path}"): + file = tar.extractfile(member) + if file: + content = file.read().decode('utf-8') + return self.parse_yaml(content, file_name, file_size) + raise FileNotFoundError(f"File '{entry_path}' not found in archive") + except (tarfile.TarError, EOFError) as e: + raise ValueError(f"Failed to parse TAR file: {e}") + + def extract_to(self, archive_path: str, target_dir: str) -> None: + """解压 TAR 文件到目标目录""" + try: + os.makedirs(target_dir, exist_ok=True) + with tarfile.open(archive_path, 'r:*') as tar: + # Safety check: prevent path traversal + for member in tar.getmembers(): + if os.path.isabs(member.name) or ".." in member.name.split("/"): + raise ValueError(f"Unsafe path in archive: {member.name}") + tar.extractall(target_dir) + except (tarfile.TarError, EOFError) as e: + raise ValueError(f"Failed to extract TAR file: {e}") diff --git a/runtime/datamate-python/app/module/operator/parsers/zip_parser.py b/runtime/datamate-python/app/module/operator/parsers/zip_parser.py new file mode 100644 index 00000000..db4a1b73 --- /dev/null +++ b/runtime/datamate-python/app/module/operator/parsers/zip_parser.py @@ -0,0 +1,46 @@ +""" +Zip File Parser +ZIP 文件解析器 +""" +import zipfile +import os +from typing import Optional + +from app.module.operator.parsers.abstract_parser import AbstractParser +from app.module.operator.schema import OperatorDto + + +class ZipParser(AbstractParser): + """ZIP 压缩包解析器""" + + def parse_yaml_from_archive( + self, + archive_path: str, + entry_path: str, + file_name: Optional[str] = None, + file_size: Optional[int] = None + ) -> OperatorDto: + """从 ZIP 文件中解析 YAML""" + try: + with zipfile.ZipFile(archive_path, 'r') as zf: + for name in zf.namelist(): + if name == entry_path or name.endswith(f"/{entry_path}"): + with zf.open(name) as file: + content = file.read().decode('utf-8') + return self.parse_yaml(content, file_name, file_size) + raise FileNotFoundError(f"File '{entry_path}' not found in archive") + except (zipfile.BadZipFile, zipfile.LargeZipFile) as e: + raise ValueError(f"Failed to parse ZIP file: {e}") + + def extract_to(self, archive_path: str, target_dir: str) -> None: + """解压 ZIP 文件到目标目录""" + try: + os.makedirs(target_dir, exist_ok=True) + with zipfile.ZipFile(archive_path, 'r') as zf: + # Safety check: prevent path traversal + for name in zf.namelist(): + if os.path.isabs(name) or ".." in name.split("/"): + raise ValueError(f"Unsafe path in archive: {name}") + zf.extractall(target_dir) + except (zipfile.BadZipFile, zipfile.LargeZipFile) as e: + raise ValueError(f"Failed to extract ZIP file: {e}") diff --git a/runtime/datamate-python/app/module/operator/repository/__init__.py b/runtime/datamate-python/app/module/operator/repository/__init__.py new file mode 100644 index 00000000..67859d72 --- /dev/null +++ b/runtime/datamate-python/app/module/operator/repository/__init__.py @@ -0,0 +1,15 @@ +""" +Operator Market Repositories +算子市场数据访问层 +""" +from .operator_repository import OperatorRepository +from .category_repository import CategoryRepository +from .category_relation_repository import CategoryRelationRepository +from .operator_release_repository import OperatorReleaseRepository + +__all__ = [ + "OperatorRepository", + "CategoryRepository", + "CategoryRelationRepository", + "OperatorReleaseRepository", +] diff --git a/runtime/datamate-python/app/module/operator/repository/category_relation_repository.py b/runtime/datamate-python/app/module/operator/repository/category_relation_repository.py new file mode 100644 index 00000000..b7de1e99 --- /dev/null +++ b/runtime/datamate-python/app/module/operator/repository/category_relation_repository.py @@ -0,0 +1,77 @@ +""" +Category Relation Repository +分类关系数据访问层 +""" +from typing import List + +from sqlalchemy import select, delete, and_ +from sqlalchemy.ext.asyncio import AsyncSession + +from app.db.models.operator import CategoryRelation +from app.module.operator.constants import CATEGORY_PREDEFINED_ID + + +class CategoryRelationRepository: + """分类关系数据访问层""" + + def __init__(self, model: CategoryRelation): + self.model = model + + async def find_all(self, db: AsyncSession) -> List[CategoryRelation]: + """查询所有分类关系""" + result = await db.execute(select(CategoryRelation)) + return result.scalars().all() + + async def batch_insert( + self, + operator_id: str, + category_ids: List[str], + db: AsyncSession + ) -> None: + """批量插入分类关系""" + for category_id in category_ids: + entity = CategoryRelation( + category_id=category_id, + operator_id=operator_id + ) + db.add(entity) + + async def batch_update( + self, + operator_id: str, + category_ids: List[str], + db: AsyncSession + ) -> None: + """批量更新分类关系(先删除后插入)""" + # Delete existing relations + await db.execute( + delete(CategoryRelation) + .where(CategoryRelation.operator_id == operator_id) + ) + # Insert new relations + for category_id in category_ids: + entity = CategoryRelation( + category_id=category_id, + operator_id=operator_id + ) + db.add(entity) + + async def delete_by_operator_id(self, operator_id: str, db: AsyncSession) -> None: + """根据算子ID删除分类关系""" + await db.execute( + delete(CategoryRelation) + .where(CategoryRelation.operator_id == operator_id) + ) + + async def operator_is_predefined(self, operator_id: str, db: AsyncSession) -> bool: + """检查算子是否为预定义算子""" + result = await db.execute( + select(CategoryRelation) + .where( + and_( + CategoryRelation.operator_id == operator_id, + CategoryRelation.category_id == CATEGORY_PREDEFINED_ID + ) + ) + ) + return result.first() is not None diff --git a/runtime/datamate-python/app/module/operator/repository/category_repository.py b/runtime/datamate-python/app/module/operator/repository/category_repository.py new file mode 100644 index 00000000..76e472e6 --- /dev/null +++ b/runtime/datamate-python/app/module/operator/repository/category_repository.py @@ -0,0 +1,23 @@ +""" +Category Repository +分类数据访问层 +""" +from typing import List + +from sqlalchemy import select +from sqlalchemy.ext.asyncio import AsyncSession + +from app.db.models.operator import Category +from app.module.operator.schema import CategoryDto + + +class CategoryRepository: + """分类数据访问层""" + + def __init__(self, model: Category): + self.model = model + + async def find_all(self, db: AsyncSession) -> List[Category]: + """查询所有分类""" + result = await db.execute(select(Category)) + return result.scalars().all() diff --git a/runtime/datamate-python/app/module/operator/repository/operator_release_repository.py b/runtime/datamate-python/app/module/operator/repository/operator_release_repository.py new file mode 100644 index 00000000..bcab7be8 --- /dev/null +++ b/runtime/datamate-python/app/module/operator/repository/operator_release_repository.py @@ -0,0 +1,72 @@ +""" +Operator Release Repository +算子发布版本数据访问层 +""" +from typing import List + +from sqlalchemy import select, delete, and_ +from sqlalchemy.ext.asyncio import AsyncSession + +from app.db.models.operator import OperatorRelease +from app.module.operator.schema import OperatorReleaseDto + + +class OperatorReleaseRepository: + """算子发布版本数据访问层""" + + def __init__(self, model: OperatorRelease): + self.model = model + + async def find_all_by_operator_id( + self, + operator_id: str, + db: AsyncSession + ) -> List[OperatorRelease]: + """查询算子的所有发布版本""" + result = await db.execute( + select(OperatorRelease) + .where(OperatorRelease.id == operator_id) + .order_by(OperatorRelease.release_date.desc()) + ) + return result.scalars().all() + + async def insert( + self, + dto: OperatorReleaseDto, + db: AsyncSession + ) -> None: + """插入发布版本""" + entity = OperatorRelease( + id=dto.id, + version=dto.version, + release_date=dto.release_date, + changelog=dto.changelog + ) + db.add(entity) + + async def update( + self, + dto: OperatorReleaseDto, + db: AsyncSession + ) -> None: + """更新发布版本""" + result = await db.execute( + select(OperatorRelease) + .where( + and_( + OperatorRelease.id == dto.id, + OperatorRelease.version == dto.version + ) + ) + ) + entity = result.scalar_one_or_none() + if entity: + entity.changelog = dto.changelog + entity.release_date = dto.release_date + + async def delete(self, operator_id: str, db: AsyncSession) -> None: + """删除算子的所有发布版本""" + await db.execute( + delete(OperatorRelease) + .where(OperatorRelease.id == operator_id) + ) diff --git a/runtime/datamate-python/app/module/operator/repository/operator_repository.py b/runtime/datamate-python/app/module/operator/repository/operator_repository.py new file mode 100644 index 00000000..990f7eb3 --- /dev/null +++ b/runtime/datamate-python/app/module/operator/repository/operator_repository.py @@ -0,0 +1,121 @@ +""" +Operator Repository +算子数据访问层 +""" +import json +from typing import List, Optional +from datetime import datetime, timezone + +from sqlalchemy import select, text, update +from sqlalchemy.ext.asyncio import AsyncSession + +from app.db.models.operator import Operator +from app.module.operator.schema import OperatorDto + + +class OperatorRepository: + """算子数据访问层""" + + def __init__(self, model: Operator): + self.model = model + + async def find_all(self, db: AsyncSession) -> List[Operator]: + """查询所有算子""" + result = await db.execute(select(Operator)) + return result.scalars().all() + + async def insert(self, dto: OperatorDto, db: AsyncSession) -> None: + """插入算子""" + entity = Operator( + id=dto.id, + name=dto.name, + description=dto.description, + version=dto.version, + inputs=dto.inputs, + outputs=dto.outputs, + runtime=dto.runtime, + settings=dto.settings, + file_name=dto.file_name, + file_size=dto.file_size, + metrics=dto.metrics, + usage_count=dto.usage_count or 0, + is_star=dto.is_star or False, + ) + db.add(entity) + + async def update(self, dto: OperatorDto, db: AsyncSession) -> None: + """更新算子""" + await db.execute( + update(Operator) + .where(Operator.id == dto.id) + .values( + name=dto.name, + description=dto.description, + version=dto.version, + inputs=dto.inputs, + outputs=dto.outputs, + runtime=dto.runtime, + settings=dto.settings, + file_name=dto.file_name, + file_size=dto.file_size, + metrics=dto.metrics, + is_star=dto.is_star, + updated_at=datetime.utcnow(), + ) + ) + + async def delete(self, operator_id: str, db: AsyncSession) -> None: + """删除算子""" + entity = await db.get(Operator, operator_id) + if entity: + await db.delete(entity) + + async def count_by_star(self, is_star: bool, db: AsyncSession) -> int: + """统计收藏算子数量""" + result = await db.execute( + select(text("COUNT(*)")) + .select_from(Operator) + .where(Operator.is_star == is_star) + ) + return result.scalar() or 0 + + async def operator_in_template(self, operator_id: str, db: AsyncSession) -> bool: + """检查算子是否在模板中""" + result = await db.execute( + text(""" + SELECT COUNT(*) FROM t_operator_instance oi + JOIN t_clean_template t ON oi.instance_id = t.id + WHERE oi.operator_id = :operator_id + """), + {"operator_id": operator_id} + ) + return (result.scalar() or 0) > 0 + + async def operator_in_unstop_task(self, operator_id: str, db: AsyncSession) -> bool: + """检查算子是否在未完成的任务中""" + result = await db.execute( + text(""" + SELECT COUNT(*) FROM t_operator_instance oi + JOIN t_clean_task t ON oi.instance_id = t.id + WHERE oi.operator_id = :operator_id AND t.status != 'COMPLETED' + """), + {"operator_id": operator_id} + ) + return (result.scalar() or 0) > 0 + + async def increment_usage_count( + self, + operator_ids: List[str], + db: AsyncSession + ) -> None: + """增加算子使用次数""" + if not operator_ids: + return + await db.execute( + update(Operator) + .where(Operator.id.in_(operator_ids)) + .values( + usage_count=Operator.usage_count + 1, + updated_at=datetime.now(timezone.utc), + ) + ) diff --git a/runtime/datamate-python/app/module/operator/schema/__init__.py b/runtime/datamate-python/app/module/operator/schema/__init__.py new file mode 100644 index 00000000..a084cbaf --- /dev/null +++ b/runtime/datamate-python/app/module/operator/schema/__init__.py @@ -0,0 +1,29 @@ +""" +Operator Market Schemas +算子市场 Schema 定义 +""" +from .operator import ( + OperatorDto, + OperatorListRequest, + PreUploadResponse, + OperatorUpdateDto, +) +from .category import ( + CategoryDto, + CategoryTreeResponse, + CategoryTreePagedResponse, + CategoryRelationDto, +) +from .release import OperatorReleaseDto + +__all__ = [ + "OperatorDto", + "OperatorListRequest", + "PreUploadResponse", + "CategoryDto", + "CategoryTreeResponse", + "CategoryTreePagedResponse", + "CategoryRelationDto", + "OperatorReleaseDto", + "OperatorUpdateDto", +] diff --git a/runtime/datamate-python/app/module/operator/schema/category.py b/runtime/datamate-python/app/module/operator/schema/category.py new file mode 100644 index 00000000..9de9dc59 --- /dev/null +++ b/runtime/datamate-python/app/module/operator/schema/category.py @@ -0,0 +1,44 @@ +""" +Category Schemas +分类 Schema 定义 +""" +from typing import List, Optional +from datetime import datetime +from pydantic import BaseModel, Field + +from app.module.shared.schema import BaseResponseModel, PaginatedData + + +class CategoryDto(BaseResponseModel): + """分类 DTO""" + id: str = Field(..., description="分类ID") + name: str = Field(..., description="分类名称") + value: Optional[str] = Field(None, description="分类值") + type: Optional[str] = Field(None, description="分类类型") + parent_id: Optional[str] = Field(None, description="父分类ID") + count: Optional[int] = Field(0, description="算子数量") + created_at: Optional[datetime] = Field(None, description="创建时间") + + +class CategoryTreeResponse(BaseResponseModel): + """分类树响应""" + id: str = Field(..., description="分类ID") + name: str = Field(..., description="分类名称") + count: int = Field(0, description="算子总数") + categories: List[CategoryDto] = Field(default_factory=list, description="子分类列表") + + +class CategoryTreePagedResponse(BaseResponseModel): + """分类树分页响应""" + star_count: int = Field(0, description="收藏的算子数量") + categories: List[CategoryTreeResponse] = Field(default_factory=list, description="分类树列表") + + +class PaginatedCategoryTree(PaginatedData): + star_count: int = Field(0, description="收藏的算子数量") + + +class CategoryRelationDto(BaseResponseModel): + """分类关系 DTO""" + category_id: str = Field(..., description="分类ID") + operator_id: str = Field(..., description="算子ID") diff --git a/runtime/datamate-python/app/module/operator/schema/operator.py b/runtime/datamate-python/app/module/operator/schema/operator.py new file mode 100644 index 00000000..f0868542 --- /dev/null +++ b/runtime/datamate-python/app/module/operator/schema/operator.py @@ -0,0 +1,72 @@ +""" +Operator Schemas +算子 Schema 定义 +""" +from __future__ import annotations + +from typing import List, Optional, Dict, Any +from datetime import datetime +from pydantic import BaseModel, Field + +from app.module.shared.schema import BaseResponseModel +from .release import OperatorReleaseDto + + +class OperatorDto(BaseResponseModel): + """算子 DTO""" + id: str = Field(..., description="算子ID") + name: str = Field(..., description="算子名称") + description: Optional[str] = Field(None, description="算子描述") + version: str = Field(..., description="算子版本") + inputs: Optional[str] = Field(None, description="输入定义(JSON)") + outputs: Optional[str] = Field(None, description="输出定义(JSON)") + runtime: Optional[str] = Field(None, description="运行时配置(JSON)") + settings: Optional[str] = Field(None, description="算子设置(JSON)") + file_name: Optional[str] = Field(None, description="文件名") + file_size: Optional[int] = Field(None, description="文件大小(字节)") + metrics: Optional[str] = Field(None, description="算子指标(JSON)") + usage_count: Optional[int] = Field(None, description="使用次数") + is_star: Optional[bool] = Field(None, description="是否收藏") + categories: Optional[List[str]] = Field(None, description="分类ID列表") + overrides: Optional[Dict[str, Any]] = Field(None, description="设置覆盖值") + requirements: Optional[List[str]] = Field(None, description="Python 依赖列表") + readme: Optional[str] = Field(None, description="README 内容") + releases: Optional[List[OperatorReleaseDto]] = Field(None, description="发布版本列表") + created_at: Optional[datetime] = Field(None, description="创建时间") + updated_at: Optional[datetime] = Field(None, description="更新时间") + + +class OperatorListRequest(BaseResponseModel): + """算子列表查询请求""" + page: int = Field(1, ge=0, description="页码(从0开始)") + size: int = Field(10, ge=1, description="页大小") + categories: List[List[str]] = Field(default_factory=list, description="分类ID列表(每个父分类下的id放到一个列表中)") + keyword: Optional[str] = Field(None, description="搜索关键词") + label_name: Optional[str] = Field(None, description="标签名称(暂不支持)") + is_star: Optional[bool] = Field(None, description="是否收藏") + + +class PreUploadResponse(BaseResponseModel): + """预上传响应""" + req_id: str = Field(..., description="请求ID") + + +class OperatorUpdateDto(BaseResponseModel): + """算子更新 DTO(所有字段可选)""" + name: Optional[str] = Field(None, description="算子名称") + description: Optional[str] = Field(None, description="算子描述") + version: Optional[str] = Field(None, description="算子版本") + inputs: Optional[str] = Field(None, description="输入定义(JSON)") + outputs: Optional[str] = Field(None, description="输出定义(JSON)") + runtime: Optional[str] = Field(None, description="运行时配置(JSON)") + settings: Optional[str] = Field(None, description="算子设置(JSON)") + file_name: Optional[str] = Field(None, description="文件名") + file_size: Optional[int] = Field(None, description="文件大小(字节)") + metrics: Optional[str] = Field(None, description="算子指标(JSON)") + usage_count: Optional[int] = Field(None, description="使用次数") + is_star: Optional[bool] = Field(None, description="是否收藏") + categories: Optional[List[str]] = Field(None, description="分类ID列表") + overrides: Optional[Dict[str, Any]] = Field(None, description="设置覆盖值") + requirements: Optional[List[str]] = Field(None, description="Python 依赖列表") + readme: Optional[str] = Field(None, description="README 内容") + releases: Optional[List[OperatorReleaseDto]] = Field(None, description="发布版本列表") diff --git a/runtime/datamate-python/app/module/operator/schema/release.py b/runtime/datamate-python/app/module/operator/schema/release.py new file mode 100644 index 00000000..f91297ee --- /dev/null +++ b/runtime/datamate-python/app/module/operator/schema/release.py @@ -0,0 +1,22 @@ +""" +Operator Release Schemas +算子发布版本 Schema 定义 +""" +from __future__ import annotations + +from typing import List, Optional +from datetime import datetime +from pydantic import BaseModel, Field + +from app.module.shared.schema import BaseResponseModel + + +class OperatorReleaseDto(BaseResponseModel): + """算子发布版本 DTO""" + id: str = Field(..., description="算子ID") + version: str = Field(..., description="版本号") + release_date: Optional[datetime] = Field(None, description="发布时间") + changelog: Optional[List[str]] = Field(None, description="更新日志列表") + + +__all__ = ["OperatorReleaseDto"] diff --git a/runtime/datamate-python/app/module/operator/service/__init__.py b/runtime/datamate-python/app/module/operator/service/__init__.py new file mode 100644 index 00000000..3e1c1d0c --- /dev/null +++ b/runtime/datamate-python/app/module/operator/service/__init__.py @@ -0,0 +1,11 @@ +""" +Operator Market Services +算子市场服务层 +""" +from .operator_service import OperatorService +from .category_service import CategoryService + +__all__ = [ + "OperatorService", + "CategoryService", +] diff --git a/runtime/datamate-python/app/module/operator/service/category_service.py b/runtime/datamate-python/app/module/operator/service/category_service.py new file mode 100644 index 00000000..c84a4906 --- /dev/null +++ b/runtime/datamate-python/app/module/operator/service/category_service.py @@ -0,0 +1,101 @@ +""" +Category Service +分类服务层 +""" +from typing import List + +from sqlalchemy.ext.asyncio import AsyncSession + +from app.module.operator.repository import ( + CategoryRepository, + CategoryRelationRepository, +) +from app.module.operator.schema import ( + CategoryDto, + CategoryTreeResponse, + CategoryTreePagedResponse, +) +from app.db.models.operator import Operator +from app.module.operator.repository.operator_repository import OperatorRepository + + +class CategoryService: + """分类服务""" + + def __init__( + self, + category_repo: CategoryRepository, + category_relation_repo: CategoryRelationRepository, + operator_repo: OperatorRepository, + ): + self.category_repo = category_repo + self.category_relation_repo = category_relation_repo + self.operator_repo = operator_repo + + async def get_all_categories( + self, + db: AsyncSession + ) -> CategoryTreePagedResponse: + """获取所有分类(树状结构)""" + # Get all categories + all_categories = await self.category_repo.find_all(db) + category_map = {c.id: c for c in all_categories} + + # Get all relations and count operators per category + all_relations = await self.category_relation_repo.find_all(db) + relation_map = {} + for rel in all_relations: + if rel.category_id not in relation_map: + relation_map[rel.category_id] = 0 + relation_map[rel.category_id] += 1 + + # Group by parent_id + grouped_by_parent = {} + for cat in all_categories: + if cat.parent_id != "0": + if cat.parent_id not in grouped_by_parent: + grouped_by_parent[cat.parent_id] = [] + grouped_by_parent[cat.parent_id].append(cat) + + # Build category trees + parent_ids = sorted( + grouped_by_parent.keys(), + key=lambda pid: pid + ) + + category_trees = [] + for parent_id in parent_ids: + group = grouped_by_parent[parent_id] + parent_category = category_map[parent_id] + + # Build DTOs for children + child_dtos = [] + total_count = 0 + for cat in sorted(group, key=lambda c: c.created_at or 0): + cat_dto = CategoryDto( + id=cat.id, + name=cat.name, + value=cat.value, + type=cat.type, + parent_id=cat.parent_id, + count=relation_map.get(cat.id, 0), + created_at=cat.created_at, + ) + child_dtos.append(cat_dto) + total_count += cat_dto.count + + tree = CategoryTreeResponse( + id=parent_id, + name=parent_category.name, + count=total_count, + categories=child_dtos, + ) + category_trees.append(tree) + + # Get star count + star_count = await self.operator_repo.count_by_star(True, db) + + return CategoryTreePagedResponse( + star_count=star_count, + categories=category_trees, + ) diff --git a/runtime/datamate-python/app/module/operator/service/operator_service.py b/runtime/datamate-python/app/module/operator/service/operator_service.py new file mode 100644 index 00000000..6314f221 --- /dev/null +++ b/runtime/datamate-python/app/module/operator/service/operator_service.py @@ -0,0 +1,624 @@ +""" +Operator Service +算子服务层 +""" +import json +import os +import uuid +import shutil +from datetime import datetime +from pathlib import Path +from typing import List, Optional, Dict, Any, TYPE_CHECKING + +from sqlalchemy.ext.asyncio import AsyncSession +from sqlalchemy import select, text, func + +from app.core.logging import get_logger +from app.core.exception import BusinessError, ErrorCodes +from app.module.operator.repository import ( + OperatorRepository, + CategoryRelationRepository, + OperatorReleaseRepository, +) +from app.module.operator.schema import ( + OperatorDto, + OperatorUpdateDto, + OperatorReleaseDto, +) +from app.module.operator.parsers import ParserHolder +from app.module.operator.constants import ( + OPERATOR_BASE_PATH, + UPLOAD_DIR, + EXTRACT_DIR, + YAML_PATH, + SERVICE_ID, +) +from app.module.shared.file_service import FileService +from app.module.shared.file_models import ( + ChunkUploadRequestDto, + FileUploadResult, +) + +logger = get_logger(__name__) + + +class OperatorService: + """算子服务""" + + def __init__( + self, + operator_repo: OperatorRepository, + category_relation_repo: CategoryRelationRepository, + operator_release_repo: OperatorReleaseRepository, + parser_holder: ParserHolder, + file_service: FileService, + ): + self.operator_repo = operator_repo + self.category_relation_repo = category_relation_repo + self.operator_release_repo = operator_release_repo + self.parser_holder = parser_holder + self.file_service = file_service + + async def get_operators( + self, + page: int, + size: int, + categories: List[List[str]], + keyword: Optional[str], + is_star: Optional[bool], + db: AsyncSession + ) -> List[OperatorDto]: + """查询算子列表(分页)""" + offset = page * size + + # Build query with categories filter + conditions = [] + params = {"limit": size, "offset": offset} + + if is_star is not None: + conditions.append("ov.is_star = :is_star") + params["is_star"] = is_star + + if keyword: + conditions.append( + "(ov.operator_name ILIKE :keyword OR ov.description ILIKE :keyword)" + ) + params["keyword"] = f"%{keyword}%" + + where_clause = "" + if conditions: + where_clause = "WHERE " + " AND ".join(conditions) + + # Handle categories grouping + group_by = "GROUP BY ov.operator_id, ov.operator_name, ov.description, ov.version, " \ + "ov.inputs, ov.outputs, ov.runtime, ov.settings, ov.is_star, " \ + "ov.file_size, ov.usage_count, ov.created_at, ov.updated_at, ov.created_by, ov.updated_by" + + having_clause = "" + if categories: + # Flatten all category IDs for IN clause + all_category_ids = [cat_id for sublist in categories for cat_id in sublist] + if all_category_ids: + where_clause += " AND category_id = ANY(:category_ids)" if where_clause else "WHERE category_id = ANY(:category_ids)" + params["category_ids"] = all_category_ids + + # Build HAVING clause for category groups + having_clauses = [] + for i, cat_group in enumerate(categories): + cat_list = ", ".join([f"'{cat_id}'" for cat_id in cat_group]) + having_clauses.append( + f"SUM(CASE WHEN category_id IN ({cat_list}) THEN 1 ELSE 0 END) > 0" + ) + having_clause = "HAVING " + " AND ".join(having_clauses) + + query = f""" + SELECT + ov.operator_id AS id, + ov.operator_name AS name, + ov.description, + ov.version, + ov.inputs, + ov.outputs, + ov.runtime, + ov.settings, + ov.is_star, + ov.file_size, + ov.usage_count, + ov.created_at, + ov.updated_at, + string_agg(ov.category_id, ',' ORDER BY ov.created_at DESC) AS categories + FROM v_operator ov + {where_clause} + {group_by} + {having_clause} + ORDER BY ov.created_at DESC + LIMIT :limit OFFSET :offset + """ + + result = await db.execute(text(query), params) + rows = result.fetchall() + + # Convert to DTOs + operators = [] + for row in rows: + categories_list = [] + if row.categories: + categories_list = [cat_id for cat_id in row.categories.split(',') if cat_id] + + operators.append(OperatorDto( + id=row.id, + name=row.name, + description=row.description, + version=row.version, + inputs=row.inputs, + outputs=row.outputs, + runtime=row.runtime, + settings=row.settings, + file_name=None, + file_size=row.file_size, + metrics=None, + usage_count=row.usage_count, + is_star=row.is_star, + categories=categories_list, + created_at=row.created_at, + updated_at=row.updated_at, + )) + + return operators + + async def count_operators( + self, + categories: List[List[str]], + keyword: Optional[str], + is_star: Optional[bool], + db: AsyncSession + ) -> int: + """统计算子数量""" + conditions = [] + params = {} + + if is_star is not None: + conditions.append("is_star = :is_star") + params["is_star"] = is_star + + if keyword: + conditions.append( + "(operator_name ILIKE :keyword OR description ILIKE :keyword)" + ) + params["keyword"] = f"%{keyword}%" + + where_clause = "" + if conditions: + where_clause = "WHERE " + " AND ".join(conditions) + + # Handle categories grouping + group_by = "GROUP BY operator_id, operator_name, description, version, inputs, outputs, " \ + "runtime, settings, is_star, file_size, usage_count, created_at, updated_at, " \ + "created_by, updated_by" + + having_clause = "" + if categories: + # Flatten all category IDs for IN clause + all_category_ids = [cat_id for sublist in categories for cat_id in sublist] + if all_category_ids: + where_clause += " AND category_id = ANY(:category_ids)" if where_clause else "WHERE category_id = ANY(:category_ids)" + params["category_ids"] = all_category_ids + + # Build HAVING clause for category groups + having_clauses = [] + for i, cat_group in enumerate(categories): + cat_list = ", ".join([f"'{cat_id}'" for cat_id in cat_group]) + having_clauses.append( + f"SUM(CASE WHEN category_id IN ({cat_list}) THEN 1 ELSE 0 END) > 0" + ) + having_clause = "HAVING " + " AND ".join(having_clauses) + + query = f""" + SELECT COUNT(*) as count + FROM ( + SELECT operator_id + FROM v_operator + {where_clause} + {group_by} + {having_clause} + ) AS t + """ + + result = await db.execute(text(query), params) + return result.scalar() or 0 + + async def get_operator_by_id( + self, + operator_id: str, + db: AsyncSession + ) -> OperatorDto: + """根据 ID 获取算子详情""" + result = await db.execute( + text(""" + SELECT + operator_id, operator_name, description, version, inputs, outputs, runtime, + settings, is_star, file_name, file_size, usage_count, metrics, + created_at, updated_at, created_by, updated_by, + string_agg(category_name, ',' ORDER BY created_at DESC) AS categories + FROM v_operator + WHERE operator_id = :operator_id + GROUP BY operator_id, operator_name, description, version, inputs, outputs, runtime, + settings, is_star, file_name, file_size, usage_count, metrics, + created_at, updated_at, created_by, updated_by + """), + {"operator_id": operator_id} + ) + row = result.fetchone() + + if not row: + raise BusinessError(ErrorCodes.OPERATOR_NOT_FOUND, operator_id) + + # Parse categories from comma-separated string + categories_str = row.categories if hasattr(row, 'categories') and row.categories else "" + categories = [c.strip() for c in categories_str.split(",")] if categories_str else [] + + # Build DTO + operator = OperatorDto( + id=row.operator_id, + name=row.operator_name, + description=row.description, + version=row.version, + inputs=row.inputs, + outputs=row.outputs, + runtime=row.runtime, + settings=row.settings, + file_name=row.file_name, + file_size=row.file_size, + metrics=row.metrics, + usage_count=row.usage_count, + is_star=row.is_star, + created_at=row.created_at, + updated_at=row.updated_at, + categories=categories, + ) + + # Read requirements and readme if file exists + if row.file_name: + extract_path = self._get_extract_path( + self._get_stem(row.file_name) + ) + operator.requirements = self._read_requirements(extract_path) + operator.readme = self._get_readme_content(extract_path) + + # Load releases + releases = await self.operator_release_repo.find_all_by_operator_id( + operator_id, db + ) + operator.releases = [ + OperatorReleaseDto( + id=release.id, + version=release.version, + release_date=release.release_date, + changelog=release.changelog + ) + for release in releases + ] + + return operator + + async def create_operator( + self, + req: OperatorDto, + db: AsyncSession + ) -> OperatorDto: + """创建算子""" + + # Generate ID if not provided + if not req.id: + req.id = str(uuid.uuid4()) + + # Override settings + self._override_settings(req) + + # Insert operator + await self.operator_repo.insert(req, db) + await db.flush() + + # Insert category relations + if req.categories: + await self.category_relation_repo.batch_insert( + req.id, req.categories, db + ) + + # Insert release + if req.releases: + release = req.releases[0] + release.id = req.id + release.version = req.version + release.release_date = datetime.now() + await self.operator_release_repo.insert(release, db) + + # Extract files + if req.file_name: + self.parser_holder.extract_to( + self._get_file_type(req.file_name), + self._get_upload_path(req.file_name), + self._get_extract_path(self._get_stem(req.file_name)) + ) + + return req + + async def update_operator( + self, + operator_id: str, + req: OperatorUpdateDto, + db: AsyncSession + ) -> OperatorDto: + """更新算子""" + + # Get existing operator + existing = await self.get_operator_by_id(operator_id, db) + + # Save original version for release comparison + original_version = existing.version + + # Merge update request into existing operator + # Only update fields that are provided (not None) + if req.name is not None: + existing.name = req.name + if req.description is not None: + existing.description = req.description + if req.version is not None: + existing.version = req.version + if req.inputs is not None: + existing.inputs = req.inputs + if req.outputs is not None: + existing.outputs = req.outputs + if req.runtime is not None: + existing.runtime = req.runtime + if req.settings is not None: + existing.settings = req.settings + if req.file_name is not None: + existing.file_name = req.file_name + if req.file_size is not None: + existing.file_size = req.file_size + if req.metrics is not None: + existing.metrics = req.metrics + if req.usage_count is not None: + existing.usage_count = req.usage_count + if req.is_star is not None: + existing.is_star = req.is_star + if req.categories is not None: + existing.categories = req.categories + if req.overrides is not None: + existing.overrides = req.overrides + + # Override settings + self._override_settings(existing) + + # Update operator + await self.operator_repo.update(existing, db) + + # Update category relations + if req.file_name is not None and req.categories is not None: + await self.category_relation_repo.batch_update( + operator_id, req.categories, db + ) + + # Update release + if req.releases is not None and len(req.releases) > 0: + release = req.releases[0] + release.id = operator_id + release.version = req.version + release.release_date = datetime.now() + if original_version == release.version: + await self.operator_release_repo.update(release, db) + else: + await self.operator_release_repo.insert(release, db) + + # Extract files + if req.file_name is not None: + self.parser_holder.extract_to( + self._get_file_type(req.file_name), + self._get_upload_path(req.file_name), + self._get_extract_path(self._get_stem(req.file_name)) + ) + + await db.flush() + return await self.get_operator_by_id(operator_id, db) + + async def delete_operator( + self, + operator_id: str, + db: AsyncSession + ) -> None: + """删除算子""" + # Check if operator is in use + in_template = await self.operator_repo.operator_in_template(operator_id, db) + in_unstop_task = await self.operator_repo.operator_in_unstop_task(operator_id, db) + if in_template or in_unstop_task: + raise BusinessError(ErrorCodes.OPERATOR_IN_INSTANCE) + + # Check if operator is predefined + is_predefined = await self.category_relation_repo.operator_is_predefined( + operator_id, db + ) + if is_predefined: + raise BusinessError(ErrorCodes.OPERATOR_CANNOT_DELETE_PREDEFINED) + + # Get operator for file cleanup + operator = await self.get_operator_by_id(operator_id, db) + + # Delete from database + await self.operator_repo.delete(operator_id, db) + await self.category_relation_repo.delete_by_operator_id(operator_id, db) + await self.operator_release_repo.delete(operator_id, db) + + # Delete extracted files + if operator.file_name: + extract_path = self._get_extract_path(self._get_stem(operator.file_name)) + shutil.rmtree(extract_path, ignore_errors=True) + + async def upload_operator( + self, + file_name: str, + db: AsyncSession + ) -> OperatorDto: + """上传算子文件并解析元数据""" + file_path = self._get_upload_path(file_name) + file_size = os.path.getsize(file_path) if os.path.exists(file_path) else None + return self.parser_holder.parse_yaml_from_archive( + self._get_file_type(file_name), + file_path, + YAML_PATH, + file_name, + file_size + ) + + async def pre_upload(self, db: AsyncSession) -> str: + """预上传,返回请求 ID""" + from app.module.operator.constants import OPERATOR_BASE_PATH, UPLOAD_DIR + + upload_path = os.path.join(OPERATOR_BASE_PATH, UPLOAD_DIR) + req_id = await self.file_service.pre_upload( + upload_path=upload_path, + service_id=SERVICE_ID, + db_session=db, + check_info=None + ) + return req_id + + async def chunk_upload( + self, + req_id: str, + file_no: int, + file_name: str, + total_chunk_num: int, + chunk_no: int, + check_sum_hex: Optional[str], + file_content: bytes, + db: AsyncSession + ) -> FileUploadResult: + """分块上传文件""" + from app.module.operator.constants import OPERATOR_BASE_PATH, UPLOAD_DIR + + upload_path = os.path.join(OPERATOR_BASE_PATH, UPLOAD_DIR) + + chunk_request = ChunkUploadRequestDto( + req_id=req_id, + file_no=file_no, + file_name=file_name, + total_chunk_num=total_chunk_num, + chunk_no=chunk_no, + check_sum_hex=check_sum_hex, + ) + + return await self.file_service.chunk_upload( + chunk_request, upload_path, file_content, db + ) + + def download_example_operator(self, file_path: str) -> Path: + """下载示例算子文件""" + path = Path(file_path) + if not path.exists(): + raise FileNotFoundError(f"File not found: {file_path}") + return path + + def _override_settings(self, operator: OperatorDto) -> None: + """用 overrides 值覆盖 settings 的 defaultVal""" + if not operator.settings or not operator.overrides: + return + + try: + settings = json.loads(operator.settings) + for key, value in operator.overrides.items(): + if key not in settings: + continue + + setting = settings[key] + setting_type = setting.get("type") + + match setting_type: + case "slider" | "switch" | "select" | "input" | "radio": + setting["defaultVal"] = value + case "checkbox": + setting["defaultVal"] = self._convert_to_list_string(value) + case "range": + self._update_properties(setting, value) + + settings[key] = setting + + operator.settings = json.dumps(settings) + except json.JSONDecodeError as e: + raise BusinessError(ErrorCodes.OPERATOR_PARSE_FAILED, str(e)) + + def _convert_to_list_string(self, value: Any) -> str: + """转换为逗号分隔的字符串""" + if value is None: + return "" + if isinstance(value, list): + return ",".join(str(v) for v in value) + return str(value) + + def _update_properties(self, setting: Dict[str, Any], value: Any) -> None: + """更新 range 类型的 properties""" + if not isinstance(value, list): + return + + properties = setting.get("properties", []) + if not isinstance(properties, list) or len(properties) != len(value): + return + + for i, prop in enumerate(properties): + if isinstance(prop, dict): + prop["defaultVal"] = value[i] + + setting["properties"] = properties + + def _read_requirements(self, extract_path: str) -> List[str]: + """读取 requirements.txt""" + requirements_path = Path(extract_path) / "requirements.txt" + if not requirements_path.exists(): + return [] + + requirements = [] + try: + with open(requirements_path, 'r', encoding='utf-8') as f: + for line in f: + line = line.strip() + if line and not line.startswith('#'): + requirements.append(line) + except Exception as e: + logger.warning(f"Failed to read requirements: {e}") + return requirements + + def _get_readme_content(self, extract_path: str) -> str: + """读取 README 内容""" + dir_path = Path(extract_path) + if not dir_path.exists() or not dir_path.is_dir(): + logger.info(f"Directory does not exist or is not a directory: {extract_path}") + return "" + + candidates = ["README.md", "readme.md", "Readme.md"] + for filename in candidates: + readme_path = dir_path / filename + if readme_path.exists() and readme_path.is_file(): + try: + content = readme_path.read_text(encoding='utf-8') + logger.info(f"Successfully read README from: {readme_path}") + return content + except Exception as e: + logger.warning(f"Failed to read README from {readme_path}: {e}") + logger.info(f"No README found in: {extract_path}") + return "" + + def _get_file_type(self, file_name: str) -> str: + """获取文件类型(扩展名)""" + return file_name.rsplit('.', 1)[-1].lower() if '.' in file_name else "" + + def _get_stem(self, file_name: str) -> str: + """获取文件名不含扩展名""" + return file_name.rsplit('.', 1)[0] if '.' in file_name else file_name + + def _get_upload_path(self, file_name: str) -> str: + """获取上传文件路径""" + return os.path.join(OPERATOR_BASE_PATH, UPLOAD_DIR, file_name) + + def _get_extract_path(self, file_stem: str) -> str: + """获取解压路径""" + return os.path.join(OPERATOR_BASE_PATH, EXTRACT_DIR, file_stem) diff --git a/runtime/datamate-python/app/module/shared/__init__.py b/runtime/datamate-python/app/module/shared/__init__.py index e69de29b..fd0d7a1a 100644 --- a/runtime/datamate-python/app/module/shared/__init__.py +++ b/runtime/datamate-python/app/module/shared/__init__.py @@ -0,0 +1,21 @@ +""" +Shared Module Init +共享模块初始化 +""" +from .file_service import FileService +from .file_models import ( + ChunkUploadPreRequestDto, + ChunkUploadRequestDto, + FileUploadResult, +) +from .chunks_saver import ChunksSaver +from .chunk_upload_repository import ChunkUploadRepository + +__all__ = [ + "FileService", + "ChunkUploadPreRequestDto", + "ChunkUploadRequestDto", + "FileUploadResult", + "ChunksSaver", + "ChunkUploadRepository", +] diff --git a/runtime/datamate-python/app/module/shared/chunk_upload_repository.py b/runtime/datamate-python/app/module/shared/chunk_upload_repository.py new file mode 100644 index 00000000..8a0c717d --- /dev/null +++ b/runtime/datamate-python/app/module/shared/chunk_upload_repository.py @@ -0,0 +1,95 @@ +""" +Chunk Upload Repository +分片上传数据访问层 +""" +from typing import Optional, List + +from sqlalchemy import select, update, delete +from sqlalchemy.ext.asyncio import AsyncSession + +from app.db.models.chunk_upload import ChunkUploadPreRequest +from app.core.logging import get_logger + +logger = get_logger(__name__) + + +class ChunkUploadRepository: + """分片上传数据访问层""" + + async def find_by_id( + self, + req_id: str, + db: AsyncSession + ) -> Optional[ChunkUploadPreRequest]: + """根据ID查询""" + result = await db.execute( + select(ChunkUploadPreRequest).where(ChunkUploadPreRequest.id == req_id) + ) + return result.scalar_one_or_none() + + async def find_by_service_id( + self, + service_id: str, + db: AsyncSession + ) -> List[ChunkUploadPreRequest]: + """根据服务ID查询""" + result = await db.execute( + select(ChunkUploadPreRequest).where( + ChunkUploadPreRequest.service_id == service_id + ) + ) + return result.scalars().all() + + async def find_all(self, db: AsyncSession) -> List[ChunkUploadPreRequest]: + """查询所有""" + result = await db.execute(select(ChunkUploadPreRequest)) + return result.scalars().all() + + async def insert( + self, + request: ChunkUploadPreRequest, + db: AsyncSession + ) -> None: + """插入""" + db.add(request) + + async def update( + self, + request: ChunkUploadPreRequest, + db: AsyncSession + ) -> int: + """更新""" + from datetime import datetime, timezone + result = await db.execute( + update(ChunkUploadPreRequest) + .where(ChunkUploadPreRequest.id == request.id) + .values( + uploaded_file_num=request.uploaded_file_num, + timeout=request.timeout, + ) + ) + return result.rowcount + + async def delete_by_id( + self, + req_id: str, + db: AsyncSession + ) -> int: + """根据ID删除""" + result = await db.execute( + delete(ChunkUploadPreRequest).where(ChunkUploadPreRequest.id == req_id) + ) + return result.rowcount + + async def delete_by_service_id( + self, + service_id: str, + db: AsyncSession + ) -> int: + """根据服务ID删除""" + result = await db.execute( + delete(ChunkUploadPreRequest).where( + ChunkUploadPreRequest.service_id == service_id + ) + ) + return result.rowcount diff --git a/runtime/datamate-python/app/module/shared/chunks_saver.py b/runtime/datamate-python/app/module/shared/chunks_saver.py new file mode 100644 index 00000000..554b263b --- /dev/null +++ b/runtime/datamate-python/app/module/shared/chunks_saver.py @@ -0,0 +1,146 @@ +""" +Chunks Saver +分片保存器,用于处理文件分片上传 +""" +import os +from pathlib import Path +from typing import Optional +from datetime import datetime, timezone + +from fastapi import UploadFile + +from app.core.logging import get_logger +from app.module.shared.file_models import ChunkUploadRequestDto + +logger = get_logger(__name__) + + +class ChunksSaver: + """分片保存器""" + + TEMP_DIR_NAME_FORMAT = "req_%s_chunks" + + @staticmethod + def save( + file_upload_request: ChunkUploadRequestDto, + pre_upload_req_id: str, + upload_path: str, + file_content: bytes + ) -> Optional[Path]: + """ + 保存分片 + + Args: + file_upload_request: 上传分片的请求 + pre_upload_req_id: 预上传请求ID + upload_path: 上传基础路径 + file_content: 文件内容(字节) + + Returns: + 保存后的文件路径,如果不是最后一个分片则返回None + """ + start_time = datetime.now(timezone.utc) + + temp_dir = Path(upload_path) / ( + ChunksSaver.TEMP_DIR_NAME_FORMAT % pre_upload_req_id + ) + temp_dir.mkdir(parents=True, exist_ok=True) + + temp_file = temp_dir / str(file_upload_request.file_no) + + ChunksSaver._append_to_target_file(temp_file, file_content) + + if file_upload_request.total_chunk_num != file_upload_request.chunk_no: + elapsed = (datetime.now(timezone.utc) - start_time).total_seconds() + logger.debug(f"save chunk {file_upload_request.chunk_no} cost {elapsed}s") + return None + + final_file = Path(upload_path) / file_upload_request.file_name + + try: + temp_file.rename(final_file) + except OSError as e: + logger.error( + f"failed to mv file: {temp_file.name}, req id: {pre_upload_req_id}, error: {e}" + ) + raise ValueError("failed to move file to target dir") from e + + elapsed = (datetime.now(timezone.utc) - start_time).total_seconds() + logger.debug(f"save chunk {file_upload_request.chunk_no} cost {elapsed}s") + + return final_file + + @staticmethod + def save_file( + file_upload_request: ChunkUploadRequestDto, + upload_path: str, + file_content: bytes + ) -> Path: + """ + 保存文件(不分片) + + Args: + file_upload_request: 上传请求 + upload_path: 上传路径 + file_content: 文件内容(字节) + + Returns: + 保存后的文件路径 + """ + target_file = Path(upload_path) / file_upload_request.file_name + + logger.info(f"file path {target_file}, file size {len(file_content)}") + + try: + target_file.parent.mkdir(parents=True, exist_ok=True) + target_file.write_bytes(file_content) + except OSError as e: + logger.error(f"failed to save file: {target_file}, error: {e}") + raise ValueError("failed to save file") from e + + return target_file + + @staticmethod + def delete_folder(folder_path: str) -> None: + """ + 删除指定路径下的所有文件 + + Args: + folder_path: 文件夹路径 + """ + folder = Path(folder_path) + + if not folder.exists(): + logger.info(f"folder {folder_path} does not exist") + return + + try: + for item in folder.glob("*"): + if item.is_file(): + item.unlink() + elif item.is_dir(): + for sub_item in item.glob("*"): + if sub_item.is_file(): + sub_item.unlink() + elif sub_item.is_dir(): + ChunksSaver.delete_folder(str(sub_item)) + item.rmdir() + except OSError as e: + logger.error(f"failed to delete folder: {folder_path}, error: {e}") + raise ValueError("failed to delete folder") from e + + @staticmethod + def _append_to_target_file(target_file: Path, content: bytes) -> None: + """ + 追加内容到目标文件末尾 + + Args: + target_file: 目标文件 + content: 要追加的内容 + """ + try: + with open(target_file, "ab") as f: + f.write(content) + except OSError as e: + logger.error(f"failed to append to file: {target_file}, error: {e}") + raise ValueError("failed to append content to file") from e diff --git a/runtime/datamate-python/app/module/shared/file_models.py b/runtime/datamate-python/app/module/shared/file_models.py new file mode 100644 index 00000000..c4e98775 --- /dev/null +++ b/runtime/datamate-python/app/module/shared/file_models.py @@ -0,0 +1,38 @@ +""" +File Models +文件相关模型定义 +""" +from pathlib import Path +from typing import Optional +from pydantic import BaseModel, Field +from datetime import datetime + + +class ChunkUploadPreRequestDto(BaseModel): + """分片上传预请求DTO""" + id: str = Field(..., description="请求ID") + total_file_num: int = Field(..., description="总文件数", ge=1) + uploaded_file_num: Optional[int] = Field(None, description="已上传文件数", ge=0) + upload_path: str = Field(..., description="文件路径") + timeout: Optional[datetime] = Field(None, description="上传请求超时时间") + service_id: Optional[str] = Field(None, description="上传请求所属服务ID") + check_info: Optional[str] = Field(None, description="业务信息") + + +class ChunkUploadRequestDto(BaseModel): + """分片上传请求DTO""" + req_id: str = Field(..., description="预上传返回的ID") + file_no: int = Field(1, description="文件编号", ge=1) + file_name: str = Field(..., description="文件名称") + total_chunk_num: int = Field(1, description="总分块数量", ge=1) + chunk_no: int = Field(1, description="当前分块编号", ge=1) + file_size: Optional[int] = Field(None, description="文件大小", ge=0) + check_sum_hex: Optional[str] = Field(None, description="文件校验和(十六进制字符串)") + + +class FileUploadResult(BaseModel): + """文件上传结果""" + is_all_files_uploaded: bool = Field(..., description="是否所有文件已上传") + check_info: Optional[str] = Field(None, description="业务上传信息") + saved_file_path: Optional[str] = Field(None, description="保存的文件路径") + file_name: str = Field(..., description="文件名称") diff --git a/runtime/datamate-python/app/module/shared/file_service.py b/runtime/datamate-python/app/module/shared/file_service.py new file mode 100644 index 00000000..1a858587 --- /dev/null +++ b/runtime/datamate-python/app/module/shared/file_service.py @@ -0,0 +1,183 @@ +""" +File Service +文件服务,处理文件上传、分片上传等功能 +""" +import os +import uuid +from datetime import datetime, timedelta +from pathlib import Path +from typing import Optional + +from app.core.logging import get_logger +from app.db.models.chunk_upload import ChunkUploadPreRequest +from app.module.shared.chunk_upload_repository import ChunkUploadRepository +from app.module.shared.chunks_saver import ChunksSaver +from app.module.shared.file_models import ( + ChunkUploadRequestDto, + FileUploadResult, +) + +logger = get_logger(__name__) + + +class FileService: + """文件服务""" + + DEFAULT_TIMEOUT_SECONDS = 120 + + def __init__( + self, + chunk_upload_repo: ChunkUploadRepository, + ): + self.chunk_upload_repo = chunk_upload_repo + + async def pre_upload( + self, + upload_path: str, + service_id: str, + db_session, + check_info: Optional[str] = None + ) -> str: + """ + 预上传 + + Args: + upload_path: 上传路径 + service_id: 服务ID + check_info: 业务信息 + + Returns: + 预上传请求ID + """ + req_id = str(uuid.uuid4()) + timeout = datetime.utcnow().replace( + microsecond=0 + ) + timedelta(seconds=self.DEFAULT_TIMEOUT_SECONDS) + + pre_request = ChunkUploadPreRequest( + id=req_id, + total_file_num=1, + uploaded_file_num=0, + upload_path=upload_path, + timeout=timeout, + service_id=service_id, + check_info=check_info, + ) + + await self.chunk_upload_repo.insert(pre_request, db_session) + return req_id + + async def chunk_upload( + self, + upload_request: ChunkUploadRequestDto, + upload_path: str, + file_content: bytes, + db_session, + ) -> FileUploadResult: + """ + 分片上传 + + Args: + upload_request: 上传请求 + upload_path: 上传路径 + file_content: 文件内容 + db_session: 数据库会话 + + Returns: + 上传结果 + """ + upload_request.file_size = len(file_content) + + pre_request = await self.chunk_upload_repo.find_by_id( + upload_request.req_id, db_session + ) + + if pre_request is None: + logger.error(f"pre-upload request not found: {upload_request.req_id}") + raise ValueError("Pre-upload request not found") + + if pre_request.is_upload_complete(): + logger.error(f"upload already complete: {upload_request.req_id}") + raise ValueError("Upload already complete") + + if pre_request.is_request_timeout(): + logger.error(f"upload request timeout: {upload_request.req_id}") + raise ValueError("Upload request timeout") + + saved_file_path = None + + if upload_request.total_chunk_num > 1: + saved_file_path = await self._upload_chunk( + upload_request, pre_request, upload_path, file_content + ) + else: + saved_file_path = await self._upload_file( + upload_request, pre_request, upload_path, file_content + ) + + update_count = await self.chunk_upload_repo.update(pre_request, db_session) + + if update_count == 0: + logger.error(f"failed to update pre-request: {upload_request.req_id}") + raise ValueError("Failed to update pre-upload request") + + is_finish = pre_request.uploaded_file_num == pre_request.total_file_num + + if is_finish: + temp_dir = os.path.join( + upload_path, + ChunksSaver.TEMP_DIR_NAME_FORMAT % pre_request.id + ) + try: + ChunksSaver.delete_folder(temp_dir) + except Exception as e: + logger.warning(f"failed to delete temp dir: {temp_dir}, error: {e}") + + await self.chunk_upload_repo.delete_by_id(pre_request.id, db_session) + + return FileUploadResult( + is_all_files_uploaded=is_finish, + check_info=pre_request.check_info, + saved_file_path=str(saved_file_path) if saved_file_path else None, + file_name=upload_request.file_name, + ) + + async def _upload_file( + self, + upload_request: ChunkUploadRequestDto, + pre_request: ChunkUploadPreRequest, + upload_path: str, + file_content: bytes + ) -> Path: + """上传单文件""" + saved_file = ChunksSaver.save_file( + upload_request, upload_path, file_content + ) + + pre_request.timeout = datetime.utcnow().replace( + microsecond=0 + ) + timedelta(seconds=self.DEFAULT_TIMEOUT_SECONDS) + pre_request.increment_uploaded_file_num() + + return saved_file + + async def _upload_chunk( + self, + upload_request: ChunkUploadRequestDto, + pre_request: ChunkUploadPreRequest, + upload_path: str, + file_content: bytes + ) -> Optional[Path]: + """上传分片""" + saved_file = ChunksSaver.save( + upload_request, pre_request.id, upload_path, file_content + ) + + if saved_file is not None: + pre_request.increment_uploaded_file_num() + return saved_file + + pre_request.timeout = datetime.utcnow().replace( + microsecond=0 + ) + timedelta(seconds=self.DEFAULT_TIMEOUT_SECONDS) + return None diff --git a/runtime/ops/examples/test_operator/metadata.yml b/runtime/ops/examples/test_operator/metadata.yml index 2320c9ed..fb1b59b8 100644 --- a/runtime/ops/examples/test_operator/metadata.yml +++ b/runtime/ops/examples/test_operator/metadata.yml @@ -22,8 +22,8 @@ metrics: runtime: memory: 10485760 cpu: 0.05 - gpu: 0.1 - npu: 0.1 + gpu: 0 + npu: 0 settings: sliderParam: name: '滑窗测试' diff --git a/runtime/ops/examples/test_operator/test_operator.tar b/runtime/ops/examples/test_operator/test_operator.tar index dc986c1d..e14771ea 100644 Binary files a/runtime/ops/examples/test_operator/test_operator.tar and b/runtime/ops/examples/test_operator/test_operator.tar differ diff --git a/runtime/ops/pyproject.toml b/runtime/ops/pyproject.toml index dd8271d1..11d6bb11 100644 --- a/runtime/ops/pyproject.toml +++ b/runtime/ops/pyproject.toml @@ -19,7 +19,7 @@ dependencies = [ "openslide-python>=1.4.3", "paddleocr==3.3.0", "paddlepaddle==3.2.2", - "pandas>=2.2.3", + "pandas>=2.2.3,<3.0.0", "presidio-analyzer==2.2.25", "presidio-anonymizer==2.2.25", "pycryptodome>=3.23.0", diff --git a/runtime/python-executor/datamate/wrappers/data_juicer_executor.py b/runtime/python-executor/datamate/wrappers/data_juicer_executor.py index d1a57125..6d345f4b 100644 --- a/runtime/python-executor/datamate/wrappers/data_juicer_executor.py +++ b/runtime/python-executor/datamate/wrappers/data_juicer_executor.py @@ -14,6 +14,7 @@ from datamate.core.base_op import FileExporter, SUCCESS_STATUS from datamate.core.constant import Fields from datamate.wrappers.executor import RayExecutor +from datamate.sql_manager.persistence_atction import TaskInfoPersistence DJ_OUTPUT = "outputs" @@ -103,6 +104,10 @@ def run(self): logger.info('Read data...') dataset = dataset.map(FileExporter().read_file, num_cpus=0.05) + # 保存原始数据文件ID集合,用于后续过滤数据检测 + original_file_ids = set(dataset.unique("fileId")) + + # 写入数据集文件 with open(self.dataset_path, "w", encoding="utf-8") as f: for batch_df in dataset.iter_batches(batch_format="pandas", batch_size=2048): batch_df.to_json(f, orient="records", lines=True, force_ascii=False) @@ -118,6 +123,26 @@ def run(self): processed_dataset = processed_dataset.map(FileExporter().save_file_and_db, num_cpus=0.05) for _ in processed_dataset.iter_batches(): pass + + # 特殊处理:识别被过滤的数据 + if processed_dataset.count() == 0: + processed_file_ids = set() + else: + processed_file_ids = set(processed_dataset.unique("fileId")) + filtered_file_ids = original_file_ids - processed_file_ids + + if filtered_file_ids: + logger.info(f"Found {len(filtered_file_ids)} filtered files, updating task result only") + for sample_dict in dataset.iter_batches(batch_format="pandas", batch_size=2048): + for _, row in sample_dict.iterrows(): + if str(row.get("fileId", "")) in filtered_file_ids: + row["fileSize"] = "0" + row["fileType"] = "" + row["execute_status"] = SUCCESS_STATUS + row[Fields.instance_id] = self.cfg.instance_id + TaskInfoPersistence().update_task_result(row) + + self.scan_files() except Exception as e: logger.error(f"An unexpected error occurred.", e) raise e diff --git a/scripts/db/data-cleaning-init.sql b/scripts/db/data-cleaning-init.sql index 93322f44..2e0501c9 100644 --- a/scripts/db/data-cleaning-init.sql +++ b/scripts/db/data-cleaning-init.sql @@ -7,9 +7,10 @@ CREATE TABLE IF NOT EXISTS t_clean_template id VARCHAR(64) PRIMARY KEY, name VARCHAR(64) UNIQUE, description VARCHAR(256), - created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, - updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, - created_by VARCHAR(256) + created_by VARCHAR(256), + updated_by VARCHAR(256), + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ); COMMENT ON TABLE t_clean_template IS '清洗模板表'; @@ -19,6 +20,7 @@ COMMENT ON COLUMN t_clean_template.description IS '模板描述'; COMMENT ON COLUMN t_clean_template.created_at IS '创建时间'; COMMENT ON COLUMN t_clean_template.updated_at IS '更新时间'; COMMENT ON COLUMN t_clean_template.created_by IS '创建者'; +COMMENT ON COLUMN t_clean_template.updated_by IS '更新者'; -- 清洗任务表 CREATE TABLE IF NOT EXISTS t_clean_task @@ -180,4 +182,4 @@ VALUES ('4421504e-c6c9-4760-b55a-509d17429597', 'ImgDirectionCorrect', 11, NULL), ('4421504e-c6c9-4760-b55a-509d17429597', 'ImgResize', 12, NULL), ('4421504e-c6c9-4760-b55a-509d17429597', 'ImgTypeUnify', 13, NULL) - ON CONFLICT (instance_id, operator_id, op_index) DO NOTHING; \ No newline at end of file + ON CONFLICT (instance_id, operator_id, op_index) DO NOTHING; diff --git a/scripts/db/data-operator-init.sql b/scripts/db/data-operator-init.sql index 0587b841..e6650e4c 100644 --- a/scripts/db/data-operator-init.sql +++ b/scripts/db/data-operator-init.sql @@ -49,6 +49,10 @@ CREATE TABLE IF NOT EXISTS t_operator_release version VARCHAR(255), release_date TIMESTAMP, changelog JSON, + created_by VARCHAR(255), + updated_by VARCHAR(255), + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, PRIMARY KEY (id, version) ); @@ -60,7 +64,10 @@ CREATE TABLE IF NOT EXISTS t_operator_category value VARCHAR(64) UNIQUE, type VARCHAR(64), parent_id VARCHAR(64), - created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP + created_by VARCHAR(255), + updated_by VARCHAR(255), + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ); COMMENT ON TABLE t_operator_category IS '算子分类表'; @@ -76,6 +83,10 @@ CREATE TABLE IF NOT EXISTS t_operator_category_relation ( category_id VARCHAR(64), operator_id VARCHAR(64), + created_by VARCHAR(255), + updated_by VARCHAR(255), + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, PRIMARY KEY (category_id, operator_id) ); @@ -207,9 +218,6 @@ VALUES ('ObjectDetectionRectangle', '图像目标检测与预标注', '基于 YOLOv8 的图像目标检测算子。对输入图像进行目标检测,输出带矩形框与类别标签的标注图像,并生成结构化标注 JSON(包含类别、置信度与边界框坐标)。支持将检测结果导出为 Label Studio 兼容的 predictions 预标注格式(rectanglelabels),可在标注任务中直接加载并进行人工校正,从而显著降低人工标注成本并提升标注效率。', '1.0.0', 'image', 'image,json', null, null, '', 12288, false, 'system', 'system') ON CONFLICT DO NOTHING; -INSERT INTO t_operator_release(id, version, release_date, changelog) -VALUES ('MineruFormatter', '1.0.0', '2026-03-30', '["aaa","bbb"]'); - INSERT INTO t_operator_category_relation(category_id, operator_id) SELECT c.id, o.id FROM t_operator_category c diff --git a/scripts/images/backend-python/Dockerfile b/scripts/images/backend-python/Dockerfile index 4d276dd0..cf24083c 100644 --- a/scripts/images/backend-python/Dockerfile +++ b/scripts/images/backend-python/Dockerfile @@ -55,13 +55,15 @@ ENV NLTK_DATA=/usr/local/nltk_data # Copy the rest of the application COPY runtime/datamate-python /app +COPY runtime/ops/examples/test_operator/test_operator.tar /app/test_operator.tar COPY --from=datax-builder /DataX/target/datax/datax /opt/datax RUN cp /opt/datax/plugin/reader/mysqlreader/libs/mysql* /opt/datax/plugin/reader/starrocksreader/libs/ COPY runtime/datamate-python/deploy/docker-entrypoint.sh /docker-entrypoint.sh RUN chmod +x /docker-entrypoint.sh \ - && dos2unix /docker-entrypoint.sh || true + && dos2unix /docker-entrypoint.sh || true \ + && ln -sf /usr/share/zoneinfo/Asia/Shanghai /etc/localtime # Expose the application port EXPOSE 18000