Browse Source

add: 分批插入数据

Eureka 8 tháng trước cách đây
mục cha
commit
796fddb01c

+ 29 - 58
taais-modules/taais-biz/src/main/java/com/taais/biz/service/impl/DataServiceImpl.java

@@ -1,5 +1,7 @@
 package com.taais.biz.service.impl;
 
+import java.util.Date;
+
 import cn.hutool.core.util.ObjectUtil;
 import cn.hutool.http.HttpRequest;
 import com.fasterxml.jackson.databind.JsonNode;
@@ -10,8 +12,8 @@ import com.taais.biz.constant.BizConstant;
 import com.taais.biz.domain.Data;
 import com.taais.biz.domain.bo.DataAmplificationTaskBo;
 import com.taais.biz.domain.bo.DataBo;
-import com.taais.biz.domain.dto.DataAmplifyDto;
 import com.taais.biz.domain.vo.BatchDataResult;
+import com.taais.biz.domain.dto.DataAmplifyDto;
 import com.taais.biz.domain.vo.DataAmplificationTaskVo;
 import com.taais.biz.domain.vo.DataSelectVo;
 import com.taais.biz.domain.vo.DataVo;
@@ -203,6 +205,7 @@ public class DataServiceImpl extends BaseServiceImpl<DataMapper, Data> implement
             List<Boolean> labeledList = new ArrayList<>();
             List<Data> dataList = new ArrayList<>();
             AtomicInteger countSize = new AtomicInteger();
+            List<Data> finalDataList = dataList;
             extractedImagesFileList.forEach(fileInfo -> {
                 //获取ID
                 Long id = ids.get(countSize.get());
@@ -246,7 +249,7 @@ public class DataServiceImpl extends BaseServiceImpl<DataMapper, Data> implement
                         String labelUrl = FileUploadUtils.getPathFileName(destInfo, id + ".txt");
                         data.setLabelurl(labelUrl);
                     }
-                    dataList.add(data);
+                    finalDataList.add(data);
                 } catch (IOException e) {
                     throw new RuntimeException(e);
                 }
@@ -270,7 +273,7 @@ public class DataServiceImpl extends BaseServiceImpl<DataMapper, Data> implement
 //                    return CommonResult.fail(format);
 //                }
 //            }
-            if (labeled){
+            if (labeled) {
                 // 未标注数量
                 long unmarkedCount = labeledList.stream().filter(Boolean.FALSE::equals).count();
                 // 已标注数量
@@ -282,7 +285,17 @@ public class DataServiceImpl extends BaseServiceImpl<DataMapper, Data> implement
                 }
             }
             // TODO 李兆晏 确认逻辑是否正确 end
-            dataMapper.insertBatch(dataList);
+
+            int batchSize = 100; // 每批插入的数量
+            int totalSize = dataList.size();
+            // 循环分批插入
+            for (int i = 0; i < totalSize; i += batchSize) {
+                // 获取当前批次的数据
+                int end = Math.min(i + batchSize, totalSize);
+                dataList = dataList.subList(i, end);
+                // 插入当前批次的数据
+                dataMapper.insertBatch(dataList);
+            }
             FileUtils.deleteFile(destZip);
         } catch (Exception e) {
             log.error("[uploadDataInfo]数据集处理出现未知异常.e:", e);
@@ -294,58 +307,16 @@ public class DataServiceImpl extends BaseServiceImpl<DataMapper, Data> implement
     @Override
     @Transactional
     public CommonResult<Boolean> dataAmplify(DataAmplifyDto dataAmplifyDto) {
-        //根据批次号获取该批次的所有文件数据
-        QueryWrapper query = query();
-        query.eq(Data::getBatchNum, dataAmplifyDto.getBatchNum());
-        List<Data> dataList = dataMapper.selectListByQuery(query);
-        if (dataList.isEmpty()) {
-            return CommonResult.fail("该批次下没有文件数据,请重新选择批次!");
-        }
-        //TODO: 此处需要定义任务开始,把相关任务信息添加上(任务名称、任务开始时间、任务类型),然后再处理文件。
-
-        List<Data> dataListInfo = dataList.stream().filter(data -> !StringUtils.isEmpty(data.getUrl())).toList();
-        if (dataListInfo.isEmpty()) {
-            return CommonResult.fail("该批次下没有文件数据,请重新选择批次!");
-        }
-        String filePath = TaaisConfig.getUploadPath();
-        LocalDate currentDate = LocalDate.now();
-        // 定义日期格式器
-        DateTimeFormatter formatter = DateTimeFormatter.ofPattern("yyyy/MM/dd");
-        String formattedDate = currentDate.format(formatter);
-        filePath = filePath + File.separator + formattedDate;
-        String finalFilePath = filePath;
-        dataListInfo.forEach(dataInfo -> {
-            try {
-                //循环调用Python扩增接口
-                Map<String, Object> bodyJson = new HashMap<>();
-                bodyJson.put("augmentationType", dataAmplifyDto.getAugmentationType());
-                bodyJson.put("inputImagePath", dataInfo.getUrl());
-                String outputImagePath = finalFilePath + AMPLIFY + System.currentTimeMillis();
-                File desc = new File(outputImagePath);
-                if (!desc.exists()) {
-                    log.info("创建文件目录: {}", desc.mkdirs());
-                }
-                bodyJson.put("outputImagePath", outputImagePath);
-                bodyJson.put("otherParams", dataAmplifyDto.getOtherParams());
-                //实际请求接口,接口未提供,暂且注释
-//                String response = HttpRequest.post(PYTHON_DATA_AMPLIFY_API)
-//                        .body(JsonUtils.toJsonString(bodyJson))
-//                        .execute().body();
-                String response = "{\"status\":200,\"msg\":\"扩增成功\"}";
-                ObjectMapper objectMapper = new ObjectMapper();
-                JsonNode rootNode = objectMapper.readTree(response);
-                String resultCode = rootNode.path(RESULT_CODE).asText();
-                //判断接口是否响应成功
-                if (!RESULT_STATUS.equals(resultCode)) {
-                    throw new RuntimeException("调用Python接口返回扩增失败");
-                }
-                //处理当前目录文件,并进行入库
-                saveDataInfo(outputImagePath, dataInfo);
-            } catch (Exception e) {
-                throw new RuntimeException(e);
-            }
-        });
-        return CommonResult.fail("该批次下没有文件数据,请重新选择批次!");
+        DataAmplifyDto amplifyDto = new DataAmplifyDto();
+        DataAmplificationTaskBo dataAmplificationTaskBo = new DataAmplificationTaskBo();
+        dataAmplificationTaskBo.setName(amplifyDto.getTaskName());
+        dataAmplificationTaskBo.setStatus(BizConstant.TASK_STATUS_PENDING);
+        dataAmplificationTaskBo.setDataBatchNums(amplifyDto.getBatchNum());
+        dataAmplificationTaskBo.setAugmentationType(amplifyDto.getAugmentationType());
+        dataAmplificationTaskBo.setParameters(JsonUtils.toJsonString(amplifyDto.getOtherParams()));
+        dataAmplificationTaskBo.setDelFlag(0);
+        dataAmplificationTaskService.insert(dataAmplificationTaskBo);
+        return this.amplifyForData(dataAmplificationTaskBo.getId().toString());
     }
 
     private void initFileInfo(String dest, List<File> extractedImagesFileList, boolean directory, String fileName) {
@@ -546,12 +517,12 @@ public class DataServiceImpl extends BaseServiceImpl<DataMapper, Data> implement
                     Date endTime = new Date();
                     DataAmplificationTaskBo update = new DataAmplificationTaskBo();
                     if (taskVo.getInputImagePath() != null) {
-                        update.setInputImagePath(taskVo.getInputImagePath()+"|" + dataInfo.getUrl());
+                        update.setInputImagePath(taskVo.getInputImagePath() + "|" + dataInfo.getUrl());
                     } else {
                         update.setInputImagePath(dataInfo.getUrl());
                     }
                     if (taskVo.getOutputImagePath() != null) {
-                        update.setOutputImagePath(taskVo.getOutputImagePath()+"|" + outputImagePath);
+                        update.setOutputImagePath(taskVo.getOutputImagePath() + "|" + outputImagePath);
                     } else {
                         update.setOutputImagePath(outputImagePath);
                     }