Forráskód Böngészése

【功能新增】AI:新增知识库文档的批量添加

YunaiV 5 hónapja
szülő
commit
7918ba7d29

+ 1 - 0
yudao-module-ai/yudao-module-ai-api/src/main/java/cn/iocoder/yudao/module/ai/enums/ErrorCodeConstants.java

@@ -56,6 +56,7 @@ public interface ErrorCodeConstants {
 
     ErrorCode KNOWLEDGE_DOCUMENT_NOT_EXISTS = new ErrorCode(1_022_008_101, "文档不存在!");
     ErrorCode KNOWLEDGE_DOCUMENT_FILE_EMPTY = new ErrorCode(1_022_008_102, "文档内容为空!");
+    ErrorCode KNOWLEDGE_DOCUMENT_FILE_DOWNLOAD_FAIL = new ErrorCode(1_022_008_102, "文件下载失败!");
     ErrorCode KNOWLEDGE_DOCUMENT_FILE_READ_FAIL = new ErrorCode(1_022_008_102, "文档加载失败!");
 
     ErrorCode KNOWLEDGE_SEGMENT_NOT_EXISTS = new ErrorCode(1_022_008_202, "段落不存在!");

+ 24 - 1
yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/controller/admin/knowledge/AiKnowledgeDocumentController.http

@@ -9,4 +9,27 @@ tenant-id: {{adminTenantId}}
   "name": "测试文档",
   "url": "https://static.iocoder.cn/README.md",
   "segmentMaxTokens": 800
-}
+}
+
+### 批量创建知识文档
+POST {{baseUrl}}/ai/knowledge/document/create-list
+Content-Type: application/json
+Authorization: Bearer {{token}}
+tenant-id: {{adminTenantId}}
+
+{
+  "knowledgeId": 1,
+  "list": [
+    {
+      "name": "测试文档1",
+      "url": "https://static.iocoder.cn/README.md",
+      "segmentMaxTokens": 800
+    },
+    {
+      "name": "测试文档2",
+      "url": "https://static.iocoder.cn/README_yudao.md",
+      "segmentMaxTokens": 400
+    }
+  ]
+}
+

+ 13 - 2
yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/controller/admin/knowledge/AiKnowledgeDocumentController.java

@@ -6,6 +6,7 @@ import cn.iocoder.yudao.framework.common.util.object.BeanUtils;
 import cn.iocoder.yudao.module.ai.controller.admin.knowledge.vo.document.AiKnowledgeDocumentPageReqVO;
 import cn.iocoder.yudao.module.ai.controller.admin.knowledge.vo.document.AiKnowledgeDocumentRespVO;
 import cn.iocoder.yudao.module.ai.controller.admin.knowledge.vo.document.AiKnowledgeDocumentUpdateReqVO;
+import cn.iocoder.yudao.module.ai.controller.admin.knowledge.vo.knowledge.AiKnowledgeDocumentCreateListReqVO;
 import cn.iocoder.yudao.module.ai.controller.admin.knowledge.vo.knowledge.AiKnowledgeDocumentCreateReqVO;
 import cn.iocoder.yudao.module.ai.dal.dataobject.knowledge.AiKnowledgeDocumentDO;
 import cn.iocoder.yudao.module.ai.service.knowledge.AiKnowledgeDocumentService;
@@ -16,6 +17,8 @@ import jakarta.validation.Valid;
 import org.springframework.validation.annotation.Validated;
 import org.springframework.web.bind.annotation.*;
 
+import java.util.List;
+
 import static cn.iocoder.yudao.framework.common.pojo.CommonResult.success;
 
 @Tag(name = "管理后台 - AI 知识库文档")
@@ -38,8 +41,16 @@ public class AiKnowledgeDocumentController {
     @PostMapping("/create")
     @Operation(summary = "新建文档")
     public CommonResult<Long> createKnowledgeDocument(@RequestBody @Valid AiKnowledgeDocumentCreateReqVO reqVO) {
-        Long knowledgeDocumentId = documentService.createKnowledgeDocument(reqVO);
-        return success(knowledgeDocumentId);
+        Long id = documentService.createKnowledgeDocument(reqVO);
+        return success(id);
+    }
+
+    @PostMapping("/create-list")
+    @Operation(summary = "批量新建文档")
+    public CommonResult<List<Long>> createKnowledgeDocumentList(
+            @RequestBody @Valid AiKnowledgeDocumentCreateListReqVO reqVO) {
+        List<Long> ids = documentService.createKnowledgeDocumentList(reqVO);
+        return success(ids);
     }
 
     @PutMapping("/update")

+ 42 - 0
yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/controller/admin/knowledge/vo/knowledge/AiKnowledgeDocumentCreateListReqVO.java

@@ -0,0 +1,42 @@
+package cn.iocoder.yudao.module.ai.controller.admin.knowledge.vo.knowledge;
+
+import io.swagger.v3.oas.annotations.media.Schema;
+import jakarta.validation.constraints.NotBlank;
+import jakarta.validation.constraints.NotEmpty;
+import jakarta.validation.constraints.NotNull;
+import lombok.Data;
+import org.hibernate.validator.constraints.URL;
+
+import java.util.List;
+
+@Schema(description = "管理后台 - AI 知识库文档批量创建 Request VO")
+@Data
+public class AiKnowledgeDocumentCreateListReqVO {
+
+    @Schema(description = "知识库编号", requiredMode = Schema.RequiredMode.REQUIRED, example = "1204")
+    @NotNull(message = "知识库编号不能为空")
+    private Long knowledgeId;
+
+    @Schema(description = "文档列表", requiredMode = Schema.RequiredMode.REQUIRED)
+    @NotEmpty(message = "文档列表不能为空")
+    private List<Document> list;
+
+    @Schema(description = "文档")
+    @Data
+    public static class Document {
+
+        @Schema(description = "文档名称", requiredMode = Schema.RequiredMode.REQUIRED, example = "三方登陆")
+        @NotBlank(message = "文档名称不能为空")
+        private String name;
+
+        @Schema(description = "文档 URL", requiredMode = Schema.RequiredMode.REQUIRED, example = "https://doc.iocoder.cn")
+        @URL(message = "文档 URL 格式不正确")
+        private String url;
+
+        @Schema(description = "分段的最大 Token 数", requiredMode = Schema.RequiredMode.REQUIRED, example = "800")
+        @NotNull(message = "分段的最大 Token 数不能为空")
+        private Integer segmentMaxTokens;
+
+    }
+
+}

+ 11 - 0
yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/service/knowledge/AiKnowledgeDocumentService.java

@@ -4,8 +4,11 @@ import cn.iocoder.yudao.framework.common.pojo.PageResult;
 import cn.iocoder.yudao.module.ai.controller.admin.knowledge.vo.document.AiKnowledgeDocumentPageReqVO;
 import cn.iocoder.yudao.module.ai.controller.admin.knowledge.vo.document.AiKnowledgeDocumentUpdateReqVO;
 import cn.iocoder.yudao.module.ai.controller.admin.knowledge.vo.knowledge.AiKnowledgeDocumentCreateReqVO;
+import cn.iocoder.yudao.module.ai.controller.admin.knowledge.vo.knowledge.AiKnowledgeDocumentCreateListReqVO;
 import cn.iocoder.yudao.module.ai.dal.dataobject.knowledge.AiKnowledgeDocumentDO;
 
+import java.util.List;
+
 /**
  * AI 知识库-文档 Service 接口
  *
@@ -21,6 +24,14 @@ public interface AiKnowledgeDocumentService {
      */
     Long createKnowledgeDocument(AiKnowledgeDocumentCreateReqVO createReqVO);
 
+    /**
+     * 批量创建文档
+     *
+     * @param createListReqVO 批量创建 Request VO
+     * @return 文档编号列表
+     */
+    List<Long> createKnowledgeDocumentList(AiKnowledgeDocumentCreateListReqVO createListReqVO);
+
     /**
      * 获取文档分页
      *

+ 47 - 13
yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/service/knowledge/AiKnowledgeDocumentServiceImpl.java

@@ -8,6 +8,7 @@ import cn.iocoder.yudao.framework.common.pojo.PageResult;
 import cn.iocoder.yudao.framework.common.util.object.BeanUtils;
 import cn.iocoder.yudao.module.ai.controller.admin.knowledge.vo.document.AiKnowledgeDocumentPageReqVO;
 import cn.iocoder.yudao.module.ai.controller.admin.knowledge.vo.document.AiKnowledgeDocumentUpdateReqVO;
+import cn.iocoder.yudao.module.ai.controller.admin.knowledge.vo.knowledge.AiKnowledgeDocumentCreateListReqVO;
 import cn.iocoder.yudao.module.ai.controller.admin.knowledge.vo.knowledge.AiKnowledgeDocumentCreateReqVO;
 import cn.iocoder.yudao.module.ai.dal.dataobject.knowledge.AiKnowledgeDocumentDO;
 import cn.iocoder.yudao.module.ai.dal.mysql.knowledge.AiKnowledgeDocumentMapper;
@@ -21,9 +22,11 @@ import org.springframework.core.io.ByteArrayResource;
 import org.springframework.stereotype.Service;
 import org.springframework.transaction.annotation.Transactional;
 
+import java.util.ArrayList;
 import java.util.List;
 
 import static cn.iocoder.yudao.framework.common.exception.util.ServiceExceptionUtil.exception;
+import static cn.iocoder.yudao.framework.common.util.collection.CollectionUtils.convertList;
 import static cn.iocoder.yudao.module.ai.enums.ErrorCodeConstants.*;
 
 /**
@@ -54,25 +57,45 @@ public class AiKnowledgeDocumentServiceImpl implements AiKnowledgeDocumentServic
         knowledgeService.validateKnowledgeExists(createReqVO.getKnowledgeId());
 
         // 2. 下载文档
-        TikaDocumentReader loader = new TikaDocumentReader(downloadFile(createReqVO.getUrl()));
-        List<Document> documents = loader.get();
-        Document document = CollUtil.getFirst(documents);
-        if (document == null || StrUtil.isEmpty(document.getText())) {
-            throw exception(KNOWLEDGE_DOCUMENT_FILE_READ_FAIL);
-        }
+        String content = readUrl(createReqVO.getUrl());
 
         // 3. 文档记录入库
-        String content = document.getText();
         AiKnowledgeDocumentDO documentDO = BeanUtils.toBean(createReqVO, AiKnowledgeDocumentDO.class)
                 .setContent(content).setContentLength(content.length()).setTokens(tokenCountEstimator.estimate(content))
                 .setStatus(CommonStatusEnum.ENABLE.getStatus());
         knowledgeDocumentMapper.insert(documentDO);
 
-        // 4. 文档切片入库
-        knowledgeSegmentService.createKnowledgeSegmentBySplitContent(documentDO.getId(), document.getText());
+        // 4. 文档切片入库(同步)
+        knowledgeSegmentService.createKnowledgeSegmentBySplitContent(documentDO.getId(), content);
         return documentDO.getId();
     }
 
+    @Override
+    @Transactional(rollbackFor = Exception.class)
+    public List<Long> createKnowledgeDocumentList(AiKnowledgeDocumentCreateListReqVO createListReqVO) {
+        // 1. 校验参数
+        knowledgeService.validateKnowledgeExists(createListReqVO.getKnowledgeId());
+
+        // 2. 下载文档
+        List<String> contents = convertList(createListReqVO.getList(), document -> readUrl(document.getUrl()));
+
+        // 3. 文档记录入库
+        List<AiKnowledgeDocumentDO> documentDOs = new ArrayList<>(createListReqVO.getList().size());
+        for (int i = 0; i < createListReqVO.getList().size(); i++) {
+            AiKnowledgeDocumentCreateListReqVO.Document documentVO = createListReqVO.getList().get(i);
+            String content = contents.get(i);
+            documentDOs.add(BeanUtils.toBean(documentVO, AiKnowledgeDocumentDO.class).setKnowledgeId(createListReqVO.getKnowledgeId())
+                    .setContent(content).setContentLength(content.length()).setTokens(tokenCountEstimator.estimate(content))
+                    .setStatus(CommonStatusEnum.ENABLE.getStatus()));
+        }
+        knowledgeDocumentMapper.insertBatch(documentDOs);
+
+        // 4. 批量创建文档切片(异步)
+        documentDOs.forEach(documentDO ->
+                knowledgeSegmentService.createKnowledgeSegmentBySplitContentAsync(documentDO.getId(), documentDO.getContent()));
+        return convertList(documentDOs, AiKnowledgeDocumentDO::getId);
+    }
+
     @Override
     public PageResult<AiKnowledgeDocumentDO> getKnowledgeDocumentPage(AiKnowledgeDocumentPageReqVO pageReqVO) {
         return knowledgeDocumentMapper.selectPage(pageReqVO);
@@ -97,17 +120,28 @@ public class AiKnowledgeDocumentServiceImpl implements AiKnowledgeDocumentServic
         return knowledgeDocument;
     }
 
-    private org.springframework.core.io.Resource downloadFile(String url) {
+    private static String readUrl(String url) {
+        // 下载文件
+        ByteArrayResource resource = null;
         try {
             byte[] bytes = HttpUtil.downloadBytes(url);
             if (bytes.length == 0) {
                 throw exception(KNOWLEDGE_DOCUMENT_FILE_EMPTY);
             }
-            return new ByteArrayResource(bytes);
+            resource = new ByteArrayResource(bytes);
         } catch (Exception e) {
-            log.error("[downloadFile][url({}) 下载失败]", url, e);
-            throw new RuntimeException(e);
+            log.error("[readUrl][url({}) 读取失败]", url, e);
+            throw exception(KNOWLEDGE_DOCUMENT_FILE_DOWNLOAD_FAIL);
+        }
+
+        // 读取文件
+        TikaDocumentReader loader = new TikaDocumentReader(resource);
+        List<Document> documents = loader.get();
+        Document document = CollUtil.getFirst(documents);
+        if (document == null || StrUtil.isEmpty(document.getText())) {
+            throw exception(KNOWLEDGE_DOCUMENT_FILE_READ_FAIL);
         }
+        return document.getText();
     }
 
 }

+ 12 - 0
yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/service/knowledge/AiKnowledgeSegmentService.java

@@ -6,6 +6,7 @@ import cn.iocoder.yudao.module.ai.controller.admin.knowledge.vo.segment.AiKnowle
 import cn.iocoder.yudao.module.ai.controller.admin.knowledge.vo.segment.AiKnowledgeSegmentUpdateReqVO;
 import cn.iocoder.yudao.module.ai.controller.admin.knowledge.vo.segment.AiKnowledgeSegmentUpdateStatusReqVO;
 import cn.iocoder.yudao.module.ai.dal.dataobject.knowledge.AiKnowledgeSegmentDO;
+import org.springframework.scheduling.annotation.Async;
 
 import java.util.List;
 
@@ -32,6 +33,17 @@ public interface AiKnowledgeSegmentService {
      */
     void createKnowledgeSegmentBySplitContent(Long documentId, String content);
 
+    /**
+     * 【异步】基于 content 内容,切片创建多个段落
+     *
+     * @param documentId 知识库文档编号
+     * @param content 文档内容
+     */
+    @Async
+    default void createKnowledgeSegmentBySplitContentAsync(Long documentId, String content) {
+        createKnowledgeSegmentBySplitContent(documentId, content);
+    }
+
     /**
      * 更新段落的内容
      *

+ 4 - 2
yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/service/knowledge/AiKnowledgeSegmentServiceImpl.java

@@ -110,8 +110,10 @@ public class AiKnowledgeSegmentServiceImpl implements AiKnowledgeSegmentService
         // 3.1 更新切片
         AiKnowledgeSegmentDO segmentDO = BeanUtils.toBean(reqVO, AiKnowledgeSegmentDO.class);
         segmentMapper.updateById(segmentDO);
-        // 3.2 重新向量化
-        writeVectorStore(vectorStore, segmentDO, new Document(segmentDO.getContent()));
+        // 3.2 重新向量化,必须开启状态
+        if (CommonStatusEnum.isEnable(segmentDO.getStatus())) {
+            writeVectorStore(vectorStore, segmentDO, new Document(segmentDO.getContent()));
+        }
     }
 
     @Override