Эх сурвалжийг харах

【功能新增】AI:知识库,新增切片接口

YunaiV 5 сар өмнө
parent
commit
d1e207899a

+ 5 - 0
yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/controller/admin/knowledge/AiKnowledgeSegmentController.http

@@ -0,0 +1,5 @@
+### 切片内容
+GET {{baseUrl}}/ai/knowledge/segment/split?url=https://static.iocoder.cn/README_yudao.md&segmentMaxTokens=800
+Content-Type: application/json
+Authorization: Bearer {{token}}
+tenant-id: {{adminTenantId}}

+ 21 - 2
yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/controller/admin/knowledge/AiKnowledgeSegmentController.java

@@ -10,12 +10,16 @@ import cn.iocoder.yudao.module.ai.controller.admin.knowledge.vo.segment.AiKnowle
 import cn.iocoder.yudao.module.ai.dal.dataobject.knowledge.AiKnowledgeSegmentDO;
 import cn.iocoder.yudao.module.ai.service.knowledge.AiKnowledgeSegmentService;
 import io.swagger.v3.oas.annotations.Operation;
+import io.swagger.v3.oas.annotations.Parameter;
+import io.swagger.v3.oas.annotations.Parameters;
 import io.swagger.v3.oas.annotations.tags.Tag;
 import jakarta.annotation.Resource;
 import jakarta.validation.Valid;
 import org.springframework.validation.annotation.Validated;
 import org.springframework.web.bind.annotation.*;
 
+import java.util.List;
+
 import static cn.iocoder.yudao.framework.common.pojo.CommonResult.success;
 
 // TODO @芋艿:增加权限标识
@@ -30,7 +34,8 @@ public class AiKnowledgeSegmentController {
 
     @GetMapping("/page")
     @Operation(summary = "获取段落分页")
-    public CommonResult<PageResult<AiKnowledgeSegmentRespVO>> getKnowledgeSegmentPage(@Valid AiKnowledgeSegmentPageReqVO pageReqVO) {
+    public CommonResult<PageResult<AiKnowledgeSegmentRespVO>> getKnowledgeSegmentPage(
+            @Valid AiKnowledgeSegmentPageReqVO pageReqVO) {
         PageResult<AiKnowledgeSegmentDO> pageResult = segmentService.getKnowledgeSegmentPage(pageReqVO);
         return success(BeanUtils.toBean(pageResult, AiKnowledgeSegmentRespVO.class));
     }
@@ -44,9 +49,23 @@ public class AiKnowledgeSegmentController {
 
     @PutMapping("/update-status")
     @Operation(summary = "启禁用段落内容")
-    public CommonResult<Boolean> updateKnowledgeSegmentStatus(@Valid @RequestBody AiKnowledgeSegmentUpdateStatusReqVO reqVO) {
+    public CommonResult<Boolean> updateKnowledgeSegmentStatus(
+            @Valid @RequestBody AiKnowledgeSegmentUpdateStatusReqVO reqVO) {
         segmentService.updateKnowledgeSegmentStatus(reqVO);
         return success(true);
     }
 
+    @GetMapping("/split")
+    @Operation(summary = "切片内容")
+    @Parameters({
+            @Parameter(name = "url", description = "文档 URL", required = true),
+            @Parameter(name = "segmentMaxTokens", description = "分段的最大 Token 数", required = true)
+    })
+    public CommonResult<List<AiKnowledgeSegmentRespVO>> splitContent(
+            @RequestParam("url") String url,
+            @RequestParam(value = "segmentMaxTokens") Integer segmentMaxTokens) {
+        List<AiKnowledgeSegmentDO> segments = segmentService.splitContent(url, segmentMaxTokens);
+        return success(BeanUtils.toBean(segments, AiKnowledgeSegmentRespVO.class));
+    }
+
 }

+ 9 - 3
yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/controller/admin/knowledge/vo/segment/AiKnowledgeSegmentRespVO.java

@@ -3,7 +3,7 @@ package cn.iocoder.yudao.module.ai.controller.admin.knowledge.vo.segment;
 import io.swagger.v3.oas.annotations.media.Schema;
 import lombok.Data;
 
-@Schema(description = "管理后台 - AI 知识库-文档 Response VO")
+@Schema(description = "管理后台 - AI 知识库文档分片 Response VO")
 @Data
 public class AiKnowledgeSegmentRespVO {
 
@@ -22,13 +22,19 @@ public class AiKnowledgeSegmentRespVO {
     @Schema(description = "切片内容", requiredMode = Schema.RequiredMode.REQUIRED, example = "Java 开发手册")
     private String content;
 
+    @Schema(description = "切片内容长度", requiredMode = Schema.RequiredMode.REQUIRED, example = "1024")
+    private Integer contentLength;
+
     @Schema(description = "token 数量", requiredMode = Schema.RequiredMode.REQUIRED, example = "1024")
     private Integer tokens;
 
-    @Schema(description = "字符数", requiredMode = Schema.RequiredMode.REQUIRED, example = "1008")
-    private Integer wordCount;
+    @Schema(description = "召回次数", requiredMode = Schema.RequiredMode.REQUIRED, example = "10")
+    private Integer retrievalCount;
 
     @Schema(description = "文档状态", requiredMode = Schema.RequiredMode.REQUIRED, example = "1")
     private Integer status;
 
+    @Schema(description = "创建时间", requiredMode = Schema.RequiredMode.REQUIRED)
+    private Long createTime;
+
 }

+ 8 - 0
yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/service/knowledge/AiKnowledgeDocumentService.java

@@ -63,4 +63,12 @@ public interface AiKnowledgeDocumentService {
      */
     AiKnowledgeDocumentDO validateKnowledgeDocumentExists(Long id);
 
+    /**
+     * 读取 URL 内容
+     *
+     * @param url URL
+     * @return 内容
+     */
+    String readUrl(String url);
+
 }

+ 3 - 2
yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/service/knowledge/AiKnowledgeDocumentServiceImpl.java

@@ -127,9 +127,10 @@ public class AiKnowledgeDocumentServiceImpl implements AiKnowledgeDocumentServic
         return knowledgeDocument;
     }
 
-    private static String readUrl(String url) {
+    @Override
+    public String readUrl(String url) {
         // 下载文件
-        ByteArrayResource resource = null;
+        ByteArrayResource resource;
         try {
             byte[] bytes = HttpUtil.downloadBytes(url);
             if (bytes.length == 0) {

+ 11 - 2
yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/service/knowledge/AiKnowledgeSegmentService.java

@@ -29,7 +29,7 @@ public interface AiKnowledgeSegmentService {
      * 基于 content 内容,切片创建多个段落
      *
      * @param documentId 知识库文档编号
-     * @param content 文档内容
+     * @param content    文档内容
      */
     void createKnowledgeSegmentBySplitContent(Long documentId, String content);
 
@@ -37,7 +37,7 @@ public interface AiKnowledgeSegmentService {
      * 【异步】基于 content 内容,切片创建多个段落
      *
      * @param documentId 知识库文档编号
-     * @param content 文档内容
+     * @param content    文档内容
      */
     @Async
     default void createKnowledgeSegmentBySplitContentAsync(Long documentId, String content) {
@@ -66,4 +66,13 @@ public interface AiKnowledgeSegmentService {
      */
     List<AiKnowledgeSegmentDO> similaritySearch(AiKnowledgeSegmentSearchReqVO reqVO);
 
+    /**
+     * 根据 URL 内容,切片创建多个段落
+     *
+     * @param url              URL 地址
+     * @param segmentMaxTokens 段落最大 Token 数
+     * @return 切片后的段落列表
+     */
+    List<AiKnowledgeSegmentDO> splitContent(String url, Integer segmentMaxTokens);
+
 }

+ 28 - 4
yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/service/knowledge/AiKnowledgeSegmentServiceImpl.java

@@ -75,9 +75,7 @@ public class AiKnowledgeSegmentServiceImpl implements AiKnowledgeSegmentService
         VectorStore vectorStore = getVectorStoreById(knowledgeDO);
 
         // 2. 文档切片
-        Document document = new Document(content);
-        TextSplitter textSplitter = buildTokenTextSplitter(documentDO.getSegmentMaxTokens());
-        List<Document> documentSegments = textSplitter.apply(Collections.singletonList(document));
+        List<Document> documentSegments = splitContentByToken(content, documentDO.getSegmentMaxTokens());
 
         // 3.1 存储切片
         List<AiKnowledgeSegmentDO> segmentDOs = convertList(documentSegments, segment -> {
@@ -86,7 +84,8 @@ public class AiKnowledgeSegmentServiceImpl implements AiKnowledgeSegmentService
             }
             return new AiKnowledgeSegmentDO().setKnowledgeId(documentDO.getKnowledgeId()).setDocumentId(documentId)
                     .setContent(segment.getText()).setContentLength(segment.getText().length())
-                    .setVectorId(AiKnowledgeSegmentDO.VECTOR_ID_EMPTY).setTokens(tokenCountEstimator.estimate(segment.getText()))
+                    .setVectorId(AiKnowledgeSegmentDO.VECTOR_ID_EMPTY)
+                    .setTokens(tokenCountEstimator.estimate(segment.getText()))
                     .setStatus(CommonStatusEnum.ENABLE.getStatus());
         });
         segmentMapper.insertBatch(segmentDOs);
@@ -180,6 +179,26 @@ public class AiKnowledgeSegmentServiceImpl implements AiKnowledgeSegmentService
         return segmentMapper.selectListByVectorIds(convertList(documents, Document::getId));
     }
 
+    @Override
+    public List<AiKnowledgeSegmentDO> splitContent(String url, Integer segmentMaxTokens) {
+        // 1. 读取 URL 内容
+        String content = knowledgeDocumentService.readUrl(url);
+
+        // 2. 文档切片
+        List<Document> documentSegments = splitContentByToken(content, segmentMaxTokens);
+
+        // 3. 转换为段落对象
+        return convertList(documentSegments, segment -> {
+            if (StrUtil.isEmpty(segment.getText())) {
+                return null;
+            }
+            return new AiKnowledgeSegmentDO()
+                    .setContent(segment.getText())
+                    .setContentLength(segment.getText().length())
+                    .setTokens(tokenCountEstimator.estimate(segment.getText()));
+        });
+    }
+
     /**
      * 校验段落是否存在
      *
@@ -202,6 +221,11 @@ public class AiKnowledgeSegmentServiceImpl implements AiKnowledgeSegmentService
         return getVectorStoreById(knowledgeService.validateKnowledgeExists(knowledgeId));
     }
 
+    private static List<Document> splitContentByToken(String content, Integer segmentMaxTokens) {
+        TextSplitter textSplitter = buildTokenTextSplitter(segmentMaxTokens);
+        return textSplitter.apply(Collections.singletonList(new Document(content)));
+    }
+
     private static TextSplitter buildTokenTextSplitter(Integer segmentMaxTokens) {
         return TokenTextSplitter.builder()
                 .withChunkSize(segmentMaxTokens)