Commit 92ff9ab0 authored by 林洋洋's avatar 林洋洋

切片相关代码提交

parent 7b441fe3
package com.ask.api.entity;
import com.baomidou.mybatisplus.annotation.IdType;
import com.baomidou.mybatisplus.annotation.TableField;
import com.baomidou.mybatisplus.annotation.TableId;
import com.baomidou.mybatisplus.annotation.TableName;
import com.fasterxml.jackson.annotation.JsonIgnore;
import com.fasterxml.jackson.databind.ObjectMapper;
import io.swagger.v3.oas.annotations.media.Schema;
import lombok.Data;
import lombok.extern.slf4j.Slf4j;
import org.springframework.util.StringUtils;
import java.io.Serializable;
import java.util.Map;
/**
* 向量存储实体
......@@ -14,6 +20,7 @@ import java.io.Serializable;
* @author ai
* @date 2024/12/20
*/
@Slf4j
@Data
@TableName("ask_vector_store")
@Schema(description = "向量存储")
......@@ -32,24 +39,101 @@ public class AskVectorStore implements Serializable {
*/
@Schema(description = "文档内容")
private String content;
/**
* 文档元数据(JSON格式)
* 包含: vectorized, status, knowledgeDocumentId, segmentIndex, createBy, createTime 等
*/
@JsonIgnore
@Schema(description = "文档元数据")
private String metadata;
@TableField(exist = false)
@Schema(description = "文档ID")
private Long documentId;
@Schema(description = "文件名称")
private String fileName;
@Schema(description = "文件路径")
private String filePath;
@TableField(exist = false)
@Schema(description = "知识库ID")
private Long knowledgeBaseId;
@TableField(exist = false)
@Schema(description = "标题")
private String title;
@TableField(exist = false)
@Schema(description = "启用状态")
private Integer isEnabled;
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
/**
* 解析metadata JSON字符串,填充对应的字段
*/
public void parseMetadata() {
if (!StringUtils.hasText(this.metadata)) {
return;
}
try {
Map<String, Object> metadataMap = OBJECT_MAPPER.readValue(this.metadata, Map.class);
// 解析 documentId
if (metadataMap.containsKey("documentId")) {
Object documentIdObj = metadataMap.get("documentId");
if (documentIdObj != null) {
this.documentId = Long.valueOf(documentIdObj.toString());
}
}
// 解析 knowledgeBaseId
if (metadataMap.containsKey("knowledgeBaseId")) {
Object knowledgeBaseIdObj = metadataMap.get("knowledgeBaseId");
if (knowledgeBaseIdObj != null) {
this.knowledgeBaseId = Long.valueOf(knowledgeBaseIdObj.toString());
}
}
// 解析 title
if (metadataMap.containsKey("title")) {
Object titleObj = metadataMap.get("title");
if (titleObj != null) {
this.title = titleObj.toString();
}
}
// 解析 isEnabled
if (metadataMap.containsKey("isEnabled")) {
Object isEnabledObj = metadataMap.get("isEnabled");
if (isEnabledObj != null) {
this.isEnabled = Integer.valueOf(isEnabledObj.toString());
}
}
// 解析 fileName"
if (metadataMap.containsKey("fileName")) {
Object fileName = metadataMap.get("fileName");
if (fileName != null) {
this.fileName = fileName.toString();
}
}
// 解析 filePath
if (metadataMap.containsKey("filePath")) {
Object filePath = metadataMap.get("filePath");
if (filePath != null) {
this.filePath = filePath.toString();
}
}
} catch (Exception e) {
log.warn("解析metadata失败,metadata: {}, 错误: {}", this.metadata, e.getMessage());
}
}
}
\ No newline at end of file
......@@ -12,8 +12,10 @@ import com.fasterxml.jackson.databind.ObjectMapper;
import io.swagger.v3.oas.annotations.Operation;
import io.swagger.v3.oas.annotations.Parameter;
import io.swagger.v3.oas.annotations.tags.Tag;
import jakarta.servlet.http.HttpServletRequest;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.springframework.ai.vectorstore.VectorStore;
import org.springframework.util.StringUtils;
import org.springframework.web.bind.annotation.*;
......@@ -36,7 +38,7 @@ import java.util.Map;
public class AskVectorStoreController {
private final AskVectorStoreService askVectorStoreService;
private final ObjectMapper objectMapper;
private final VectorStore vectorStore;
/**
* 分页查询向量存储
......@@ -56,10 +58,12 @@ public class AskVectorStoreController {
LambdaQueryWrapper<AskVectorStore> wrapper = Wrappers.lambdaQuery(AskVectorStore.class)
.like(org.apache.commons.lang3.StringUtils.isNoneBlank(content), AskVectorStore::getContent, content.trim())
.like(org.apache.commons.lang3.StringUtils.isNoneBlank(title), AskVectorStore::getContent, title.trim())
.eq(AskVectorStore::getDocumentId, documentId)
// 使用metadata jsonB字段进行过滤
.apply("metadata::jsonb ->> 'documentId' = {0}", String.valueOf(documentId))
.apply(org.apache.commons.lang3.StringUtils.isNoneBlank(title), "metadata::jsonb ->> 'title' LIKE {0}", "%" + title + "%")
.orderByDesc(AskVectorStore::getId);
IPage<AskVectorStore> result = askVectorStoreService.page(page, wrapper);
result.getRecords().forEach(askVectorStore -> askVectorStore.parseMetadata());
return R.ok(result);
}
......@@ -75,7 +79,9 @@ public class AskVectorStoreController {
if (!StringUtils.hasText(id)) {
return R.failed("ID不能为空");
}
return R.ok(askVectorStoreService.getById(id));
AskVectorStore askVectorStore = askVectorStoreService.getById(id);
askVectorStore.parseMetadata();
return R.ok(askVectorStore);
}
/**
......@@ -89,27 +95,38 @@ public class AskVectorStoreController {
@PutMapping("/segment/status/{segmentId}")
public R<Boolean> updateSegmentStatus(@Parameter(description = "切片ID") @PathVariable String segmentId,
@Parameter(description = "启用状态(1:启用,0:禁用)") @RequestParam Integer isEnabled) {
return R.ok(askVectorStoreService.update(Wrappers.<AskVectorStore>lambdaUpdate().eq(AskVectorStore::getId, segmentId)
.set(AskVectorStore::getIsEnabled, isEnabled)));
// 参数校验
if (!StringUtils.hasText(segmentId)) {
return R.failed("切片ID不能为空");
}
/**
* 修改向量存储
*
* @param askVectorStore 向量存储数据
* @return 操作结果
*/
@Operation(summary = "修改向量存储", description = "修改向量存储数据")
@PutMapping
public R<Boolean> updateById(@Valid @RequestBody AskVectorStore askVectorStore) {
if (!StringUtils.hasText(askVectorStore.getId())) {
return R.failed("ID不能为空");
if (isEnabled == null || (isEnabled != 0 && isEnabled != 1)) {
return R.failed("启用状态参数无效,只能为0(禁用)或1(启用)");
}
boolean result = askVectorStoreService.updateById(askVectorStore);
return R.ok(result, result ? "修改成功" : "修改失败");
// 直接更新metadata jsonB中的isEnabled字段
boolean result = askVectorStoreService.update(
Wrappers.<AskVectorStore>lambdaUpdate()
.eq(AskVectorStore::getId, segmentId)
.setSql("metadata = jsonb_set(metadata, '{isEnabled}', '" + isEnabled + "')")
);
return R.ok(result);
}
// /**
// * 修改向量存储
// *
// * @param askVectorStore 向量存储数据
// * @return 操作结果
// */
// @Operation(summary = "修改向量存储", description = "修改向量存储数据")
// @PutMapping
// public R<Boolean> updateById(@Valid @RequestBody AskVectorStore askVectorStore) {
// if (!StringUtils.hasText(askVectorStore.getId())) {
// return R.failed("ID不能为空");
// }
// boolean result = askVectorStoreService.updateById(askVectorStore);
// return R.ok(result, result ? "修改成功" : "修改失败");
// }
/**
* 批量删除向量存储
*
......@@ -122,10 +139,9 @@ public class AskVectorStoreController {
if (ids == null || ids.isEmpty()) {
return R.failed("ID列表不能为空");
}
try {
boolean result = askVectorStoreService.removeByIds(ids);
return R.ok(result, result ? "批量删除成功" : "批量删除失败");
vectorStore.delete(ids);
return R.ok(true);
} catch (Exception e) {
log.error("批量删除向量存储失败,IDs: {}, 错误: {}", ids, e.getMessage(), e);
return R.failed("批量删除失败:" + e.getMessage());
......
......@@ -10,7 +10,6 @@ import com.ask.api.entity.KnowledgeDocument;
import com.ask.api.entity.SysFile;
import com.ask.common.core.FileTemplate;
import com.ask.common.core.R;
import com.ask.service.AskVectorStoreService;
import com.ask.service.KnowledgeDocumentService;
import com.baomidou.mybatisplus.core.conditions.query.LambdaQueryWrapper;
import com.baomidou.mybatisplus.core.metadata.IPage;
......@@ -22,6 +21,8 @@ import io.swagger.v3.oas.annotations.security.SecurityRequirement;
import io.swagger.v3.oas.annotations.tags.Tag;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.springframework.ai.vectorstore.VectorStore;
import org.springframework.ai.vectorstore.filter.Filter;
import org.springframework.http.HttpHeaders;
import org.springframework.http.MediaType;
import org.springframework.transaction.annotation.Transactional;
......@@ -49,7 +50,7 @@ public class KnowledgeDocumentController {
private final KnowledgeDocumentService knowledgeDocumentService;
private final AskVectorStoreService askVectorStoreService;
private final VectorStore vectorStore;
/**
......@@ -76,7 +77,6 @@ public class KnowledgeDocumentController {
}
// 按创建时间倒序排列
wrapper.orderByDesc(KnowledgeDocument::getCreateTime);
return R.ok(knowledgeDocumentService.page(page, wrapper));
}
......@@ -135,16 +135,33 @@ public class KnowledgeDocumentController {
/**
* 通过id删除知识库文档
*
* @param ids
* @param ids 文档ID列表
* @return R
*/
@Operation(summary = "批量知识库文档", description = "通过id删除知识库文档")
@Operation(summary = "批量删除知识库文档", description = "批量删除知识库文档")
@DeleteMapping("/batch")
@Transactional(rollbackFor = Exception.class)
public R<Boolean> removeById(@RequestBody List<Long> ids) {
ids.forEach(id -> {
try {
// 删除知识库文档
knowledgeDocumentService.removeById(id);
askVectorStoreService.remove(Wrappers.lambdaQuery(AskVectorStore.class).eq(AskVectorStore::getDocumentId, id));
// 构建基于documentId的过滤条件
Filter.Expression filterExpression = new Filter.Expression(
Filter.ExpressionType.EQ,
new Filter.Key("documentId"),
new Filter.Value(id)
);
// 删除向量存储中对应的文档切片
vectorStore.delete(filterExpression);
log.info("成功删除文档及其向量数据,文档ID: {}", id);
} catch (Exception e) {
log.error("删除文档失败,文档ID: {}, 错误: {}", id, e.getMessage(), e);
throw new RuntimeException("删除文档失败:" + e.getMessage());
}
});
return R.ok(true);
......
......@@ -2,6 +2,7 @@ package com.ask.service;
import com.ask.api.entity.AskVectorStore;
import com.baomidou.mybatisplus.extension.service.IService;
import org.apache.ibatis.annotations.Param;
import org.springframework.ai.document.Document;
import java.util.List;
......@@ -14,5 +15,13 @@ import java.util.List;
*/
public interface AskVectorStoreService extends IService<AskVectorStore> {
/**
* 向量化存储方法
* 获取内容和标题,向量化存储到向量字段上
*
* @param askVectorStore 向量存储实体
* @return 是否成功
*/
boolean vectorizeAndStore(@Param("entity") AskVectorStore askVectorStore);
}
\ No newline at end of file
......@@ -9,8 +9,10 @@ import com.baomidou.mybatisplus.extension.service.impl.ServiceImpl;
import com.fasterxml.jackson.databind.ObjectMapper;
import lombok.extern.slf4j.Slf4j;
import org.springframework.ai.document.Document;
import org.springframework.ai.vectorstore.VectorStore;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;
import org.springframework.util.StringUtils;
import java.time.LocalDateTime;
import java.util.HashMap;
......@@ -29,5 +31,85 @@ import java.util.stream.Collectors;
@Service
public class AskVectorStoreServiceImpl extends ServiceImpl<AskVectorStoreMapper, AskVectorStore> implements AskVectorStoreService {
@Autowired
private VectorStore vectorStore;
@Autowired
private ObjectMapper objectMapper;
@Override
public boolean vectorizeAndStore(AskVectorStore askVectorStore) {
try {
// 校验必要字段
if (!StringUtils.hasText(askVectorStore.getContent())) {
log.warn("向量化存储失败:文档内容为空");
return false;
}
// 构建文档内容,如果有标题则添加标题
StringBuilder contentBuilder = new StringBuilder();
if (StringUtils.hasText(askVectorStore.getTitle())) {
contentBuilder.append("标题: ").append(askVectorStore.getTitle()).append("\n\n");
}
contentBuilder.append(askVectorStore.getContent());
String documentContent = contentBuilder.toString();
// 构建元数据
Map<String, Object> metadata = new HashMap<>();
if (StringUtils.hasText(askVectorStore.getMetadata())) {
try {
// 解析已有的元数据
Map<String, Object> existingMetadata = objectMapper.readValue(
askVectorStore.getMetadata(), Map.class);
metadata.putAll(existingMetadata);
} catch (Exception e) {
log.warn("解析已有元数据失败:{}", e.getMessage());
}
}
// 添加向量化相关的元数据
metadata.put("id", askVectorStore.getId());
metadata.put("title", askVectorStore.getTitle());
metadata.put("documentId", askVectorStore.getDocumentId());
metadata.put("knowledgeBaseId", askVectorStore.getKnowledgeBaseId());
metadata.put("isEnabled", askVectorStore.getIsEnabled());
metadata.put("vectorized", 1); // 标记为已向量化
metadata.put("vectorizeTime", LocalDateTime.now().toString());
// 创建 Spring AI Document 对象
Document document = new Document(askVectorStore.getId(), documentContent, metadata);
// 向量化存储到 VectorStore
vectorStore.add(List.of(document));
// 更新数据库中的向量化状态
updateVectorizedStatus(askVectorStore.getId(), metadata);
log.info("向量化存储成功:id={}, title={}", askVectorStore.getId(), askVectorStore.getTitle());
return true;
} catch (Exception e) {
log.error("向量化存储失败:id={}, error={}",
askVectorStore.getId(), e.getMessage(), e);
return false;
}
}
/**
* 更新向量化状态
*/
private void updateVectorizedStatus(String id, Map<String, Object> metadata) {
try {
String metadataJson = objectMapper.writeValueAsString(metadata);
LambdaUpdateWrapper<AskVectorStore> updateWrapper = new LambdaUpdateWrapper<>();
updateWrapper.eq(AskVectorStore::getId, id)
.set(AskVectorStore::getMetadata, metadataJson);
this.update(updateWrapper);
} catch (Exception e) {
log.warn("更新向量化状态失败:id={}, error={}", id, e.getMessage());
}
}
}
\ No newline at end of file
......@@ -456,7 +456,6 @@ public class KnowledgeDocumentServiceImpl extends ServiceImpl<KnowledgeDocumentM
metadata.put("documentId", document.getId());
metadata.put("fileName", document.getFileName());
metadata.put("filePath", document.getFilePath());
metadata.put("vectorized", 0);
metadata.put("isEnabled", 1); //启用
metadata.put("createTime", LocalDateTime.now().toString());
askVectorStore.setContent(vo.getContent());
......@@ -471,6 +470,8 @@ public class KnowledgeDocumentServiceImpl extends ServiceImpl<KnowledgeDocumentM
}
});
askVectorStoreService.saveBatch(askVectorStores);
//需要异步 执行向量化
}
return true;
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment