Commit 52b71166 authored by 林洋洋's avatar 林洋洋

调整项目结构

parent 0b99f865
# Ask Data AI
基于Spring AI的智能问答系统,支持文档处理、向量化存储和智能问答。
## 项目结构
```
ask-data-ai
├── ask-data-ai-api -- 系统接口模块
└── ask-data-ai-biz -- 系统业务模块
```
## 主要功能
- 文档处理:支持多种格式文档的上传和处理
- 向量化存储:使用PGVector进行文档向量化存储
- 智能问答:基于GPT和RAG的智能问答系统
- 对话记忆:支持多轮对话记忆功能
## 开发环境
- JDK 17
- Maven 3.8+
- PostgreSQL 16+
## 快速开始
1. 安装必要的开发环境
2. 克隆项目到本地
3. 修改配置文件中的数据库连接信息和OpenAI配置
4. 运行数据库初始化脚本
5. 运行项目
```bash
# 打包
mvn clean package
# 运行
java -jar ask-data-ai-biz/target/ask-data-ai-biz.jar
```
## 环境变量
| 变量名 | 说明 | 默认值 |
|--------|------|--------|
| DB_HOST | 数据库地址 | 127.0.0.1 |
| DB_PORT | 数据库端口 | 5432 |
| DB_NAME | 数据库名称 | ask_data |
| DB_USER | 数据库用户名 | postgres |
| DB_PWD | 数据库密码 | 123456 |
| OPENAI_API_KEY | OpenAI API密钥 | your-api-key |
| OPENAI_BASE_URL | OpenAI API地址 | https://api.openai.com |
## Docker支持
```bash
# 构建镜像
mvn clean package docker:build
# 运行容器
docker run -d \
-p 9999:9999 \
-e DB_HOST=postgres \
-e OPENAI_API_KEY=your-api-key \
ask-data-ai
```
## 接口文档
启动服务后访问:http://localhost:9999/swagger-ui.html
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>com.ask</groupId>
<artifactId>ask-data-ai</artifactId>
<version>1.0.0-SNAPSHOT</version>
</parent>
<artifactId>ask-data-ai-api</artifactId>
<packaging>jar</packaging>
<description>ask-data-ai API模块</description>
<dependencies>
<!-- Common -->
<dependency>
<groupId>com.ask</groupId>
<artifactId>ask-data-ai-common</artifactId>
<version>${project.version}</version>
</dependency>
<!-- Lombok -->
<dependency>
<groupId>org.projectlombok</groupId>
<artifactId>lombok</artifactId>
<scope>provided</scope>
</dependency>
</dependencies>
</project>
\ No newline at end of file
FROM openjdk:17-jdk-slim
WORKDIR /build
ARG JAR_FILE=target/*.jar
COPY ${JAR_FILE} app.jar
EXPOSE 8080
ENTRYPOINT ["java","-jar","/build/app.jar"]
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>com.ask</groupId>
<artifactId>ask-data-ai</artifactId>
<version>1.0.0-SNAPSHOT</version>
</parent>
<artifactId>ask-data-ai-biz</artifactId>
<packaging>jar</packaging>
<description>ask-data-ai 业务处理模块</description>
<dependencyManagement>
<dependencies>
<dependency>
<groupId>org.springframework.ai</groupId>
<artifactId>spring-ai-bom</artifactId>
<version>${spring-ai.version}</version>
<type>pom</type>
<scope>import</scope>
</dependency>
</dependencies>
</dependencyManagement>
<dependencies>
<!-- Common -->
<dependency>
<groupId>com.ask</groupId>
<artifactId>ask-data-ai-common</artifactId>
<version>${project.version}</version>
</dependency>
<!-- API -->
<dependency>
<groupId>com.ask</groupId>
<artifactId>ask-data-ai-api</artifactId>
<version>${project.version}</version>
</dependency>
<!-- SpringBoot Web -->
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-web</artifactId>
<exclusions>
<exclusion>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-tomcat</artifactId>
</exclusion>
</exclusions>
</dependency>
<!-- undertow容器 -->
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-undertow</artifactId>
</dependency>
<!-- SpringBoot Actuator -->
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-actuator</artifactId>
</dependency>
<!-- PostgreSQL驱动 -->
<dependency>
<groupId>org.postgresql</groupId>
<artifactId>postgresql</artifactId>
</dependency>
<!-- MyBatis Plus -->
<dependency>
<groupId>com.baomidou</groupId>
<artifactId>mybatis-plus-spring-boot3-starter</artifactId>
<version>${mybatis-plus.version}</version>
</dependency>
<!-- Spring AI -->
<dependency>
<groupId>org.springframework.ai</groupId>
<artifactId>spring-ai-starter-model-openai</artifactId>
</dependency>
<dependency>
<groupId>org.springframework.ai</groupId>
<artifactId>spring-ai-starter-mcp-client-webflux</artifactId>
</dependency>
<dependency>
<groupId>org.springframework.ai</groupId>
<artifactId>spring-ai-starter-vector-store-pgvector</artifactId>
</dependency>
<dependency>
<groupId>org.springframework.ai</groupId>
<artifactId>spring-ai-advisors-vector-store</artifactId>
</dependency>
<dependency>
<groupId>org.springframework.ai</groupId>
<artifactId>spring-ai-rag</artifactId>
</dependency>
<dependency>
<groupId>org.springframework.ai</groupId>
<artifactId>spring-ai-tika-document-reader</artifactId>
</dependency>
<dependency>
<groupId>org.springframework.ai</groupId>
<artifactId>spring-ai-starter-model-chat-memory-repository-jdbc</artifactId>
</dependency>
<dependency>
<groupId>org.projectlombok</groupId>
<artifactId>lombok</artifactId>
<scope>provided</scope>
</dependency>
</dependencies>
<repositories>
<repository>
<id>spring-snapshots</id>
<name>Spring Snapshots</name>
<url>https://repo.spring.io/snapshot</url>
<releases>
<enabled>false</enabled>
</releases>
</repository>
<repository>
<name>Central Portal Snapshots</name>
<id>central-portal-snapshots</id>
<url>https://central.sonatype.com/repository/maven-snapshots/</url>
<releases>
<enabled>false</enabled>
</releases>
<snapshots>
<enabled>true</enabled>
</snapshots>
</repository>
</repositories>
<build>
<plugins>
</plugins>
</build>
</project>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>com.ask</groupId>
<artifactId>ask-data-ai</artifactId>
<version>1.0.0-SNAPSHOT</version>
</parent>
<artifactId>ask-data-ai-boot</artifactId>
<packaging>jar</packaging>
<description>ask-data-ai 启动模块</description>
<dependencies>
<!-- 业务模块 -->
<dependency>
<groupId>com.ask</groupId>
<artifactId>ask-data-ai-biz</artifactId>
<version>${project.version}</version>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-maven-plugin</artifactId>
<configuration>
<mainClass>com.ask.AskDataAiApplication</mainClass>
<layout>JAR</layout>
</configuration>
<executions>
<execution>
<goals>
<goal>repackage</goal>
</goals>
</execution>
</executions>
</plugin>
</plugins>
</build>
</project>
\ No newline at end of file
package com.ask;
import org.springframework.boot.SpringApplication;
import org.springframework.boot.autoconfigure.SpringBootApplication;
/**
* 启动类
*
* @author YangKai
*/
@SpringBootApplication
public class AskDataAiApplication {
public static void main(String[] args) {
SpringApplication.run(AskDataAiApplication.class, args);
}
}
\ No newline at end of file
server:
port: 9999
servlet:
context-path: /admin # 项目访问路径
spring:
application:
name: ask-data-ai
datasource:
url: jdbc:postgresql://81.70.183.25:25432/ask_data_ai_db
username: postgres
password: postgres123
driver-class-name: org.postgresql.Driver
ai:
vectorstore:
pgvector:
index-type: HNSW
distance-type: COSINE_DISTANCE
dimensions: 1024
max-document-batch-size: 10000 # Optional: Maximum number of documents per batch
schema-name: public
table-name: ask_vector_store
chat:
memory:
repository:
jdbc:
initialize-schema: never # 开发环境可以使用 always,方便调试
platform: postgresql
openai:
base-url: https://dashscope.aliyuncs.com/compatible-mode
api-key: sk-ae96ff281ff644c992843c64a711a950
chat:
options:
model: qwen-plus
embedding:
base-url: https://dashscope.aliyuncs.com/compatible-mode
api-key: sk-ae96ff281ff644c992843c64a711a950
options:
model: text-embedding-v4
mybatis-plus:
mapper-locations: classpath*:/mapper/*Mapper.xml # mapper文件位置
global-config:
banner: false # 是否打印 mybatis-plus banner
db-config:
id-type: auto # 主键类型
where-strategy: not_empty # where 条件策略
insert-strategy: not_empty # 插入策略
update-strategy: not_null # 更新策略
type-handlers-package: com.ask.common.mybatis.handler # 类型处理器包
configuration:
jdbc-type-for-null: 'null' # 是否设置字段为null
call-setters-on-nulls: true # 是否调用set方法时传入null值
shrink-whitespaces-in-sql: true # 去掉sql中多余的空格报错
# springdoc-openapi项目配置
springdoc:
swagger-ui:
enabled: true # 开启swagger-ui
path: /swagger-ui.html # 配置访问路径
api-docs:
enabled: true # 开启api-docs
path: /v3/api-docs # 配置访问路径
group-configs:
- group: 'default'
paths-to-match: '/**'
packages-to-scan: com.ask
default-produces-media-type: application/json
default-consumes-media-type: application/json
# knife4j的增强配置,不需要增强可以不配
knife4j:
enable: true
setting:
language: zh_cn
enable-swagger-models: true
enable-document-manage: true
swagger-model-name: 实体类列表
enable-version: false
enable-reload-cache-parameter: false
enable-after-script: false
enable-filter-multipart-api-method-type: POST
enable-filter-multipart-apis: false
enable-request-cache: true
enable-host: false
enable-host-text:
# swagger配置
swagger:
enabled: true
title: Ask Data AI接口文档
gateway: http://localhost:${server.port}/admin
token-url: ${swagger.gateway}/oauth2/token
scope: server
# 日志配置
logging:
level:
root: INFO
com.ask: DEBUG
<?xml version="1.0" encoding="UTF-8"?>
<configuration debug="false">
<!--定义日志文件的存储地址 -->
<property name="LOG_HOME" value="logs" />
<!--<property name="COLOR_PATTERN" value="%black(%contextName-) %red(%d{yyyy-MM-dd HH:mm:ss}) %green([%thread]) %highlight(%-5level) %boldMagenta( %replace(%caller{1}){'\t|Caller.{1}0|\r\n', ''})- %gray(%msg%xEx%n)" />-->
<!-- 控制台输出 -->
<appender name="STDOUT" class="ch.qos.logback.core.ConsoleAppender">
<encoder class="ch.qos.logback.classic.encoder.PatternLayoutEncoder">
<!--格式化输出:%d表示日期,%thread表示线程名,%-5level:级别从左显示5个字符宽度%msg:日志消息,%n是换行符
<pattern>%d{yyyy-MM-dd HH:mm:ss.SSS} [%thread] %-5level %logger{50}:%L - %msg%n</pattern>-->
<pattern>%d{yyyy-MM-dd HH:mm:ss.SSS} [%thread] %highlight(%-5level) %cyan(%logger{50}:%L) - %msg%n</pattern>
</encoder>
</appender>
<!-- 按照每天生成日志文件 -->
<appender name="FILE" class="ch.qos.logback.core.rolling.RollingFileAppender">
<rollingPolicy class="ch.qos.logback.core.rolling.SizeAndTimeBasedRollingPolicy">
<!--日志文件输出的文件名 -->
<FileNamePattern>${LOG_HOME}/ask-%d{yyyy-MM-dd}.%i.log</FileNamePattern>
<!--日志文件保留天数 -->
<MaxHistory>30</MaxHistory>
<maxFileSize>10MB</maxFileSize>
</rollingPolicy>
<encoder class="ch.qos.logback.classic.encoder.PatternLayoutEncoder">
<!--格式化输出:%d表示日期,%thread表示线程名,%-5level:级别从左显示5个字符宽度%msg:日志消息,%n是换行符 -->
<pattern>%d{yyyy-MM-dd HH:mm:ss.SSS} [%thread] %-5level %logger{50}:%L - %msg%n</pattern>
</encoder>
</appender>
<!-- 每天生成一个html格式的日志结束 -->
<!--myibatis log configure -->
<logger name="com.apache.ibatis" level="TRACE" />
<logger name="java.sql.Connection" level="DEBUG" />
<logger name="java.sql.Statement" level="DEBUG" />
<logger name="java.sql.PreparedStatement" level="DEBUG" />
<!-- 日志输出级别 -->
<root level="INFO">
<appender-ref ref="STDOUT" />
<appender-ref ref="FILE" />
</root>
<logger name="okhttp3" level="ERROR"/>
</configuration>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>com.ask</groupId>
<artifactId>ask-data-ai</artifactId>
<version>1.0.0-SNAPSHOT</version>
</parent>
<artifactId>ask-data-ai-common</artifactId>
<packaging>jar</packaging>
<description>ask-data-ai 公共模块</description>
<dependencies>
<!-- Spring Context -->
<dependency>
<groupId>org.springframework</groupId>
<artifactId>spring-context</artifactId>
</dependency>
<!-- Spring MVC -->
<dependency>
<groupId>org.springframework</groupId>
<artifactId>spring-web</artifactId>
<optional>true</optional>
</dependency>
<!-- <dependency>-->
<!-- <groupId>org.springframework.cloud</groupId>-->
<!-- <artifactId>spring-cloud-commons</artifactId>-->
<!-- </dependency>-->
<!-- MyBatis Plus -->
<dependency>
<groupId>com.baomidou</groupId>
<artifactId>mybatis-plus-spring-boot3-starter</artifactId>
<version>${mybatis-plus.version}</version>
</dependency>
<!-- Validation -->
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-validation</artifactId>
</dependency>
<!-- Configuration -->
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-configuration-processor</artifactId>
<optional>true</optional>
</dependency>
<!-- Lombok -->
<dependency>
<groupId>org.projectlombok</groupId>
<artifactId>lombok</artifactId>
<scope>provided</scope>
</dependency>
<!-- FastJson -->
<dependency>
<groupId>com.alibaba.fastjson2</groupId>
<artifactId>fastjson2</artifactId>
<version>${fastjson.version}</version>
</dependency>
<!-- Commons Lang3 -->
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-lang3</artifactId>
</dependency>
<!-- Commons Collections4 -->
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-collections4</artifactId>
<version>4.4</version>
</dependency>
<!-- Swagger -->
<dependency>
<groupId>com.github.xiaoymin</groupId>
<artifactId>knife4j-openapi3-jakarta-spring-boot-starter</artifactId>
<version>4.3.0</version>
</dependency>
<dependency>
<groupId>org.springdoc</groupId>
<artifactId>springdoc-openapi-starter-common</artifactId>
<version>${springdoc.version}</version>
</dependency>
</dependencies>
</project>
\ No newline at end of file
package com.ask.common.config;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import org.springframework.web.cors.CorsConfiguration;
import org.springframework.web.cors.UrlBasedCorsConfigurationSource;
import org.springframework.web.filter.CorsFilter;
@Configuration
public class CorsConfig {
private CorsConfiguration buildConfig() {
CorsConfiguration corsConfiguration = new CorsConfiguration();
// 使用addAllowedOriginPattern代替addAllowedOrigin
corsConfiguration.addAllowedOriginPattern("*"); // 1允许任何域名使用
corsConfiguration.addAllowedHeader("*"); // 2允许任何头
corsConfiguration.addAllowedMethod("*"); // 3允许任何方法(post、get等)
corsConfiguration.setAllowCredentials(true); // 支持安全证书。跨域携带cookie需要配置这个
corsConfiguration.setMaxAge(3600L); // 预检请求的有效期,单位为秒。设置maxage,可以避免每次都发出预检请求
return corsConfiguration;
}
@Bean
public CorsFilter corsFilter() {
UrlBasedCorsConfigurationSource source = new UrlBasedCorsConfigurationSource();
source.registerCorsConfiguration("/**", buildConfig());
return new CorsFilter(source);
}
}
\ No newline at end of file
package com.ask.common.config;
import com.baomidou.mybatisplus.annotation.DbType;
import com.baomidou.mybatisplus.extension.plugins.MybatisPlusInterceptor;
import com.baomidou.mybatisplus.extension.plugins.inner.PaginationInnerInterceptor;
import org.springframework.boot.autoconfigure.AutoConfiguration;
import org.springframework.context.annotation.Bean;
/**
* MybatisPlus配置
*/
@AutoConfiguration
public class MybatisPlusConfig {
/**
* 分页插件
*/
@Bean
public MybatisPlusInterceptor mybatisPlusInterceptor() {
MybatisPlusInterceptor interceptor = new MybatisPlusInterceptor();
interceptor.addInnerInterceptor(new PaginationInnerInterceptor(DbType.POSTGRE_SQL));
return interceptor;
}
}
\ No newline at end of file
package com.ask.common.core;
import lombok.AllArgsConstructor;
import lombok.Getter;
import lombok.NoArgsConstructor;
import lombok.Setter;
import java.io.Serializable;
/**
* 响应信息主体
*/
@Getter
@Setter
@NoArgsConstructor
@AllArgsConstructor
public class R<T> implements Serializable {
private static final long serialVersionUID = 1L;
private int code;
private String msg;
private T data;
public static <T> R<T> ok() {
return ok(null);
}
public static <T> R<T> ok(T data) {
return new R<>(0, "success", data);
}
public static <T> R<T> failed(String msg) {
return new R<>(1, msg, null);
}
public static <T> R<T> failed(int code, String msg) {
return new R<>(code, msg, null);
}
}
\ No newline at end of file
//package com.ask.common.mybatis.handler;
//
//import com.fasterxml.jackson.databind.ObjectMapper;
//import org.apache.ibatis.type.BaseTypeHandler;
//import org.apache.ibatis.type.JdbcType;
//import org.apache.ibatis.type.MappedJdbcTypes;
//import org.apache.ibatis.type.MappedTypes;
//
//import java.sql.CallableStatement;
//import java.sql.PreparedStatement;
//import java.sql.ResultSet;
//import java.sql.SQLException;
//
///**
// * JSON类型处理器
// */
//@MappedTypes(Object.class)
//@MappedJdbcTypes(JdbcType.OTHER)
//public class JsonTypeHandler extends BaseTypeHandler<Object> {
//
// private static final ObjectMapper MAPPER = new ObjectMapper();
// private final Class<?> type;
//
// public JsonTypeHandler(Class<?> type) {
// if (type == null) {
// throw new IllegalArgumentException("Type argument cannot be null");
// }
// this.type = type;
// }
//
// @Override
// public void setNonNullParameter(PreparedStatement ps, int i, Object parameter, JdbcType jdbcType)
// throws SQLException {
// PGobject pgObject = new PGobject();
// pgObject.setType("jsonb");
// try {
// pgObject.setValue(MAPPER.writeValueAsString(parameter));
// } catch (Exception e) {
// throw new SQLException("Error converting value to JSON: " + parameter, e);
// }
// ps.setObject(i, pgObject);
// }
//
// @Override
// public Object getNullableResult(ResultSet rs, String columnName) throws SQLException {
// return parse(rs.getString(columnName));
// }
//
// @Override
// public Object getNullableResult(ResultSet rs, int columnIndex) throws SQLException {
// return parse(rs.getString(columnIndex));
// }
//
// @Override
// public Object getNullableResult(CallableStatement cs, int columnIndex) throws SQLException {
// return parse(cs.getString(columnIndex));
// }
//
// private Object parse(String json) {
// if (json == null) {
// return null;
// }
// try {
// return MAPPER.readValue(json, type);
// } catch (Exception e) {
// throw new RuntimeException("Error parsing JSON: " + json, e);
// }
// }
//}
\ No newline at end of file
package com.ask.common.mybatis.handler;
import com.baomidou.mybatisplus.core.handlers.MetaObjectHandler;
import lombok.extern.slf4j.Slf4j;
import org.apache.ibatis.reflection.MetaObject;
import org.springframework.stereotype.Component;
import java.time.LocalDateTime;
/**
* MyBatis Plus 自动填充处理器
*
* @author ai
* @date 2024/03/21
*/
@Slf4j
@Component
public class MybatisMetaObjectHandler implements MetaObjectHandler {
@Override
public void insertFill(MetaObject metaObject) {
// 设置创建时间
this.strictInsertFill(metaObject, "createTime", LocalDateTime::now, LocalDateTime.class);
// 设置创建人
this.strictInsertFill(metaObject, "createBy", () -> "admin", String.class);
// 设置更新时间(新增时也设置更新时间)
this.strictInsertFill(metaObject, "updateTime", LocalDateTime::now, LocalDateTime.class);
// 设置更新人(新增时也设置更新人)
this.strictInsertFill(metaObject, "updateBy", () -> "admin", String.class);
}
@Override
public void updateFill(MetaObject metaObject) {
log.debug("自动填充更新字段");
// 设置更新时间
this.strictUpdateFill(metaObject, "updateTime", LocalDateTime::now, LocalDateTime.class);
// 设置更新人
this.strictUpdateFill(metaObject, "updateBy", () -> "admin", String.class);
}
}
\ No newline at end of file
This diff is collapsed.
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
<html>
<head>
<title>Logback Log Messages</title>
<style type="text/css">
table { margin-left: 2em; margin-right: 2em; border-left: 2px solid #AAA; }
TR.even { background: #FFFFFF; }
TR.odd { background: #EAEAEA; }
TR.warn TD.Level, TR.error TD.Level, TR.fatal TD.Level {font-weight: bold; color: #FF4040 }
TD { padding-right: 1ex; padding-left: 1ex; border-right: 2px solid #AAA; }
TD.Time, TD.Date { text-align: right; font-family: courier, monospace; font-size: smaller; }
TD.Thread { text-align: left; }
TD.Level { text-align: right; }
TD.Logger { text-align: left; }
TR.header { background: #596ED5; color: #FFF; font-weight: bold; font-size: larger; }
TD.Exception { background: #A2AEE8; font-family: courier, monospace;}
</style>
</head>
<body>
<hr/>
<p>Log session start time Fri Jul 11 18:42:40 CST 2025</p><p></p>
<table cellspacing="0">
<tr class="header">
<td class="Level">Level</td>
<td class="Date">Date</td>
<td class="Message">Message</td>
<td class="MethodOfCaller">MethodOfCaller</td>
<td class="FileOfCaller">FileOfCaller</td>
<td class="LineOfCaller">LineOfCaller</td>
</tr>
</table>
</body></html><!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
<html>
<head>
<title>Logback Log Messages</title>
<style type="text/css">
table { margin-left: 2em; margin-right: 2em; border-left: 2px solid #AAA; }
TR.even { background: #FFFFFF; }
TR.odd { background: #EAEAEA; }
TR.warn TD.Level, TR.error TD.Level, TR.fatal TD.Level {font-weight: bold; color: #FF4040 }
TD { padding-right: 1ex; padding-left: 1ex; border-right: 2px solid #AAA; }
TD.Time, TD.Date { text-align: right; font-family: courier, monospace; font-size: smaller; }
TD.Thread { text-align: left; }
TD.Level { text-align: right; }
TD.Logger { text-align: left; }
TR.header { background: #596ED5; color: #FFF; font-weight: bold; font-size: larger; }
TD.Exception { background: #A2AEE8; font-family: courier, monospace;}
</style>
</head>
<body>
<hr/>
<p>Log session start time Fri Jul 11 18:45:57 CST 2025</p><p></p>
<table cellspacing="0">
<tr class="header">
<td class="Level">Level</td>
<td class="Date">Date</td>
<td class="Message">Message</td>
<td class="MethodOfCaller">MethodOfCaller</td>
<td class="FileOfCaller">FileOfCaller</td>
<td class="LineOfCaller">LineOfCaller</td>
</tr>
</table>
</body></html>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>com.ask</groupId>
<artifactId>ask-data-ai</artifactId>
<version>1.0.0-SNAPSHOT</version>
<packaging>pom</packaging>
<description>AI智能问答系统</description>
<properties>
<spring-boot.version>3.2.1</spring-boot.version>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<maven.compiler.source>17</maven.compiler.source>
<maven.compiler.target>17</maven.compiler.target>
<mybatis-plus.version>3.5.5</mybatis-plus.version>
<spring-boot.mybatis>3.0.2</spring-boot.mybatis>
<fastjson.version>2.0.42</fastjson.version>
<spring-ai.version>1.1.0-SNAPSHOT</spring-ai.version>
<anyline.version>8.7.2-jdk17-20240808</anyline.version>
<springdoc.version>2.3.0</springdoc.version>
</properties>
<modules>
<module>ask-data-ai-common</module>
<module>ask-data-ai-api</module>
<module>ask-data-ai-biz</module>
<module>ask-data-ai-boot</module>
</modules>
<dependencyManagement>
<dependencies>
<!-- spring boot 依赖 -->
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-dependencies</artifactId>
<version>${spring-boot.version}</version>
<type>pom</type>
<scope>import</scope>
</dependency>
<!-- &lt;!&ndash; spring cloud 依赖 &ndash;&gt;-->
<!-- <dependency>-->
<!-- <groupId>org.springframework.cloud</groupId>-->
<!-- <artifactId>spring-cloud-dependencies</artifactId>-->
<!-- <version>2023.0.0</version>-->
<!-- <type>pom</type>-->
<!-- <scope>import</scope>-->
<!-- </dependency>-->
<!-- spring ai 依赖 -->
<dependency>
<groupId>org.springframework.ai</groupId>
<artifactId>spring-ai-bom</artifactId>
<version>${spring-ai.version}</version>
<type>pom</type>
<scope>import</scope>
</dependency>
</dependencies>
</dependencyManagement>
<build>
<finalName>${project.name}</finalName>
<resources>
<resource>
<directory>src/main/resources</directory>
<filtering>true</filtering>
</resource>
</resources>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.11.0</version>
<configuration>
<source>${maven.compiler.source}</source>
<target>${maven.compiler.target}</target>
<parameters>true</parameters>
</configuration>
</plugin>
</plugins>
<pluginManagement>
<plugins>
<plugin>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-maven-plugin</artifactId>
<version>${spring-boot.version}</version>
<configuration>
<finalName>${project.build.finalName}</finalName>
<layers>
<enabled>true</enabled>
</layers>
</configuration>
<executions>
<execution>
<goals>
<goal>repackage</goal>
</goals>
</execution>
</executions>
</plugin>
</plugins>
</pluginManagement>
</build>
<repositories>
<repository>
<id>spring-snapshots</id>
<name>Spring Snapshots</name>
<url>https://repo.spring.io/snapshot</url>
<releases>
<enabled>false</enabled>
</releases>
</repository>
<repository>
<name>Central Portal Snapshots</name>
<id>central-portal-snapshots</id>
<url>https://central.sonatype.com/repository/maven-snapshots/</url>
<releases>
<enabled>false</enabled>
</releases>
<snapshots>
<enabled>true</enabled>
</snapshots>
</repository>
</repositories>
</project>
\ No newline at end of file
package com.pig4cloud.pig.ask.api.dto;
import io.swagger.v3.oas.annotations.media.Schema;
import lombok.Data;
import java.util.List;
/**
* 文档分段结果DTO
*
* @author ai
* @date 2024/12/19
*/
@Data
@Schema(description = "文档分段结果")
public class DocumentSegmentResult {
/**
* 文件名
*/
@Schema(description = "文件名")
private String fileName;
/**
* 文档分段列表
*/
@Schema(description = "文档分段列表")
private List<DocumentSegment> segments;
/**
* 总分段数
*/
@Schema(description = "总分段数")
private Integer totalSegments;
/**
* 文档总字符数
*/
@Schema(description = "文档总字符数")
private Integer totalCharacters;
/**
* 文档分段详情
*/
@Data
@Schema(description = "文档分段详情")
public static class DocumentSegment {
/**
* 分段序号
*/
@Schema(description = "分段序号")
private Integer index;
/**
* 分段内容
*/
@Schema(description = "分段内容")
private String content;
/**
* 分段字符数
*/
@Schema(description = "分段字符数")
private Integer charCount;
/**
* 分段token数(预估)
*/
@Schema(description = "分段token数(预估)")
private Integer tokenCount;
}
}
\ No newline at end of file
package com.pig4cloud.pig.ask.api.entity;
import com.baomidou.mybatisplus.annotation.IdType;
import com.baomidou.mybatisplus.annotation.TableId;
import com.baomidou.mybatisplus.annotation.TableLogic;
import com.baomidou.mybatisplus.annotation.TableName;
import com.pig4cloud.pig.common.mybatis.base.BaseEntity;
import io.swagger.v3.oas.annotations.media.Schema;
import lombok.Data;
import lombok.EqualsAndHashCode;
/**
* 对话记录实体
*
* @author ai
* @date 2024/03/21
*/
@Data
@TableName("ask_chat_conversation")
@EqualsAndHashCode(callSuper = true)
@Schema(description = "对话记录")
public class ChatConversation extends BaseEntity {
private static final long serialVersionUID = 1L;
/**
* 主键ID
*/
@TableId(type = IdType.AUTO)
@Schema(description = "主键ID")
private Long id;
/**
* 会话ID
*/
@Schema(description = "会话ID")
private String conversationId;
/**
* 标题
*/
@Schema(description = "标题")
private String title;
/**
* 智能体ID
*/
@Schema(description = "智能体ID")
private Integer agentId;
/**
* 用户ID
*/
@Schema(description = "用户ID")
private Long userId;
/**
* 删除标记,0未删除,1已删除
*/
@TableLogic
private String delFlag;
}
\ No newline at end of file
package com.pig4cloud.pig.ask.api.entity;
import com.baomidou.mybatisplus.annotation.IdType;
import com.baomidou.mybatisplus.annotation.TableId;
import com.baomidou.mybatisplus.annotation.TableLogic;
import com.baomidou.mybatisplus.annotation.TableName;
import io.swagger.v3.oas.annotations.media.Schema;
import lombok.Data;
import java.time.LocalDateTime;
/**
* AI对话记忆存储实体
*
* @author ai
* @date 2024/03/21
*/
@Data
@TableName("ask_chat_conversation_detail")
@Schema(description = "AI对话记忆存储")
public class ChatConversationDetail {
/**
* 主键ID
*/
@TableId(type = IdType.AUTO)
@Schema(description = "主键ID")
private Long id;
/**
* 会话ID
*/
@Schema(description = "会话ID")
private String conversationId;
/**
* 消息内容
*/
@Schema(description = "消息内容")
private String content;
/**
* 消息类型(user/assistant)
*/
@Schema(description = "消息类型")
private String type;
/**
* 时间戳
*/
@Schema(description = "时间戳")
private LocalDateTime timestamp;
/**
* 删除标记,0未删除,1已删除
*/
@TableLogic
private String delFlag;
}
\ No newline at end of file
package com.pig4cloud.pig.ask.api.entity;
import com.baomidou.mybatisplus.annotation.IdType;
import com.baomidou.mybatisplus.annotation.TableId;
import com.baomidou.mybatisplus.annotation.TableLogic;
import com.baomidou.mybatisplus.annotation.TableName;
import com.pig4cloud.pig.common.mybatis.base.BaseEntity;
import io.swagger.v3.oas.annotations.media.Schema;
import lombok.Data;
import lombok.EqualsAndHashCode;
/**
* 知识库实体
*
* @author ai
* @date 2024/12/19
*/
@Data
@TableName("ask_knowledge_base")
@EqualsAndHashCode(callSuper = true)
@Schema(description = "知识库")
public class KnowledgeBase extends BaseEntity {
private static final long serialVersionUID = 1L;
/**
* 主键ID
*/
@TableId(type = IdType.AUTO)
@Schema(description = "主键ID")
private Long id;
/**
* 知识库名称
*/
@Schema(description = "知识库名称")
private String name;
/**
* 知识库描述
*/
@Schema(description = "知识库描述")
private String description;
/**
* 删除标记,0未删除,1已删除
*/
@TableLogic
@Schema(description = "删除标记")
private String delFlag;
}
\ No newline at end of file
package com.pig4cloud.pig.ask.api.entity;
import com.baomidou.mybatisplus.annotation.IdType;
import com.baomidou.mybatisplus.annotation.TableId;
import com.baomidou.mybatisplus.annotation.TableLogic;
import com.baomidou.mybatisplus.annotation.TableName;
import com.pig4cloud.pig.common.mybatis.base.BaseEntity;
import io.swagger.v3.oas.annotations.media.Schema;
import lombok.Data;
import lombok.EqualsAndHashCode;
/**
* 知识库文档实体
*
* @author ai
* @date 2024/12/19
*/
@Data
@TableName("ask_knowledge_document")
@EqualsAndHashCode(callSuper = true)
@Schema(description = "知识库文档")
public class KnowledgeDocument extends BaseEntity {
private static final long serialVersionUID = 1L;
/**
* 主键ID
*/
@TableId(type = IdType.AUTO)
@Schema(description = "主键ID")
private Long id;
/**
* 知识库ID
*/
@Schema(description = "知识库ID")
private Long knowledgeBaseId;
/**
* 文档名称
*/
@Schema(description = "文档名称")
private String name;
/**
* 原始文件名
*/
@Schema(description = "原始文件名")
private String fileName;
/**
* 文件存储路径
*/
@Schema(description = "文件存储路径")
private String filePath;
/**
* 文件大小(字节)
*/
@Schema(description = "文件大小(字节)")
private Long fileSize;
/**
* 文件类型
*/
@Schema(description = "文件类型")
private String fileType;
/**
* 处理状态:0-待处理,1-处理中,2-处理完成,3-处理失败
*/
@Schema(description = "处理状态:0-待处理,1-处理中,2-处理完成,3-处理失败")
private Integer status;
/**
* 分段数量
*/
@Schema(description = "分段数量")
private Integer segmentCount;
/**
* 总token数量
*/
@Schema(description = "总token数量")
private Integer tokenCount;
/**
* 删除标记,0未删除,1已删除
*/
@TableLogic
@Schema(description = "删除标记")
private String delFlag;
}
\ No newline at end of file
package com.pig4cloud.pig.ask.config;
import lombok.extern.slf4j.Slf4j;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import org.springframework.scheduling.annotation.EnableAsync;
import org.springframework.scheduling.concurrent.ThreadPoolTaskExecutor;
import java.util.concurrent.Executor;
import java.util.concurrent.ThreadPoolExecutor;
/**
* 异步配置类
*
* @author ai
* @date 2024/12/19
*/
@Slf4j
@Configuration
@EnableAsync
public class AsyncConfig {
/**
* 向量化任务执行器
*/
@Bean("vectorizeExecutor")
public Executor vectorizeExecutor() {
ThreadPoolTaskExecutor executor = new ThreadPoolTaskExecutor();
executor.setCorePoolSize(2);
executor.setMaxPoolSize(5);
executor.setQueueCapacity(100);
executor.setKeepAliveSeconds(60);
executor.setThreadNamePrefix("vectorize-");
executor.setRejectedExecutionHandler(new ThreadPoolExecutor.CallerRunsPolicy());
executor.setWaitForTasksToCompleteOnShutdown(true);
executor.setAwaitTerminationSeconds(60);
executor.initialize();
log.info("向量化任务执行器初始化完成");
return executor;
}
}
\ No newline at end of file
package com.pig4cloud.pig.ask.controller;
import com.baomidou.mybatisplus.core.metadata.IPage;
import com.baomidou.mybatisplus.core.toolkit.Wrappers;
import com.baomidou.mybatisplus.extension.plugins.pagination.Page;
import com.pig4cloud.pig.ask.api.entity.ChatConversation;
import com.pig4cloud.pig.ask.service.ChatConversationService;
import com.pig4cloud.pig.common.core.util.R;
import com.pig4cloud.pig.common.log.annotation.SysLog;
import io.swagger.v3.oas.annotations.Operation;
import io.swagger.v3.oas.annotations.security.SecurityRequirement;
import io.swagger.v3.oas.annotations.tags.Tag;
import lombok.RequiredArgsConstructor;
import org.springframework.http.HttpHeaders;
import org.springframework.web.bind.annotation.*;
/**
* 对话记录管理
*
* @author ai
* @date 2024/03/21
*/
@RestController
@RequiredArgsConstructor
@RequestMapping("/chat/conversation")
@Tag(description = "conversation", name = "对话记录管理")
@SecurityRequirement(name = HttpHeaders.AUTHORIZATION)
public class ChatConversationController {
private final ChatConversationService chatConversationService;
/**
* 分页查询
* @param page 分页对象
* @param chatConversation 查询条件
* @return 分页数据
*/
@Operation(summary = "分页查询", description = "分页查询")
@GetMapping("/page")
public R<IPage<ChatConversation>> getPage(Page page, ChatConversation chatConversation) {
return R.ok(chatConversationService.page(page, Wrappers.query(chatConversation)));
}
/**
* 通过id查询对话记录
* @param id id
* @return R
*/
@Operation(summary = "通过id查询", description = "通过id查询")
@GetMapping("/{id}")
public R<ChatConversation> getById(@PathVariable("id") Long id) {
return R.ok(chatConversationService.getById(id));
}
/**
* 新增对话记录
* @param chatConversation 对话记录
* @return R
*/
@Operation(summary = "新增对话记录", description = "新增对话记录")
@SysLog("新增对话记录")
@PostMapping
public R<Boolean> save(@RequestBody ChatConversation chatConversation) {
return R.ok(chatConversationService.save(chatConversation));
}
/**
* 修改对话记录
* @param chatConversation 对话记录
* @return R
*/
@Operation(summary = "修改对话记录", description = "修改对话记录")
@SysLog("修改对话记录")
@PutMapping
public R<Boolean> updateById(@RequestBody ChatConversation chatConversation) {
return R.ok(chatConversationService.updateById(chatConversation));
}
/**
* 通过id删除对话记录
* @param id id
* @return R
*/
@Operation(summary = "通过id删除对话记录", description = "通过id删除对话记录")
@SysLog("通过id删除对话记录")
@DeleteMapping("/{id}")
public R<Boolean> removeById(@PathVariable Long id) {
return R.ok(chatConversationService.removeById(id));
}
}
\ No newline at end of file
package com.pig4cloud.pig.ask.controller;
import com.baomidou.mybatisplus.core.metadata.IPage;
import com.baomidou.mybatisplus.core.toolkit.Wrappers;
import com.baomidou.mybatisplus.extension.plugins.pagination.Page;
import com.pig4cloud.pig.ask.api.entity.ChatConversationDetail;
import com.pig4cloud.pig.ask.service.ChatConversationDetailService;
import com.pig4cloud.pig.common.core.util.R;
import com.pig4cloud.pig.common.log.annotation.SysLog;
import io.swagger.v3.oas.annotations.Operation;
import io.swagger.v3.oas.annotations.security.SecurityRequirement;
import io.swagger.v3.oas.annotations.tags.Tag;
import lombok.RequiredArgsConstructor;
import org.springframework.http.HttpHeaders;
import org.springframework.web.bind.annotation.*;
import java.util.List;
/**
* 对话详细记录管理
*
* @author ai
* @date 2024/03/21
*/
@RestController
@RequiredArgsConstructor
@RequestMapping("/chat/conversation/detail")
@Tag(description = "conversation-detail", name = "对话详细记录管理")
@SecurityRequirement(name = HttpHeaders.AUTHORIZATION)
public class ChatConversationDetailController {
private final ChatConversationDetailService chatConversationDetailService;
/**
* 根据会话ID分页获取对话详情
* @param page 分页对象
* @param conversationId 会话ID
* @return 分页数据
*/
@Operation(summary = "根据会话ID分页获取对话详情", description = "根据会话ID分页获取对话详情")
@GetMapping("/page/{conversationId}")
public R<IPage<ChatConversationDetail>> pageByConversationId(Page page, @PathVariable String conversationId) {
IPage<ChatConversationDetail> result =chatConversationDetailService.page(page,Wrappers.<ChatConversationDetail>lambdaQuery()
.eq(ChatConversationDetail::getConversationId, conversationId)
.orderByAsc(ChatConversationDetail::getTimestamp));
return R.ok(result);
}
/**
* 根据会话ID获取对话详情列表
* @param conversationId 会话ID
* @return 对话详情列表
*/
@Operation(summary = "根据会话ID获取对话详情列表", description = "根据会话ID获取对话详情列表")
@GetMapping("/list/{conversationId}")
public R<List<ChatConversationDetail>> listByConversationId(@PathVariable String conversationId) {
return R.ok(chatConversationDetailService.list(Wrappers.<ChatConversationDetail>lambdaQuery()
.eq(ChatConversationDetail::getConversationId, conversationId)
.orderByAsc(ChatConversationDetail::getTimestamp)));
}
}
\ No newline at end of file
package com.pig4cloud.pig.ask.controller;
import com.baomidou.mybatisplus.core.metadata.IPage;
import com.baomidou.mybatisplus.core.toolkit.Wrappers;
import com.baomidou.mybatisplus.extension.plugins.pagination.Page;
import com.pig4cloud.pig.ask.api.entity.KnowledgeBase;
import com.pig4cloud.pig.ask.service.KnowledgeBaseService;
import com.pig4cloud.pig.common.core.util.R;
import com.pig4cloud.pig.common.log.annotation.SysLog;
import io.swagger.v3.oas.annotations.Operation;
import io.swagger.v3.oas.annotations.security.SecurityRequirement;
import io.swagger.v3.oas.annotations.tags.Tag;
import lombok.RequiredArgsConstructor;
import org.springframework.http.HttpHeaders;
import org.springframework.util.StringUtils;
import org.springframework.web.bind.annotation.*;
/**
* 知识库管理
*
* @author ai
* @date 2024/12/19
*/
@RestController
@RequiredArgsConstructor
@RequestMapping("/knowledge/base")
@Tag(description = "knowledgeBase", name = "知识库管理")
@SecurityRequirement(name = HttpHeaders.AUTHORIZATION)
public class KnowledgeBaseController {
private final KnowledgeBaseService knowledgeBaseService;
/**
* 分页查询
* @param page 分页对象
* @param knowledgeBase 查询条件
* @return 分页数据
*/
@Operation(summary = "分页查询", description = "分页查询")
@GetMapping("/page")
public R<IPage<KnowledgeBase>> getPage(Page page, KnowledgeBase knowledgeBase) {
return R.ok(knowledgeBaseService.page(page, Wrappers.query(knowledgeBase)));
}
/**
* 通过id查询知识库
* @param id id
* @return R
*/
@Operation(summary = "通过id查询", description = "通过id查询")
@GetMapping("/{id}")
public R<KnowledgeBase> getById(@PathVariable("id") Long id) {
return R.ok(knowledgeBaseService.getById(id));
}
/**
* 新增知识库
* @param knowledgeBase 知识库
* @return R
*/
@Operation(summary = "新增知识库", description = "新增知识库")
@SysLog("新增知识库")
@PostMapping
public R<Boolean> save(@RequestBody KnowledgeBase knowledgeBase) {
// 校验知识库名称是否为空
if (!StringUtils.hasText(knowledgeBase.getName())) {
return R.failed("知识库名称不能为空");
}
// 校验知识库名称是否重复
if (knowledgeBaseService.checkNameExists(knowledgeBase.getName(), null)) {
return R.failed("知识库名称已存在,请修改后重试");
}
return R.ok(knowledgeBaseService.save(knowledgeBase));
}
/**
* 修改知识库
* @param knowledgeBase 知识库
* @return R
*/
@Operation(summary = "修改知识库", description = "修改知识库")
@SysLog("修改知识库")
@PutMapping
public R<Boolean> updateById(@RequestBody KnowledgeBase knowledgeBase) {
// 校验知识库名称是否为空
if (!StringUtils.hasText(knowledgeBase.getName())) {
return R.failed("知识库名称不能为空");
}
// 校验知识库名称是否重复
if (knowledgeBaseService.checkNameExists(knowledgeBase.getName(), knowledgeBase.getId())) {
return R.failed("知识库名称已存在,请修改后重试");
}
return R.ok(knowledgeBaseService.updateById(knowledgeBase));
}
/**
* 通过id删除知识库
* @param id id
* @return R
*/
@Operation(summary = "通过id删除知识库", description = "通过id删除知识库")
@SysLog("通过id删除知识库")
@DeleteMapping("/{id}")
public R<Boolean> removeById(@PathVariable Long id) {
return R.ok(knowledgeBaseService.removeById(id));
}
/**
* 校验知识库名称是否重复
* @param name 知识库名称
* @param id 知识库ID(可选,修改时传入)
* @return R
*/
@Operation(summary = "校验知识库名称", description = "校验知识库名称是否重复")
@GetMapping("/checkName")
public R<Boolean> checkName(@RequestParam String name, @RequestParam(required = false) Long id) {
if (!StringUtils.hasText(name)) {
return R.ok(false, "名称不能为空");
}
boolean exists = knowledgeBaseService.checkNameExists(name, id);
if (exists) {
return R.ok(false, "知识库名称已存在");
}
return R.ok(true, "知识库名称可用");
}
}
\ No newline at end of file
package com.pig4cloud.pig.ask.controller;
import com.baomidou.mybatisplus.core.conditions.query.LambdaQueryWrapper;
import com.baomidou.mybatisplus.core.metadata.IPage;
import com.baomidou.mybatisplus.core.toolkit.Wrappers;
import com.baomidou.mybatisplus.extension.plugins.pagination.Page;
import com.pig4cloud.pig.ask.api.dto.DocumentSegmentResult;
import com.pig4cloud.pig.ask.api.entity.KnowledgeDocument;
import com.pig4cloud.pig.ask.service.KnowledgeDocumentService;
import com.pig4cloud.pig.common.core.util.R;
import com.pig4cloud.pig.common.log.annotation.SysLog;
import io.swagger.v3.oas.annotations.Operation;
import io.swagger.v3.oas.annotations.Parameter;
import io.swagger.v3.oas.annotations.security.SecurityRequirement;
import io.swagger.v3.oas.annotations.tags.Tag;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.springframework.http.HttpHeaders;
import org.springframework.http.MediaType;
import org.springframework.util.StringUtils;
import org.springframework.web.bind.annotation.*;
import org.springframework.web.multipart.MultipartFile;
import java.util.List;
/**
* 知识库文档管理
*
* @author ai
* @date 2024/12/19
*/
@Slf4j
@RestController
@RequiredArgsConstructor
@RequestMapping("/knowledge/document")
@Tag(description = "knowledgeDocument", name = "知识库文档管理")
@SecurityRequirement(name = HttpHeaders.AUTHORIZATION)
public class KnowledgeDocumentController {
private final KnowledgeDocumentService knowledgeDocumentService;
/**
* 分页查询
* @param page 分页对象
* @param knowledgeBaseId 知识库ID(必填)
* @param name 文档名称(非必填,模糊搜索)
* @return 分页数据
*/
@Operation(summary = "分页查询", description = "分页查询")
@GetMapping("/page")
public R<IPage<KnowledgeDocument>> getPage(Page page,
@RequestParam Long knowledgeBaseId,
@RequestParam(required = false) String name) {
LambdaQueryWrapper<KnowledgeDocument> wrapper = Wrappers.lambdaQuery();
// 知识库ID必填条件
wrapper.eq(KnowledgeDocument::getKnowledgeBaseId, knowledgeBaseId);
// 文档名称模糊搜索(非必填)
if (StringUtils.hasText(name)) {
wrapper.like(KnowledgeDocument::getName, name.trim());
}
// 按创建时间倒序排列
wrapper.orderByDesc(KnowledgeDocument::getCreateTime);
return R.ok(knowledgeDocumentService.page(page, wrapper));
}
/**
* 通过id查询知识库文档
* @param id id
* @return R
*/
@Operation(summary = "通过id查询", description = "通过id查询")
@GetMapping("/{id}")
public R<KnowledgeDocument> getById(@PathVariable("id") Long id) {
return R.ok(knowledgeDocumentService.getById(id));
}
/**
* 新增知识库文档
* @param knowledgeBaseId 知识库ID
* @param segmentResults 文档分段结果列表
* @return R
*/
@Operation(summary = "新增知识库文档", description = "保存文档分段结果到知识库")
@SysLog("新增知识库文档")
@PostMapping
public R<Boolean> save(@RequestParam Long knowledgeBaseId,
@RequestBody List<DocumentSegmentResult> segmentResults) {
// 校验知识库ID
if (knowledgeBaseId == null || knowledgeBaseId <= 0) {
return R.failed("知识库ID不能为空且必须大于0");
}
// 校验分段结果
if (segmentResults == null || segmentResults.isEmpty()) {
return R.failed("文档分段结果不能为空");
}
try {
boolean result = knowledgeDocumentService.saveSegmentResults(knowledgeBaseId, segmentResults);
return R.ok(result, "知识库文档保存成功");
} catch (Exception e) {
log.error("知识库文档保存失败,知识库ID: {}, 错误: {}", knowledgeBaseId, e.getMessage(), e);
return R.failed("知识库文档保存失败:" + e.getMessage());
}
}
/**
* 修改知识库文档
* @param knowledgeDocument 知识库文档
* @return R
*/
@Operation(summary = "修改知识库文档", description = "修改知识库文档")
@SysLog("修改知识库文档")
@PutMapping
public R<Boolean> updateById(@RequestBody KnowledgeDocument knowledgeDocument) {
return R.ok(knowledgeDocumentService.updateById(knowledgeDocument));
}
/**
* 通过id删除知识库文档
* @param id id
* @return R
*/
@Operation(summary = "通过id删除知识库文档", description = "通过id删除知识库文档")
@SysLog("通过id删除知识库文档")
@DeleteMapping("/{id}")
public R<Boolean> removeById(@PathVariable Long id) {
return R.ok(knowledgeDocumentService.removeById(id));
}
/**
* 文档分段处理
* @param knowledgeBaseId 知识库ID
* @param files 文件数组
* @return 文档分段结果列表
*/
@Operation(summary = "文档分段处理", description = "上传文档并进行分段处理,支持PDF、Word、Excel、TXT、MD等格式")
@SysLog("文档分段处理")
@PostMapping(value = "/segment", consumes = MediaType.MULTIPART_FORM_DATA_VALUE)
public R<List<DocumentSegmentResult>> segmentDocuments(
@Parameter(description = "知识库ID", required = true)
@RequestParam Long knowledgeBaseId,
@Parameter(description = "文件数组", required = true)
@RequestParam("files") MultipartFile[] files) {
// 校验知识库ID
if (knowledgeBaseId == null || knowledgeBaseId <= 0) {
return R.failed("知识库ID不能为空且必须大于0");
}
// 校验文件
if (files == null || files.length == 0) {
return R.failed("请选择要上传的文件");
}
// 校验文件大小和类型
for (MultipartFile file : files) {
if (file.isEmpty()) {
return R.failed("文件不能为空:" + file.getOriginalFilename());
}
// 限制文件大小(50MB)
if (file.getSize() > 50 * 1024 * 1024) {
return R.failed("文件大小不能超过50MB:" + file.getOriginalFilename());
}
// 检查文件类型
String fileName = file.getOriginalFilename();
if (fileName == null || !isValidFileType(fileName)) {
return R.failed("不支持的文件类型:" + fileName + ",支持的格式:PDF、DOC、DOCX、XLS、XLSX、TXT、MD、RTF、ODT");
}
}
try {
List<DocumentSegmentResult> results = knowledgeDocumentService.segmentDocuments(knowledgeBaseId, files);
return R.ok(results, "文档分段处理完成");
} catch (Exception e) {
log.error("文档分段处理失败,知识库ID: {}, 错误: {}", knowledgeBaseId, e.getMessage(), e);
return R.failed("文档分段处理失败:" + e.getMessage());
}
}
/**
* 检查文件类型是否支持
* @param fileName 文件名
* @return 是否支持
*/
private boolean isValidFileType(String fileName) {
if (!StringUtils.hasText(fileName)) {
return false;
}
String extension = fileName.toLowerCase();
return extension.endsWith(".pdf") ||
extension.endsWith(".doc") ||
extension.endsWith(".docx") ||
extension.endsWith(".xls") ||
extension.endsWith(".xlsx") ||
extension.endsWith(".txt") ||
extension.endsWith(".md") ||
extension.endsWith(".rtf") ||
extension.endsWith(".odt");
}
}
\ No newline at end of file
package com.pig4cloud.pig.ask.mapper;
import com.baomidou.mybatisplus.core.mapper.BaseMapper;
import com.pig4cloud.pig.ask.api.entity.ChatConversationDetail;
import org.apache.ibatis.annotations.Mapper;
/**
* 对话详细记录Mapper接口
*
* @author ai
* @date 2024/03/21
*/
@Mapper
public interface ChatConversationDetailMapper extends BaseMapper<ChatConversationDetail> {
}
\ No newline at end of file
package com.pig4cloud.pig.ask.mapper;
import com.baomidou.mybatisplus.core.mapper.BaseMapper;
import com.pig4cloud.pig.ask.api.entity.ChatConversation;
import org.apache.ibatis.annotations.Mapper;
/**
* 对话记录Mapper接口
*
* @author ai
* @date 2024/03/21
*/
@Mapper
public interface ChatConversationMapper extends BaseMapper<ChatConversation> {
}
\ No newline at end of file
package com.pig4cloud.pig.ask.mapper;
import com.baomidou.mybatisplus.core.mapper.BaseMapper;
import com.pig4cloud.pig.ask.api.entity.KnowledgeBase;
import org.apache.ibatis.annotations.Mapper;
/**
* 知识库Mapper接口
*
* @author ai
* @date 2024/12/19
*/
@Mapper
public interface KnowledgeBaseMapper extends BaseMapper<KnowledgeBase> {
}
\ No newline at end of file
package com.pig4cloud.pig.ask.mapper;
import com.baomidou.mybatisplus.core.mapper.BaseMapper;
import com.pig4cloud.pig.ask.api.entity.KnowledgeDocument;
import org.apache.ibatis.annotations.Mapper;
/**
* 知识库文档Mapper接口
*
* @author ai
* @date 2024/12/19
*/
@Mapper
public interface KnowledgeDocumentMapper extends BaseMapper<KnowledgeDocument> {
}
\ No newline at end of file
package com.pig4cloud.pig.ask.service;
import com.baomidou.mybatisplus.core.metadata.IPage;
import com.baomidou.mybatisplus.extension.plugins.pagination.Page;
import com.baomidou.mybatisplus.extension.service.IService;
import com.pig4cloud.pig.ask.api.entity.ChatConversationDetail;
import java.util.List;
/**
* 对话详细记录服务接口
*
* @author ai
* @date 2024/03/21
*/
public interface ChatConversationDetailService extends IService<ChatConversationDetail> {
}
\ No newline at end of file
package com.pig4cloud.pig.ask.service;
import com.baomidou.mybatisplus.extension.service.IService;
import com.pig4cloud.pig.ask.api.entity.ChatConversation;
/**
* 对话记录服务接口
*
* @author ai
* @date 2024/03/21
*/
public interface ChatConversationService extends IService<ChatConversation> {
}
\ No newline at end of file
package com.pig4cloud.pig.ask.service;
import com.baomidou.mybatisplus.extension.service.IService;
import com.pig4cloud.pig.ask.api.entity.KnowledgeBase;
/**
* 知识库服务接口
*
* @author ai
* @date 2024/12/19
*/
public interface KnowledgeBaseService extends IService<KnowledgeBase> {
/**
* 校验知识库名称是否重复
* @param name 知识库名称
* @param id 知识库ID(修改时传入,新增时传null)
* @return true-重复,false-不重复
*/
boolean checkNameExists(String name, Long id);
}
\ No newline at end of file
package com.pig4cloud.pig.ask.service;
import com.baomidou.mybatisplus.extension.service.IService;
import com.pig4cloud.pig.ask.api.dto.DocumentSegmentResult;
import com.pig4cloud.pig.ask.api.entity.KnowledgeDocument;
import org.springframework.web.multipart.MultipartFile;
import java.util.List;
/**
* 知识库文档服务接口
*
* @author ai
* @date 2024/12/19
*/
public interface KnowledgeDocumentService extends IService<KnowledgeDocument> {
/**
* 文档分段处理
* @param knowledgeBaseId 知识库ID
* @param files 文件数组
* @return 文档分段结果列表
*/
List<DocumentSegmentResult> segmentDocuments(Long knowledgeBaseId, MultipartFile[] files);
/**
* 保存文档分段结果
* @param knowledgeBaseId 知识库ID
* @param segmentResults 分段结果列表
* @return 保存结果
*/
boolean saveSegmentResults(Long knowledgeBaseId, List<DocumentSegmentResult> segmentResults);
}
\ No newline at end of file
package com.pig4cloud.pig.ask.service;
import com.pig4cloud.pig.ask.api.dto.DocumentSegmentResult;
import java.util.List;
/**
* 向量存储服务接口
*
* @author ai
* @date 2024/12/19
*/
public interface VectorStoreService {
/**
* 异步向量化文档分段
* @param knowledgeBaseId 知识库ID
* @param documentId 文档ID
* @param segmentResults 分段结果列表
*/
void vectorizeDocumentsAsync(Long knowledgeBaseId, Long documentId, List<DocumentSegmentResult> segmentResults);
/**
* 向量化单个文档的分段
* @param knowledgeBaseId 知识库ID
* @param documentId 文档ID
* @param segmentResult 分段结果
*/
void vectorizeDocument(Long knowledgeBaseId, Long documentId, DocumentSegmentResult segmentResult);
}
\ No newline at end of file
package com.pig4cloud.pig.ask.service.impl;
import com.baomidou.mybatisplus.core.metadata.IPage;
import com.baomidou.mybatisplus.core.toolkit.Wrappers;
import com.baomidou.mybatisplus.extension.plugins.pagination.Page;
import com.baomidou.mybatisplus.extension.service.impl.ServiceImpl;
import com.pig4cloud.pig.ask.api.entity.ChatConversationDetail;
import com.pig4cloud.pig.ask.mapper.ChatConversationDetailMapper;
import com.pig4cloud.pig.ask.service.ChatConversationDetailService;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.springframework.stereotype.Service;
import java.util.List;
/**
* 对话详细记录服务实现类
*
* @author ai
* @date 2024/03/21
*/
@Slf4j
@Service
@RequiredArgsConstructor
public class ChatConversationDetailServiceImpl extends ServiceImpl<ChatConversationDetailMapper, ChatConversationDetail> implements ChatConversationDetailService {
}
\ No newline at end of file
package com.pig4cloud.pig.ask.service.impl;
import com.baomidou.mybatisplus.extension.service.impl.ServiceImpl;
import com.pig4cloud.pig.ask.api.entity.ChatConversation;
import com.pig4cloud.pig.ask.mapper.ChatConversationMapper;
import com.pig4cloud.pig.ask.service.ChatConversationService;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.springframework.stereotype.Service;
/**
* 对话记录服务实现类
*
* @author ai
* @date 2024/03/21
*/
@Slf4j
@Service
@RequiredArgsConstructor
public class ChatConversationServiceImpl extends ServiceImpl<ChatConversationMapper, ChatConversation> implements ChatConversationService {
}
\ No newline at end of file
package com.pig4cloud.pig.ask.service.impl;
import com.baomidou.mybatisplus.core.conditions.query.LambdaQueryWrapper;
import com.baomidou.mybatisplus.extension.service.impl.ServiceImpl;
import com.pig4cloud.pig.ask.api.entity.KnowledgeBase;
import com.pig4cloud.pig.ask.mapper.KnowledgeBaseMapper;
import com.pig4cloud.pig.ask.service.KnowledgeBaseService;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.springframework.stereotype.Service;
import org.springframework.util.StringUtils;
/**
* 知识库服务实现类
*
* @author ai
* @date 2024/12/19
*/
@Slf4j
@Service
@RequiredArgsConstructor
public class KnowledgeBaseServiceImpl extends ServiceImpl<KnowledgeBaseMapper, KnowledgeBase> implements KnowledgeBaseService {
@Override
public boolean checkNameExists(String name, Long id) {
if (!StringUtils.hasText(name)) {
return false;
}
LambdaQueryWrapper<KnowledgeBase> wrapper = new LambdaQueryWrapper<>();
wrapper.eq(KnowledgeBase::getName, name.trim());
// 修改时排除当前记录
if (id != null) {
wrapper.ne(KnowledgeBase::getId, id);
}
return this.count(wrapper) > 0;
}
}
\ No newline at end of file
package com.pig4cloud.pig.ask.service.impl;
import com.baomidou.mybatisplus.extension.service.impl.ServiceImpl;
import com.pig4cloud.pig.ask.api.dto.DocumentSegmentResult;
import com.pig4cloud.pig.ask.api.entity.KnowledgeDocument;
import com.pig4cloud.pig.ask.mapper.KnowledgeDocumentMapper;
import com.pig4cloud.pig.ask.service.KnowledgeDocumentService;
import com.pig4cloud.pig.ask.service.VectorStoreService;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.springframework.ai.document.Document;
import org.springframework.ai.reader.tika.TikaDocumentReader;
import org.springframework.ai.transformer.splitter.TokenTextSplitter;
import org.springframework.core.io.InputStreamResource;
import org.springframework.stereotype.Service;
import org.springframework.util.StringUtils;
import org.springframework.web.multipart.MultipartFile;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
/**
* 知识库文档服务实现类
*
* @author ai
* @date 2024/12/19
*/
@Slf4j
@Service
@RequiredArgsConstructor
public class KnowledgeDocumentServiceImpl extends ServiceImpl<KnowledgeDocumentMapper, KnowledgeDocument> implements KnowledgeDocumentService {
private final VectorStoreService vectorStoreService;
@Override
public List<DocumentSegmentResult> segmentDocuments(Long knowledgeBaseId, MultipartFile[] files) {
List<DocumentSegmentResult> results = new ArrayList<>();
if (files == null || files.length == 0) {
log.warn("文件数组为空,知识库ID: {}", knowledgeBaseId);
return results;
}
// 初始化文本分割器,设置分段参数
TokenTextSplitter textSplitter = new TokenTextSplitter(
1000, // 默认分段大小(token数)
200, // 重叠token数
5, // 最小分段大小
10000, // 最大分段大小
true // 保持分隔符
);
for (MultipartFile file : files) {
if (file.isEmpty()) {
log.warn("跳过空文件: {}", file.getOriginalFilename());
continue;
}
try {
// 使用Tika读取文档
TikaDocumentReader documentReader = new TikaDocumentReader(
new InputStreamResource(file.getInputStream())
);
// 读取文档内容
List<Document> documents = documentReader.get();
if (documents.isEmpty()) {
log.warn("文档解析失败或内容为空: {}", file.getOriginalFilename());
continue;
}
// 合并所有文档内容
StringBuilder contentBuilder = new StringBuilder();
for (Document doc : documents) {
if (StringUtils.hasText(doc.getText())) {
contentBuilder.append(doc.getText()).append("\n");
}
}
String fullContent = contentBuilder.toString().trim();
if (!StringUtils.hasText(fullContent)) {
log.warn("文档内容为空: {}", file.getOriginalFilename());
continue;
}
// 对文档进行分段
Document fullDocument = new Document(fullContent);
List<Document> segments = textSplitter.apply(List.of(fullDocument));
// 构建分段结果
DocumentSegmentResult result = new DocumentSegmentResult();
result.setFileName(file.getOriginalFilename());
result.setTotalSegments(segments.size());
result.setTotalCharacters(fullContent.length());
List<DocumentSegmentResult.DocumentSegment> segmentList = new ArrayList<>();
for (int i = 0; i < segments.size(); i++) {
Document segment = segments.get(i);
DocumentSegmentResult.DocumentSegment segmentDto = new DocumentSegmentResult.DocumentSegment();
segmentDto.setIndex(i + 1);
segmentDto.setContent(segment.getText());
segmentDto.setCharCount(segment.getText().length());
// 简单估算token数(大约1个中文字符=1.5token,英文单词=1token)
segmentDto.setTokenCount(estimateTokenCount(segment.getText()));
segmentList.add(segmentDto);
}
result.setSegments(segmentList);
results.add(result);
log.info("文档分段完成: {}, 分段数: {}, 总字符数: {}",
file.getOriginalFilename(), segments.size(), fullContent.length());
} catch (IOException e) {
log.error("文档处理失败: {}, 错误: {}", file.getOriginalFilename(), e.getMessage(), e);
// 创建错误结果
DocumentSegmentResult errorResult = new DocumentSegmentResult();
errorResult.setFileName(file.getOriginalFilename());
errorResult.setSegments(new ArrayList<>());
errorResult.setTotalSegments(0);
errorResult.setTotalCharacters(0);
results.add(errorResult);
}
}
return results;
}
@Override
public boolean saveSegmentResults(Long knowledgeBaseId, List<DocumentSegmentResult> segmentResults) {
if (segmentResults == null || segmentResults.isEmpty()) {
log.warn("分段结果为空,无法保存,知识库ID: {}", knowledgeBaseId);
return false;
}
List<KnowledgeDocument> knowledgeDocuments = new ArrayList<>();
for (DocumentSegmentResult result : segmentResults) {
// 创建文档记录
KnowledgeDocument document = new KnowledgeDocument();
document.setKnowledgeBaseId(knowledgeBaseId);
document.setName(result.getFileName());
document.setFileName(result.getFileName());
document.setStatus(0); // 默认待处理状态
document.setSegmentCount(result.getTotalSegments());
document.setTokenCount(result.getSegments().stream()
.mapToInt(DocumentSegmentResult.DocumentSegment::getTokenCount)
.sum());
knowledgeDocuments.add(document);
}
// 批量保存文档
boolean saveResult = this.saveBatch(knowledgeDocuments);
if (saveResult) {
// 异步进行向量化处理
for (int i = 0; i < knowledgeDocuments.size(); i++) {
KnowledgeDocument document = knowledgeDocuments.get(i);
DocumentSegmentResult segmentResult = segmentResults.get(i);
// 异步向量化
vectorStoreService.vectorizeDocumentsAsync(
knowledgeBaseId,
document.getId(),
List.of(segmentResult)
);
log.info("启动异步向量化处理: 文档ID={}, 文件名={}",
document.getId(), segmentResult.getFileName());
}
}
return saveResult;
}
/**
* 估算token数量
*
* @param content 文本内容
* @return 预估token数
*/
private Integer estimateTokenCount(String content) {
if (!StringUtils.hasText(content)) {
return 0;
}
// 简单的token估算逻辑
// 中文字符按1.5个token计算,英文单词按1个token计算
int chineseCharCount = 0;
int englishWordCount = 0;
for (char c : content.toCharArray()) {
if (c >= 0x4e00 && c <= 0x9fff) {
chineseCharCount++;
}
}
// 估算英文单词数
String[] words = content.replaceAll("[\\u4e00-\\u9fff]", "").split("\\s+");
englishWordCount = words.length;
return Math.round(chineseCharCount * 1.5f + englishWordCount);
}
}
\ No newline at end of file
package com.pig4cloud.pig.ask.service.impl;
import com.pig4cloud.pig.ask.api.dto.DocumentSegmentResult;
import com.pig4cloud.pig.ask.api.entity.KnowledgeDocument;
import com.pig4cloud.pig.ask.service.KnowledgeDocumentService;
import com.pig4cloud.pig.ask.service.VectorStoreService;
import lombok.extern.slf4j.Slf4j;
import org.springframework.ai.document.Document;
import org.springframework.ai.embedding.EmbeddingModel;
import org.springframework.ai.vectorstore.VectorStore;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.context.annotation.Lazy;
import org.springframework.scheduling.annotation.Async;
import org.springframework.stereotype.Service;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
/**
* 向量存储服务实现类
*
* @author ai
* @date 2024/12/19
*/
@Slf4j
@Service
public class VectorStoreServiceImpl implements VectorStoreService {
private final VectorStore vectorStore;
private final EmbeddingModel embeddingModel;
@Autowired
@Lazy
private KnowledgeDocumentService knowledgeDocumentService;
public VectorStoreServiceImpl(VectorStore vectorStore, EmbeddingModel embeddingModel) {
this.vectorStore = vectorStore;
this.embeddingModel = embeddingModel;
}
@Async("vectorizeExecutor")
@Override
public void vectorizeDocumentsAsync(Long knowledgeBaseId, Long documentId, List<DocumentSegmentResult> segmentResults) {
try {
log.info("开始异步向量化处理,知识库ID: {}, 文档ID: {}", knowledgeBaseId, documentId);
// 更新文档状态为处理中
KnowledgeDocument document = knowledgeDocumentService.getById(documentId);
if (document != null) {
document.setStatus(1); // 1-处理中
knowledgeDocumentService.updateById(document);
}
List<Document> documents = new ArrayList<>();
for (DocumentSegmentResult segmentResult : segmentResults) {
if (segmentResult.getSegments() != null) {
for (DocumentSegmentResult.DocumentSegment segment : segmentResult.getSegments()) {
// 创建文档对象
Document doc = new Document(segment.getContent());
// 添加元数据
Map<String, Object> metadata = new HashMap<>();
metadata.put("knowledge_base_id", knowledgeBaseId);
metadata.put("document_id", documentId);
metadata.put("file_name", segmentResult.getFileName());
metadata.put("segment_index", segment.getIndex());
metadata.put("char_count", segment.getCharCount());
metadata.put("token_count", segment.getTokenCount());
doc.setMetadata(metadata);
documents.add(doc);
}
}
}
// 批量向量化并存储
if (!documents.isEmpty()) {
vectorStore.add(documents);
log.info("向量化完成,共处理 {} 个分段", documents.size());
// 更新文档状态为处理完成
if (document != null) {
document.setStatus(2); // 2-处理完成
knowledgeDocumentService.updateById(document);
}
}
} catch (Exception e) {
log.error("向量化处理失败,知识库ID: {}, 文档ID: {}, 错误: {}", knowledgeBaseId, documentId, e.getMessage(), e);
// 更新文档状态为处理失败
KnowledgeDocument document = knowledgeDocumentService.getById(documentId);
if (document != null) {
document.setStatus(3); // 3-处理失败
knowledgeDocumentService.updateById(document);
}
}
}
@Override
public void vectorizeDocument(Long knowledgeBaseId, Long documentId, DocumentSegmentResult segmentResult) {
try {
List<Document> documents = new ArrayList<>();
if (segmentResult.getSegments() != null) {
for (DocumentSegmentResult.DocumentSegment segment : segmentResult.getSegments()) {
// 创建文档对象
Document doc = new Document(segment.getContent());
// 添加元数据
Map<String, Object> metadata = new HashMap<>();
metadata.put("knowledge_base_id", knowledgeBaseId);
metadata.put("document_id", documentId);
metadata.put("file_name", segmentResult.getFileName());
metadata.put("segment_index", segment.getIndex());
metadata.put("char_count", segment.getCharCount());
metadata.put("token_count", segment.getTokenCount());
doc.setMetadata(metadata);
documents.add(doc);
}
}
// 向量化并存储
if (!documents.isEmpty()) {
vectorStore.add(documents);
log.info("单个文档向量化完成: {}, 分段数: {}", segmentResult.getFileName(), documents.size());
}
} catch (Exception e) {
log.error("单个文档向量化失败,知识库ID: {}, 文档ID: {}, 文件名: {}, 错误: {}",
knowledgeBaseId, documentId, segmentResult.getFileName(), e.getMessage(), e);
}
}
}
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment