Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
A
ask_data_ai_admin
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
linyangyang
ask_data_ai_admin
Commits
2d0f6bc2
Commit
2d0f6bc2
authored
Jul 25, 2025
by
林洋洋
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
dcs卡件
parent
a5f5931e
Changes
16
Hide whitespace changes
Inline
Side-by-side
Showing
16 changed files
with
372 additions
and
45 deletions
+372
-45
DocumentSegmentPatternRequest.java
...n/java/com/ask/api/dto/DocumentSegmentPatternRequest.java
+34
-0
KeyAndValueVO.java
...ta-ai-api/src/main/java/com/ask/api/vo/KeyAndValueVO.java
+16
-0
AskVectorStoreController.java
...ain/java/com/ask/controller/AskVectorStoreController.java
+3
-0
ChatController.java
...-biz/src/main/java/com/ask/controller/ChatController.java
+5
-4
KnowledgeDocumentController.java
.../java/com/ask/controller/KnowledgeDocumentController.java
+18
-1
SysFileController.java
...z/src/main/java/com/ask/controller/SysFileController.java
+14
-2
AskVectorStoreService.java
.../src/main/java/com/ask/service/AskVectorStoreService.java
+3
-0
KnowledgeDocumentService.java
...c/main/java/com/ask/service/KnowledgeDocumentService.java
+8
-0
SysFileService.java
...-ai-biz/src/main/java/com/ask/service/SysFileService.java
+1
-0
AskVectorStoreServiceImpl.java
.../java/com/ask/service/impl/AskVectorStoreServiceImpl.java
+22
-4
KnowledgeDocumentServiceImpl.java
...va/com/ask/service/impl/KnowledgeDocumentServiceImpl.java
+72
-9
SysFileServiceImpl.java
...rc/main/java/com/ask/service/impl/SysFileServiceImpl.java
+20
-0
ExcelTools.java
...k-data-ai-biz/src/main/java/com/ask/tools/ExcelTools.java
+108
-1
FluxUtils.java
...sk-data-ai-biz/src/main/java/com/ask/utils/FluxUtils.java
+47
-23
application.yml
...ta-ai/ask-data-ai-boot/src/main/resources/application.yml
+1
-1
dcs卡件日报模板.docx
...ata-ai/ask-data-ai-boot/src/main/resources/dcs卡件日报模板.docx
+0
-0
No files found.
ask-data-ai/ask-data-ai-api/src/main/java/com/ask/api/dto/DocumentSegmentPatternRequest.java
0 → 100644
View file @
2d0f6bc2
package
com
.
ask
.
api
.
dto
;
import
com.ask.api.entity.SysFile
;
import
io.swagger.v3.oas.annotations.media.Schema
;
import
jakarta.validation.constraints.NotEmpty
;
import
jakarta.validation.constraints.NotNull
;
import
lombok.Data
;
import
java.util.List
;
@Data
@Schema
(
description
=
"文档分段请求参数"
)
public
class
DocumentSegmentPatternRequest
{
@NotNull
(
message
=
"知识库ID不能为空"
)
@Schema
(
description
=
"知识库ID"
,
required
=
true
,
example
=
"1"
)
private
Long
knowledgeBaseId
;
@NotEmpty
(
message
=
"文件数组不能为空"
)
@Schema
(
description
=
"文件数组"
,
required
=
true
)
private
List
<
SysFile
>
files
;
@Schema
(
description
=
"分割符"
)
@NotEmpty
(
message
=
"分割符不能为NULL"
)
private
String
key
;
@Schema
(
description
=
"正则表达式"
)
@NotEmpty
(
message
=
"正则表达式不能为NULL"
)
private
String
value
;
@Schema
(
description
=
"最大长度"
)
@NotNull
(
message
=
"最大长度不能为空"
)
private
Integer
maxLength
;
}
ask-data-ai/ask-data-ai-api/src/main/java/com/ask/api/vo/KeyAndValueVO.java
0 → 100644
View file @
2d0f6bc2
package
com
.
ask
.
api
.
vo
;
import
io.swagger.v3.oas.annotations.media.Schema
;
import
lombok.AllArgsConstructor
;
import
lombok.Data
;
@AllArgsConstructor
@Data
public
class
KeyAndValueVO
{
@Schema
(
description
=
"分割符"
)
private
String
key
;
@Schema
(
description
=
"正则表达式"
)
private
String
value
;
}
ask-data-ai/ask-data-ai-biz/src/main/java/com/ask/controller/AskVectorStoreController.java
View file @
2d0f6bc2
package
com
.
ask
.
controller
;
import
com.ask.api.entity.AskVectorStore
;
import
com.ask.api.vo.KeyAndValueVO
;
import
com.ask.common.core.R
;
import
com.ask.service.AskVectorStoreService
;
import
com.baomidou.mybatisplus.core.conditions.query.LambdaQueryWrapper
;
...
...
@@ -150,4 +151,6 @@ public class AskVectorStoreController {
return
R
.
failed
(
"批量删除失败:"
+
e
.
getMessage
());
}
}
}
ask-data-ai/ask-data-ai-biz/src/main/java/com/ask/controller/ChatController.java
View file @
2d0f6bc2
...
...
@@ -109,7 +109,7 @@ public class ChatController {
Message
userMessage
=
new
UserMessage
(
message
);
Prompt
prompt
=
new
Prompt
(
List
.
of
(
systemMessage
,
userMessage
));
return
FluxUtils
.
wrapDeepSeekStream
(
deepseek
ChatClient
.
prompt
(
prompt
)
return
FluxUtils
.
wrapDeepSeekStream
(
openAi
ChatClient
.
prompt
(
prompt
)
.
advisors
(
messageChatMemoryAdvisor
)
.
advisors
(
a
->
a
.
param
(
ChatMemory
.
CONVERSATION_ID
,
conversationId
))
.
stream
()
...
...
@@ -136,7 +136,7 @@ public class ChatController {
//向量数据召回
FilterExpressionBuilder
builder
=
new
FilterExpressionBuilder
();
Filter
.
Expression
filter
=
builder
.
eq
(
"isEnabled"
,
1
).
build
();
List
<
Document
>
documents
=
chatService
.
retrieveDocuments
(
message
,
0.
75
,
5
,
filter
);
List
<
Document
>
documents
=
chatService
.
retrieveDocuments
(
message
,
0.
50
,
5
,
filter
);
//获取文件引用
String
reference
=
chatService
.
getReference
(
documents
);
//拼装知识库上下文内容
...
...
@@ -145,7 +145,7 @@ public class ChatController {
String
userPrompt
=
ragPromptService
.
createRagPrompt
(
message
,
context
,
historyMemory
);
StringBuilder
contentBuilder
=
new
StringBuilder
();
return
FluxUtils
.
wrapDeepSeekStream
(
deepseek
ChatClient
.
prompt
()
return
FluxUtils
.
wrapDeepSeekStream
(
openAi
ChatClient
.
prompt
()
.
user
(
userPrompt
)
.
system
(
"你是一个智能助手,基于以下上下文和历史对话回答问题,请用简洁的语言回答问题,并确保答案准确,要求"
+
"1.以 Markdown 格式输出"
)
...
...
@@ -169,10 +169,11 @@ public class ChatController {
Message
userMessage
=
new
UserMessage
(
message
);
Prompt
prompt
=
new
Prompt
(
List
.
of
(
systemMessage
,
userMessage
));
return
FluxUtils
.
wrapDeepSeekStream
(
deepseek
ChatClient
.
prompt
(
prompt
)
return
FluxUtils
.
wrapDeepSeekStream
(
openAi
ChatClient
.
prompt
(
prompt
)
.
advisors
(
messageChatMemoryAdvisor
)
.
advisors
(
a
->
a
.
param
(
ChatMemory
.
CONVERSATION_ID
,
conversationId
))
.
tools
(
excelTools
)
.
advisors
()
.
stream
()
.
chatResponse
());
...
...
ask-data-ai/ask-data-ai-biz/src/main/java/com/ask/controller/KnowledgeDocumentController.java
View file @
2d0f6bc2
...
...
@@ -3,13 +3,16 @@ package com.ask.controller;
import
cn.hutool.core.io.FileUtil
;
import
cn.hutool.core.util.IdUtil
;
import
cn.hutool.core.util.StrUtil
;
import
com.ask.api.dto.DocumentSegmentPatternRequest
;
import
com.ask.api.dto.DocumentSegmentRequest
;
import
com.ask.api.dto.DocumentSegmentResult
;
import
com.ask.api.entity.AskVectorStore
;
import
com.ask.api.entity.KnowledgeDocument
;
import
com.ask.api.entity.SysFile
;
import
com.ask.api.vo.KeyAndValueVO
;
import
com.ask.common.core.FileTemplate
;
import
com.ask.common.core.R
;
import
com.ask.service.AskVectorStoreService
;
import
com.ask.service.KnowledgeDocumentService
;
import
com.baomidou.mybatisplus.core.conditions.query.LambdaQueryWrapper
;
import
com.baomidou.mybatisplus.core.metadata.IPage
;
...
...
@@ -53,6 +56,8 @@ public class KnowledgeDocumentController {
private
final
VectorStore
vectorStore
;
private
final
AskVectorStoreService
askVectorStoreService
;
/**
* 分页查询
...
...
@@ -174,7 +179,7 @@ public class KnowledgeDocumentController {
* @param request 请求体
* @return 文档分段结果列表
*/
@Operation
(
summary
=
"
文档分段处理"
,
description
=
"
上传文档并进行分段处理,支持PDF、Word、Excel、TXT、MD等格式"
)
@Operation
(
summary
=
"
智能文档分段处理"
,
description
=
"智能文档
上传文档并进行分段处理,支持PDF、Word、Excel、TXT、MD等格式"
)
@PostMapping
(
value
=
"/segment"
)
public
R
<
List
<
DocumentSegmentResult
>>
segmentDocuments
(
@Valid
@RequestBody
DocumentSegmentRequest
request
)
{
...
...
@@ -187,6 +192,18 @@ public class KnowledgeDocumentController {
}
}
@Operation
(
summary
=
"获取分割符"
,
description
=
"获取分割符"
)
@GetMapping
(
"/select/pattern"
)
public
R
<
List
<
KeyAndValueVO
>>
selectPattern
()
{
return
R
.
ok
(
askVectorStoreService
.
splitPattern
());
}
@Operation
(
summary
=
"自定义分割"
,
description
=
"自定义分割"
)
@PostMapping
(
"/split/pattern"
)
public
R
<
List
<
DocumentSegmentResult
>>
splitPattern
(
@Valid
@RequestBody
DocumentSegmentPatternRequest
request
)
{
return
R
.
ok
(
knowledgeDocumentService
.
segmentDocumentsByPattern
(
request
));
}
/**
* 检查文件类型是否支持
*
...
...
ask-data-ai/ask-data-ai-biz/src/main/java/com/ask/controller/SysFileController.java
View file @
2d0f6bc2
...
...
@@ -103,12 +103,24 @@ public class SysFileController {
@GetMapping
(
"/{bucket}/{fileName}"
)
public
void
file
(
@Parameter
(
description
=
"存储桶名称"
,
required
=
true
,
example
=
"1"
)
@PathVariable
String
bucket
,
@PathVariable
String
bucket
,
@Parameter
(
description
=
"文件名称"
,
required
=
true
,
example
=
"example.pdf"
)
@PathVariable
String
fileName
,
@PathVariable
String
fileName
,
HttpServletResponse
response
)
{
sysFileService
.
getFile
(
bucket
,
fileName
,
response
);
}
@GetMapping
(
"/{bucket}/{fileName}/{originalName}"
)
public
void
fileByUuid
(
@Parameter
(
description
=
"存储桶名称"
,
required
=
true
,
example
=
"1"
)
@PathVariable
String
bucket
,
@Parameter
(
description
=
"文件名称"
,
required
=
true
,
example
=
"example.pdf"
)
@PathVariable
String
fileName
,
@Parameter
(
description
=
"原始文件名称"
,
required
=
true
,
example
=
"example.pdf"
)
@PathVariable
String
originalName
,
HttpServletResponse
response
)
{
sysFileService
.
getFileByUUid
(
bucket
,
fileName
,
response
,
originalName
);
}
}
ask-data-ai/ask-data-ai-biz/src/main/java/com/ask/service/AskVectorStoreService.java
View file @
2d0f6bc2
package
com
.
ask
.
service
;
import
com.ask.api.entity.AskVectorStore
;
import
com.ask.api.vo.KeyAndValueVO
;
import
com.baomidou.mybatisplus.extension.service.IService
;
import
org.apache.ibatis.annotations.Param
;
import
org.springframework.ai.document.Document
;
import
java.util.ArrayList
;
import
java.util.List
;
/**
...
...
@@ -24,4 +26,5 @@ public interface AskVectorStoreService extends IService<AskVectorStore> {
*/
int
batchUpdateVectorEmbedding
(
List
<
AskVectorStore
>
askVectorStores
);
public
List
<
KeyAndValueVO
>
splitPattern
();
}
\ No newline at end of file
ask-data-ai/ask-data-ai-biz/src/main/java/com/ask/service/KnowledgeDocumentService.java
View file @
2d0f6bc2
package
com
.
ask
.
service
;
import
com.ask.api.dto.DocumentSegmentPatternRequest
;
import
com.ask.api.dto.DocumentSegmentRequest
;
import
com.ask.api.dto.DocumentSegmentResult
;
import
com.ask.api.entity.KnowledgeDocument
;
...
...
@@ -22,6 +23,13 @@ public interface KnowledgeDocumentService extends IService<KnowledgeDocument> {
*/
List
<
DocumentSegmentResult
>
segmentDocuments
(
DocumentSegmentRequest
request
);
/**
* 文档分段处理
* @return 文档分段结果列表
*/
List
<
DocumentSegmentResult
>
segmentDocumentsByPattern
(
DocumentSegmentPatternRequest
request
);
/**
* 保存文档分段结果
* @param knowledgeBaseId 知识库ID
...
...
ask-data-ai/ask-data-ai-biz/src/main/java/com/ask/service/SysFileService.java
View file @
2d0f6bc2
...
...
@@ -48,6 +48,7 @@ public interface SysFileService extends IService<SysFile> {
*/
void
getFile
(
String
bucket
,
String
fileName
,
HttpServletResponse
response
);
void
getFileByUUid
(
String
bucket
,
String
fileName
,
HttpServletResponse
response
,
String
originalName
);
public
InputStream
getFileStream
(
String
bruckName
,
String
fileName
);
...
...
ask-data-ai/ask-data-ai-biz/src/main/java/com/ask/service/impl/AskVectorStoreServiceImpl.java
View file @
2d0f6bc2
package
com
.
ask
.
service
.
impl
;
import
com.ask.api.entity.AskVectorStore
;
import
com.ask.api.vo.KeyAndValueVO
;
import
com.ask.mapper.AskVectorStoreMapper
;
import
com.ask.service.AskVectorStoreService
;
import
com.baomidou.mybatisplus.core.conditions.query.LambdaQueryWrapper
;
...
...
@@ -27,10 +28,7 @@ import org.springframework.util.StringUtils;
import
java.sql.PreparedStatement
;
import
java.sql.SQLException
;
import
java.time.LocalDateTime
;
import
java.util.HashMap
;
import
java.util.List
;
import
java.util.Map
;
import
java.util.UUID
;
import
java.util.*
;
import
java.util.stream.Collectors
;
/**
...
...
@@ -126,4 +124,24 @@ public class AskVectorStoreServiceImpl extends ServiceImpl<AskVectorStoreMapper,
}
}
@Override
public
List
<
KeyAndValueVO
>
splitPattern
()
{
List
<
KeyAndValueVO
>
resultList
=
new
ArrayList
<>();
resultList
.
add
(
new
KeyAndValueVO
(
"#"
,
"(?<=^)# .*|(?<=\\n)# .*"
));
resultList
.
add
(
new
KeyAndValueVO
(
"##"
,
"(?<=\\n)(?<!#)## (?!#).*|(?<=^)(?<!#)## (?!#).*"
));
resultList
.
add
(
new
KeyAndValueVO
(
"##"
,
"(?<=\\n)(?<!#)## (?!#).*|(?<=^)(?<!#)## (?!#).*"
));
resultList
.
add
(
new
KeyAndValueVO
(
"###"
,
"(?<=\\n)(?<!#)### (?!#).*|(?<=^)(?<!#)### (?!#).*"
));
resultList
.
add
(
new
KeyAndValueVO
(
"####"
,
"(?<=\\n)(?<!#)#### (?!#).*|(?<=^)(?<!#)#### (?!#).*"
));
resultList
.
add
(
new
KeyAndValueVO
(
"#####"
,
"(?<=\\n)(?<!#)##### (?!#).*|(?<=^)(?<!#)##### (?!#).*"
));
resultList
.
add
(
new
KeyAndValueVO
(
"######"
,
"(?<=\\n)(?<!#)###### (?!#).*|(?<=^)(?<!#)###### (?!#).*"
));
resultList
.
add
(
new
KeyAndValueVO
(
"-"
,
"(?<! )- .*"
));
resultList
.
add
(
new
KeyAndValueVO
(
"space"
,
"(?<! ) (?! )"
));
resultList
.
add
(
new
KeyAndValueVO
(
"semicolon"
,
"(?<!;);(?!;)"
));
resultList
.
add
(
new
KeyAndValueVO
(
"comma"
,
"(?<!,),(?!,)"
));
resultList
.
add
(
new
KeyAndValueVO
(
"period"
,
"(?<!。)。(?!。)"
));
resultList
.
add
(
new
KeyAndValueVO
(
"enter"
,
"(?<!\\n)\\n(?!\\n)"
));
resultList
.
add
(
new
KeyAndValueVO
(
"blank line"
,
"(?<!\\n)\\n\\n(?!\\n)"
));
return
resultList
;
}
}
\ No newline at end of file
ask-data-ai/ask-data-ai-biz/src/main/java/com/ask/service/impl/KnowledgeDocumentServiceImpl.java
View file @
2d0f6bc2
package
com
.
ask
.
service
.
impl
;
import
com.ask.api.dto.DocumentSegmentPatternRequest
;
import
com.ask.api.dto.DocumentSegmentRequest
;
import
com.ask.api.dto.DocumentSegmentResult
;
import
com.ask.api.entity.AskVectorStore
;
...
...
@@ -20,6 +21,7 @@ import org.springframework.ai.reader.ExtractedTextFormatter;
import
org.springframework.ai.reader.pdf.ParagraphPdfDocumentReader
;
import
org.springframework.ai.reader.pdf.config.PdfDocumentReaderConfig
;
import
org.springframework.ai.reader.tika.TikaDocumentReader
;
import
org.springframework.ai.transformer.splitter.TextSplitter
;
import
org.springframework.ai.transformer.splitter.TokenTextSplitter
;
import
org.springframework.core.io.InputStreamResource
;
import
org.springframework.stereotype.Service
;
...
...
@@ -30,6 +32,8 @@ import java.io.InputStream;
import
java.time.LocalDateTime
;
import
java.util.*
;
import
java.util.concurrent.CompletableFuture
;
import
java.util.regex.Pattern
;
import
java.util.stream.Collectors
;
/**
* 知识库文档服务实现类
...
...
@@ -73,9 +77,8 @@ public class KnowledgeDocumentServiceImpl extends ServiceImpl<KnowledgeDocumentM
/**
* PDF文档切片函数 - 支持多种切片策略
*
* @param inputStream PDF文件的输入流
* @param sliceStrategy 切片策略 (PAGE, PARAGRAPH, CUSTOM)
* @param maxTokensPerSlice 每片最大token数(仅对CUSTOM策略有效)
* @return 文档片段列表,每个Document就是一片
*/
public
List
<
Document
>
slicePdfDocument
(
String
bucketName
,
String
fileName
,
SliceStrategy
sliceStrategy
)
{
...
...
@@ -387,9 +390,9 @@ public class KnowledgeDocumentServiceImpl extends ServiceImpl<KnowledgeDocumentM
// String docText = documentParseService.extractText(sysFileService.getFileStream(file.getBucketName(), file.getFileName()));
// List<Document> segments = sliceByTokens(docText);
SliceStrategy
sliceStrategy
=
SliceStrategy
.
CUSTOM
;
if
(
"pdf"
.
equals
(
file
.
getType
()))
{
sliceStrategy
=
SliceStrategy
.
PARAGRAPH
;
}
//
if ("pdf".equals(file.getType())) {
//
sliceStrategy = SliceStrategy.PARAGRAPH;
//
}
// 读取文档内容 - 使用新的PDF切片函数
List
<
Document
>
segments
=
slicePdfDocument
(
file
.
getBucketName
(),
file
.
getFileName
(),
...
...
@@ -412,9 +415,69 @@ public class KnowledgeDocumentServiceImpl extends ServiceImpl<KnowledgeDocumentM
DocumentSegmentResult
.
DocumentSegment
segmentDto
=
new
DocumentSegmentResult
.
DocumentSegment
();
segmentDto
.
setIndex
(
i
+
1
);
segmentDto
.
setContent
(
segment
.
getText
());
segmentDto
.
setCharCount
((
Integer
)
segment
.
getMetadata
().
get
(
"size"
));
// 简单估算token数(大约1个中文字符=1.5token,英文单词=1token)
segmentDto
.
setTitle
(
segment
.
getMetadata
().
get
(
"title"
).
toString
());
segmentDto
.
setCharCount
(
Objects
.
requireNonNull
(
segment
.
getText
()).
length
());
segmentList
.
add
(
segmentDto
);
}
result
.
setSegments
(
segmentList
);
results
.
add
(
result
);
log
.
info
(
"文档分段完成: {}, 分段数: {}"
,
file
.
getOriginal
(),
segments
.
size
());
}
return
results
;
}
@Override
public
List
<
DocumentSegmentResult
>
segmentDocumentsByPattern
(
DocumentSegmentPatternRequest
request
)
{
List
<
DocumentSegmentResult
>
results
=
new
ArrayList
<>();
for
(
SysFile
file
:
request
.
getFiles
())
{
TikaDocumentReader
tikaDocumentReader
=
new
TikaDocumentReader
(
new
InputStreamResource
(
sysFileService
.
getFileStream
(
file
.
getBucketName
(),
file
.
getFileName
())));
List
<
Document
>
pageDocuments
=
tikaDocumentReader
.
read
();
// 合并所有页面内容
StringBuilder
text
=
new
StringBuilder
();
for
(
Document
pageDoc
:
pageDocuments
)
{
if
(
StringUtils
.
hasText
(
pageDoc
.
getText
()))
{
text
.
append
(
pageDoc
.
getText
()).
append
(
"\n"
);
}
}
if
(
text
.
isEmpty
())
{
log
.
warn
(
"文档解析失败或内容为空: {}"
,
file
.
getOriginal
());
continue
;
}
String
[]
texts
=
text
.
toString
().
split
(
request
.
getKey
());
List
<
Document
>
segments
=
new
ArrayList
<>();
for
(
String
s
:
texts
)
{
if
(
org
.
apache
.
commons
.
lang3
.
StringUtils
.
isBlank
(
s
)){
continue
;
}
if
(
s
.
length
()>
request
.
getMaxLength
()){
s
=
s
.
substring
(
0
,
request
.
getMaxLength
());
}
Document
document
=
new
Document
(
s
.
trim
());
segments
.
add
(
document
);
}
// 构建分段结果
DocumentSegmentResult
result
=
new
DocumentSegmentResult
();
result
.
setFileName
(
file
.
getOriginal
());
result
.
setFilePath
(
file
.
getUrl
());
result
.
setTotalSegments
(
segments
.
size
());
result
.
setFileSize
(
file
.
getFileSize
());
List
<
DocumentSegmentResult
.
DocumentSegment
>
segmentList
=
new
ArrayList
<>();
for
(
int
i
=
0
;
i
<
segments
.
size
();
i
++)
{
Document
segment
=
segments
.
get
(
i
);
DocumentSegmentResult
.
DocumentSegment
segmentDto
=
new
DocumentSegmentResult
.
DocumentSegment
();
segmentDto
.
setIndex
(
i
+
1
);
segmentDto
.
setContent
(
segment
.
getText
());
segmentDto
.
setCharCount
(
Objects
.
requireNonNull
(
segment
.
getText
()).
length
());
// segmentDto.setCharCount((Integer) segment.getMetadata().get("size"));
// // 简单估算token数(大约1个中文字符=1.5token,英文单词=1token)
// segmentDto.setTitle(segment.getMetadata().get("title").toString());
segmentList
.
add
(
segmentDto
);
}
result
.
setSegments
(
segmentList
);
...
...
@@ -453,7 +516,7 @@ public class KnowledgeDocumentServiceImpl extends ServiceImpl<KnowledgeDocumentM
// 构建metadata
Map
<
String
,
Object
>
metadata
=
new
HashMap
<>();
metadata
.
put
(
"knowledgeBaseId"
,
document
.
getKnowledgeBaseId
());
metadata
.
put
(
"title"
,
vo
.
getTitle
());
metadata
.
put
(
"title"
,
org
.
apache
.
commons
.
lang3
.
StringUtils
.
isBlank
(
vo
.
getTitle
())?
""
:
vo
.
getTitle
());
metadata
.
put
(
"documentId"
,
document
.
getId
());
metadata
.
put
(
"fileName"
,
document
.
getFileName
());
metadata
.
put
(
"filePath"
,
document
.
getFilePath
());
...
...
ask-data-ai/ask-data-ai-biz/src/main/java/com/ask/service/impl/SysFileServiceImpl.java
View file @
2d0f6bc2
...
...
@@ -38,6 +38,8 @@ import org.springframework.transaction.annotation.Transactional;
import
org.springframework.web.multipart.MultipartFile
;
import
java.io.InputStream
;
import
java.net.URLEncoder
;
import
java.nio.charset.StandardCharsets
;
import
java.util.ArrayList
;
import
java.util.List
;
import
java.util.Objects
;
...
...
@@ -105,6 +107,24 @@ public class SysFileServiceImpl extends ServiceImpl<SysFileMapper, SysFile> impl
}
}
@Override
public
void
getFileByUUid
(
String
bucket
,
String
fileName
,
HttpServletResponse
response
,
String
originalName
)
{
try
(
S3Object
s3Object
=
fileTemplate
.
getObject
(
bucket
,
fileName
))
{
response
.
setContentType
(
"application/octet-stream; charset=UTF-8"
);
// 2️⃣ 文件名:先 UTF-8 再 ISO-8859-1 转码(兼容所有浏览器)
String
encodedName
=
new
String
(
originalName
.
getBytes
(
StandardCharsets
.
UTF_8
),
StandardCharsets
.
ISO_8859_1
);
response
.
setHeader
(
"Content-Disposition"
,
"attachment; filename=\""
+
encodedName
+
"\""
);
//// 3️⃣(可选)额外设置火狐/Edge 专用头
// response.setHeader("Content-Disposition",
// "attachment; filename*=UTF-8''" + URLEncoder.encode(fileName, StandardCharsets.UTF_8));
IoUtil
.
copy
(
s3Object
.
getObjectContent
(),
response
.
getOutputStream
());
}
catch
(
Exception
e
)
{
log
.
error
(
"文件读取异常: {}"
,
e
.
getLocalizedMessage
());
}
}
/**
* 获取文件流
* @param fileName
...
...
ask-data-ai/ask-data-ai-biz/src/main/java/com/ask/tools/ExcelTools.java
View file @
2d0f6bc2
...
...
@@ -45,6 +45,34 @@ public class ExcelTools {
@Autowired
private
FileTemplate
fileTemplate
;
private
final
List
<
String
>
placeholders
=
List
.
of
(
"@天津智深数据.iICTS_MoQty"
,
"@天津智深数据.iICTS_CompletionQty"
,
"@天津智深数据.iICTS_OKRate"
,
"@天津智深数据.日iAS计划数量"
,
"@天津智深数据.iAS_CompletionQty"
,
"@天津智深数据.iAS_OKRate"
,
"@天津智深数据.iSMTS_MoQty"
,
"@天津智深数据.iSMTS_CompletionQty"
,
"@天津智深数据.iSMTS_OKRate"
,
"@天津智深数据.iDIPS_MoQty"
,
"@天津智深数据.iDIPS_CompletionQty"
,
"@天津智深数据.iDIPS_OKRate"
,
"@天津智深数据.iFCTS_MoQty"
,
"@天津智深数据.iFCTS_CompletionQty"
,
"@天津智深数据.iFCTS_OKRate"
,
"@天津智深数据.iCCS_MoQty"
,
"@天津智深数据.iCCS_CompletionQty"
,
"@天津智深数据.iDIPS_OKRate"
,
"@天津智深数据.iPS_MoQty"
,
"@天津智深数据.iPS_CompletionQty"
,
"@天津智深数据.iBS_MoQty"
,
"@天津智深数据.iBS_CompletionQty"
,
"@天津智深数据.iBS_OKRate"
,
"@天津智深数据.今日_PlanQty"
,
"@天津智深数据.今日生产成品"
);
private
String
formatLocalDate
(
LocalDate
date
)
{
// 创建自定义格式器:2位年份.单/双位月.单/双位日
DateTimeFormatter
formatter
=
new
DateTimeFormatterBuilder
()
...
...
@@ -61,7 +89,7 @@ public class ExcelTools {
return
date
.
format
(
formatter
);
}
@Tool
(
description
=
"
Get the production records word
"
)
@Tool
(
description
=
"
获取科环集团电力运营日报,入参时间(yyyy-M-d)
"
)
public
String
getProductionFile
(
String
dateStr
)
{
log
.
info
(
"getProductionFile{}"
,
dateStr
);
DateTimeFormatter
formatter
=
DateTimeFormatter
.
ofPattern
(
"yyyy-M-d"
);
...
...
@@ -95,6 +123,57 @@ public class ExcelTools {
return
jsonObject
.
toString
();
}
@Tool
(
description
=
"获取天津智深DCS卡件日报"
,
returnDirect
=
true
)
public
String
getDcsFile
(
String
dateStr
)
{
var
ref
=
new
Object
()
{
String
markDown
=
"| 产线 | 计划产量 | 实际产量 | 成品率 |\n"
+
"| --- | --- | --- | --- |\n"
+
"| iICTS | @天津智深数据.iICTS_MoQty | @天津智深数据.iICTS_CompletionQty | @天津智深数据.iICTS_OKRate |\n"
+
"| iAS | @天津智深数据.日iAS计划数量 | @天津智深数据.iAS_CompletionQty | @天津智深数据.iAS_OKRate |\n"
+
"| iSMTS | @天津智深数据.iSMTS_MoQty | @天津智深数据.iSMTS_CompletionQty | @天津智深数据.iSMTS_OKRate |\n"
+
"| iDIPS | @天津智深数据.iDIPS_MoQty | @天津智深数据.iDIPS_CompletionQty | @天津智深数据.iDIPS_OKRate |\n"
+
"| iFCTS | @天津智深数据.iFCTS_MoQty | @天津智深数据.iFCTS_CompletionQty | @天津智深数据.iFCTS_OKRate |\n"
+
"| iCCS | @天津智深数据.iCCS_MoQty | @天津智深数据.iCCS_CompletionQty | @天津智深数据.iDIPS_OKRate |\n"
+
"| iPS | @天津智深数据.iPS_MoQty | @天津智深数据.iPS_CompletionQty | \\- |\n"
+
"| iBS | @天津智深数据.iBS_MoQty | @天津智深数据.iBS_CompletionQty | @天津智深数据.iBS_OKRate |\n"
+
"| **总计** | @天津智深数据.今日_PlanQty | @天津智深数据.今日生产成品 | \\- |\n"
+
"\n"
+
"<p align=\"center\">\n"
+
" <a href=\"fileUrl\" download>\uD83D\uDCCE 点击下载文件</a>\n"
+
"</p>"
;
};
log
.
info
(
"getDcsFile{}"
,
dateStr
);
DateTimeFormatter
formatter
=
DateTimeFormatter
.
ofPattern
(
"yyyy-M-d"
);
LocalDate
date
=
LocalDate
.
parse
(
dateStr
,
formatter
);
String
dateFileStr
=
formatLocalDate
(
date
);
LocalDateTime
startTime
=
date
.
atTime
(
0
,
0
,
0
);
LocalDateTime
endTime
=
date
.
atTime
(
23
,
59
,
59
);
String
bucketName
=
"dcs"
;
// 动态生成文件名
String
fileName
=
UUID
.
randomUUID
().
toString
().
replace
(
"-"
,
""
)
+
".docx"
;
String
originalName
=
String
.
format
(
"dcs卡件日报(%s).docx"
,
dateFileStr
);
String
url
=
baseUrl
+
"/admin/sys-file/"
+
bucketName
+
"/"
+
fileName
+
"/"
+
originalName
;
ref
.
markDown
=
ref
.
markDown
.
replace
(
"fileUrl"
,
url
);
Map
<
String
,
Object
>
paramMap
=
generateParamData
(
startTime
,
endTime
,
placeholders
);
log
.
info
(
"入参{}"
,
paramMap
);
paramMap
.
forEach
((
key
,
value
)->{
ref
.
markDown
=
ref
.
markDown
.
replace
(
key
,
String
.
valueOf
(
value
));
});
boolean
result
=
convertUtils
.
fillWordLoop
(
fileName
,
"dcs卡件日报模板.docx"
,
paramMap
,
bucketName
);
log
.
info
(
"{} {}"
,
result
,
ref
.
markDown
);
if
(!
result
){
return
"dcs卡件日报生成失败"
;
}
return
ref
.
markDown
;
}
public
Map
<
String
,
Object
>
generateReport
(
LocalDateTime
startTime
,
LocalDateTime
endTime
)
{
Map
<
String
,
Object
>
paramMap
=
new
HashMap
<>();
paramMap
.
put
(
"year"
,
startTime
.
getYear
());
...
...
@@ -158,6 +237,34 @@ public class ExcelTools {
return
paramMap
;
}
public
Map
<
String
,
Object
>
generateParamData
(
LocalDateTime
startTime
,
LocalDateTime
endTime
,
List
<
String
>
params
)
{
Map
<
String
,
Object
>
paramMap
=
new
HashMap
<>();
paramMap
.
put
(
"year"
,
startTime
.
getYear
());
paramMap
.
put
(
"month"
,
startTime
.
getMonthValue
());
paramMap
.
put
(
"day"
,
startTime
.
getDayOfMonth
());
if
(
CollectionUtils
.
isEmpty
(
params
))
{
return
paramMap
;
}
List
<
AskHistoryCollectData
>
askHistoryCollectDataList
=
askHistoryCollectDataMapper
.
selectLastRecordForEachPath
(
params
,
startTime
,
endTime
);
Map
<
String
,
Double
>
result
=
askHistoryCollectDataList
.
stream
()
.
collect
(
Collectors
.
toMap
(
AskHistoryCollectData:
:
getPath
,
// 获取 path 作为键
AskHistoryCollectData:
:
getValue
,
// 获取 value 作为值
(
existingValue
,
newValue
)
->
existingValue
// 如果有重复的键,这里决定如何处理。这里选择保留现有的值。
));
params
.
forEach
(
param
->
{
paramMap
.
put
(
param
,
""
);
if
(
result
.
containsKey
(
param
)){
paramMap
.
put
(
param
,
result
.
get
(
param
));
}
});
return
paramMap
;
}
private
void
handleType1
(
String
key
,
List
<
AskReportDict
.
Params
>
params
,
Map
<
String
,
Object
>
paramMap
,
Map
<
String
,
Double
>
result
)
{
AskReportDict
.
Params
param
=
params
.
get
(
0
);
...
...
ask-data-ai/ask-data-ai-biz/src/main/java/com/ask/utils/FluxUtils.java
View file @
2d0f6bc2
package
com
.
ask
.
utils
;
import
com.baomidou.mybatisplus.core.toolkit.StringUtils
;
import
org.springframework.ai.chat.messages.AssistantMessage
;
import
org.springframework.ai.chat.model.ChatResponse
;
import
org.springframework.ai.deepseek.DeepSeekAssistantMessage
;
import
reactor.core.publisher.Flux
;
import
java.lang.reflect.Field
;
import
java.util.List
;
import
java.util.concurrent.atomic.AtomicBoolean
;
...
...
@@ -21,62 +23,84 @@ public class FluxUtils {
return
upstream
.
flatMapIterable
(
resp
->
{
DeepSeekAssistantMessage
msg
=
(
DeepSeekAssistantMessage
)
resp
.
getResult
().
getOutput
();
AssistantMessage
msg
=
resp
.
getResult
().
getOutput
();
String
reasoningContent
=
""
;
String
textContent
=
msg
.
getText
();
// 普通回答
try
{
// 反射读取 DeepSeekAssistantMessage.reasoningContent
Field
f
=
msg
.
getClass
().
getDeclaredField
(
"reasoningContent"
);
f
.
setAccessible
(
true
);
reasoningContent
=
(
String
)
f
.
get
(
msg
);
}
catch
(
Exception
ignored
)
{
/* 不是 DeepSeekAssistantMessage 时留空 */
}
StringBuilder
sb
=
new
StringBuilder
();
if
(
reasoningStarted
.
compareAndSet
(
false
,
true
))
{
// 推理阶段
if
(!
reasoningStarted
.
get
())
{
reasoningStarted
.
set
(
true
);
sb
.
append
(
"<think>"
);
}
// 推理阶段:第一次出现推理内容时输出 <think>
if
(
StringUtils
.
isNotBlank
(
msg
.
getReasoningContent
()))
{
sb
.
append
(
msg
.
getReasoningContent
());
if
(
StringUtils
.
isNotBlank
(
reasoningContent
))
{
sb
.
append
(
reasoningContent
);
}
// 回答阶段:第一次出现答案时输出 </think><answer>
if
(
StringUtils
.
isNotBlank
(
msg
.
getText
()))
{
if
(
answerStarted
.
compareAndSet
(
false
,
true
))
{
// 回答阶段
if
(
StringUtils
.
isNotBlank
(
textContent
))
{
if
(!
answerStarted
.
get
())
{
answerStarted
.
set
(
true
);
sb
.
append
(
"</think><answer>"
);
}
sb
.
append
(
msg
.
getText
()
);
sb
.
append
(
textContent
);
}
return
List
.
of
(
sb
.
toString
());
})
.
concatWith
(
Flux
.
just
(
"</answer>"
));
// 末尾补一次关闭标签
.
concatWith
(
Flux
.
just
(
"</answer>"
));
}
public
static
Flux
<
String
>
wrapDeepSeekStream
(
Flux
<
ChatResponse
>
upstream
,
StringBuilder
stringBuilder
)
{
AtomicBoolean
reasoningStarted
=
new
AtomicBoolean
(
false
);
AtomicBoolean
answerStarted
=
new
AtomicBoolean
(
false
);
return
upstream
.
flatMapIterable
(
resp
->
{
DeepSeekAssistantMessage
msg
=
(
DeepSeekAssistantMessage
)
resp
.
getResult
().
getOutput
();
AssistantMessage
msg
=
resp
.
getResult
().
getOutput
();
String
reasoningContent
=
""
;
String
textContent
=
msg
.
getText
();
// 普通回答
try
{
// 反射读取 DeepSeekAssistantMessage.reasoningContent
Field
f
=
msg
.
getClass
().
getDeclaredField
(
"reasoningContent"
);
f
.
setAccessible
(
true
);
reasoningContent
=
(
String
)
f
.
get
(
msg
);
}
catch
(
Exception
ignored
)
{
/* 不是 DeepSeekAssistantMessage 时留空 */
}
StringBuilder
sb
=
new
StringBuilder
();
if
(
reasoningStarted
.
compareAndSet
(
false
,
true
))
{
// 推理阶段
if
(!
reasoningStarted
.
get
())
{
reasoningStarted
.
set
(
true
);
sb
.
append
(
"<think>"
);
}
// 推理阶段:第一次出现推理内容时输出 <think>
if
(
StringUtils
.
isNotBlank
(
msg
.
getReasoningContent
()))
{
sb
.
append
(
msg
.
getReasoningContent
());
if
(
StringUtils
.
isNotBlank
(
reasoningContent
))
{
sb
.
append
(
reasoningContent
);
}
// 回答阶段:第一次出现答案时输出 </think><answer>
if
(
StringUtils
.
isNotBlank
(
msg
.
getText
()
))
{
stringBuilder
.
append
(
msg
.
getText
()
);
if
(
StringUtils
.
isNotBlank
(
textContent
))
{
stringBuilder
.
append
(
textContent
);
if
(
answerStarted
.
compareAndSet
(
false
,
true
))
{
sb
.
append
(
"</think><answer>"
);
}
sb
.
append
(
msg
.
getText
()
);
sb
.
append
(
textContent
);
}
return
List
.
of
(
sb
.
toString
());
})
.
concatWith
(
Flux
.
just
(
"</answer>"
));
// 末尾补一次关闭标签
.
concatWith
(
Flux
.
just
(
"</answer>"
));
}
}
ask-data-ai/ask-data-ai-boot/src/main/resources/application.yml
View file @
2d0f6bc2
...
...
@@ -37,7 +37,7 @@ spring:
api-key
:
sk-ae96ff281ff644c992843c64a711a950
chat
:
options
:
model
:
qwen-plus
model
:
deepseek-r1-0528
embedding
:
base-url
:
https://dashscope.aliyuncs.com/compatible-mode
api-key
:
sk-ae96ff281ff644c992843c64a711a950
...
...
ask-data-ai/ask-data-ai-boot/src/main/resources/dcs卡件日报模板.docx
0 → 100644
View file @
2d0f6bc2
File added
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment