Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
A
ask_data_ai_admin
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
linyangyang
ask_data_ai_admin
Commits
5bb40bed
Commit
5bb40bed
authored
Jul 24, 2025
by
林洋洋
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
添加图片库备用
parent
e6672292
Changes
9
Hide whitespace changes
Inline
Side-by-side
Showing
9 changed files
with
254 additions
and
29 deletions
+254
-29
AskImagesRecord.java
...api/src/main/java/com/ask/api/entity/AskImagesRecord.java
+20
-0
pom.xml
ask-data-ai/ask-data-ai-biz/pom.xml
+10
-0
AskHistoryCollectDataMapper.java
...main/java/com/ask/mapper/AskHistoryCollectDataMapper.java
+8
-0
AskImagesRecordMapper.java
...z/src/main/java/com/ask/mapper/AskImagesRecordMapper.java
+11
-0
DocumentParseService.java
.../main/java/com/ask/service/impl/DocumentParseService.java
+109
-0
KnowledgeDocumentServiceImpl.java
...va/com/ask/service/impl/KnowledgeDocumentServiceImpl.java
+33
-8
ExcelTools.java
...k-data-ai-biz/src/main/java/com/ask/tools/ExcelTools.java
+40
-20
AskHistoryCollectDataMapper.xml
...src/main/resources/mapper/AskHistoryCollectDataMapper.xml
+22
-0
application.yml
...ta-ai/ask-data-ai-boot/src/main/resources/application.yml
+1
-1
No files found.
ask-data-ai/ask-data-ai-api/src/main/java/com/ask/api/entity/AskImagesRecord.java
0 → 100644
View file @
5bb40bed
package
com
.
ask
.
api
.
entity
;
import
com.baomidou.mybatisplus.annotation.TableName
;
import
io.swagger.v3.oas.annotations.media.Schema
;
import
lombok.Data
;
import
java.time.LocalDateTime
;
@Data
@TableName
(
value
=
"ask_images_record"
)
@Schema
(
description
=
"图片表"
)
public
class
AskImagesRecord
{
// 路径
private
Long
id
;
// 值
private
String
imageName
;
// 时间戳
private
byte
[]
imageData
;
}
ask-data-ai/ask-data-ai-biz/pom.xml
View file @
5bb40bed
...
...
@@ -31,6 +31,16 @@
<version>
${project.version}
</version>
</dependency>
<dependency>
<groupId>
commons-io
</groupId>
<artifactId>
commons-io
</artifactId>
<version>
2.17.0
</version>
</dependency>
<!-- <dependency>-->
<!-- <groupId>org.apache.tika</groupId>-->
<!-- <artifactId>tika-parsers-ocr</artifactId>-->
<!-- <version>3.0.0</version>-->
<!-- </dependency>-->
<!-- API -->
<dependency>
<groupId>
com.ask
</groupId>
...
...
ask-data-ai/ask-data-ai-biz/src/main/java/com/ask/mapper/AskHistoryCollectDataMapper.java
View file @
5bb40bed
...
...
@@ -3,9 +3,17 @@ package com.ask.mapper;
import
com.ask.api.entity.AskHistoryCollectData
;
import
com.baomidou.mybatisplus.core.mapper.BaseMapper
;
import
org.apache.ibatis.annotations.Mapper
;
import
org.apache.ibatis.annotations.Param
;
import
org.apache.ibatis.annotations.Select
;
import
java.time.LocalDateTime
;
import
java.util.List
;
@Mapper
public
interface
AskHistoryCollectDataMapper
extends
BaseMapper
<
AskHistoryCollectData
>
{
List
<
AskHistoryCollectData
>
selectLastRecordForEachPath
(
@Param
(
"paths"
)
List
<
String
>
paths
,
@Param
(
"startTime"
)
LocalDateTime
startTime
,
@Param
(
"endTime"
)
LocalDateTime
endTime
);
}
\ No newline at end of file
ask-data-ai/ask-data-ai-biz/src/main/java/com/ask/mapper/AskImagesRecordMapper.java
0 → 100644
View file @
5bb40bed
package
com
.
ask
.
mapper
;
import
com.ask.api.entity.AskHistoryCollectData
;
import
com.ask.api.entity.AskImagesRecord
;
import
com.baomidou.mybatisplus.core.mapper.BaseMapper
;
import
org.apache.ibatis.annotations.Mapper
;
@Mapper
public
interface
AskImagesRecordMapper
extends
BaseMapper
<
AskImagesRecord
>
{
}
ask-data-ai/ask-data-ai-biz/src/main/java/com/ask/service/impl/DocumentParseService.java
0 → 100644
View file @
5bb40bed
package
com
.
ask
.
service
.
impl
;
import
com.ask.api.entity.AskImagesRecord
;
import
com.ask.mapper.AskImagesRecordMapper
;
import
lombok.AllArgsConstructor
;
import
org.apache.tika.exception.TikaException
;
import
org.apache.tika.extractor.EmbeddedDocumentExtractor
;
import
org.apache.tika.metadata.Metadata
;
import
org.apache.tika.parser.AutoDetectParser
;
import
org.apache.tika.parser.ParseContext
;
import
org.apache.tika.parser.Parser
;
import
org.apache.tika.parser.microsoft.OfficeParserConfig
;
import
org.apache.tika.sax.ContentHandlerDecorator
;
import
org.springframework.stereotype.Component
;
import
org.xml.sax.Attributes
;
import
org.xml.sax.ContentHandler
;
import
org.xml.sax.SAXException
;
import
java.io.IOException
;
import
java.io.InputStream
;
import
java.util.LinkedHashMap
;
import
java.util.Map
;
@Component
@AllArgsConstructor
public
class
DocumentParseService
{
private
final
AskImagesRecordMapper
askImagesRecordMapper
;
public
String
extractText
(
InputStream
inputStream
)
{
// 初始化解析器、元数据和上下文
Parser
parser
=
new
AutoDetectParser
();
Metadata
metadata
=
new
Metadata
();
ParseContext
parseContext
=
new
ParseContext
();
OfficeParserConfig
officeParserConfig
=
new
OfficeParserConfig
();
//忽略页眉页脚
officeParserConfig
.
setIncludeHeadersAndFooters
(
false
);
parseContext
.
set
(
OfficeParserConfig
.
class
,
officeParserConfig
);
Map
<
String
,
Long
>
imageMap
=
new
LinkedHashMap
<>();
// 自定义ContentHandler用于插入占位符
class
MarkdownImageHandler
extends
ContentHandlerDecorator
{
private
final
StringBuilder
markdown
=
new
StringBuilder
();
private
String
localName
=
null
;
@Override
public
void
characters
(
char
[]
ch
,
int
start
,
int
length
)
{
String
text
=
new
String
(
ch
,
start
,
length
);
if
(
this
.
localName
.
equals
(
"h1"
))
{
markdown
.
append
(
"# "
).
append
(
text
);
}
else
if
(
this
.
localName
.
equals
(
"p"
))
{
markdown
.
append
(
"\n"
).
append
(
text
);
}
else
{
markdown
.
append
(
text
);
}
}
@Override
public
void
startElement
(
String
uri
,
String
localName
,
String
qName
,
Attributes
attrs
)
{
this
.
localName
=
localName
;
// System.out.println("localName="+localName+" qName="+qName+" text="+text);
if
(
"img"
.
equals
(
localName
))
{
// 捕获图片节点
String
src
=
attrs
.
getValue
(
"src"
);
if
(
src
!=
null
&&
src
.
startsWith
(
"embedded:"
))
{
String
imageName
=
src
.
split
(
":"
)[
1
];
//TODO 存储图片
AskImagesRecord
askImagesRecord
=
new
AskImagesRecord
();
askImagesRecord
.
setImageName
(
imageName
);
askImagesRecordMapper
.
insert
(
askImagesRecord
);
imageMap
.
put
(
imageName
,
askImagesRecord
.
getId
());
markdown
.
append
(
".
append
(
askImagesRecord
.
getId
()).
append
(
")\n"
);
}
}
}
public
String
getMarkdown
()
{
return
markdown
.
toString
();
}
}
MarkdownImageHandler
contentHandler
=
new
MarkdownImageHandler
();
EmbeddedDocumentExtractor
extractor
=
new
EmbeddedDocumentExtractor
()
{
@Override
public
boolean
shouldParseEmbedded
(
Metadata
metadata
)
{
// 只处理图片类型
return
metadata
.
get
(
Metadata
.
CONTENT_TYPE
)
!=
null
&&
metadata
.
get
(
Metadata
.
CONTENT_TYPE
).
startsWith
(
"image/"
);
}
@Override
public
void
parseEmbedded
(
InputStream
inputStream
,
ContentHandler
embeddedHandler
,
Metadata
metadata
,
boolean
b
)
throws
IOException
,
SAXException
{
String
fileName
=
metadata
.
get
(
"resourceName"
);
Long
imageId
=
imageMap
.
get
(
fileName
);
AskImagesRecord
askImagesRecord
=
askImagesRecordMapper
.
selectById
(
imageId
);
askImagesRecord
.
setImageData
(
inputStream
.
readAllBytes
());
askImagesRecordMapper
.
updateById
(
askImagesRecord
);
}
};
parseContext
.
set
(
EmbeddedDocumentExtractor
.
class
,
extractor
);
// 开始解析文档
try
{
parser
.
parse
(
inputStream
,
contentHandler
,
metadata
,
parseContext
);
}
catch
(
IOException
|
SAXException
|
TikaException
e
)
{
throw
new
RuntimeException
(
e
);
}
//System.out.println("文件内容:" + contentHandler.getMarkdown());
return
contentHandler
.
getMarkdown
();
}
}
\ No newline at end of file
ask-data-ai/ask-data-ai-biz/src/main/java/com/ask/service/impl/KnowledgeDocumentServiceImpl.java
View file @
5bb40bed
...
...
@@ -45,7 +45,7 @@ public class KnowledgeDocumentServiceImpl extends ServiceImpl<KnowledgeDocumentM
private
final
SysFileService
sysFileService
;
private
final
AskVectorStoreService
askVectorStoreService
;
private
final
AsyncVectorizationService
asyncVectorizationService
;
private
final
DocumentParseService
documentParseService
;
/**
...
...
@@ -78,8 +78,8 @@ public class KnowledgeDocumentServiceImpl extends ServiceImpl<KnowledgeDocumentM
* @param maxTokensPerSlice 每片最大token数(仅对CUSTOM策略有效)
* @return 文档片段列表,每个Document就是一片
*/
public
List
<
Document
>
slicePdfDocument
(
String
bucketName
,
String
fileName
,
SliceStrategy
sliceStrategy
)
{
InputStreamResource
resource
=
new
InputStreamResource
(
sysFileService
.
getFileStream
(
bucketName
,
fileName
));
public
List
<
Document
>
slicePdfDocument
(
String
bucketName
,
String
fileName
,
SliceStrategy
sliceStrategy
)
{
InputStreamResource
resource
=
new
InputStreamResource
(
sysFileService
.
getFileStream
(
bucketName
,
fileName
));
List
<
Document
>
documents
=
new
ArrayList
<>();
try
{
...
...
@@ -99,9 +99,9 @@ public class KnowledgeDocumentServiceImpl extends ServiceImpl<KnowledgeDocumentM
}
log
.
info
(
"PDF切片完成,策略: {}, 切片数量: {}"
,
sliceStrategy
,
documents
.
size
());
}
catch
(
Exception
e
)
{
}
catch
(
Exception
e
)
{
log
.
error
(
"PDF切片失败,策略: {}, 错误: {}"
,
sliceStrategy
,
e
.
getMessage
(),
e
);
documents
=
sliceByTokens
(
new
InputStreamResource
(
sysFileService
.
getFileStream
(
bucketName
,
fileName
)));
documents
=
sliceByTokens
(
new
InputStreamResource
(
sysFileService
.
getFileStream
(
bucketName
,
fileName
)));
}
return
documents
;
...
...
@@ -146,7 +146,7 @@ public class KnowledgeDocumentServiceImpl extends ServiceImpl<KnowledgeDocumentM
.
build
())
.
withPagesPerDocument
(
0
)
.
build
());
List
<
Document
>
pageDocuments
=
pdfReader
.
read
();
List
<
Document
>
pageDocuments
=
pdfReader
.
read
();
List
<
Document
>
paragraphDocuments
=
new
ArrayList
<>();
...
...
@@ -348,6 +348,28 @@ public class KnowledgeDocumentServiceImpl extends ServiceImpl<KnowledgeDocumentM
return
tokenDocuments
;
}
private
List
<
Document
>
sliceByTokens
(
String
text
)
{
// 使用TokenTextSplitter进行切片
TokenTextSplitter
textSplitter
=
new
TokenTextSplitter
(
4096
,
// 分片大小
50
,
// 最小分片字符数
50
,
// 设置最小需要嵌入的长度
1000
,
// 最大片段大小
true
// 保持分隔符
);
Document
fullDocument
=
new
Document
(
text
);
List
<
Document
>
tokenDocuments
=
textSplitter
.
apply
(
List
.
of
(
fullDocument
));
for
(
Document
subDoc
:
tokenDocuments
)
{
subDoc
.
getMetadata
().
put
(
"size"
,
Objects
.
requireNonNull
(
subDoc
.
getText
()).
length
());
subDoc
.
getMetadata
().
put
(
"title"
,
""
);
}
return
tokenDocuments
;
}
/**
* PDF切片策略枚举
*/
...
...
@@ -361,9 +383,12 @@ public class KnowledgeDocumentServiceImpl extends ServiceImpl<KnowledgeDocumentM
List
<
DocumentSegmentResult
>
results
=
new
ArrayList
<>();
for
(
SysFile
file
:
request
.
getFiles
())
{
// String docText = documentParseService.extractText(sysFileService.getFileStream(file.getBucketName(), file.getFileName()));
// List<Document> segments = sliceByTokens(docText);
SliceStrategy
sliceStrategy
=
SliceStrategy
.
CUSTOM
;
if
(
"pdf"
.
equals
(
file
.
getType
()))
{
sliceStrategy
=
SliceStrategy
.
PARAGRAPH
;
if
(
"pdf"
.
equals
(
file
.
getType
()))
{
sliceStrategy
=
SliceStrategy
.
PARAGRAPH
;
}
// 读取文档内容 - 使用新的PDF切片函数
List
<
Document
>
segments
=
slicePdfDocument
(
...
...
ask-data-ai/ask-data-ai-biz/src/main/java/com/ask/tools/ExcelTools.java
View file @
5bb40bed
...
...
@@ -21,11 +21,9 @@ import java.time.format.DateTimeFormatter;
import
java.time.format.DateTimeFormatterBuilder
;
import
java.time.format.SignStyle
;
import
java.time.temporal.ChronoField
;
import
java.util.HashMap
;
import
java.util.List
;
import
java.util.Map
;
import
java.util.Objects
;
import
java.util.*
;
import
java.util.concurrent.atomic.AtomicReference
;
import
java.util.stream.Collectors
;
@Component
@Slf4j
...
...
@@ -105,6 +103,28 @@ public class ExcelTools {
List
<
AskReportDict
>
askReportDicts
=
askReportDictMapper
.
selectList
(
Wrappers
.
lambdaQuery
(
AskReportDict
.
class
).
eq
(
AskReportDict:
:
getType
,
1
)
);
List
<
String
>
paramSer
=
new
ArrayList
<>();
for
(
AskReportDict
askReportDict
:
askReportDicts
)
{
askReportDict
.
convertParamsToParamList
();
askReportDict
.
getParamList
().
forEach
(
params
->
{
if
(
params
.
getType
()
==
1
)
{
if
(!
paramSer
.
contains
(
params
.
getParam
()))
{
paramSer
.
add
(
params
.
getParam
());
}
}
});
}
if
(
CollectionUtils
.
isEmpty
(
paramSer
))
{
return
paramMap
;
}
List
<
AskHistoryCollectData
>
askHistoryCollectDataList
=
askHistoryCollectDataMapper
.
selectLastRecordForEachPath
(
paramSer
,
startTime
,
endTime
);
Map
<
String
,
Double
>
result
=
askHistoryCollectDataList
.
stream
()
.
collect
(
Collectors
.
toMap
(
AskHistoryCollectData:
:
getPath
,
// 获取 path 作为键
AskHistoryCollectData:
:
getValue
,
// 获取 value 作为值
(
existingValue
,
newValue
)
->
existingValue
// 如果有重复的键,这里决定如何处理。这里选择保留现有的值。
));
for
(
AskReportDict
askReportDict
:
askReportDicts
)
{
String
key
=
askReportDict
.
getKey
();
...
...
@@ -117,13 +137,13 @@ public class ExcelTools {
switch
(
askReportDict
.
getType
())
{
case
1
:
handleType1
(
askReportDict
.
getKey
(),
params
,
paramMap
,
startTime
,
endTime
);
handleType1
(
askReportDict
.
getKey
(),
params
,
paramMap
,
result
);
break
;
case
2
:
handleType2
(
askReportDict
.
getKey
(),
params
,
paramMap
,
startTime
,
endTime
);
handleType2
(
askReportDict
.
getKey
(),
params
,
paramMap
,
result
);
break
;
case
3
:
handleType3
(
askReportDict
.
getKey
(),
params
,
paramMap
,
startTime
,
endTime
);
handleType3
(
askReportDict
.
getKey
(),
params
,
paramMap
,
result
);
break
;
default
:
// Handle other types if necessary
...
...
@@ -134,50 +154,50 @@ public class ExcelTools {
return
paramMap
;
}
private
void
handleType1
(
String
key
,
List
<
AskReportDict
.
Params
>
params
,
Map
<
String
,
Object
>
paramMap
,
LocalDateTime
startTime
,
LocalDateTime
endTime
)
{
private
void
handleType1
(
String
key
,
List
<
AskReportDict
.
Params
>
params
,
Map
<
String
,
Object
>
paramMap
,
Map
<
String
,
Double
>
result
)
{
AskReportDict
.
Params
param
=
params
.
get
(
0
);
if
(
param
==
null
)
{
return
;
}
Double
value
=
getLatestValue
(
param
.
getParam
(),
startTime
,
endTime
);
Double
value
=
result
.
get
(
param
.
getParam
()
);
if
(
value
!=
null
)
{
paramMap
.
put
(
key
,
value
);
}
}
private
void
handleType2
(
String
key
,
List
<
AskReportDict
.
Params
>
params
,
Map
<
String
,
Object
>
paramMap
,
LocalDateTime
startTime
,
LocalDateTime
endTime
)
{
private
void
handleType2
(
String
key
,
List
<
AskReportDict
.
Params
>
params
,
Map
<
String
,
Object
>
paramMap
,
Map
<
String
,
Double
>
result
)
{
if
(
params
.
size
()
<
2
)
{
return
;
}
Double
numerator
=
getParamValue
(
params
.
get
(
0
),
startTime
,
endTime
);
Double
denominator
=
getParamValue
(
params
.
get
(
1
),
startTime
,
endTime
);
Double
numerator
=
result
.
get
(
params
.
get
(
0
).
getParam
()
);
Double
denominator
=
result
.
get
(
params
.
get
(
1
).
getParam
()
);
if
(
numerator
==
null
||
denominator
==
null
||
denominator
==
0
)
{
return
;
}
double
result
=
Math
.
round
(
numerator
/
denominator
*
100.0
)
/
100.0
;
// 保留两位小数
paramMap
.
put
(
key
,
result
);
Double
value
=
Math
.
round
(
numerator
/
denominator
*
100.0
)
/
100.0
;
// 保留两位小数
paramMap
.
put
(
key
,
value
);
}
private
void
handleType3
(
String
key
,
List
<
AskReportDict
.
Params
>
params
,
Map
<
String
,
Object
>
paramMap
,
LocalDateTime
startTime
,
LocalDateTime
endTime
)
{
private
void
handleType3
(
String
key
,
List
<
AskReportDict
.
Params
>
params
,
Map
<
String
,
Object
>
paramMap
,
Map
<
String
,
Double
>
result
)
{
if
(
params
.
size
()
<
2
)
{
return
;
}
Double
numerator
=
getParamValue
(
params
.
get
(
0
),
startTime
,
endTime
);
Double
denominator
=
getParamValue
(
params
.
get
(
1
),
startTime
,
endTime
);
Double
numerator
=
result
.
get
(
params
.
get
(
0
).
getParam
()
);
Double
denominator
=
result
.
get
(
params
.
get
(
1
).
getParam
()
);
if
(
numerator
==
null
||
denominator
==
null
||
denominator
==
0
)
{
return
;
}
double
result
=
(
numerator
/
denominator
)
*
100
;
// 计算百分比
result
=
Math
.
round
(
result
*
100.0
)
/
100.0
;
// 保留两位小数
Double
value
=
(
numerator
/
denominator
)
*
100
;
// 计算百分比
value
=
Math
.
round
(
value
*
100.0
)
/
100.0
;
// 保留两位小数
String
percentageResult
=
String
.
format
(
"%.2f%%"
,
result
);
// 格式化为百分比字符串
String
percentageResult
=
String
.
format
(
"%.2f%%"
,
value
);
// 格式化为百分比字符串
paramMap
.
put
(
key
,
percentageResult
);
}
...
...
ask-data-ai/ask-data-ai-biz/src/main/resources/mapper/AskHistoryCollectDataMapper.xml
0 → 100644
View file @
5bb40bed
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE mapper PUBLIC "-//mybatis.org//DTD Mapper 3.0//EN" "http://mybatis.org/dtd/mybatis-3-mapper.dtd">
<mapper
namespace=
"com.ask.mapper.AskHistoryCollectDataMapper"
>
<select
id=
"selectLastRecordForEachPath"
resultType=
"com.ask.api.entity.AskHistoryCollectData"
>
SELECT *
FROM (
SELECT *,
ROW_NUMBER() OVER (PARTITION BY path ORDER BY datetime DESC) AS rn
FROM ask_history_collect_data
WHERE path IN
<foreach
item=
"item"
collection=
"paths"
open=
"("
separator=
","
close=
")"
>
#{item}
</foreach>
AND datetime BETWEEN #{startTime} AND #{endTime}
) t
WHERE t.rn = 1
</select>
</mapper>
ask-data-ai/ask-data-ai-boot/src/main/resources/application.yml
View file @
5bb40bed
...
...
@@ -15,7 +15,7 @@ spring:
datasource
:
url
:
jdbc:postgresql://8.152.98.45:5432/ask_data_ai_db?stringtype=unspecified
username
:
postgres
password
:
postgres123
password
:
e5d039e4ba5246068
driver-class-name
:
org.postgresql.Driver
ai
:
vectorstore
:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment