Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
D
db_gpt
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
linyangyang
db_gpt
Commits
29d47f96
Commit
29d47f96
authored
Sep 03, 2024
by
于飞
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
拼接资源
parent
36bd2724
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
227 additions
and
0 deletions
+227
-0
filter.py
dbgpt/app/apps/utils/filter.py
+108
-0
spach_keywords.py
dbgpt/app/apps/utils/spach_keywords.py
+19
-0
keywordsviews.py
dbgpt/app/apps/vadmin/keywordsviews.py
+96
-0
dbgpt_server.py
dbgpt/app/dbgpt_server.py
+4
-0
No files found.
dbgpt/app/apps/utils/filter.py
0 → 100644
View file @
29d47f96
from
collections
import
defaultdict
import
re
from
fastapi
import
Request
,
Depends
from
dbgpt.app.apps.core.database
import
db_getter
from
sqlalchemy.ext.asyncio
import
AsyncSession
from
dbgpt.app.apps.vadmin.word.crud
import
SensitiveDal
from
dbgpt.app.apps.vadmin.auth.utils.current
import
AllUserAuth
,
FullAdminAuth
,
OpenAuth
from
dbgpt.app.apps.vadmin.auth.utils.validation.auth
import
Auth
class
DFAFilter
():
'''Filter Messages from keywords
Use DFA to keep algorithm perform constantly
敏感词过滤
>>> f = DFAFilter()
>>> f.add("sexy")
>>> f.filter("hello sexy baby")
hello **** baby
'''
def
__init__
(
self
):
self
.
keyword_chains
=
{}
self
.
delimit
=
'
\x00
'
def
add
(
self
,
keyword
):
if
not
isinstance
(
keyword
,
str
):
keyword
=
keyword
.
decode
(
'utf-8'
)
keyword
=
keyword
.
lower
()
chars
=
keyword
.
strip
()
if
not
chars
:
return
level
=
self
.
keyword_chains
for
i
in
range
(
len
(
chars
)):
if
chars
[
i
]
in
level
:
level
=
level
[
chars
[
i
]]
else
:
if
not
isinstance
(
level
,
dict
):
break
for
j
in
range
(
i
,
len
(
chars
)):
level
[
chars
[
j
]]
=
{}
last_level
,
last_char
=
level
,
chars
[
j
]
level
=
level
[
chars
[
j
]]
last_level
[
last_char
]
=
{
self
.
delimit
:
0
}
break
if
i
==
len
(
chars
)
-
1
:
level
[
self
.
delimit
]
=
0
#从文本里加载 敏感词
def
parse
(
self
,
path
):
with
open
(
path
,
encoding
=
'UTF-8'
)
as
f
:
for
keyword
in
f
:
self
.
add
(
keyword
.
strip
())
#从数据库中加载 敏感词
async
def
parse_from_db
(
self
,
db
:
AsyncSession
):
#db: AsyncSession = Depends(db_getter)
print
(
'---------sensitive-load-------------'
)
sdl
=
SensitiveDal
(
db
)
datas
=
await
sdl
.
get_sensitives
()
for
keyword
in
datas
:
self
.
add
(
keyword
.
word_name
)
def
filter
(
self
,
message
,
repl
=
"*"
):
is_sensitive
=
False
if
not
isinstance
(
message
,
str
):
message
=
message
.
decode
(
'utf-8'
)
message
=
message
.
lower
()
ret
=
[]
start
=
0
while
start
<
len
(
message
):
level
=
self
.
keyword_chains
step_ins
=
0
for
char
in
message
[
start
:]:
if
char
in
level
:
step_ins
+=
1
if
self
.
delimit
not
in
level
[
char
]:
level
=
level
[
char
]
else
:
ret
.
append
(
repl
*
step_ins
)
start
+=
step_ins
-
1
is_sensitive
=
True
break
else
:
ret
.
append
(
message
[
start
])
break
else
:
ret
.
append
(
message
[
start
])
start
+=
1
#return 返回两个参数
return
''
.
join
(
ret
),
is_sensitive
mydfafiter
=
DFAFilter
()
"""
if __name__ == "__main__":
gfw = DFAFilter()
gfw.parse("keywords")
import time
t = time.process_time()
print(gfw.filter("法轮功 我操操操", "*"))
print(gfw.filter("针孔摄像机 我操操操", "*"))
print(gfw.filter("售假人民币 我操操操", "*"))
print(gfw.filter("传世私服 我操操操", "*"))
print('Cost is
%6.6
f'
%
(time.process_time() - t))
"""
dbgpt/app/apps/utils/spach_keywords.py
0 → 100644
View file @
29d47f96
#-*- encoding=utf-8 -*-
from
typing
import
Any
import
spacy
import
pytextrank
class
SpacyNlp
():
def
__init__
(
self
,
name
):
print
(
'--------------------SpacyNlp.__init__-------------------------'
)
self
.
name
=
name
# nlp = spacy.load("en_core_web_trf")
self
.
nlp
=
spacy
.
load
(
"zh_core_web_trf"
)
self
.
nlp
.
add_pipe
(
"textrank"
)
def
do_something
(
self
):
pass
my_spacy_nlp
=
SpacyNlp
(
'分词模块单例'
)
dbgpt/app/apps/vadmin/keywordsviews.py
0 → 100644
View file @
29d47f96
import
json
from
fastapi
import
APIRouter
,
Depends
,
Body
,
UploadFile
,
Form
,
Request
from
dbgpt.app.apps.utils.file.file_manage
import
FileManage
from
dbgpt.app.apps.utils.response
import
SuccessResponse
,
ErrorResponse
from
dbgpt.app.apps.vadmin.auth.utils.validation.auth
import
Auth
from
dbgpt.app.apps.vadmin.media
import
schemas
,
crud
from
dbgpt.app.apps.vadmin.media.params.media_list
import
MediaListParams
,
GroupListParams
,
MediaEditParams
,
QuestionListParams
,
\
QuestionEditParams
,
CorrelationListParams
from
dbgpt.app.apps.core.dependencies
import
IdList
from
dbgpt.app.apps.vadmin.auth.utils.current
import
AllUserAuth
,
FullAdminAuth
,
OpenAuth
from
dbgpt.app.apps.vadmin.auth.utils.validation.auth
import
Auth
from
typing
import
Any
,
Dict
,
Generic
,
Optional
,
TypeVar
from
dbgpt._private.pydantic
import
BaseModel
,
ConfigDict
,
Field
,
model_to_dict
from
dbgpt.app.apps.utils.spach_keywords
import
my_spacy_nlp
from
dbgpt.app.apps.utils.filter
import
mydfafiter
router
=
APIRouter
()
class
ConversationVo
(
BaseModel
):
model_config
=
ConfigDict
(
protected_namespaces
=
())
user_input
:
str
=
""
@
router
.
post
(
"/get_spacy_keywords"
,
summary
=
"资源列表(图片、视频)"
)
async
def
get_spacy_keywords
(
dialogue
:
ConversationVo
=
Body
(),
auth
:
Auth
=
Depends
(
OpenAuth
())):
print
(
f
"用户输入的问题:{dialogue.user_input} "
)
print
(
'----------------begin---------------->'
)
#先判断敏感词
dfa_result
,
is_sensitive
=
mydfafiter
.
filter
(
dialogue
.
user_input
,
"*"
)
print
(
dfa_result
)
if
is_sensitive
:
print
(
'用户输入有敏感词'
)
result
=
{
'code'
:
200
,
'message'
:
'success'
,
'data'
:
[{
'type'
:
3
,
'word_name'
:
'111'
,
'is_sensitive'
:
1
,
'user_input'
:
dfa_result
}]}
return
SuccessResponse
(
result
)
#返回type=3
#没有敏感词的时候,查找是否有相关图片 或者 视频
doc
=
my_spacy_nlp
.
nlp
(
dialogue
.
user_input
)
words
=
[]
# examine the top-ranked phrases in the document
for
phrase
in
doc
.
_
.
phrases
:
# logger.info(f"----1--->:{phrase.rank}--->:{phrase.count}")
# logger.info(f"----2--->:{phrase.chunks[0]}")
words
.
append
(
phrase
.
chunks
[
0
])
print
(
words
)
if
len
(
words
)
>
0
:
print
(
words
[
0
])
#取出匹配到的关键词,获取数据库中的图片
images_dic
=
{
'page'
:
1
,
'limit'
:
10
,
'v_order'
:
None
,
'v_order_field'
:
None
,
'type'
:
1
,
'group_id'
:
None
,
'file_name'
:
(
'like'
,
words
[
0
])}
images_datas
,
count
=
await
crud
.
MediaDal
(
auth
.
db
)
.
get_datas
(
**
images_dic
,
v_return_count
=
True
)
#print(f"-----查询到的图片为:---->:{images_datas}")
result
=
[]
for
data
in
images_datas
:
json_image
=
{
'type'
:
1
,
'file_name'
:
data
.
get
(
'file_name'
),
'key_word'
:
data
.
get
(
'key_word'
),
'local_path'
:
data
.
get
(
'local_path'
),
'remote_path'
:
data
.
get
(
'remote_path'
)}
result
.
append
(
json_image
)
# 取出匹配到的关键词,获取数据库中的视频
video_dic
=
{
'page'
:
1
,
'limit'
:
10
,
'v_order'
:
None
,
'v_order_field'
:
None
,
'type'
:
2
,
'group_id'
:
None
,
'file_name'
:
(
'like'
,
words
[
0
])}
video_datas
,
count
=
await
crud
.
MediaDal
(
auth
.
db
)
.
get_datas
(
**
video_dic
,
v_return_count
=
True
)
#print(f"-----查询到的视频为:---->:{video_datas}")
for
data
in
video_datas
:
json_video
=
{
'type'
:
2
,
'file_name'
:
data
.
get
(
'file_name'
),
'key_word'
:
data
.
get
(
'key_word'
),
'local_path'
:
data
.
get
(
'local_path'
),
'remote_path'
:
data
.
get
(
'remote_path'
)}
result
.
append
(
json_video
)
return
SuccessResponse
(
result
)
else
:
print
(
f
"-----没有找到需要查询的内容:---->"
)
return
ErrorResponse
(
"没有找到需要查询的内容"
)
dbgpt/app/dbgpt_server.py
View file @
29d47f96
import
argparse
import
asyncio
import
os
import
sys
from
typing
import
List
...
...
@@ -44,6 +45,7 @@ from dbgpt.util.utils import (
from
dbgpt.app.apps.utils.tools
import
import_modules
from
dbgpt.app.apps.utils.spach_keywords
import
my_spacy_nlp
from
dbgpt.app.apps.utils.filter
import
mydfafiter
REQUEST_LOG_RECORD
=
False
MIDDLEWARES
=
[
...
...
@@ -101,6 +103,7 @@ def mount_routers(app: FastAPI):
from
dbgpt.app.apps.system.views
import
router
as
system_views
from
dbgpt.app.apps.vadmin.media.views
import
router
as
media_views
from
dbgpt.app.apps.vadmin.word.views
import
router
as
word_views
from
dbgpt.app.apps.vadmin.keywordsviews
import
router
as
keywords_views
app
.
include_router
(
api_v1
,
prefix
=
"/api"
,
tags
=
[
"Chat"
])
app
.
include_router
(
api_v2
,
prefix
=
"/api"
,
tags
=
[
"ChatV2"
])
...
...
@@ -115,6 +118,7 @@ def mount_routers(app: FastAPI):
app
.
include_router
(
system_views
,
prefix
=
"/api/v2/vadmin/system"
,
tags
=
[
"System"
])
app
.
include_router
(
media_views
,
prefix
=
"/api/v2"
,
tags
=
[
"System"
])
app
.
include_router
(
word_views
,
prefix
=
"/api/v2/vadmin/word"
,
tags
=
[
"Word"
])
app
.
include_router
(
keywords_views
,
prefix
=
"/api/v2/vadmin"
,
tags
=
[
"vadmin"
])
def
mount_static_files
(
app
:
FastAPI
):
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment