Commit 29d47f96 authored by 于飞's avatar 于飞

拼接资源

parent 36bd2724
from collections import defaultdict
import re
from fastapi import Request, Depends
from dbgpt.app.apps.core.database import db_getter
from sqlalchemy.ext.asyncio import AsyncSession
from dbgpt.app.apps.vadmin.word.crud import SensitiveDal
from dbgpt.app.apps.vadmin.auth.utils.current import AllUserAuth, FullAdminAuth, OpenAuth
from dbgpt.app.apps.vadmin.auth.utils.validation.auth import Auth
class DFAFilter():
'''Filter Messages from keywords
Use DFA to keep algorithm perform constantly
敏感词过滤
>>> f = DFAFilter()
>>> f.add("sexy")
>>> f.filter("hello sexy baby")
hello **** baby
'''
def __init__(self):
self.keyword_chains = {}
self.delimit = '\x00'
def add(self, keyword):
if not isinstance(keyword, str):
keyword = keyword.decode('utf-8')
keyword = keyword.lower()
chars = keyword.strip()
if not chars:
return
level = self.keyword_chains
for i in range(len(chars)):
if chars[i] in level:
level = level[chars[i]]
else:
if not isinstance(level, dict):
break
for j in range(i, len(chars)):
level[chars[j]] = {}
last_level, last_char = level, chars[j]
level = level[chars[j]]
last_level[last_char] = {self.delimit: 0}
break
if i == len(chars) - 1:
level[self.delimit] = 0
#从文本里加载 敏感词
def parse(self, path):
with open(path, encoding='UTF-8') as f:
for keyword in f:
self.add(keyword.strip())
#从数据库中加载 敏感词
async def parse_from_db(self, db: AsyncSession):
#db: AsyncSession = Depends(db_getter)
print('---------sensitive-load-------------')
sdl = SensitiveDal(db)
datas = await sdl.get_sensitives()
for keyword in datas:
self.add(keyword.word_name)
def filter(self, message, repl="*"):
is_sensitive = False
if not isinstance(message, str):
message = message.decode('utf-8')
message = message.lower()
ret = []
start = 0
while start < len(message):
level = self.keyword_chains
step_ins = 0
for char in message[start:]:
if char in level:
step_ins += 1
if self.delimit not in level[char]:
level = level[char]
else:
ret.append(repl * step_ins)
start += step_ins - 1
is_sensitive = True
break
else:
ret.append(message[start])
break
else:
ret.append(message[start])
start += 1
#return 返回两个参数
return ''.join(ret), is_sensitive
mydfafiter = DFAFilter()
"""
if __name__ == "__main__":
gfw = DFAFilter()
gfw.parse("keywords")
import time
t = time.process_time()
print(gfw.filter("法轮功 我操操操", "*"))
print(gfw.filter("针孔摄像机 我操操操", "*"))
print(gfw.filter("售假人民币 我操操操", "*"))
print(gfw.filter("传世私服 我操操操", "*"))
print('Cost is %6.6f' % (time.process_time() - t))
"""
#-*- encoding=utf-8 -*-
from typing import Any
import spacy
import pytextrank
class SpacyNlp():
def __init__(self, name):
print('--------------------SpacyNlp.__init__-------------------------')
self.name = name
# nlp = spacy.load("en_core_web_trf")
self.nlp = spacy.load("zh_core_web_trf")
self.nlp.add_pipe("textrank")
def do_something(self):
pass
my_spacy_nlp = SpacyNlp('分词模块单例')
import json
from fastapi import APIRouter, Depends, Body, UploadFile, Form, Request
from dbgpt.app.apps.utils.file.file_manage import FileManage
from dbgpt.app.apps.utils.response import SuccessResponse,ErrorResponse
from dbgpt.app.apps.vadmin.auth.utils.validation.auth import Auth
from dbgpt.app.apps.vadmin.media import schemas, crud
from dbgpt.app.apps.vadmin.media.params.media_list import MediaListParams, GroupListParams, MediaEditParams, QuestionListParams, \
QuestionEditParams, CorrelationListParams
from dbgpt.app.apps.core.dependencies import IdList
from dbgpt.app.apps.vadmin.auth.utils.current import AllUserAuth, FullAdminAuth, OpenAuth
from dbgpt.app.apps.vadmin.auth.utils.validation.auth import Auth
from typing import Any, Dict, Generic, Optional, TypeVar
from dbgpt._private.pydantic import BaseModel, ConfigDict, Field, model_to_dict
from dbgpt.app.apps.utils.spach_keywords import my_spacy_nlp
from dbgpt.app.apps.utils.filter import mydfafiter
router = APIRouter()
class ConversationVo(BaseModel):
model_config = ConfigDict(protected_namespaces=())
user_input: str = ""
@router.post("/get_spacy_keywords", summary="资源列表(图片、视频)")
async def get_spacy_keywords(dialogue: ConversationVo = Body(), auth: Auth = Depends(OpenAuth())):
print(f"用户输入的问题:{dialogue.user_input} ")
print('----------------begin---------------->')
#先判断敏感词
dfa_result, is_sensitive = mydfafiter.filter(dialogue.user_input, "*")
print(dfa_result)
if is_sensitive:
print('用户输入有敏感词')
result = {'code': 200, 'message': 'success', 'data': [{'type': 3, 'word_name': '111', 'is_sensitive': 1, 'user_input':dfa_result}]}
return SuccessResponse(result) #返回type=3
#没有敏感词的时候,查找是否有相关图片 或者 视频
doc = my_spacy_nlp.nlp(dialogue.user_input)
words = []
# examine the top-ranked phrases in the document
for phrase in doc._.phrases:
# logger.info(f"----1--->:{phrase.rank}--->:{phrase.count}")
# logger.info(f"----2--->:{phrase.chunks[0]}")
words.append(phrase.chunks[0])
print(words)
if len(words) > 0:
print(words[0])
#取出匹配到的关键词,获取数据库中的图片
images_dic = {'page': 1, 'limit': 10, 'v_order': None, 'v_order_field': None, 'type': 1, 'group_id': None,
'file_name': ('like', words[0])}
images_datas, count = await crud.MediaDal(auth.db).get_datas(**images_dic, v_return_count=True)
#print(f"-----查询到的图片为:---->:{images_datas}")
result = []
for data in images_datas:
json_image = {'type': 1, 'file_name': data.get('file_name'), 'key_word': data.get('key_word'),
'local_path': data.get('local_path'), 'remote_path': data.get('remote_path')}
result.append(json_image)
# 取出匹配到的关键词,获取数据库中的视频
video_dic = {'page': 1, 'limit': 10, 'v_order': None, 'v_order_field': None, 'type': 2, 'group_id': None,
'file_name': ('like', words[0])}
video_datas, count = await crud.MediaDal(auth.db).get_datas(**video_dic, v_return_count=True)
#print(f"-----查询到的视频为:---->:{video_datas}")
for data in video_datas:
json_video = {'type': 2, 'file_name': data.get('file_name'), 'key_word': data.get('key_word'),
'local_path': data.get('local_path'), 'remote_path': data.get('remote_path')}
result.append(json_video)
return SuccessResponse(result)
else:
print(f"-----没有找到需要查询的内容:---->")
return ErrorResponse("没有找到需要查询的内容")
import argparse import argparse
import asyncio
import os import os
import sys import sys
from typing import List from typing import List
...@@ -44,6 +45,7 @@ from dbgpt.util.utils import ( ...@@ -44,6 +45,7 @@ from dbgpt.util.utils import (
from dbgpt.app.apps.utils.tools import import_modules from dbgpt.app.apps.utils.tools import import_modules
from dbgpt.app.apps.utils.spach_keywords import my_spacy_nlp from dbgpt.app.apps.utils.spach_keywords import my_spacy_nlp
from dbgpt.app.apps.utils.filter import mydfafiter
REQUEST_LOG_RECORD = False REQUEST_LOG_RECORD = False
MIDDLEWARES = [ MIDDLEWARES = [
...@@ -101,6 +103,7 @@ def mount_routers(app: FastAPI): ...@@ -101,6 +103,7 @@ def mount_routers(app: FastAPI):
from dbgpt.app.apps.system.views import router as system_views from dbgpt.app.apps.system.views import router as system_views
from dbgpt.app.apps.vadmin.media.views import router as media_views from dbgpt.app.apps.vadmin.media.views import router as media_views
from dbgpt.app.apps.vadmin.word.views import router as word_views from dbgpt.app.apps.vadmin.word.views import router as word_views
from dbgpt.app.apps.vadmin.keywordsviews import router as keywords_views
app.include_router(api_v1, prefix="/api", tags=["Chat"]) app.include_router(api_v1, prefix="/api", tags=["Chat"])
app.include_router(api_v2, prefix="/api", tags=["ChatV2"]) app.include_router(api_v2, prefix="/api", tags=["ChatV2"])
...@@ -115,6 +118,7 @@ def mount_routers(app: FastAPI): ...@@ -115,6 +118,7 @@ def mount_routers(app: FastAPI):
app.include_router(system_views, prefix="/api/v2/vadmin/system", tags=["System"]) app.include_router(system_views, prefix="/api/v2/vadmin/system", tags=["System"])
app.include_router(media_views, prefix="/api/v2", tags=["System"]) app.include_router(media_views, prefix="/api/v2", tags=["System"])
app.include_router(word_views, prefix="/api/v2/vadmin/word", tags=["Word"]) app.include_router(word_views, prefix="/api/v2/vadmin/word", tags=["Word"])
app.include_router(keywords_views, prefix="/api/v2/vadmin", tags=["vadmin"])
def mount_static_files(app: FastAPI): def mount_static_files(app: FastAPI):
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment