import json

from fastapi import APIRouter, Depends, Body, UploadFile, Form, Request

from dbgpt.app.apps.utils.file.file_manage import FileManage
from dbgpt.app.apps.utils.response import SuccessResponse, ErrorResponse
from dbgpt.app.apps.vadmin.auth.utils.validation.auth import Auth
from dbgpt.app.apps.vadmin.media import schemas
from dbgpt.app.apps.vadmin.media.crud import MediaDal,QuestionDal,CorrelationDal

from dbgpt.app.apps.vadmin.chathistory.crud import ChatHistoryDal
from dbgpt.app.apps.vadmin.chathistory.schemas.chathistory import ChatHistorySchemas

from dbgpt.app.apps.vadmin.auth.utils.current import AllUserAuth, FullAdminAuth, OpenAuth
from dbgpt.app.apps.vadmin.auth.utils.validation.auth import Auth
from sqlalchemy.ext.asyncio import AsyncSession

from typing import Any, Dict, Generic, Optional, TypeVar
from dbgpt._private.pydantic import BaseModel, ConfigDict, Field, model_to_dict
from dbgpt.app.apps.vadmin.word import crud

from dbgpt.app.apps.utils.spach_keywords import my_spacy_nlp
from dbgpt.app.apps.utils.filter import mydfafiter, mydfafiter_picture, mydfafiter_question, mydfafiter_video
from dbgpt.serve.conversation.api.schemas import MessageVo, ServeRequest, ServerResponse
from dbgpt.app.openapi.api_view_model import (
    ChatSceneVo,
    ConversationVo,
    Result,
)
from sqlalchemy import BinaryExpression

router = APIRouter()

#提问次数
question_count = 0

#关键词匹配到的类型
MEDIA_TYPE1 = 1 #图片
MEDIA_TYPE2 = 2 #视频
MEDIA_TYPE3 = 3 #铭感词
MEDIA_TYPE4 = 4 #问答对
MEDIA_TYPE5 = 5 #统计次数3->留下联系电话

def get_key_words(user_input: str) -> list:
    """
    接受一个字符串输入,提取关键词,并返回其中的单词列表。
    """
    words = []
    doc = my_spacy_nlp.nlp(user_input)
    # examine the top-ranked phrases in the document
    for phrase in doc._.phrases:
        # logger.info(f"----1--->:{phrase.rank}--->:{phrase.count}")
        # logger.info(f"----2--->:{phrase.chunks[0]}")
        words.append(phrase.chunks[0])

    print(words)
    return words

def get_key_words_nlp(user_input: str) -> list:
    """
    另外一种算法提取关键词，比上面的算法更加准确
    """

    words = []
    dfa_result, is_sensitive, matched_medias = mydfafiter_picture.filter(user_input, "*")
    #print(matched_medias)
    for phrase in matched_medias:
        words.append(phrase)

    dfa_result2, is_sensitive2, matched_medias2 = mydfafiter_question.filter(user_input, "*")
    # print(matched_medias2)
    for phrase2 in matched_medias2:
        words.append(phrase2)

    dfa_result3, is_sensitive3, matched_medias3 = mydfafiter_video.filter(user_input, "*")
    # print(matched_medias3)
    for phrase3 in matched_medias3:
        words.append(phrase3)

    #print(words)
    return words

async def get_media_datas_by(conv_uid: str, words: [], db: AsyncSession, knownledge: str) -> list:
    # 去拿出group_id
    datas = []
    if knownledge != None:
        datas, count = await CorrelationDal(db).get_datas(name=knownledge, v_return_count=True)

    if len(datas) > 0:
        result = []
        corrdata = datas[0]
        image_datas = corrdata.get('image_group') or []
        for image_groups in image_datas:
            image_groupid = image_groups.get('group_id')
            print(f"===========>image_groupid:{image_groupid}")
            #遍历关键词数组->找出每一个关键词对应的图片
            for word in words:
                # 取出匹配到的关键词，获取数据库中的图片
                images_dic = {'page': 1, 'limit': 0, 'v_order': None, 'v_order_field': None, 'type': 1,
                              'group_id': image_groupid,
                              'key_word': word}
                images_datas, count = await MediaDal(db).get_datas(**images_dic, v_return_count=True)
                print(f"-----查询到的图片为:---->:{images_datas}")
                for data in images_datas:
                    json_image = {'type': MEDIA_TYPE1, 'file_name': data.get('file_name'),
                                  'key_word': data.get('key_word'),
                                  'local_path': data.get('local_path'), 'remote_path': data.get('remote_path')}
                    result.append(json_image)

        video_datas = corrdata.get('video_group') or []
        for video_groups in video_datas:
            video_groupid = video_groups.get('group_id')
            print(f"===========>video_groupid:{video_groupid}")
            # 遍历关键词数组->找出每一个关键词对应的视频
            for word in words:
                # 取出匹配到的关键词，获取数据库中的视频
                video_dic = {'page': 1, 'limit': 0, 'v_order': None, 'v_order_field': None, 'type': 2,
                             'group_id': video_groupid,
                             'key_word': word}

                video_datas, count = await MediaDal(db).get_datas(**video_dic, v_return_count=True)
                print(f"-----查询到的视频为:---->:{video_datas}")
                for videodata in video_datas:
                    json_video = {'type': MEDIA_TYPE2, 'file_name': videodata.get('file_name'),
                                  'key_word': videodata.get('key_word'),
                                  'local_path': videodata.get('local_path'), 'remote_path': videodata.get('remote_path')}
                    result.append(json_video)

        question_datas = corrdata.get('question_group') or []
        for question_groups in question_datas:
            question_groupid = question_groups.get('group_id')
            print(f"===========>question_groupid:{question_groupid}")
            # 遍历关键词数组->找出每一个关键词对应的问答对
            for word in words:
                # 匹配到的问答对有
                question_dic = {'page': 1, 'limit': 0, 'v_order': None, 'v_order_field': None,
                                'group_id': question_groupid,
                                'key_word': word}
                question_datas, count = await QuestionDal(db).get_datas(**question_dic, v_return_count=True)
                print(f"-----查询到的问答对为:---->:{question_datas}")
                for questiondata in question_datas:
                    json_question = {'type': MEDIA_TYPE4, 'title': questiondata.get('title'),
                                     'key_word': questiondata.get('key_word'),
                                     'answer': questiondata.get('answer')}
                    result.append(json_question)


        # 保存到聊天历史资源数据库中
        if len(result) > 0:
            print(f"-----保存数据的时候打印会话ID:---->:{conv_uid}")
            # 保存到聊天历史资料表中
            json_string = json.dumps(result)  # 转换为字符串
            simi_data = ChatHistorySchemas()
            simi_data.conv_uid = conv_uid
            simi_data.message_medias = json_string
            await ChatHistoryDal(db).create_data(data=simi_data)

        return result
    else:
        #return await get_media_datas(conv_uid, words, db)
        result = []
        return result

async def get_media_datas(conv_uid: str, words: str, db: AsyncSession) -> list:
    # 取出匹配到的关键词，获取数据库中的图片
    images_dic = {'page': 1, 'limit': 0, 'v_order': None, 'v_order_field': None, 'type': 1, 'group_id': None,
                  'key_word': ('like', words)}

    images_datas, count = await MediaDal(db).get_datas(**images_dic, v_return_count=True)
    print(f"-----查询到的图片为:---->:{images_datas}")
    result = []
    for data in images_datas:
        json_image = {'type': MEDIA_TYPE1, 'file_name': data.get('file_name'), 'key_word': data.get('key_word'),
                      'local_path': data.get('local_path'), 'remote_path': data.get('remote_path')}
        result.append(json_image)

    # 取出匹配到的关键词，获取数据库中的视频
    video_dic = {'page': 1, 'limit': 0, 'v_order': None, 'v_order_field': None, 'type': 2, 'group_id': None,
                 'key_word': ('like', words)}

    video_datas, count = await MediaDal(db).get_datas(**video_dic, v_return_count=True)
    print(f"-----查询到的视频为:---->:{video_datas}")
    for videodata in video_datas:
        json_video = {'type': MEDIA_TYPE2, 'file_name': videodata.get('file_name'), 'key_word': videodata.get('key_word'),
                      'local_path': videodata.get('local_path'), 'remote_path': videodata.get('remote_path')}
        result.append(json_video)

    #匹配到的问答对有
    question_dic = {'page': 1, 'limit': 0, 'v_order': None, 'v_order_field': None,'key_word': ('like', words)}
    question_datas, count = await QuestionDal(db).get_datas(**question_dic, v_return_count=True)
    print(f"-----查询到的问答对为:---->:{question_datas}")
    for questiondata in question_datas:
        json_question = {'type': MEDIA_TYPE4, 'title': questiondata.get('title'), 'key_word': questiondata.get('key_word'),
                         'answer': questiondata.get('answer')}
        result.append(json_question)

    #保存到聊天历史资源数据库中
    if len(result) > 0:
        print(f"-----保存数据的时候打印会话ID:---->:{conv_uid}")
        #保存到聊天历史资料表中
        json_string = json.dumps(result) #转换为字符串
        simi_data = ChatHistorySchemas()
        simi_data.conv_uid = conv_uid
        simi_data.message_medias = json_string
        await ChatHistoryDal(db).create_data(data=simi_data)

    return result

async def get_media_datas_all(conv_uid: str, default_model: str, db: AsyncSession, messages:list) -> list:
    """
    根据会话ID-->获取聊天历史的图片或视频资源
    """
    ret_media_datas = []

    #获取聊天历史资源数据
    history_dic = {'page': 1, 'limit': 10, 'v_order': None, 'v_order_field': None, 'conv_uid': conv_uid,
                   'message_medias': ('like', None)}
    datas, count = await ChatHistoryDal(db).get_datas(**history_dic, v_return_count=True)

    json_data = [{"type": MEDIA_TYPE1, "file_name": "723629348SJfHgjzD.png"}]  # 传入列表
    if count > 0:
        history_datas = datas[0].get('message_medias')
        json_data = json.loads(history_datas)

    print(f"----将字符串转换为json格式---->:{json_data}")

    for msg in messages:
        # 根据历史聊天记录msg
        ret_media_datas.append(
            MessageVo(
                role=msg.type,
                context=msg.content,
                order=msg.round_index,
                model_name=default_model,
                extra=json_data,
            )
        )

    return ret_media_datas


def filter_similar(key_words: list, similar_words: list) -> list:
    """
    从 key_words 中过滤掉同义词，只保留每组同义词中的一个词。

    参数:
    - key_words: 需要筛选的关键词列表。
    - similar_words: 同义词组列表，每组是包含同义词的列表。

    返回值:
    - f_words: 过滤后的关键词列表，只保留非同义词或每组中的一个代表词。

    示例:
    key_words = ['高兴', '快乐', '满足', '生气', '愤怒']
    similar_words = [['高兴', '快乐', '满足'], ['生气', '愤怒']]
    result = filter_similar(key_words, similar_words)
    print(result)  # 输出：['高兴', '生气']
    这个例子中，函数会保留“高兴”作为同义词组的代表词，并保留“生气”作为另一组的代表。
    """

    #先对key_words去重
    unique_keywords = list(set(key_words))

    f_words = []
    for word in unique_keywords:
        # 检查这个词或它的同义词是否已经在 f_words 中
        found_similar = False
        for synonym_group in similar_words:
            if word in synonym_group:
                # 如果这个同义词组中的任何一个词已在结果列表中，跳过当前词
                if any(syn in f_words for syn in synonym_group):
                    found_similar = True
                    break
        if not found_similar:
            f_words.append(word)
    return f_words


@router.post("/get_spacy_keywords", summary="资源列表(图片、视频)")
async def get_spacy_keywords(dialogue: ConversationVo = Body(), auth: Auth = Depends(OpenAuth())):
    print(f"用户输入的问题：{dialogue.user_input} -- 选择的知识库为:{dialogue.select_param}")
    print('----------------begin---------------->')

    #统计提问次数
    """
    global question_count
    question_count += 1
    print(f"=====>question_count:{question_count}")
    if question_count >= 3:
        print('=====触发留下联系方式=====')
        result = {'code': 200, 'message': 'success',
                  'data': [{'type': MEDIA_TYPE5, 'answer': "请留下您的联系方式,后续给您安排技术人员给您详细讲解一下:"}]}
        question_count = 0
        return SuccessResponse(result)  # 返回type=5
    """

    #先判断敏感词
    dfa_result, is_sensitive, matched_sensitives = mydfafiter.filter(dialogue.user_input, "*")
    print(dfa_result)

    if is_sensitive:
        print('用户输入有敏感词')
        result = {'code': 200, 'message': 'success',
                  'data': [{'type': MEDIA_TYPE3, 'word_name': matched_sensitives, 'is_sensitive': 1, 'user_input': dfa_result}]}
        return SuccessResponse(result)  #返回type=3

    #没有敏感词的时候,查找是否有相关图片 或者 视频
    words = get_key_words_nlp(dialogue.user_input) #100%匹配算法
    if len(words) > 0:
        print(f"---算法1-匹配到的关键词--->:{words}")
        #从数据库中加载同义词列表 | similar_words = [['高兴', '快乐', '满足'], ['生气', '愤怒']]
        similar_words = await crud.SimilarDal(auth.db).get_similar_by_keyword()
        key_words = filter_similar(words, similar_words) #先过滤掉同义词
        print(f"---算法1-过滤掉近义词后的关键词--->:{key_words}")
        result = await get_media_datas_by(dialogue.conv_uid, key_words, auth.db, dialogue.select_param)
        return SuccessResponse(result)
    else:
        print(f"---算法2-begin--->")
        #上面的算法没找到，换一种算法继续找
        words2 = get_key_words(dialogue.user_input)
        if len(words2) > 0:
            print(f"---算法2-匹配到的关键词--->:{words2}")
            # 从数据库中加载同义词列表 | similar_words = [['高兴', '快乐', '满足'], ['生气', '愤怒']]
            similar_words2 = await crud.SimilarDal(auth.db).get_similar_by_keyword()
            key_words2 = filter_similar(words2, similar_words2)  # 先过滤掉同义词
            print(f"---算法2-过滤掉近义词后的关键词--->:{key_words2}")
            result = await get_media_datas_by(dialogue.conv_uid, key_words2, auth.db, dialogue.select_param)
            return SuccessResponse(result)
        else:
            print(f"-----没有找到需要查询的内容:---->")
            return ErrorResponse("没有找到需要查询的内容")


@router.get("/load_parse_from_db", summary="加载敏感词和资源关键词")
async def load_parse_from_db(auth: Auth = Depends(OpenAuth())):
    # 从数据库中加载 并且初始化敏感词,图片,视频,问答对-->到内存中
    await mydfafiter.parse_from_db(auth.db)
    await mydfafiter_picture.parse_picture_from_db(auth.db)
    await mydfafiter_video.parse_video_from_db(auth.db)
    await mydfafiter_question.parse_question_from_db(auth.db)

    return SuccessResponse("media and sensitive all load OK")
