import json import numpy as np import os path = os.path.abspath(os.path.dirname(__file__)) def onehot_generate(): # 从文件读取映射关系 with open(path + "/mapping.json", "r", encoding="utf-8") as json_file: loaded_data = json.load(json_file) # 提取关键词和故障现象 keywords_list = [] phenomenon_list = [] for mapping in loaded_data: keywords_list.extend(mapping["keywords"]) phenomenon_list.append(mapping["phenomenon"]) # 生成关键词词表 vocabulary = sorted(set(keywords_list)) # 生成one-hot编码 one_hot_vectors = [] for word in vocabulary: one_hot = [1 if word == w else 0 for w in vocabulary] one_hot_vectors.append(one_hot) # 转换为NumPy数组 one_hot_array = np.array(one_hot_vectors, dtype=np.int32) # 生成关键词与one-hot编码的字典 keyword_one_hot_dict = {} for i, word in enumerate(vocabulary): keyword_one_hot_dict[word] = one_hot_array[i].tolist() # 生成故障现象词的向量 phenomenon_vectors = [] for phenomenon in phenomenon_list: vector_sum = np.zeros(len(vocabulary), dtype=np.int32) for keyword in keywords_list: if keyword in phenomenon: vector_sum = np.logical_or(vector_sum, np.array(keyword_one_hot_dict[keyword], dtype=np.int32)) phenomenon_vectors.append(vector_sum.tolist()) # 创建故障现象词的向量字典 phenomenon_vector_dict = {} for i, phenomenon in enumerate(phenomenon_list): phenomenon_vector_dict[phenomenon] = [int(val) for val in phenomenon_vectors[i]] # 保存关键词与one-hot编码的字典为JSON文件 with open(path + "/keyword_one_hot_dict.json", "w", encoding="utf-8") as json_file: json.dump(keyword_one_hot_dict, json_file, ensure_ascii=False, indent=4) # 保存故障现象词的向量字典为JSON文件 with open(path + "/phenomenon_vector_dict.json", "w", encoding="utf-8") as json_file: json.dump(phenomenon_vector_dict, json_file, ensure_ascii=False, indent=4)