12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455 |
- import json
- import numpy as np
- import os
- path = os.path.abspath(os.path.dirname(__file__))
- def onehot_generate():
- # 从文件读取映射关系
- with open(path + "/mapping.json", "r", encoding="utf-8") as json_file:
- loaded_data = json.load(json_file)
- # 提取关键词和故障现象
- keywords_list = []
- phenomenon_list = []
- for mapping in loaded_data:
- keywords_list.extend(mapping["keywords"])
- phenomenon_list.append(mapping["phenomenon"])
- # 生成关键词词表
- vocabulary = sorted(set(keywords_list))
- # 生成one-hot编码
- one_hot_vectors = []
- for word in vocabulary:
- one_hot = [1 if word == w else 0 for w in vocabulary]
- one_hot_vectors.append(one_hot)
- # 转换为NumPy数组
- one_hot_array = np.array(one_hot_vectors, dtype=np.int32)
- # 生成关键词与one-hot编码的字典
- keyword_one_hot_dict = {}
- for i, word in enumerate(vocabulary):
- keyword_one_hot_dict[word] = one_hot_array[i].tolist()
- # 生成故障现象词的向量
- phenomenon_vectors = []
- for phenomenon in phenomenon_list:
- vector_sum = np.zeros(len(vocabulary), dtype=np.int32)
- for keyword in keywords_list:
- if keyword in phenomenon:
- vector_sum = np.logical_or(vector_sum, np.array(keyword_one_hot_dict[keyword], dtype=np.int32))
- phenomenon_vectors.append(vector_sum.tolist())
- # 创建故障现象词的向量字典
- phenomenon_vector_dict = {}
- for i, phenomenon in enumerate(phenomenon_list):
- phenomenon_vector_dict[phenomenon] = [int(val) for val in phenomenon_vectors[i]]
- # 保存关键词与one-hot编码的字典为JSON文件
- with open(path + "/keyword_one_hot_dict.json", "w", encoding="utf-8") as json_file:
- json.dump(keyword_one_hot_dict, json_file, ensure_ascii=False, indent=4)
- # 保存故障现象词的向量字典为JSON文件
- with open(path + "/phenomenon_vector_dict.json", "w", encoding="utf-8") as json_file:
- json.dump(phenomenon_vector_dict, json_file, ensure_ascii=False, indent=4)
|