jaccard.py 798 B

123456789101112131415161718192021
  1. #import numpy as np
  2. #from scipy.spatial.distance import pdist#直接调包可以计算JC值 :需要两个句子长度一样;所以暂时不用
  3. import jieba
  4. def Jaccrad(model, reference):#terms_reference为源句子,terms_model为候选句子
  5. terms_reference= jieba.cut(reference)#默认精准模式
  6. terms_model= jieba.cut(model)
  7. grams_reference = set(terms_reference)#去重;如果不需要就改为list
  8. grams_model = set(terms_model)
  9. temp=0
  10. for i in grams_reference:
  11. if i in grams_model:
  12. temp=temp+1#交集
  13. fenmu=len(grams_model)+len(grams_reference)-temp #并集
  14. jaccard_coefficient=float(temp/fenmu)
  15. return jaccard_coefficient
  16. a="发动机灯亮"
  17. b="发动机灯变亮"
  18. jaccard_coefficient=Jaccrad(a,b)
  19. print(jaccard_coefficient)