在看了Andrew关于Bleu得分的视频之后,我想从零开始在python中实现一个。我用python编写了完整的代码,使用的是numpy。这是完整的代码
import numpy as np
def n_gram_generator(sentence,n= 2,n_gram= False):
'''
N-Gram generator with parameters sentence
n is for number of n_grams
The n_gram parameter removes repeating n_grams
'''
sentence = sentence.lower() # converting to lower case
sent_arr = np.array(sentence.split()) # split to string arrays
length = len(sent_arr)
word_list = []
for i in range(length+1):
if i < n:
continue
word_range = list(range(i-n,i))
s_list = sent_arr[word_range]
string = ' '.join(s_list) # converting list to strings
word_list.append(string) # append to word_list
if n_gram:
word_list = list(set(word_list))
return word_list
def bleu_score(original,machine_translated):
'''
Bleu score function given a orginal and a machine translated sentences
'''
mt_length = len(machine_translated.split())
o_length = len(original.split())
# Brevity Penalty
if mt_length>o_length:
BP=1
else:
penality=1-(mt_length/o_length)
BP=np.exp(penality)
# calculating precision
precision_score = []
for i in range(mt_length):
original_n_gram = n_gram_generator(original,i)
machine_n_gram = n_gram_generator(machine_translated,i)
n_gram_list = list(set(machine_n_gram)) # removes repeating strings
# counting number of occurence
machine_score = 0
original_score = 0
for j in n_gram_list:
machine_count = machine_n_gram.count(j)
original_count = original_n_gram.count(j)
machine_score = machine_score+machine_count
original_score = original_score+original_count
precision = original_score/machine_score
precision_score.append(precision)
precisions_sum = np.array(precision_score).sum()
avg_precisions_sum=precisions_sum/mt_length
bleu=BP*np.exp(avg_precisions_sum)
return bleu
if __name__ == "__main__":
original = "this is a test"
bs=bleu_score(original,original)
print("Bleu Score Original",bs)我试着用nltk测试我的分数
from nltk.translate.bleu_score import sentence_bleu
reference = [['this', 'is', 'a', 'test']]
candidate = ['this', 'is', 'a', 'test']
score = sentence_bleu(reference, candidate)
print(score)问题是我bleu的分数是关于2.718281的,而nltk的是1。我做错了什么?
以下是一些可能的原因:
1)我计算了机器翻译句子的长度。从1点到4点
2) n_gram_generator函数,我自己写的,但不确定它的准确性
( 3)我如何使用错误的函数或错误计算bleu的分数。
有人能查一下我的代码并告诉我我在哪里犯的错误吗?
发布于 2019-07-10 13:22:46
你的bleu分数计算错了。问题:
修正码
def bleu_score(original,machine_translated):
'''
Bleu score function given a orginal and a machine translated sentences
'''
mt_length = len(machine_translated.split())
o_length = len(original.split())
# Brevity Penalty
if mt_length>o_length:
BP=1
else:
penality=1-(mt_length/o_length)
BP=np.exp(penality)
# Clipped precision
clipped_precision_score = []
for i in range(1, 5):
original_n_gram = Counter(n_gram_generator(original,i))
machine_n_gram = Counter(n_gram_generator(machine_translated,i))
c = sum(machine_n_gram.values())
for j in machine_n_gram:
if j in original_n_gram:
if machine_n_gram[j] > original_n_gram[j]:
machine_n_gram[j] = original_n_gram[j]
else:
machine_n_gram[j] = 0
#print (sum(machine_n_gram.values()), c)
clipped_precision_score.append(sum(machine_n_gram.values())/c)
#print (clipped_precision_score)
weights =[0.25]*4
s = (w_i * math.log(p_i) for w_i, p_i in zip(weights, clipped_precision_score))
s = BP * math.exp(math.fsum(s))
return s
original = "It is a guide to action which ensures that the military alwasy obeys the command of the party"
machine_translated = "It is the guiding principle which guarantees the military forces alwasy being under the command of the party"
print (bleu_score(original, machine_translated))
print (sentence_bleu([original.split()], machine_translated.split()))输出:
0.27098211583470044
0.27098211583470044发布于 2019-08-26 19:52:27
下面是实际nltk 源代码的稍微修改的版本
def sentence_bleu_man(
references,
hypothesis,
weights=(0.25, 0.25, 0.25, 0.25)):
# compute modified precision for 1-4 ngrams
p_numerators = Counter()
p_denominators = Counter()
hyp_lengths, ref_lengths = 0, 0
for i, _ in enumerate(weights, start=1):
p_i = modified_precision(references, hypothesis, i)
p_numerators[i] += p_i.numerator
p_denominators[i] += p_i.denominator
# compute brevity penalty
hyp_len = len(hypothesis)
ref_len = closest_ref_length(references, hyp_len)
bp = brevity_penalty(ref_len, hyp_len)
# compute final score
p_n = [
Fraction(p_numerators[i], p_denominators[i],
_normalize=False)
for i, _ in enumerate(weights, start=1)
if p_numerators[i] > 0
]
s = (w_i * math.log(p_i) for w_i, p_i in zip(weights, p_n))
s = bp * math.exp(math.fsum(s))
return s我们可以使用原始纸中的一个示例
rt_raw = [
'It is a guide to action that ensures that the military will forever heed Party commands',
'It is the guiding principle which guarantees the military forces always being under the command of the Party',
'It is the practical guide for the army always to heed the directions of the party'
]
ct_raw = [
'It is a guide to action which ensures that the military always obeys the commands of the party',
'It is to insure the troops forever hearing the activity guidebook that party direct'
]
def process_trans(t):
return t.lower().split()
rt = [process_trans(t) for t in rt_raw]
ct = [process_trans(t) for t in ct_raw]
c1, c2 = ct[0], ct[1]
sentence_bleu_man(rt, c2, weights=(.5, .5, 0, 0))
sentence_bleu(rt, c2, weights=(.5, .5, 0, 0))输出:
0.18174699151949172
0.18174699151949172发布于 2020-11-21 13:22:42
以下是修改后的解决方案
# coding: utf-8
import numpy as np
from collections import Counter
import math
from nltk.translate.bleu_score import sentence_bleu
def n_gram_generator(sentence,n= 2,n_gram= False):
'''
N-Gram generator with parameters sentence
n is for number of n_grams
The n_gram parameter removes repeating n_grams
'''
sentence = sentence.lower() # converting to lower case
sent_arr = np.array(sentence.split()) # split to string arrays
length = len(sent_arr)
word_list = []
for i in range(length+1):
if i < n:
continue
word_range = list(range(i-n,i))
s_list = sent_arr[word_range]
string = ' '.join(s_list) # converting list to strings
word_list.append(string) # append to word_list
if n_gram:
word_list = list(set(word_list))
return word_list
def bleu_score(original, machine_translated):
'''
Bleu score function given a orginal and a machine translated sentences
'''
mt_length = len(machine_translated.split())
o_length = len(original.split())
# Brevity Penalty
if mt_length > o_length:
BP=1
else:
penality=1-(mt_length/o_length)
BP = np.exp(penality)
# Clipped precision
clipped_precision_score = []
for ngram_level in range(1, 5): # 1-gram to 4-gram
original_ngram_list = n_gram_generator(original, ngram_level)
original_n_gram = Counter(original_ngram_list)
machine_ngram_list = n_gram_generator(machine_translated, ngram_level)
machine_n_gram = Counter(machine_ngram_list)
num_ngrams_in_translation = sum(machine_n_gram.values()) # number of ngrams in translation
# iterate the unique ngrams in translation (candidate)
for j in machine_n_gram:
if j in original_n_gram: # if found in reference
if machine_n_gram[j] > original_n_gram[j]: # CLIPPING - if found in translation more than in source, clip
machine_n_gram[j] = original_n_gram[j]
else:
machine_n_gram[j] = 0
#print (sum(machine_n_gram.values()), c)
clipped_precision_score.append(float(sum(machine_n_gram.values())) / num_ngrams_in_translation)
#print (clipped_precision_score)
weights = [0.25]*4
s = (w_i * math.log(p_i) for w_i, p_i in zip(weights, clipped_precision_score))
s = BP * math.exp(math.fsum(s))
return s
original = "It is a guide to action which ensures that the military alwasy obeys the command of the party"
machine_translated = "It is the guiding principle which guarantees the military forces alwasy being under the command of the party"
print (bleu_score(original, machine_translated))
print (sentence_bleu([original.split()], machine_translated.split()))https://stackoverflow.com/questions/56968434
复制相似问题