计算与文本语料库gutenberg相关联的所有文件ID的单词覆盖率。它的编写代码是什么,
import nltk
from nltk.corpus import gutenburg
from decimal import Decimal
for fileid in gutenburg.fileids():
n_chars = len(gutenburg.raw(fileid))
n_words = len(gutenburg.words(fileids))
print(round(Decimal(n_chars/n_words), 7), fileids)发布于 2020-02-09 11:29:55
import nltk
from nltk.corpus import gutenberg
for fileid in gutenberg.fileids():
total_unique_words = len(set(gutenberg.words(fileid)))
total_words = len(gutenberg.words(fileid))
print(total_words/total_unique_words,fileid)https://stackoverflow.com/questions/59099224
复制相似问题