diff --git a/chapter7/2-clean2grams.py b/chapter7/2-clean2grams.py index abc38ed..f6b7505 100644 --- a/chapter7/2-clean2grams.py +++ b/chapter7/2-clean2grams.py @@ -3,6 +3,7 @@ import re import string from collections import OrderedDict +from collections import defaultdict def cleanInput(input): input = re.sub('\n+', " ", input) @@ -20,13 +21,10 @@ def cleanInput(input): def getNgrams(input, n): input = cleanInput(input) - output = dict() + output=defaultdict(int) for i in range(len(input)-n+1): newNGram = " ".join(input[i:i+n]) - if newNGram in output: - output[newNGram] += 1 - else: - output[newNGram] = 1 + output[newNGram]+=1 return output html = urlopen("http://en.wikipedia.org/wiki/Python_(programming_language)") @@ -38,4 +36,4 @@ def getNgrams(input, n): ngrams = getNgrams(content, 2) ngrams = OrderedDict(sorted(ngrams.items(), key=lambda t: t[1], reverse=True)) -print(ngrams) \ No newline at end of file +print(ngrams)