Instructions
Objective
Write a python homework to implement a function `topBigrams` which returns the 10 most frequent character pairs of adjacent letters in an English text.
Requirements and Specifications
Task 1
Implement a function `topBigrams` which returns the 10 most frequent character pairs of adjacent letters in an English text. The function must return a `list` holding the 10 most frequently occurring character pairs in descending order. This function can be used, for example, to optimise the key assignment on a keyboard.
Important:
- You are meant to work on a particular input text available from the supplemental script `corpus.py`.
- `corpus.py`, when loaded, will provide a corpus of 20 candidate texts.
- Work on the text in this corpus identified by a computed position. Your position is computed as the integer obtained from your `student_id` (trim any leading zeroes!) modulo `20` (there are 20 candidate texts in `corpus`, hence this divisor).
Hints:
- The modulo is also known as the residual computation, e.g.: `55 % 10 = 5` (see also Task 2).
- In a first step, remove any character other than [A-Z, a-z] from the input string for bigram frequency analysis.
- When calculating the occurences of character pairs, uppercase letters should be treated the same as lowercase letters.
The corpus.py file shall be omitted in the submitted zip-file.
Source Code
def topBigrams(text):
filtered = ''
for c in text:
clower = c.lower()
if clower.isalpha():
filtered += clower
top = {}
for i in range(len(filtered)-1):
bigram = filtered[i:i+2]
if bigram in top:
top[bigram] += 1
else:
top[bigram] = 1
bigrams = list(top.keys())
bigrams.sort(key=lambda x: top[x], reverse=True)
l = min(10, len(bigrams))
return bigrams[:l]
from nose.tools import assert_equal
text = corpus[(int(student_id.lstrip("h0")) % len(corpus))]
assert_equal(type(topBigrams(text)), list)
assert_equal(len(topBigrams(text)), 10)
assert_equal(topBigrams("NotImplementedError"), ['no', 'ot', 'ti', 'im', 'mp', 'pl', 'le', 'em', 'me', 'en'])
assert_equal(topBigrams("Yes, I will adhere to the Code of Conduct"), ['he', 'co', 'ye', 'es', 'si', 'iw', 'wi', 'il', 'll', 'la'])
def compute(a, b):
if not isinstance(a, int) or not isinstance (b, int):
return "Wrong input type"
if a <= 0 or b <= 0:
return "This is illegal, only positive numbers allowed"
result = None
if a == b:
result = a
else:
if a > b:
result = compute(a-b, b)
else:
result = compute(a, b-a)
return result
from nose.tools import assert_equal
assert_equal(compute(-168, 4), "This is illegal, only positive numbers allowed")
assert_equal(compute(5, 2205), 5)
assert_equal(type(compute(99, 11)), int)
assert_equal(compute(4231, 1324), 1)
assert_equal(compute("整数", "int"), "Wrong input type")
def decode(input_string):
if not isinstance(input_string, str):
return "Wrong input type"
if len(input_string) == 0:
return ""
parts = input_string.split()
n = len(parts)
m = len(parts[0])
result = ""
for i in range(n*m):
c = parts[i % n][i // n]
if c == '-':
result += ' '
else:
result += c
result = result.strip()
return result
from nose.tools import assert_equal
assert_equal(decode("H-VCG EIEOS L--D- LLEI- OONN-"), 'HELLO I LOVE ENCODINGS')
assert_equal(decode("T-A-I HA-SN ELHE- RWICD EADRA -YDET ISETA S-N--"), 'THERE IS ALWAYS A HIDDEN SECRET IN DATA')
assert_equal(decode("NHRHA OA-IG BLRSE OLE-- D-AM- YEDE- -V-S- SETS-"), 'NOBODY SHALL EVER READ THIS MESSAGE')
assert_equal(type(decode("ts et")), str)
assert_equal(len(decode("T- HS IE SN -T WE IN LC LE -- BI E- -G AU -E LS OS N- G-")), 36)
assert_equal(decode(""), "")
assert_equal(decode([]), "Wrong input type")
stopwords = ['i', 'me', 'my', 'myself', 'we', 'our', 'ours', 'ourselves', 'you', "you're", "you've", "you'll", "you'd",
'your', 'yours', 'yourself', 'yourselves', 'he', 'him', 'his', 'himself', 'she', "she's", 'her', 'hers',
'herself', 'it', "it's", 'its', 'itself', 'they', 'them', 'their', 'theirs', 'themselves', 'what', 'which',
'who', 'whom', 'this', 'that', "that'll", 'these', 'those', 'am', 'is', 'are', 'was', 'were', 'be', 'been',
'being', 'have', 'has', 'had', 'having', 'do', 'does', 'did', 'doing', 'a', 'an', 'the', 'and', 'but', 'if',
'or', 'because', 'as', 'until', 'while', 'of', 'at', 'by', 'for', 'with', 'about', 'against', 'between', 'into',
'through', 'during', 'before', 'after', 'above', 'below', 'to', 'from', 'up', 'down', 'in', 'out', 'on', 'off',
'over', 'under', 'again', 'further', 'then', 'once', 'here', 'there', 'when', 'where', 'why', 'how', 'all',
'any', 'both', 'each', 'few', 'more', 'most', 'other', 'some', 'such', 'no', 'nor', 'not', 'only', 'own',
'same', 'so', 'than', 'too', 'very', 's', 't', 'can', 'will', 'just', 'don', "don't", 'should', "should've",
'now', 'd', 'll', 'm', 'o', 're', 've', 'y', 'ain', 'aren', "aren't", 'couldn', "couldn't", 'didn', "didn't",
'doesn', "doesn't", 'hadn', "hadn't", 'hasn', "hasn't", 'haven', "haven't", 'isn', "isn't", 'ma', 'mightn',
"mightn't", 'mustn', "mustn't", 'needn', "needn't", 'shan', "shan't", 'shouldn', "shouldn't", 'wasn', "wasn't",
'weren', "weren't", 'won', "won't", 'wouldn', "wouldn't"]
def cleaning(input_string, stopword_list):
if not isinstance(input_string, str):
return "Wrong input type"
if len(input_string) == 0:
return []
words = []
currword = ''
for c in input_string:
if c.isdigit():
continue
if c in '-_+\\^ ,#.;!?:>< ':
if len(currword) > 0:
words.append(currword)
currword = ''
else:
currword += c
if len(currword) > 0:
words.append(currword)
result = []
for w in words:
if w not in stopword_list:
result.append(w)
return result
from nose.tools import assert_equal
assert_equal(cleaning("Goo7d.day you won
assert_equal(cleaning("Plea8se.h3elp me
assert_equal(cleaning("Plea8se.h3elp me
assert_equal(cleaning("Tr7y this;,<+ 2o3n5e", stopwords), ['Try', 'one'])
assert_equal(cleaning("Tr7y this;,<+ 2o3n5e", []), ['Try', 'this', 'one'])
assert_equal(len(cleaning("The_ch3air,w2as+so-ha76ppy<>f7o2356r,al9ice!", stopwords)), 4)
assert_equal(type(cleaning("Goo7d.day you won
assert_equal(cleaning([], stopwords), "Wrong input type")
assert_equal(cleaning(24, stopwords), "Wrong input type")
assert_equal(cleaning("", stopwords), [])
Similar Samples
Discover expertly crafted sample projects and assignments at ProgrammingHomeworkHelp.com. Our examples showcase the quality and precision of our work, demonstrating our commitment to helping you excel in programming. Explore various coding solutions and gain insights to enhance your programming skills today!
Python
Python
Python
Python
Python
Python
Python
Python
Python
Python
Python
Python
Python
Python
Python
Python
Python
Python
Python
Python