Instructions
Objective
Write a python assignment program to implement meta data.
Requirements and Specifications
Source Code
A3
"""CSC108: Fall 2021 -- Assignment 3: arxiv.org
This code is provided solely for the personal and private use of
students taking the CSC108/CSCA08 course at the University of
Toronto. Copying for purposes other than this use is expressly
prohibited. All forms of distribution of this code, whether as given
or with any changes, are expressly prohibited.
All of the files in this directory and all subdirectories are:
Copyright (c) 2021 Anya Tafliovich, Michelle Craig, Tom Fairgrieve, Sadia
Sharmin, and Jacqueline Smith.
"""
from io import StringIO
from typing import Any, Dict
import unittest
import arxiv_functions as arxiv
import checker_generic as checker
MODULENAME = 'arxiv_functions'
PYTA_CONFIG = 'a3_pyta.json'
TARGET_LEN = 79
SEP = '='
CONSTANTS = {
'ID': 'identifier',
'TITLE': 'title',
'CREATED': 'created',
'MODIFIED': 'modified',
'AUTHORS': 'authors',
'ABSTRACT': 'abstract',
'END': 'END'
}
DATA_FILE = """008
Intro to CS is the best course ever
2021-09-01
Ponce,Marcelo
Tafliovich,Anya Y.
We present clear evidence that Introduction to
Computer Science is the best course.
END
827
University of Toronto is the best university
2021-08-20
2021-10-02
Ponce,Marcelo
Bretscher,Anna
Tafliovich,Anya Y.
We show a formal proof that the University of
Toronto is the best university.
END
"""
DATA_DICT = {
'008': {
'identifier': '008',
'title': 'Intro to CS is the best course ever',
'created': '2021-09-01',
'modified': '',
'authors': [('Ponce', 'Marcelo'), ('Tafliovich', 'Anya Y.')],
'abstract': '''We present clear evidence that Introduction to
Computer Science is the best course.'''},
'827': {
'identifier': '827',
'title': 'University of Toronto is the best university',
'created': '2021-08-20',
'modified': '2021-10-02',
'authors': [('Bretscher', 'Anna'),
('Ponce', 'Marcelo'),
('Tafliovich', 'Anya Y.')],
'abstract': '''We show a formal proof that the University of
Toronto is the best university.'''}
}
class CheckTest(unittest.TestCase):
"""A simple checker (NOT a full tester!) for assignment functions."""
def test_contains_keyword(self) -> None:
"""A simple check for contains_keyword."""
self._check_list_of_type(arxiv.contains_keyword,
[DATA_DICT, 'is'], str)
def test_created_in_year(self) -> None:
"""A simple check for created_in_year."""
self._check_simple_type(arxiv.created_in_year,
[DATA_DICT, '008', 2021], bool)
def test_read_arxiv_file(self) -> None:
"""A simple check for read_arxiv_file."""
print('\nChecking read_arxiv_file...')
result = checker.returns_dict_of(
arxiv.read_arxiv_file, [StringIO(DATA_FILE)], str, dict)
self.assertTrue(result[0], result[1])
valid_keys = {'identifier', 'title', 'created', 'modified',
'authors', 'abstract'}
msg = 'Value corresponding to key "{}" should be a {}.'
for article in result[1].values():
self.assertTrue(isinstance(article['authors'], list) and
_all_names(article['authors']),
msg.format('authors', 'list of names'))
for key in valid_keys - {'authors'}:
self.assertTrue(article[key] is None or
isinstance(article[key], str),
msg.format(key, 'str or None'))
print(' check complete')
def test_make_author_to_article(self) -> None:
"""A simple check for make_author_to_articles."""
print('\nChecking make_author_to_articles...')
result = checker.type_check_simple(
arxiv.make_author_to_articles, [DATA_DICT], dict)
self.assertTrue(result[0], result[1])
msg = ('make_author_to_articles should return a dict in which\n'
'keys are names (Tuple[str, str]) and values are lists of\n'
'article IDs (List[str]).')
for key, value in result[1].items():
self.assertTrue(_is_name(key) and
isinstance(value, list) and
all(isinstance(elt, str) for elt in value),
msg)
print(' check complete')
def test_get_coauthors(self) -> None:
"""A simple check for get_coauthors."""
self._check_list_of_names(arxiv.get_coauthors,
[DATA_DICT, ('Tafliovich', 'Anya Y.')])
def test_get_most_published_authors(self) -> None:
"""A simple check for get_most_published_authors."""
self._check_list_of_names(arxiv.get_most_published_authors,
[DATA_DICT])
def test_suggest_collaborators(self) -> None:
"""A simple check for suggest_collaborators."""
self._check_list_of_names(arxiv.suggest_collaborators,
[DATA_DICT, ('Tafliovich', 'Anya Y.')])
def test_has_prolific_authors(self) -> None:
"""A simple check for has_prolific_authors."""
by_author = {
('Ponce', 'Marcelo'): ['008', '827'],
('Tafliovich', 'Anya Y.'): ['008', '827'],
('Bretscher', 'Anna'): ['827']
}
self._check_simple_type(arxiv.has_prolific_authors,
[by_author, DATA_DICT['008'], 2],
bool)
def test_keep_prolific_authors(self) -> None:
"""A simple check for keep_prolific_authors: only checks that the
return value is None."""
self._check_simple_type(arxiv.keep_prolific_authors,
[DATA_DICT, 2],
type(None))
def test_check_constants(self) -> None:
"""Check that values of constants are not changed."""
print('\nChecking that constants refer to their original values')
self._check_constants(CONSTANTS, arxiv)
print(' check complete')
def _check_list_of_names(self, func: callable, args: list) -> None:
"""Check that func called with arguments args returns a
List[constants.NameType]. Display progress and outcome of the
check.
"""
print('\nChecking {}...'.format(func.__name__))
result = checker.type_check_simple(func, args, list)
self.assertTrue(result[0], result[1])
msg = ('{} should return a list of tuples of two strs:\n'
'[(lastname1, firstname1), (lastname2, firstname2), ...]\n'
'Test your function thoroughly!').format(func.__name__)
self.assertTrue(_all_names(result[1]), msg)
print(' check complete')
def _check_simple_type(self, func: callable, args: list,
expected: type) -> None:
"""Check that func called with arguments args returns a value of type
expected. Display the progress and the result of the check.
"""
print('\nChecking {}...'.format(func.__name__))
result = checker.type_check_simple(func, args, expected)
self.assertTrue(result[0], result[1])
print(' check complete')
def _check_list_of_type(self, func: callable, args: list, typ: type) -> None:
"""Check that func called with arguments args returns a list with
values of the type expected. Display the progress and the result of
the check.
"""
print('\nChecking {}...'.format(func.__name__))
result = checker.returns_list_of(func, args, typ)
self.assertTrue(result[0], result[1])
print(' check complete')
def _check_constants(self, name2value: Dict[str, Any], mod: Any) -> None:
"""Check that, for each (name, value) pair in name2value, the value of
a variable named name in module mod is value.
"""
for name, expected in name2value.items():
actual = getattr(mod, name)
msg = 'The value of {} should be {} but is {}.'.format(
name, expected, actual)
self.assertEqual(expected, actual, msg)
def _all_names(obj: Any) -> bool:
"""Return whether every item in obj is of type constants.NameType."""
return all(_is_name(name) for name in obj)
def _is_name(obj: Any) -> bool:
"""Return whether obj is a name, i.e. a Tuple[str, str]."""
return (isinstance(obj, tuple) and len(obj) == 2 and
isinstance(obj[0], str) and isinstance(obj[1], str))
checker.ensure_no_io(MODULENAME)
print(''.center(TARGET_LEN, SEP))
print(' Start: checking coding style '.center(TARGET_LEN, SEP))
checker.run_pyta(MODULENAME + '.py', PYTA_CONFIG)
print(' End checking coding style '.center(TARGET_LEN, SEP))
print(' Start: checking type contracts '.center(TARGET_LEN, SEP))
unittest.main(exit=False)
print(' End checking type contracts '.center(TARGET_LEN, SEP))
print('\nScroll up to see ALL RESULTS:')
print(' - checking coding style')
print(' - checking type contract\n')
ARXIV FUNCTION
"""CSC108: Fall 2021 -- Assignment 3: arxiv.org
This code is provided solely for the personal and private use of
students taking the CSC108/CSCA08 course at the University of
Toronto. Copying for purposes other than this use is expressly
prohibited. All forms of distribution of this code, whether as given
or with any changes, are expressly prohibited.
All of the files in this directory and all subdirectories are:
Copyright (c) 2021 Anya Tafliovich, Michelle Craig, Tom Fairgrieve, Sadia
Sharmin, and Jacqueline Smith.
"""
# importing copy for use in the keep_prolific_authors docstring
# you do not need to use it anywhere else
import copy
from typing import Dict, List, TextIO
from constants import (ID, TITLE, CREATED, MODIFIED, AUTHORS, ABSTRACT, END,
NameType, ArticleValueType, ArticleType, ArxivType)
EXAMPLE_ARXIV = {
'031': {
ID: '031',
TITLE: 'Calculus is the Best Course Ever',
CREATED: '',
MODIFIED: '2021-09-02',
AUTHORS: [('Breuss', 'Nataliya')],
ABSTRACT: 'We discuss the reasons why Calculus is the best course.'},
'067': {
ID: '067',
TITLE: 'Discrete Mathematics is the Best Course Ever',
CREATED: '2021-09-02',
MODIFIED: '2021-10-01',
AUTHORS: [('Pancer', 'Richard'), ('Bretscher', 'Anna')],
ABSTRACT: ('We explain why Discrete Mathematics is the best ' +
'course of all times.')},
'827': {
ID: '827',
TITLE: 'University of Toronto is the Best University',
CREATED: '2021-08-20',
MODIFIED: '2021-10-02',
AUTHORS: [('Ponce', 'Marcelo'), ('Bretscher', 'Anna'),
('Tafliovich', 'Anya Y.')],
ABSTRACT: 'We show a formal proof that the University of\n' +
'Toronto is the best university.'},
'008': {
ID: '008',
TITLE: 'Intro to CS is the Best Course Ever',
CREATED: '2021-09-01',
MODIFIED: '',
AUTHORS: [('Ponce', 'Marcelo'), ('Tafliovich', 'Anya Y.')],
ABSTRACT: 'We present clear evidence that Introduction to\n' + \
'Computer Science is the best course.'},
'042': {
ID: '042',
TITLE: '',
CREATED: '2021-05-04',
MODIFIED: '2021-05-05',
AUTHORS: [],
ABSTRACT: 'This is a strange article with no title\n' + \
'and no authors.\n\nIt also has a blank line in its abstract!'}
}
EXAMPLE_BY_AUTHOR = {
('Ponce', 'Marcelo'): ['008', '827'],
('Tafliovich', 'Anya Y.'): ['008', '827'],
('Bretscher', 'Anna'): ['067', '827'],
('Breuss', 'Nataliya'): ['031'],
('Pancer', 'Richard'): ['067']
}
################################################
## Task 1
################################################
# a helper to remove non-alphabetic characters
def clean_word(word: str) -> str:
"""Return word with all non-alphabetic characters removed and converted to
lowercase.
Precondition: word contains no whitespace
>>> clean_word('Hello!!!')
'hello'
>>> clean_word('12cat.dog?')
'catdog'
>>> clean_word("DON'T")
'dont'
"""
new_word = ''
for ch in word:
if ch.isalpha():
new_word = new_word + ch.lower()
return new_word
# Add your other Task 1 functions here
def created_in_year(ArxivType, id: str, year: int):
# Check if there is an article with the given id in the data
if id in ArxivType:
article = ArxivType[id]
# get date
article_date = article[CREATED]
# Split and get year
article_year = int(article_date.split("-")[0])
# Chekc if years match
return article_year == year
# If we reach this line, it is because there is no article with that id in the data, so return None
return None
def contains_keyword(id_to_article: ArxivType, word: str) -> List[str]:
# clean word
word = clean_word(word)
# Create a list to store all article with the word
result = []
# Loop through articles
for id in id_to_article:
if word in id_to_article[id][TITLE].lower() or word in id_to_article[id][ABSTRACT].lower():
result.append(id)
# Now sort list
result = sorted(result)
return result
################################################
## Task 2
################################################
def extract_article_data(data: ArxivType) -> ArticleType:
id = data[0]
title = data[1]
creation_date = data[2]
modified_date = data[3]
authors = []
if data[4] != '': # the authors line is not empty
line = data[4]
i = 4
while line != '':
author_data = line.split(",")
authors.append(tuple(author_data))
line = data[i+1]
i += 1
# We are now at an empty line
# the next line contains the abstract
abstract = ''
for j in range(i, len(data)):
abstract += data[j]
else: # there is no authors
abstract = ''
i = 5
for j in range(i, len(data)):
abstract += data[j]
# Sort authors
#authors = sorted(authors, key = lambda x: x[0])
#print(authors)
# Now build dict
article = {
ID: id,
TITLE: title,
CREATED: creation_date,
MODIFIED: modified_date,
AUTHORS: authors,
ABSTRACT: abstract
}
return article
def read_arxiv_file(f: TextIO) -> ArxivType:
"""Return a ArxivType dictionary containing the arxiv metadata in f.
Note we do not include example calls for functions that take open files.
"""
# Complete this Task 2 function
# Create Dictionary
ArxivType = dict()
# Read lines
lines = f.readlines()
# Create a list to store all lines before an 'END'
while len(lines) > 0:
line = lines.pop(0).strip()
data = []
while line != 'END':
data.append(line)
line = lines.pop(0).strip()
# Extract article
article = extract_article_data(data)
ArxivType[article[ID]] = article
# Now, sort ArxivType by title
ArxivType = dict(sorted(ArxivType.items(), key = lambda x: x[1][TITLE] if x[1][TITLE] else 'z'*100))
return ArxivType
# Add your helper functions for Task 2 here
################################################
## Task 3
################################################
def make_author_to_articles(id_to_article: ArxivType
) -> Dict[NameType, List[str]]:
"""Return a dict that maps each author name to a list (sorted in
lexicographic order) of IDs of articles written by that author,
based on the information in id_to_article.
>>> make_author_to_articles(EXAMPLE_ARXIV) == EXAMPLE_BY_AUTHOR
True
>>> make_author_to_articles({})
{}
"""
# Complete this Task 3 function
result = dict()
# Loop through articles
for id in id_to_article:
for author in id_to_article[id][AUTHORS]:
if not author in result:
result[author] = []
result[author].append(id)
# Now sort the values
for author, lst in result.items():
lst = sorted(lst)
result[author] = lst
return result
def get_coauthors(data: ArxivType, author: NameType ) -> List[NameType]:
result = list()
for id in data:
authors = data[id][AUTHORS]
if author in authors:
for author2 in authors:
if author2 != author:
result += [x for x in authors if x != author and x not in result]
# Now sort
result = sorted(result, key = lambda x: x[0])
return result
def get_most_published_authors(data: ArxivType) -> List[NameType]:
articles_count = dict()
for id in data:
authors = data[id][AUTHORS]
for author in authors:
articles_count[author] = articles_count.get(author, 0) + 1
# Now, get the max value in the dictionary (max count of articles)
max_count = max(articles_count.items(), key = lambda x: x[1])[1]
result = []
# Now, get all keys with this value. If there is just one, then one key will be returned.
# If there is more than one, then all keys with that value are returned
for key in articles_count:
if articles_count[key] == max_count:
result.append(key)
# Now sort
result = sorted(result, key=lambda x: x[0])
return result
def suggest_collaborators(data: ArxivType, author: NameType) -> List[NameType]:
# First, get author's coauthors
coauthors = get_coauthors(data, author)
# Now, for each coauthor, get its coauthors
result = []
for c_author in coauthors:
co_coauthors = get_coauthors(data, c_author)
result += [x for x in co_coauthors if x != author and x not in result and x not in coauthors]
# Sort
result = sorted(result, key=lambda x: x[0])
return result
# Add your other functions for Task 3 here
################################################
## Task 4
################################################
# Add your Task 4 functions here
def has_prolific_authors(data: Dict[NameType, List[str]], id: str, min_number: int):
# First, loop turhough data
for author in data:
# Check if the article_id is in this author's articles
if id in data[author] and len(data[author]) >= min_number:
return True
return False
def keep_prolific_authors(id_to_article: ArxivType,
min_publications: int) -> None:
"""Update id_to_article so that it contains only articles published by
authors with min_publications or more articles published. As long
as at least one of the authors has min_publications, the article
is kept.
>>> arxiv_copy = copy.deepcopy(EXAMPLE_ARXIV)
>>> keep_prolific_authors(arxiv_copy, 2)
>>> len(arxiv_copy)
3
>>> '008' in arxiv_copy and '067' in arxiv_copy and '827' in arxiv_copy
True
>>> arxiv_copy = copy.deepcopy(EXAMPLE_ARXIV)
>>> keep_prolific_authors(arxiv_copy, 3)
>>> arxiv_copy
{}
"""
# Complete the body of this function. We have provided this docstring to
# you so that you can use the EXAMPLE_ARXIV for testing mutation.
# Note that we do not expect you to know about the copy.deepcopy function.
# Make author to articles
author_to_article = make_author_to_articles(id_to_article)
# Define a list to store the articles to be removed
ids_to_remove = list()
# Loop through articles
for id in id_to_article:
authors = id_to_article[id][AUTHORS]
# Check if at least one author is prolific
has_prolific = False
for author in authors:
if has_prolific_authors(author_to_article, id, min_publications):
has_prolific = True
break
if not has_prolific: # There is no prolific author in this article
ids_to_remove.append(id)
# Now remove
for id in ids_to_remove:
id_to_article.pop(id, None)
if __name__ == '__main__':
# uncomment the lines below to run doctest on your code
# note that doctest requires your docstring examples to be perfectly
# formatted, and we will not be running doctest on your code
import doctest
doctest.testmod()
# uncomment the lines below to work with the small data set
example_data = open('example_data.txt')
example_arxiv = read_arxiv_file(example_data)
example_data.close()
if example_arxiv == EXAMPLE_ARXIV:
print('The dictionary you produced matches EXAMPLE_ARXIV!')
print('This is a good sign, but do more of your own testing!')
else:
# If you are getting this message, try setting a breakpoint on the
# line that calls read_arxiv_file above and running the debugger
print('Expected to get', EXAMPLE_ARXIV)
print('But got', example_arxiv)
# uncomment the lines below to work with a larger data set
data = open('data.txt')
arxiv = read_arxiv_file(data)
data.close()
author_to_articles = make_author_to_articles(arxiv)
most_published = get_most_published_authors(arxiv)
print(most_published)
print(get_coauthors(arxiv, ('Varanasi', 'Mahesh K.'))) # one
print(get_coauthors(arxiv, ('Chablat', 'Damien'))) # many
CONSTANTS
"""CSC108: Fall 2021 -- Assignment 3: arxiv.org
This code is provided solely for the personal and private use of
students taking the CSC108/CSCA08 course at the University of
Toronto. Copying for purposes other than this use is expressly
prohibited. All forms of distribution of this code, whether as given
or with any changes, are expressly prohibited.
All of the files in this directory and all subdirectories are:
Copyright (c) 2021 Anya Tafliovich, Michelle Craig, Tom Fairgrieve, Sadia
Sharmin, and Jacqueline Smith.
"""
from typing import Dict, List, Tuple, Union
ID = 'identifier'
TITLE = 'title'
CREATED = 'created'
MODIFIED = 'modified'
AUTHORS = 'authors'
ABSTRACT = 'abstract'
END = 'END'
# We store names as tuples of two strs: (last-name, first-name(s)).
NameType = Tuple[str, str]
# ArticleValueType is the type for valid values in the ArticleType
# dict. All values are str, except for the value associated with
# key AUTHORS, which is a List of NameType.
# Note that we have not introduced Union - you can read it as "or"
ArticleValueType = Union[str, List[NameType]]
# ArticleType is a dict that maps keys ID, TITLE, CREATED, MODIFIED,
# AUTHORS, and ABSTRACT to their values (of type ArticleValueType).
ArticleType = Dict[str, ArticleValueType]
# ArxivType is a dict that maps article identifiers to articles,
# i.e. to values of type ArticleType.
ArxivType = Dict[str, ArticleType]
Similar Samples
Browse through our curated samples at ProgrammingHomeworkHelp.com to witness our expertise in delivering high-quality programming solutions. From algorithm design to debugging complex code, our samples exemplify precision and proficiency across various languages. Experience firsthand how our solutions can elevate your understanding and grades in programming assignments.
Python
Python
Python
Python
Python
Python
Python
Python
Python
Python
Python
Python
Python
Python
Python
Python
Python
Python
Python
Python