×
Reviews 4.9/5 Order Now

Create a Program to Implement Meta Data in Python Assignment Solution

July 08, 2024
Dr. Matthew Hernandez
Dr. Matthew
🇨🇭 Switzerland
Python
Dr. Matthew Hernandez, an esteemed Computer Science researcher, obtained his PhD from ETH Zurich, Switzerland. With 6 years of experience under his belt, he has successfully completed over 400 Python assignments, demonstrating his proficiency and commitment to excellence.
Key Topics
  • Instructions
    • Objective
  • Requirements and Specifications
Tip of the day
Always start SQL assignments by understanding the schema and relationships between tables. Use proper indentation and aliases for clarity, and test queries incrementally to catch errors early.
News
Owl Scientific Computing 1.2: Updated on December 24, 2024, Owl is a numerical programming library for the OCaml language, offering advanced features for scientific computing.

Instructions

Objective

Write a python assignment program to implement meta data.

Requirements and Specifications

program-to-implement-meta-data-in-python
program-to-implement-meta-data-in-python 1

Source Code

A3 """CSC108: Fall 2021 -- Assignment 3: arxiv.org This code is provided solely for the personal and private use of students taking the CSC108/CSCA08 course at the University of Toronto. Copying for purposes other than this use is expressly prohibited. All forms of distribution of this code, whether as given or with any changes, are expressly prohibited. All of the files in this directory and all subdirectories are: Copyright (c) 2021 Anya Tafliovich, Michelle Craig, Tom Fairgrieve, Sadia Sharmin, and Jacqueline Smith. """ from io import StringIO from typing import Any, Dict import unittest import arxiv_functions as arxiv import checker_generic as checker MODULENAME = 'arxiv_functions' PYTA_CONFIG = 'a3_pyta.json' TARGET_LEN = 79 SEP = '=' CONSTANTS = { 'ID': 'identifier', 'TITLE': 'title', 'CREATED': 'created', 'MODIFIED': 'modified', 'AUTHORS': 'authors', 'ABSTRACT': 'abstract', 'END': 'END' } DATA_FILE = """008 Intro to CS is the best course ever 2021-09-01 Ponce,Marcelo Tafliovich,Anya Y. We present clear evidence that Introduction to Computer Science is the best course. END 827 University of Toronto is the best university 2021-08-20 2021-10-02 Ponce,Marcelo Bretscher,Anna Tafliovich,Anya Y. We show a formal proof that the University of Toronto is the best university. END """ DATA_DICT = { '008': { 'identifier': '008', 'title': 'Intro to CS is the best course ever', 'created': '2021-09-01', 'modified': '', 'authors': [('Ponce', 'Marcelo'), ('Tafliovich', 'Anya Y.')], 'abstract': '''We present clear evidence that Introduction to Computer Science is the best course.'''}, '827': { 'identifier': '827', 'title': 'University of Toronto is the best university', 'created': '2021-08-20', 'modified': '2021-10-02', 'authors': [('Bretscher', 'Anna'), ('Ponce', 'Marcelo'), ('Tafliovich', 'Anya Y.')], 'abstract': '''We show a formal proof that the University of Toronto is the best university.'''} } class CheckTest(unittest.TestCase): """A simple checker (NOT a full tester!) for assignment functions.""" def test_contains_keyword(self) -> None: """A simple check for contains_keyword.""" self._check_list_of_type(arxiv.contains_keyword, [DATA_DICT, 'is'], str) def test_created_in_year(self) -> None: """A simple check for created_in_year.""" self._check_simple_type(arxiv.created_in_year, [DATA_DICT, '008', 2021], bool) def test_read_arxiv_file(self) -> None: """A simple check for read_arxiv_file.""" print('\nChecking read_arxiv_file...') result = checker.returns_dict_of( arxiv.read_arxiv_file, [StringIO(DATA_FILE)], str, dict) self.assertTrue(result[0], result[1]) valid_keys = {'identifier', 'title', 'created', 'modified', 'authors', 'abstract'} msg = 'Value corresponding to key "{}" should be a {}.' for article in result[1].values(): self.assertTrue(isinstance(article['authors'], list) and _all_names(article['authors']), msg.format('authors', 'list of names')) for key in valid_keys - {'authors'}: self.assertTrue(article[key] is None or isinstance(article[key], str), msg.format(key, 'str or None')) print(' check complete') def test_make_author_to_article(self) -> None: """A simple check for make_author_to_articles.""" print('\nChecking make_author_to_articles...') result = checker.type_check_simple( arxiv.make_author_to_articles, [DATA_DICT], dict) self.assertTrue(result[0], result[1]) msg = ('make_author_to_articles should return a dict in which\n' 'keys are names (Tuple[str, str]) and values are lists of\n' 'article IDs (List[str]).') for key, value in result[1].items(): self.assertTrue(_is_name(key) and isinstance(value, list) and all(isinstance(elt, str) for elt in value), msg) print(' check complete') def test_get_coauthors(self) -> None: """A simple check for get_coauthors.""" self._check_list_of_names(arxiv.get_coauthors, [DATA_DICT, ('Tafliovich', 'Anya Y.')]) def test_get_most_published_authors(self) -> None: """A simple check for get_most_published_authors.""" self._check_list_of_names(arxiv.get_most_published_authors, [DATA_DICT]) def test_suggest_collaborators(self) -> None: """A simple check for suggest_collaborators.""" self._check_list_of_names(arxiv.suggest_collaborators, [DATA_DICT, ('Tafliovich', 'Anya Y.')]) def test_has_prolific_authors(self) -> None: """A simple check for has_prolific_authors.""" by_author = { ('Ponce', 'Marcelo'): ['008', '827'], ('Tafliovich', 'Anya Y.'): ['008', '827'], ('Bretscher', 'Anna'): ['827'] } self._check_simple_type(arxiv.has_prolific_authors, [by_author, DATA_DICT['008'], 2], bool) def test_keep_prolific_authors(self) -> None: """A simple check for keep_prolific_authors: only checks that the return value is None.""" self._check_simple_type(arxiv.keep_prolific_authors, [DATA_DICT, 2], type(None)) def test_check_constants(self) -> None: """Check that values of constants are not changed.""" print('\nChecking that constants refer to their original values') self._check_constants(CONSTANTS, arxiv) print(' check complete') def _check_list_of_names(self, func: callable, args: list) -> None: """Check that func called with arguments args returns a List[constants.NameType]. Display progress and outcome of the check. """ print('\nChecking {}...'.format(func.__name__)) result = checker.type_check_simple(func, args, list) self.assertTrue(result[0], result[1]) msg = ('{} should return a list of tuples of two strs:\n' '[(lastname1, firstname1), (lastname2, firstname2), ...]\n' 'Test your function thoroughly!').format(func.__name__) self.assertTrue(_all_names(result[1]), msg) print(' check complete') def _check_simple_type(self, func: callable, args: list, expected: type) -> None: """Check that func called with arguments args returns a value of type expected. Display the progress and the result of the check. """ print('\nChecking {}...'.format(func.__name__)) result = checker.type_check_simple(func, args, expected) self.assertTrue(result[0], result[1]) print(' check complete') def _check_list_of_type(self, func: callable, args: list, typ: type) -> None: """Check that func called with arguments args returns a list with values of the type expected. Display the progress and the result of the check. """ print('\nChecking {}...'.format(func.__name__)) result = checker.returns_list_of(func, args, typ) self.assertTrue(result[0], result[1]) print(' check complete') def _check_constants(self, name2value: Dict[str, Any], mod: Any) -> None: """Check that, for each (name, value) pair in name2value, the value of a variable named name in module mod is value. """ for name, expected in name2value.items(): actual = getattr(mod, name) msg = 'The value of {} should be {} but is {}.'.format( name, expected, actual) self.assertEqual(expected, actual, msg) def _all_names(obj: Any) -> bool: """Return whether every item in obj is of type constants.NameType.""" return all(_is_name(name) for name in obj) def _is_name(obj: Any) -> bool: """Return whether obj is a name, i.e. a Tuple[str, str].""" return (isinstance(obj, tuple) and len(obj) == 2 and isinstance(obj[0], str) and isinstance(obj[1], str)) checker.ensure_no_io(MODULENAME) print(''.center(TARGET_LEN, SEP)) print(' Start: checking coding style '.center(TARGET_LEN, SEP)) checker.run_pyta(MODULENAME + '.py', PYTA_CONFIG) print(' End checking coding style '.center(TARGET_LEN, SEP)) print(' Start: checking type contracts '.center(TARGET_LEN, SEP)) unittest.main(exit=False) print(' End checking type contracts '.center(TARGET_LEN, SEP)) print('\nScroll up to see ALL RESULTS:') print(' - checking coding style') print(' - checking type contract\n') ARXIV FUNCTION """CSC108: Fall 2021 -- Assignment 3: arxiv.org This code is provided solely for the personal and private use of students taking the CSC108/CSCA08 course at the University of Toronto. Copying for purposes other than this use is expressly prohibited. All forms of distribution of this code, whether as given or with any changes, are expressly prohibited. All of the files in this directory and all subdirectories are: Copyright (c) 2021 Anya Tafliovich, Michelle Craig, Tom Fairgrieve, Sadia Sharmin, and Jacqueline Smith. """ # importing copy for use in the keep_prolific_authors docstring # you do not need to use it anywhere else import copy from typing import Dict, List, TextIO from constants import (ID, TITLE, CREATED, MODIFIED, AUTHORS, ABSTRACT, END, NameType, ArticleValueType, ArticleType, ArxivType) EXAMPLE_ARXIV = { '031': { ID: '031', TITLE: 'Calculus is the Best Course Ever', CREATED: '', MODIFIED: '2021-09-02', AUTHORS: [('Breuss', 'Nataliya')], ABSTRACT: 'We discuss the reasons why Calculus is the best course.'}, '067': { ID: '067', TITLE: 'Discrete Mathematics is the Best Course Ever', CREATED: '2021-09-02', MODIFIED: '2021-10-01', AUTHORS: [('Pancer', 'Richard'), ('Bretscher', 'Anna')], ABSTRACT: ('We explain why Discrete Mathematics is the best ' + 'course of all times.')}, '827': { ID: '827', TITLE: 'University of Toronto is the Best University', CREATED: '2021-08-20', MODIFIED: '2021-10-02', AUTHORS: [('Ponce', 'Marcelo'), ('Bretscher', 'Anna'), ('Tafliovich', 'Anya Y.')], ABSTRACT: 'We show a formal proof that the University of\n' + 'Toronto is the best university.'}, '008': { ID: '008', TITLE: 'Intro to CS is the Best Course Ever', CREATED: '2021-09-01', MODIFIED: '', AUTHORS: [('Ponce', 'Marcelo'), ('Tafliovich', 'Anya Y.')], ABSTRACT: 'We present clear evidence that Introduction to\n' + \ 'Computer Science is the best course.'}, '042': { ID: '042', TITLE: '', CREATED: '2021-05-04', MODIFIED: '2021-05-05', AUTHORS: [], ABSTRACT: 'This is a strange article with no title\n' + \ 'and no authors.\n\nIt also has a blank line in its abstract!'} } EXAMPLE_BY_AUTHOR = { ('Ponce', 'Marcelo'): ['008', '827'], ('Tafliovich', 'Anya Y.'): ['008', '827'], ('Bretscher', 'Anna'): ['067', '827'], ('Breuss', 'Nataliya'): ['031'], ('Pancer', 'Richard'): ['067'] } ################################################ ## Task 1 ################################################ # a helper to remove non-alphabetic characters def clean_word(word: str) -> str: """Return word with all non-alphabetic characters removed and converted to lowercase. Precondition: word contains no whitespace >>> clean_word('Hello!!!') 'hello' >>> clean_word('12cat.dog?') 'catdog' >>> clean_word("DON'T") 'dont' """ new_word = '' for ch in word: if ch.isalpha(): new_word = new_word + ch.lower() return new_word # Add your other Task 1 functions here def created_in_year(ArxivType, id: str, year: int): # Check if there is an article with the given id in the data if id in ArxivType: article = ArxivType[id] # get date article_date = article[CREATED] # Split and get year article_year = int(article_date.split("-")[0]) # Chekc if years match return article_year == year # If we reach this line, it is because there is no article with that id in the data, so return None return None def contains_keyword(id_to_article: ArxivType, word: str) -> List[str]: # clean word word = clean_word(word) # Create a list to store all article with the word result = [] # Loop through articles for id in id_to_article: if word in id_to_article[id][TITLE].lower() or word in id_to_article[id][ABSTRACT].lower(): result.append(id) # Now sort list result = sorted(result) return result ################################################ ## Task 2 ################################################ def extract_article_data(data: ArxivType) -> ArticleType: id = data[0] title = data[1] creation_date = data[2] modified_date = data[3] authors = [] if data[4] != '': # the authors line is not empty line = data[4] i = 4 while line != '': author_data = line.split(",") authors.append(tuple(author_data)) line = data[i+1] i += 1 # We are now at an empty line # the next line contains the abstract abstract = '' for j in range(i, len(data)): abstract += data[j] else: # there is no authors abstract = '' i = 5 for j in range(i, len(data)): abstract += data[j] # Sort authors #authors = sorted(authors, key = lambda x: x[0]) #print(authors) # Now build dict article = { ID: id, TITLE: title, CREATED: creation_date, MODIFIED: modified_date, AUTHORS: authors, ABSTRACT: abstract } return article def read_arxiv_file(f: TextIO) -> ArxivType: """Return a ArxivType dictionary containing the arxiv metadata in f. Note we do not include example calls for functions that take open files. """ # Complete this Task 2 function # Create Dictionary ArxivType = dict() # Read lines lines = f.readlines() # Create a list to store all lines before an 'END' while len(lines) > 0: line = lines.pop(0).strip() data = [] while line != 'END': data.append(line) line = lines.pop(0).strip() # Extract article article = extract_article_data(data) ArxivType[article[ID]] = article # Now, sort ArxivType by title ArxivType = dict(sorted(ArxivType.items(), key = lambda x: x[1][TITLE] if x[1][TITLE] else 'z'*100)) return ArxivType # Add your helper functions for Task 2 here ################################################ ## Task 3 ################################################ def make_author_to_articles(id_to_article: ArxivType ) -> Dict[NameType, List[str]]: """Return a dict that maps each author name to a list (sorted in lexicographic order) of IDs of articles written by that author, based on the information in id_to_article. >>> make_author_to_articles(EXAMPLE_ARXIV) == EXAMPLE_BY_AUTHOR True >>> make_author_to_articles({}) {} """ # Complete this Task 3 function result = dict() # Loop through articles for id in id_to_article: for author in id_to_article[id][AUTHORS]: if not author in result: result[author] = [] result[author].append(id) # Now sort the values for author, lst in result.items(): lst = sorted(lst) result[author] = lst return result def get_coauthors(data: ArxivType, author: NameType ) -> List[NameType]: result = list() for id in data: authors = data[id][AUTHORS] if author in authors: for author2 in authors: if author2 != author: result += [x for x in authors if x != author and x not in result] # Now sort result = sorted(result, key = lambda x: x[0]) return result def get_most_published_authors(data: ArxivType) -> List[NameType]: articles_count = dict() for id in data: authors = data[id][AUTHORS] for author in authors: articles_count[author] = articles_count.get(author, 0) + 1 # Now, get the max value in the dictionary (max count of articles) max_count = max(articles_count.items(), key = lambda x: x[1])[1] result = [] # Now, get all keys with this value. If there is just one, then one key will be returned. # If there is more than one, then all keys with that value are returned for key in articles_count: if articles_count[key] == max_count: result.append(key) # Now sort result = sorted(result, key=lambda x: x[0]) return result def suggest_collaborators(data: ArxivType, author: NameType) -> List[NameType]: # First, get author's coauthors coauthors = get_coauthors(data, author) # Now, for each coauthor, get its coauthors result = [] for c_author in coauthors: co_coauthors = get_coauthors(data, c_author) result += [x for x in co_coauthors if x != author and x not in result and x not in coauthors] # Sort result = sorted(result, key=lambda x: x[0]) return result # Add your other functions for Task 3 here ################################################ ## Task 4 ################################################ # Add your Task 4 functions here def has_prolific_authors(data: Dict[NameType, List[str]], id: str, min_number: int): # First, loop turhough data for author in data: # Check if the article_id is in this author's articles if id in data[author] and len(data[author]) >= min_number: return True return False def keep_prolific_authors(id_to_article: ArxivType, min_publications: int) -> None: """Update id_to_article so that it contains only articles published by authors with min_publications or more articles published. As long as at least one of the authors has min_publications, the article is kept. >>> arxiv_copy = copy.deepcopy(EXAMPLE_ARXIV) >>> keep_prolific_authors(arxiv_copy, 2) >>> len(arxiv_copy) 3 >>> '008' in arxiv_copy and '067' in arxiv_copy and '827' in arxiv_copy True >>> arxiv_copy = copy.deepcopy(EXAMPLE_ARXIV) >>> keep_prolific_authors(arxiv_copy, 3) >>> arxiv_copy {} """ # Complete the body of this function. We have provided this docstring to # you so that you can use the EXAMPLE_ARXIV for testing mutation. # Note that we do not expect you to know about the copy.deepcopy function. # Make author to articles author_to_article = make_author_to_articles(id_to_article) # Define a list to store the articles to be removed ids_to_remove = list() # Loop through articles for id in id_to_article: authors = id_to_article[id][AUTHORS] # Check if at least one author is prolific has_prolific = False for author in authors: if has_prolific_authors(author_to_article, id, min_publications): has_prolific = True break if not has_prolific: # There is no prolific author in this article ids_to_remove.append(id) # Now remove for id in ids_to_remove: id_to_article.pop(id, None) if __name__ == '__main__': # uncomment the lines below to run doctest on your code # note that doctest requires your docstring examples to be perfectly # formatted, and we will not be running doctest on your code import doctest doctest.testmod() # uncomment the lines below to work with the small data set example_data = open('example_data.txt') example_arxiv = read_arxiv_file(example_data) example_data.close() if example_arxiv == EXAMPLE_ARXIV: print('The dictionary you produced matches EXAMPLE_ARXIV!') print('This is a good sign, but do more of your own testing!') else: # If you are getting this message, try setting a breakpoint on the # line that calls read_arxiv_file above and running the debugger print('Expected to get', EXAMPLE_ARXIV) print('But got', example_arxiv) # uncomment the lines below to work with a larger data set data = open('data.txt') arxiv = read_arxiv_file(data) data.close() author_to_articles = make_author_to_articles(arxiv) most_published = get_most_published_authors(arxiv) print(most_published) print(get_coauthors(arxiv, ('Varanasi', 'Mahesh K.'))) # one print(get_coauthors(arxiv, ('Chablat', 'Damien'))) # many CONSTANTS """CSC108: Fall 2021 -- Assignment 3: arxiv.org This code is provided solely for the personal and private use of students taking the CSC108/CSCA08 course at the University of Toronto. Copying for purposes other than this use is expressly prohibited. All forms of distribution of this code, whether as given or with any changes, are expressly prohibited. All of the files in this directory and all subdirectories are: Copyright (c) 2021 Anya Tafliovich, Michelle Craig, Tom Fairgrieve, Sadia Sharmin, and Jacqueline Smith. """ from typing import Dict, List, Tuple, Union ID = 'identifier' TITLE = 'title' CREATED = 'created' MODIFIED = 'modified' AUTHORS = 'authors' ABSTRACT = 'abstract' END = 'END' # We store names as tuples of two strs: (last-name, first-name(s)). NameType = Tuple[str, str] # ArticleValueType is the type for valid values in the ArticleType # dict. All values are str, except for the value associated with # key AUTHORS, which is a List of NameType. # Note that we have not introduced Union - you can read it as "or" ArticleValueType = Union[str, List[NameType]] # ArticleType is a dict that maps keys ID, TITLE, CREATED, MODIFIED, # AUTHORS, and ABSTRACT to their values (of type ArticleValueType). ArticleType = Dict[str, ArticleValueType] # ArxivType is a dict that maps article identifiers to articles, # i.e. to values of type ArticleType. ArxivType = Dict[str, ArticleType]

Similar Samples

Browse through our curated samples at ProgrammingHomeworkHelp.com to witness our expertise in delivering high-quality programming solutions. From algorithm design to debugging complex code, our samples exemplify precision and proficiency across various languages. Experience firsthand how our solutions can elevate your understanding and grades in programming assignments.