Python Program to Implement Quality Control Assignment Solution

June 22, 2024
Key Topics
  • Instructions
    • Objective
  • Requirements and Specifications
Write a python assignment program to implement quality control.

Requirements and Specifications


Source Code

import pandas as pd import matplotlib.pyplot as plt import numpy as np # Define function to display main menu and ask for option def menu(): while True: try : print("Please choose from the following options:") print("\t1 - Load data from a file") print("\t2 - View data") print("\t3 - Clean data") print("\t4 - Analyse data") print("\t5 - Visualise data") print("\t6 - Save data to a file") print("\t7 - Quit") option = int(input()) if option >= 1 and option <= 7: return option else: print("Please enter a valid option.") except: print("Please enter a valid menu option") # Define a function to display the menu for the 'Clean Data' option def cleandata_menu(): while True: try : print("Cleaning data:") print("\t1 - Drop rows with missing values") print("\t2 - Fill missing values") print("\t3 - Drop duplicate rows") print("\t4 - Drop column") print("\t5 - Rename column") print("\t6 - Finish cleaning") option = int(input()) if option >= 1 and option <= 6: return option else: print("Please enter a valid option.") except: print("Please enter a valid menu option") # Define a function to get an integer from user. The integer must be between [lb, ub] def get_int(message, lb, ub): """ Requests an integer input 'n' such that lb <= n <= ub """ while True: try : option = int(input(message)) if option >= lb and option <= ub: return option else: print(f"Please enter a value between {lb} and {ub}.") except: print("Please enter a valid integer.") # Main code current_data = None # Variable to store the current loaded data running = True while running: option = menu() if option == 1: # Ask for file name file_name = input("Enter file name: ") try: data = pd.read_csv(file_name) current_data = data print(f" File {file_name} correctly loaded!") # Ask if s/he wants to set a column name as index while True: col_name = input("Enter column name to be set as index: ") if len(col_name) > 0: if col_name in current_data.columns: current_data = current_data.set_index(col_name) current_data = current_data.drop(columns=[col_name]) break else: print("Sorry, the data does not contain a column with that name.") else: break except: print("File does not exist or could not be loaded.") elif option == 2: # Print if current_data: print(current_data) else: print("No data loaded.") elif option == 3: if current_data: while True: opt = cleandata_menu() if opt == 1: # drop rows with missing values # Ask for threshold treshold = get_int("Enter the treshold for dropping rows: ", 1, np.inf) current_data = current_data[current_data.isnull().sum(axis = 1) < treshold] elif opt == 2: # fill missing values replacement = get_int("Enter the replacement value", -np.inf, np.inf) current_data.fillna(replacement) elif opt == 3: # Drop duplicate rows # Get current amount of rowas n_current = len(current_data) current_data.drop_duplicates() n_new = len(current_data) print(f"{n_current-n_new} rows dropped.") elif opt == 4: # drop dolumn # Ask name of column print("Which column do you want to drop? (leave blank for none)") for c in current_data.columns: print(f"\t{c}") column = input() if len(column) > 0: if column in current_data.columns: current_data = current_data.drop(columns = [column]) print(f"{column} dropped.") else: print("Invalid selection!") else: print("No column dropped.") elif opt == 5: # Rename column print("Which column do you want to rename? (leave blank for none)") for c in current_data.columns: print(f"\t{c}") column = input() if len(column) > 0: if column in current_data.columns: # Ask for new name new_column = input("Enter the new name: ") current_data.rename(columns={column:new_column}) print(f"{column} renamed to {new_column}.") else: print("Invalid selection!") else: print("No column renamed.") elif opt == 6: # finish cleaning break print(current_data) else: print("No data loaded.") elif option == 4: # Analyse data if current_data: for c in current_data.columns: print(c) print('-'*len(c)) print("{:<15s}:{:>5d}".format("number of values (n)", current_data[c].count())) print("{:<15s}:{:>5.2f}".format("minimum", current_data[c].min())) print("{:<15s}:{:>5.2f}".format("maximum", current_data[c].max())) print("{:<15s}:{:>5.2f}".format("mean", current_data[c].mean())) print("{:<15s}:{:>5.2f}".format("median", current_data[c].median())) print("{:<15s}:{:>5.2f}".format("standard deviation", current_data[c].std())) print("{:<15s}:{:>5.2f}".format("std. err. of mean", current_data[c].sem())) # Display correlation table print(current_data.corr()) else: print("No data loaded.") elif option == 5: # Visualize if current_data: while True: # Ask for plot type print("Please choose from the following kinds: line, bar, box") plot_type = input() if plot_type.lower() in ['line', 'bar', 'box']: print("Do you want subplots? (y/n)") yn = input() if yn.lower() in ['y', 'n']: plot_title = input("Please enter the title for the plot (leave blank for no title)\n") x_label = input("Please enter the x-axis label (leave blank for no label).\n") y_label = input("Please enter the y-axis label (leave balnk for no label).\n") if yn == 'y': # subplots n_columns = len(current_data.columns) if plot_type != 'box': fig, axes = plt.subplots(nrows = n_columns, ncols = 1) for i, c in enumerate(current_data.columns): if plot_type == 'line': current_data.plot(y=c, use_index = True, ax = axes[i]) elif plot_type == 'bar': current_data.plot.bar(y=c, use_index = True, ax = axes[i]) axes[i].set_title(plot_title) axes[i].set_xlabel(x_label) axes[i].set_ylabel(y_label) plt.show() break else: plt.figure() current_data.boxplot() plt.title(plot_title) plt.xlabel(x_label) plt.ylabel(y_label) plt.show() break else: n_columns = len(current_data.columns) if plot_type != 'box': for i, c in enumerate(current_data.columns): plt.figure() current_data.plot(y=c, use_index = True) plt.title(plot_title) plt.xlabel(x_label) plt.ylabel(y_label) plt.show() break else: plt.figure() current_data.boxplot() plt.title(plot_title) plt.xlabel(x_label) plt.ylabel(y_label) plt.show() break else: print("Invalid selection!") else: print("Invalid selection!") else: print("No data loaded.") elif option == 6: #Save to a file if current_data: file_name = input("Enter the filename, including extension: ") try: current_data.to_csv(file_name,sep=',') except: print(f"Data could not be saved to {file_name}") else: print("No data loaded.") elif option == 7: running = False print("Goodbye")

