Instructions
Objective
Write a python assignment program to implement quality control.
Requirements and Specifications
Source Code
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
# Define function to display main menu and ask for option
def menu():
while True:
try :
print("Please choose from the following options:")
print("\t1 - Load data from a file")
print("\t2 - View data")
print("\t3 - Clean data")
print("\t4 - Analyse data")
print("\t5 - Visualise data")
print("\t6 - Save data to a file")
print("\t7 - Quit")
option = int(input())
if option >= 1 and option <= 7:
return option
else:
print("Please enter a valid option.")
except:
print("Please enter a valid menu option")
# Define a function to display the menu for the 'Clean Data' option
def cleandata_menu():
while True:
try :
print("Cleaning data:")
print("\t1 - Drop rows with missing values")
print("\t2 - Fill missing values")
print("\t3 - Drop duplicate rows")
print("\t4 - Drop column")
print("\t5 - Rename column")
print("\t6 - Finish cleaning")
option = int(input())
if option >= 1 and option <= 6:
return option
else:
print("Please enter a valid option.")
except:
print("Please enter a valid menu option")
# Define a function to get an integer from user. The integer must be between [lb, ub]
def get_int(message, lb, ub):
"""
Requests an integer input 'n' such that lb <= n <= ub
"""
while True:
try :
option = int(input(message))
if option >= lb and option <= ub:
return option
else:
print(f"Please enter a value between {lb} and {ub}.")
except:
print("Please enter a valid integer.")
# Main code
current_data = None # Variable to store the current loaded data
running = True
while running:
option = menu()
if option == 1:
# Ask for file name
file_name = input("Enter file name: ")
try:
data = pd.read_csv(file_name)
current_data = data
print(f" File {file_name} correctly loaded!")
# Ask if s/he wants to set a column name as index
while True:
col_name = input("Enter column name to be set as index: ")
if len(col_name) > 0:
if col_name in current_data.columns:
current_data = current_data.set_index(col_name)
current_data = current_data.drop(columns=[col_name])
break
else:
print("Sorry, the data does not contain a column with that name.")
else:
break
except:
print("File does not exist or could not be loaded.")
elif option == 2:
# Print
if current_data:
print(current_data)
else:
print("No data loaded.")
elif option == 3:
if current_data:
while True:
opt = cleandata_menu()
if opt == 1: # drop rows with missing values
# Ask for threshold
treshold = get_int("Enter the treshold for dropping rows: ", 1, np.inf)
current_data = current_data[current_data.isnull().sum(axis = 1) < treshold]
elif opt == 2: # fill missing values
replacement = get_int("Enter the replacement value", -np.inf, np.inf)
current_data.fillna(replacement)
elif opt == 3: # Drop duplicate rows
# Get current amount of rowas
n_current = len(current_data)
current_data.drop_duplicates()
n_new = len(current_data)
print(f"{n_current-n_new} rows dropped.")
elif opt == 4: # drop dolumn
# Ask name of column
print("Which column do you want to drop? (leave blank for none)")
for c in current_data.columns:
print(f"\t{c}")
column = input()
if len(column) > 0:
if column in current_data.columns:
current_data = current_data.drop(columns = [column])
print(f"{column} dropped.")
else:
print("Invalid selection!")
else:
print("No column dropped.")
elif opt == 5: # Rename column
print("Which column do you want to rename? (leave blank for none)")
for c in current_data.columns:
print(f"\t{c}")
column = input()
if len(column) > 0:
if column in current_data.columns:
# Ask for new name
new_column = input("Enter the new name: ")
current_data.rename(columns={column:new_column})
print(f"{column} renamed to {new_column}.")
else:
print("Invalid selection!")
else:
print("No column renamed.")
elif opt == 6: # finish cleaning
break
print(current_data)
else:
print("No data loaded.")
elif option == 4: # Analyse data
if current_data:
for c in current_data.columns:
print(c)
print('-'*len(c))
print("{:<15s}:{:>5d}".format("number of values (n)", current_data[c].count()))
print("{:<15s}:{:>5.2f}".format("minimum", current_data[c].min()))
print("{:<15s}:{:>5.2f}".format("maximum", current_data[c].max()))
print("{:<15s}:{:>5.2f}".format("mean", current_data[c].mean()))
print("{:<15s}:{:>5.2f}".format("median", current_data[c].median()))
print("{:<15s}:{:>5.2f}".format("standard deviation", current_data[c].std()))
print("{:<15s}:{:>5.2f}".format("std. err. of mean", current_data[c].sem()))
# Display correlation table
print(current_data.corr())
else:
print("No data loaded.")
elif option == 5: # Visualize
if current_data:
while True:
# Ask for plot type
print("Please choose from the following kinds: line, bar, box")
plot_type = input()
if plot_type.lower() in ['line', 'bar', 'box']:
print("Do you want subplots? (y/n)")
yn = input()
if yn.lower() in ['y', 'n']:
plot_title = input("Please enter the title for the plot (leave blank for no title)\n")
x_label = input("Please enter the x-axis label (leave blank for no label).\n")
y_label = input("Please enter the y-axis label (leave balnk for no label).\n")
if yn == 'y': # subplots
n_columns = len(current_data.columns)
if plot_type != 'box':
fig, axes = plt.subplots(nrows = n_columns, ncols = 1)
for i, c in enumerate(current_data.columns):
if plot_type == 'line':
current_data.plot(y=c, use_index = True, ax = axes[i])
elif plot_type == 'bar':
current_data.plot.bar(y=c, use_index = True, ax = axes[i])
axes[i].set_title(plot_title)
axes[i].set_xlabel(x_label)
axes[i].set_ylabel(y_label)
plt.show()
break
else:
plt.figure()
current_data.boxplot()
plt.title(plot_title)
plt.xlabel(x_label)
plt.ylabel(y_label)
plt.show()
break
else:
n_columns = len(current_data.columns)
if plot_type != 'box':
for i, c in enumerate(current_data.columns):
plt.figure()
current_data.plot(y=c, use_index = True)
plt.title(plot_title)
plt.xlabel(x_label)
plt.ylabel(y_label)
plt.show()
break
else:
plt.figure()
current_data.boxplot()
plt.title(plot_title)
plt.xlabel(x_label)
plt.ylabel(y_label)
plt.show()
break
else:
print("Invalid selection!")
else:
print("Invalid selection!")
else:
print("No data loaded.")
elif option == 6: #Save to a file
if current_data:
file_name = input("Enter the filename, including extension: ")
try:
current_data.to_csv(file_name,sep=',')
except:
print(f"Data could not be saved to {file_name}")
else:
print("No data loaded.")
elif option == 7:
running = False
print("Goodbye")
Similar Samples
Explore our curated collection of programming homework samples at ProgrammingHomeworkHelp.com. Our examples, spanning Java, Python, C++, and more, demonstrate our expertise in tackling various coding challenges. Each solution showcases clarity, thoroughness, and adherence to academic standards. Dive into our samples to experience how we can assist you in mastering programming concepts effectively.
Python
Python
Python
Python
Python
Python
Python
Python
Python
Python
Python
Python
Python
Python
Python
Python
Python
Python
Python
Python