Created
March 26, 2023 21:17
-
-
Save MarkBaggett/411427eb965e28a99df700adbc853aab to your computer and use it in GitHub Desktop.
Calculate Stats on student lab completions
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import pandas as pd | |
def objective_to_day(ct): | |
ct = ct[10:] | |
return int(ct.split(".")[0]) | |
def objective_to_question(ct): | |
ct = ct[10:] | |
return int(ct.split(".")[1]) | |
def objective_to_step(ct): | |
ct = ct[10:] | |
return int(ct.split(".")[2]) | |
#read the csv | |
data = pd.read_csv("attempts.csv") | |
#Remove Unused Columns | |
data = data.drop(['user_reference','attempt_status','points_awarded','failure_reason','points_available','difficulty','submitted_flag','submitted_flag_hash'],axis=1) | |
#Convert the date from string into a date | |
data['attempted_at'] = pd.to_datetime(data['attempted_at']) | |
#Fix all the Objective 0 | |
data.loc[data['challenge_title']=="Objective 0","challenge_title"] = "Objective 0.0.0" | |
#Break down the objective numbers into its parts | |
data["DAY"] = data.challenge_title.apply(objective_to_day) | |
data["QUESTION"] = data.challenge_title.apply(objective_to_question) | |
data["STEP"] = data.challenge_title.apply(objective_to_step) | |
#Sort the data | |
data = data.sort_values([ "display_name", "attempted_at"]) | |
data['STEP_UP'] = (data['STEP'].diff()!=1 ).cumsum() | |
agg_dict = {'attempted_at': ['first', 'last']} | |
new_cols = data.groupby(['display_name', 'DAY', "QUESTION", "STEP_UP"]).agg(agg_dict) | |
new_cols.columns = new_cols.columns.map('_'.join) | |
new_cols = new_cols.reset_index() | |
data = data.merge(new_cols[['display_name', 'DAY', 'QUESTION', "STEP_UP", 'attempted_at_first', 'attempted_at_last']], | |
on=['display_name', 'DAY', 'QUESTION', 'STEP_UP']) | |
data["ELAPSED_TIME"] = data['attempted_at_last'] - data['attempted_at_first'] | |
data['ELAPSED_TIME'] = data['ELAPSED_TIME'].apply(lambda x:pd.Timedelta(x).total_seconds()) | |
#Sort the data | |
data = data.sort_values([ "display_name", "attempted_at"]) | |
for day in data['DAY'].unique(): | |
for q in data['QUESTION'].unique(): | |
question = data[(data['DAY'] == day) & (data['QUESTION'] == q) & (data['STEP'] == 1)] | |
# calculate the cutoff values for the 5th and 95th percentiles | |
cutoff_low = question['ELAPSED_TIME'].quantile(0.10) | |
cutoff_high = question['ELAPSED_TIME'].quantile(0.90) | |
trimmed = question.loc[(question['ELAPSED_TIME'] >= cutoff_low) & (question['ELAPSED_TIME'] <= cutoff_high), 'ELAPSED_TIME'] | |
labtime = trimmed.mean() + (trimmed.std() *2) | |
miss = len(question.loc[(question['ELAPSED_TIME'] > labtime), 'ELAPSED_TIME']) | |
if len(question) > 0: | |
print(f"Day {day} Question {q} TIME IN MINUTES:") | |
print(f" Mean = {trimmed.mean()/60:03.2f}") | |
print(f" Min = {trimmed.min()/60:03.2f}") | |
print(f" Max = {trimmed.max()/60:03.2f}") | |
print(f" STDDEV = {trimmed.std()/60:03.2f}") | |
print(f" If you run the lab {labtime/60:03.2f} you would miss {miss} of {len(question)} students. ") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment