mGalarnyk/assignment3.md

Last active October 25, 2025 06:35

Star (11) You must be signed in to star a gist
Fork (45) You must be signed in to fork a gist

Select an option

Learn more about clone URLs
Clone this repository at <script src="https://gist.github.com/mGalarnyk/21695638e94965640c35667e8683642c.js"></script>
Save mGalarnyk/21695638e94965640c35667e8683642c to your computer and use it in GitHub Desktop.

R Programming Programming Assignment 3 (Week 4) John Hopkins Data Science Specialization Coursera for the github repo https://github.com/mGalarnyk/datasciencecoursera

Raw

assignment3.md

R Programming Project 3

github repo for rest of specialization: Data Science Coursera

The zip file containing the data can be downloaded here: Assignment 3 Data

Part 1 Plot the 30-day mortality rates for heart attack (outcome.R)

# install.packages("data.table")
library("data.table")

# Reading in data
outcome <- data.table::fread('outcome-of-care-measures.csv')
outcome[, (11) := lapply(.SD, as.numeric), .SDcols = (11)]
outcome[, lapply(.SD
                 , hist
                 , xlab= "Deaths"
                 , main = "Hospital 30-Day Death (Mortality) Rates from Heart Attack"
                 , col="lightblue")
        , .SDcols = (11)]

Part 2 Finding the best hospital in a state (best.R)

best <- function(state, outcome) {
  
  # Read outcome data
  out_dt <- data.table::fread('outcome-of-care-measures.csv')
  
  outcome <- tolower(outcome)
  
  # Column name is same as variable so changing it 
  chosen_state <- state 

  # Check that state and outcome are valid
  if (!chosen_state %in% unique(out_dt[["State"]])) {
    stop('invalid state')
  }
  
  if (!outcome %in% c("heart attack", "heart failure", "pneumonia")) {
    stop('invalid outcome')
  }
  
  # Renaming Columns to be less verbose and lowercase
  setnames(out_dt
           , tolower(sapply(colnames(out_dt), gsub, pattern = "^Hospital 30-Day Death \\(Mortality\\) Rates from ", replacement = "" ))
  )
  
  #Filter by state
  out_dt <- out_dt[state == chosen_state]
  
  # Columns indices to keep
  col_indices <- grep(paste0("hospital name|state|^",outcome), colnames(out_dt))
  
  # Filtering out unnessecary data 
  out_dt <- out_dt[, .SD ,.SDcols = col_indices]
  
  # Find out what class each column is 
  # sapply(out_dt,class)
  out_dt[, outcome] <- out_dt[,  as.numeric(get(outcome))]
  
  
  # Removing Missing Values for numerical datatype (outcome column)
  out_dt <- out_dt[complete.cases(out_dt),]
  
  # Order Column to Top 
  out_dt <- out_dt[order(get(outcome), `hospital name`)]
  
  return(out_dt[, "hospital name"][1])

}

Part 3 Ranking hospitals by outcome in a state (rankhospital.R)

rankhospital <- function(state, outcome, num = "best") {
  
  # Read outcome data
  out_dt <- data.table::fread('outcome-of-care-measures.csv')
  
  outcome <- tolower(outcome)
  
  # Column name is same as variable so changing it 
  chosen_state <- state 
  
  # Check that state and outcome are valid
  if (!chosen_state %in% unique(out_dt[["State"]])) {
    stop('invalid state')
  }
  
  if (!outcome %in% c("heart attack", "heart failure", "pneumonia")) {
    stop('invalid outcome')
  }
  
  # Renaming Columns to be less verbose and lowercase
  setnames(out_dt
           , tolower(sapply(colnames(out_dt), gsub, pattern = "^Hospital 30-Day Death \\(Mortality\\) Rates from ", replacement = "" ))
  )
  
  #Filter by state
  out_dt <- out_dt[state == chosen_state]
  
  # Columns indices to keep
  col_indices <- grep(paste0("hospital name|state|^",outcome), colnames(out_dt))
  
  # Filtering out unnessecary data 
  out_dt <- out_dt[, .SD ,.SDcols = col_indices]
  
  # Find out what class each column is 
  # sapply(out_dt,class)
  out_dt[, outcome] <- out_dt[,  as.numeric(get(outcome))]
  
  
  # Removing Missing Values for numerical datatype (outcome column)
  out_dt <- out_dt[complete.cases(out_dt),]
  
  # Order Column to Top 
  out_dt <- out_dt[order(get(outcome), `hospital name`)]
  
  out_dt <- out_dt[,  .(`hospital name` = `hospital name`, state = state, rate = get(outcome), Rank = .I)]
  
  if (num == "best"){
    return(out_dt[1,`hospital name`])
  }
  
  if (num == "worst"){
    return(out_dt[.N,`hospital name`])
  }
  
  return(out_dt[num,`hospital name`])

}

Part 4 Ranking hospitals in all states (rankall.R)

rankall <- function(outcome, num = "best") {
  
  # Read outcome data
  out_dt <- data.table::fread('outcome-of-care-measures.csv')
  
  outcome <- tolower(outcome)
  
  if (!outcome %in% c("heart attack", "heart failure", "pneumonia")) {
    stop('invalid outcome')
  }
  
  # Renaming Columns to be less verbose and lowercase
  setnames(out_dt
           , tolower(sapply(colnames(out_dt), gsub, pattern = "^Hospital 30-Day Death \\(Mortality\\) Rates from ", replacement = "" ))
  )
  
  # Columns indices to keep
  col_indices <- grep(paste0("hospital name|state|^",outcome), colnames(out_dt))
  
  # Filtering out unnessecary data 
  out_dt <- out_dt[, .SD ,.SDcols = col_indices]
  
  # Find out what class each column is 
  # sapply(out_dt,class)
  
  # Change outcome column class
  out_dt[, outcome] <- out_dt[,  as.numeric(get(outcome))]
  
  if (num == "best"){
    return(out_dt[order(state, get(outcome), `hospital name`)
    , .(hospital = head(`hospital name`, 1))
    , by = state])
  }
  
  if (num == "worst"){
    return(out_dt[order(get(outcome), `hospital name`)
    , .(hospital = tail(`hospital name`, 1))
    , by = state])
  }
  
  return(out_dt[order(state, get(outcome), `hospital name`)
                , head(.SD,num)
                , by = state, .SDcols = c("hospital name") ])
  
}

siyangni commented Nov 26, 2020 •

edited

Loading

This course is just designed to make me feel bad. I was in the honor's college while I was a senior, now I am getting my master in sociology.
Throughout my academic career so far I've never Googled anyone else's assignment. And this course makes me do this for every assignment!!!!!! By the way, I have a decent knowledge of programming where I gain from learning Python. I thought this class would be easy for me (after quickly going through the lecture videos), yet from the first assignment I began to scratch my head for an answer.

Guess what? the following courses for this specialization are just no better. At first sight I thought the instructors may have problems with their pedagogy. After going through several courses of JHU's Data Science Specialization, I highly doubt it's not just pedagogy, it is their attitude. There is no way the three instructors who should be incredibly smart people cannot find the embarrassingly obvious large gap between the course material and the assignments/quizzes in every one of their courses. And they rush through every course in this specialization. I paid for those courses though! I really want to file a complaint on Coursera.

amingraphy commented Nov 28, 2020

I agree the course it useless. Instead I started learning by reading a time-consuming textbook, called "discovering statistics using R". I even think the material provided by Roger Peng are not all in the same basic level. He doesn't teach basic tools of R, but then he jumps to using multi-core computation on your computer to speed up the calculation! It is just ridiculous

DanEscasa commented Dec 30, 2020 •

edited

Loading

Thanks for the hard work. I chose not to shorten the unwieldingly long column names, just used switch:

outcome <- switch(outcome,
"heart attack" = "Hospital.30.Day.Death..Mortality..Rates.from.Heart.Attack",
"heart failure" = "Hospital.30.Day.Death..Mortality..Rates.from.Heart.Failure",
"pneumonia" = "Hospital.30.Day.Death..Mortality..Rates.from.Pneumonia",
NULL)
if (is.null(outcome)) {
stop(" : invalid outcome")
}

<rant>
Isn't there a way to preserve the indentation in a code block?
</rant>

As to Roger Peng's teaching, I was looking for a functional programming-oriented approach. He still treats it like a procedural language. I should write up something on that in codementor.io

charlenelch13 commented Jan 3, 2021

Thank you so much for the sharing!! It is sooo helpful! I had zero knowledge about programming before taking this course. I feel frustrated about learning the logics from the course materials. They are so vague and not supported by daily examples. I don't know how they can assume students to know how to finish the assignments... Thank you for guiding me!!

EmilieWaite18 commented Mar 23, 2021

I have never been so frustrated by anything. I have had to look up every single answer to these assignments.

anaidcandido commented Jun 1, 2021

Many thanks for sharing!! I also thought I was the only one struggling with the course but I'm "glad" to see is the course itself.
Now I am able to analyze and compare what I was doing. Thanks a lot!

yeho-bt commented Jun 13, 2021

Hmmmm, I am not alone feeling like this.

jdpm93 commented Oct 5, 2021

I have the same problem, the gap between the lessons, swirl, and the assigments is horrible and frustrating. some excercises require the use of things never covered in the videos or swirl, I see that many have sent feedback but they are not taking action on this, also the video lessons need to be updated it's 2021 and the videos were recorded in 2015. this is insane. Thank you for sharing.

codobene commented Dec 16, 2021

truth is, the world is not meant for everyone to be kept alive - it just needs the upper 10% of people that manage this course well. Same is true for jobs in general, stock market, etc etc
get it quickly, or work overtime to compensate for your idiocy, or become social darvinism's fish food.

codobene commented Dec 16, 2021

i blindly copy code that I find here, fail to reproduce anything, and take drugs. good night. good luck!

Juanvelz commented Mar 24, 2022

These people should learn how to teach before offering a course. On the other hand, they know very well how to discourage students.

emakello commented Mar 27, 2022

The essence of taking an online course is to learn skills that can help you in your career or academics. While the courses are stimulating, it makes no sense to bring assignments that discourage rather than encourage learners. Some people like me had never coded before and I spend hours trying to do this thing without success. I wish there was someone who could teach coding from scratch without assuming any prior knowledge.

cfsobral commented Jan 22, 2023

Thank you for help. I believe that the course already expects us to look for solutions like this one from mGalarnyk, otherwise it would not make sense to pass on an assignment of this complexity for beginners to do, because many like me would get frustrated and give up the course, as I read in some comments made here. Thank you mGalarnyk

Doc-OmSa commented Apr 9, 2023

Thanks mGalarnyk. I have a feeling that this course is definitely not for beginners, unlikely for intermediates as well. The assignments are too advanced. I have been struggling to understand functions. I am a beginner having taken the Google Data Analutics course previously. This is too advanced. Would likely leave. Just wanted to ask if there are better courses more suitable for someone who is a beginner and going to intermediate level? An other platform?

mGalarnyk/assignment3.md

Select an option

No results found

Select an option

No results found

R Programming Project 3

Part 1 Plot the 30-day mortality rates for heart attack (outcome.R)

Part 2 Finding the best hospital in a state (best.R)

Part 3 Ranking hospitals by outcome in a state (rankhospital.R)

Part 4 Ranking hospitals in all states (rankall.R)

siyangni commented Nov 26, 2020 •

edited

Loading

Uh oh!

amingraphy commented Nov 28, 2020

Uh oh!

DanEscasa commented Dec 30, 2020 •

edited

Loading

Uh oh!

charlenelch13 commented Jan 3, 2021

Uh oh!

EmilieWaite18 commented Mar 23, 2021

Uh oh!

anaidcandido commented Jun 1, 2021

Uh oh!

yeho-bt commented Jun 13, 2021

Uh oh!

jdpm93 commented Oct 5, 2021

Uh oh!

codobene commented Dec 16, 2021

Uh oh!

codobene commented Dec 16, 2021

Uh oh!

Juanvelz commented Mar 24, 2022

Uh oh!

emakello commented Mar 27, 2022

Uh oh!

cfsobral commented Jan 22, 2023

Uh oh!

Doc-OmSa commented Apr 9, 2023

Uh oh!

mGalarnyk/assignment3.md

R Programming Project 3

Part 1 Plot the 30-day mortality rates for heart attack (outcome.R)

Part 2 Finding the best hospital in a state (best.R)

Part 3 Ranking hospitals by outcome in a state (rankhospital.R)

Part 4 Ranking hospitals in all states (rankall.R)

siyangni commented Nov 26, 2020 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

amingraphy commented Nov 28, 2020

Uh oh!

DanEscasa commented Dec 30, 2020 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

charlenelch13 commented Jan 3, 2021

Uh oh!

EmilieWaite18 commented Mar 23, 2021

Uh oh!

anaidcandido commented Jun 1, 2021

Uh oh!

yeho-bt commented Jun 13, 2021

Uh oh!

jdpm93 commented Oct 5, 2021

Uh oh!

codobene commented Dec 16, 2021

Uh oh!

codobene commented Dec 16, 2021

Uh oh!

Juanvelz commented Mar 24, 2022

Uh oh!

emakello commented Mar 27, 2022

Uh oh!

cfsobral commented Jan 22, 2023

Uh oh!

Doc-OmSa commented Apr 9, 2023

Uh oh!

siyangni commented Nov 26, 2020 •

edited

Loading

DanEscasa commented Dec 30, 2020 •

edited

Loading