library(here)
student_data = read_csv(here("student_data.csv"))
gathered_student_data = student_data %>% select(-age, -grade_level) %>%
pivot_longer(c(-first, -middle, -last, -school), names_to = "key", values_to = "grade") %>%
separate(key, c("class", "year"), sep = "__")
gathered_student_data %>% head(4)
## # A tibble: 4 x 7
## first middle last school class year grade
## <chr> <chr> <chr> <chr> <chr> <chr> <dbl>
## 1 Krimhilde Yuri Hierro Oakwood math_grade 2015 NA
## 2 Krimhilde Yuri Hierro Oakwood english_grade 2015 NA
## 3 Krimhilde Yuri Hierro Oakwood science_grade 2015 NA
## 4 Krimhilde Yuri Hierro Oakwood social_studies_grade 2015 NA
final_student_data = gathered_student_data %>% pivot_wider(names_from = "class", values_from = "grade") %>%
mutate(gpa = (math_grade + english_grade + science_grade + social_studies_grade) / 4)
final_student_data %>% head(4)
## # A tibble: 4 x 10
## first middle last school year math_grade english_grade science_grade
## <chr> <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl>
## 1 Krim~ Yuri Hier~ Oakwo~ 2015 NA NA NA
## 2 Krim~ Yuri Hier~ Oakwo~ 2016 72 64 64
## 3 Krim~ Yuri Hier~ Oakwo~ 2017 72 58 63
## 4 Krim~ Yuri Hier~ Oakwo~ 2018 65 57 62
## # ... with 2 more variables: social_studies_grade <dbl>, gpa <dbl>
# the easy way, repeat for each school
final_student_data %>% filter(year == 2018, school == "Oakwood") %>% select(gpa) %>% unlist() %>% mean()
## [1] 60.55165
# the harder way
final_student_data %>% filter(year == 2018) %>% {aggregate(.$gpa, by = list(school = .$school), mean)}
## school x
## 1 Oakwood 60.55165
## 2 Pine Field 62.76240
## 3 Shady Willow 66.17926