Zhiguang Huo (Caleb)
Monday September 9, 2019
The tidyverse is a collection of R packages designed for data science.
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tidyr)
library(ggplot2)
## alternatively, you can do library(tidyverse) to include all of them
library(tidyverse)## ── Attaching packages ─────────────────────────────────────────────────────── tidyverse 1.2.1 ──
## ✔ tibble  2.1.3     ✔ stringr 1.4.0
## ✔ purrr   0.3.2     ✔ forcats 0.4.0
## ── Conflicts ────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
data source: sleepstudy.csv (also available in R lme4 package)
Original way to read in data
asleepfile <- "https:///Caleb-Huo.github.io/teaching/data/sleep/sleepstudy.csv"
data0 <- read.csv(asleepfile)## Parsed with column specification:
## cols(
##   Reaction = col_double(),
##   Days = col_double(),
##   Subject = col_double()
## )
##   Reaction Days Subject
## 1 249.5600    0     308
## 2 258.7047    1     308
## 3 250.8006    2     308
## 4 321.4398    3     308
## 5 356.8519    4     308
## 6 414.6901    5     308
## [1] "data.frame"
## # A tibble: 180 x 3
##    Reaction  Days Subject
##       <dbl> <dbl>   <dbl>
##  1     250.     0     308
##  2     259.     1     308
##  3     251.     2     308
##  4     321.     3     308
##  5     357.     4     308
##  6     415.     5     308
##  7     382.     6     308
##  8     290.     7     308
##  9     431.     8     308
## 10     466.     9     308
## # … with 170 more rows
## [1] "spec_tbl_df" "tbl_df"      "tbl"         "data.frame"
## Parsed with column specification:
## cols(
##   Reaction = col_double(),
##   Days = col_double(),
##   Subject = col_double()
## )
## [1] 249.5600 258.7047 250.8006
##      Reaction Days Subject
## [1,] 249.5600    0     308
## [2,] 258.7047    1     308
## [3,] 250.8006    2     308
##   Reaction Days Subject
## 1 249.5600    0     308
## 2 258.7047    1     308
## 3 250.8006    2     308
## # A tibble: 180 x 2
##     Days Subject
##    <dbl>   <dbl>
##  1     0     308
##  2     1     308
##  3     2     308
##  4     3     308
##  5     4     308
##  6     5     308
##  7     6     308
##  8     7     308
##  9     8     308
## 10     9     308
## # … with 170 more rows
## # A tibble: 180 x 2
##     Days Subject
##    <dbl>   <dbl>
##  1     0     308
##  2     1     308
##  3     2     308
##  4     3     308
##  5     4     308
##  6     5     308
##  7     6     308
##  8     7     308
##  9     8     308
## 10     9     308
## # … with 170 more rows
## [1] 2.718282
## [1] 2.718282
## [1] 1
## # A tibble: 180 x 2
##     Days Subject
##    <dbl>   <dbl>
##  1     0     308
##  2     1     308
##  3     2     308
##  4     3     308
##  5     4     308
##  6     5     308
##  7     6     308
##  8     7     308
##  9     8     308
## 10     9     308
## # … with 170 more rows
## # A tibble: 10 x 2
##     Days Subject
##    <dbl>   <dbl>
##  1     0     308
##  2     1     308
##  3     2     308
##  4     3     308
##  5     4     308
##  6     5     308
##  7     6     308
##  8     7     308
##  9     8     308
## 10     9     308
## # A tibble: 6 x 2
##    Days Subject
##   <dbl>   <dbl>
## 1     3     308
## 2     4     308
## 3     5     308
## 4     6     308
## 5     8     308
## 6     9     308
## # A tibble: 6 x 3
##   Reaction  Days Subject
##      <dbl> <dbl>   <dbl>
## 1     321.     3     308
## 2     357.     4     308
## 3     415.     5     308
## 4     382.     6     308
## 5     431.     8     308
## 6     466.     9     308
## # A tibble: 6 x 3
##   Reaction  Days Subject
##      <dbl> <dbl>   <dbl>
## 1     321.     3     308
## 2     357.     4     308
## 3     415.     5     308
## 4     382.     6     308
## 5     431.     8     308
## 6     466.     9     308
## [1] 321.4398 356.8519 414.6901 382.2038 430.5853 466.3535
## [1] 321.4398 356.8519 414.6901 382.2038 430.5853 466.3535
## [1]  4  5  6  7  9 10
## [1]  4  5  6  7  9 10
## # A tibble: 6 x 3
##   Reaction  Days Subject
##      <dbl> <dbl>   <dbl>
## 1     321.     3     308
## 2     357.     4     308
## 3     415.     5     308
## 4     382.     6     308
## 5     431.     8     308
## 6     466.     9     308
## # A tibble: 8 x 3
##   Reaction  Days Subject
##      <dbl> <dbl>   <dbl>
## 1     250.     0     308
## 2     259.     1     308
## 3     251.     2     308
## 4     321.     3     308
## 5     357.     4     308
## 6     415.     5     308
## 7     382.     6     308
## 8     290.     7     308
## # A tibble: 8 x 3
##   Reaction  Days Subject
##      <dbl> <dbl>   <dbl>
## 1     250.     0     308
## 2     259.     1     308
## 3     251.     2     308
## 4     321.     3     308
## 5     357.     4     308
## 6     415.     5     308
## 7     382.     6     308
## 8     290.     7     308
## # A tibble: 6 x 3
##   Reaction  Days Subject
##      <dbl> <dbl>   <dbl>
## 1     194.     1     310
## 2     199.     0     310
## 3     203.     2     309
## 4     205.     3     309
## 5     205.     1     309
## 6     208.     4     309
## # A tibble: 6 x 3
##   Reaction  Days Subject
##      <dbl> <dbl>   <dbl>
## 1     199.     0     310
## 2     222.     0     352
## 3     223.     0     309
## 4     225.     0     370
## 5     235.     0     332
## 6     236.     0     349
## Reaction     Days  Subject 
## 1339.693    0.000 2022.000
## # A tibble: 6 x 3
##   Reaction  Days Subject
##      <dbl> <dbl>   <dbl>
## 1     237.     9     335
## 2     237.     9     309
## 3     248.     9     310
## 4     254.     9     332
## 5     348.     9     351
## 6     352.     9     349
## # A tibble: 6 x 4
##   Reaction  Days Subject Reaction_binary
##      <dbl> <dbl>   <dbl> <lgl>          
## 1     250.     0     308 TRUE           
## 2     259.     1     308 FALSE          
## 3     251.     2     308 FALSE          
## 4     321.     3     308 FALSE          
## 5     357.     4     308 FALSE          
## 6     415.     5     308 FALSE
## # A tibble: 6 x 5
##   Reaction  Days Subject Reaction_binary Reaction_sec
##      <dbl> <dbl>   <dbl> <lgl>                  <dbl>
## 1     250.     0     308 TRUE                   0.250
## 2     259.     1     308 FALSE                  0.259
## 3     251.     2     308 FALSE                  0.251
## 4     321.     3     308 FALSE                  0.321
## 5     357.     4     308 FALSE                  0.357
## 6     415.     5     308 FALSE                  0.415
## # A tibble: 6 x 3
##   Reaction  Days Subject
##      <dbl> <dbl>   <dbl>
## 1     250.     0     308
## 2     259.     1     308
## 3     251.     2     308
## 4     321.     3     308
## 5     357.     4     308
## 6     415.     5     308
## # A tibble: 6 x 3
##   Reaction  Days Subject
##      <dbl> <dbl>   <dbl>
## 1     5.52     0    5.73
## 2     5.56     1    5.73
## 3     5.52     2    5.73
## 4     5.77     3    5.73
## 5     5.88     4    5.73
## 6     6.03     5    5.73
## # A tibble: 180 x 3
##    Reaction  Days    ID
##       <dbl> <dbl> <dbl>
##  1     250.     0   308
##  2     259.     1   308
##  3     251.     2   308
##  4     321.     3   308
##  5     357.     4   308
##  6     415.     5   308
##  7     382.     6   308
##  8     290.     7   308
##  9     431.     8   308
## 10     466.     9   308
## # … with 170 more rows
data1 %>% 
    summarise(avg_reaction = mean(Reaction), 
              min_reaction = min(Reaction),
              max_reaction = max(Reaction),
              total = n())## # A tibble: 1 x 4
##   avg_reaction min_reaction max_reaction total
##          <dbl>        <dbl>        <dbl> <int>
## 1         299.         194.         466.   180
tt <- data1 %>% 
      group_by(Subject) %>%
      summarise(avg_reaction = mean(Reaction), 
              min_reaction = min(Reaction),
              max_reaction = max(Reaction),
              total = n())
tt %>% head## # A tibble: 6 x 5
##   Subject avg_reaction min_reaction max_reaction total
##     <dbl>        <dbl>        <dbl>        <dbl> <int>
## 1     308         342.         250.         466.    10
## 2     309         215.         203.         237.    10
## 3     310         231.         194.         261.    10
## 4     330         303.         280.         354.    10
## 5     331         309.         285          372.    10
## 6     332         307.         235.         454.    10
## # A tibble: 6 x 5
##   Subject avg_reaction min_reaction max_reaction total
##     <dbl>        <dbl>        <dbl>        <dbl> <int>
## 1     308         342.         250.         466.    10
## 2     309         215.         203.         237.    10
## 3     310         231.         194.         261.    10
## 4     330         303.         280.         354.    10
## 5     331         309.         285          372.    10
## 6     332         307.         235.         454.    10
## # A tibble: 6 x 3
##   avg_reaction min_reaction max_reaction
##          <dbl>        <dbl>        <dbl>
## 1         342.         250.         466.
## 2         215.         203.         237.
## 3         231.         194.         261.
## 4         303.         280.         354.
## 5         309.         285          372.
## 6         307.         235.         454.
## # A tibble: 6 x 3
##   avg_reaction min_reaction max_reaction
##          <dbl>        <dbl>        <dbl>
## 1         342.         250.         466.
## 2         215.         203.         237.
## 3         231.         194.         261.
## 4         303.         280.         354.
## 5         309.         285          372.
## 6         307.         235.         454.
superheroes <- "
    name, alignment, gender,         publisher
 Magneto,       bad,   male,            Marvel
   Storm,      good, female,            Marvel
Mystique,       bad, female,            Marvel
  Batman,      good,   male,                DC
   Joker,       bad,   male,                DC
Catwoman,       bad, female,                DC
 Hellboy,      good,   male, Dark Horse Comics
"
superheroes <- read_csv(superheroes, skip = 1)
publishers <- "
  publisher, yr_founded
         DC,       1934
     Marvel,       1939
      Image,       1992
"
publishers <- read_csv(publishers, skip = 1)## Joining, by = "publisher"
## # A tibble: 6 x 5
##   name     alignment gender publisher yr_founded
##   <chr>    <chr>     <chr>  <chr>          <dbl>
## 1 Magneto  bad       male   Marvel          1939
## 2 Storm    good      female Marvel          1939
## 3 Mystique bad       female Marvel          1939
## 4 Batman   good      male   DC              1934
## 5 Joker    bad       male   DC              1934
## 6 Catwoman bad       female DC              1934
## Joining, by = "publisher"
## # A tibble: 6 x 5
##   publisher yr_founded name     alignment gender
##   <chr>          <dbl> <chr>    <chr>     <chr> 
## 1 DC              1934 Batman   good      male  
## 2 DC              1934 Joker    bad       male  
## 3 DC              1934 Catwoman bad       female
## 4 Marvel          1939 Magneto  bad       male  
## 5 Marvel          1939 Storm    good      female
## 6 Marvel          1939 Mystique bad       female
## Joining, by = "publisher"
## # A tibble: 7 x 5
##   name     alignment gender publisher         yr_founded
##   <chr>    <chr>     <chr>  <chr>                  <dbl>
## 1 Magneto  bad       male   Marvel                  1939
## 2 Storm    good      female Marvel                  1939
## 3 Mystique bad       female Marvel                  1939
## 4 Batman   good      male   DC                      1934
## 5 Joker    bad       male   DC                      1934
## 6 Catwoman bad       female DC                      1934
## 7 Hellboy  good      male   Dark Horse Comics         NA
## Joining, by = "publisher"
## # A tibble: 7 x 5
##   publisher yr_founded name     alignment gender
##   <chr>          <dbl> <chr>    <chr>     <chr> 
## 1 DC              1934 Batman   good      male  
## 2 DC              1934 Joker    bad       male  
## 3 DC              1934 Catwoman bad       female
## 4 Marvel          1939 Magneto  bad       male  
## 5 Marvel          1939 Storm    good      female
## 6 Marvel          1939 Mystique bad       female
## 7 Image           1992 <NA>     <NA>      <NA>
## Joining, by = "publisher"
## # A tibble: 7 x 5
##   name     alignment gender publisher yr_founded
##   <chr>    <chr>     <chr>  <chr>          <dbl>
## 1 Batman   good      male   DC              1934
## 2 Joker    bad       male   DC              1934
## 3 Catwoman bad       female DC              1934
## 4 Magneto  bad       male   Marvel          1939
## 5 Storm    good      female Marvel          1939
## 6 Mystique bad       female Marvel          1939
## 7 <NA>     <NA>      <NA>   Image           1992
## Joining, by = "publisher"
## # A tibble: 7 x 5
##   publisher         yr_founded name     alignment gender
##   <chr>                  <dbl> <chr>    <chr>     <chr> 
## 1 Marvel                  1939 Magneto  bad       male  
## 2 Marvel                  1939 Storm    good      female
## 3 Marvel                  1939 Mystique bad       female
## 4 DC                      1934 Batman   good      male  
## 5 DC                      1934 Joker    bad       male  
## 6 DC                      1934 Catwoman bad       female
## 7 Dark Horse Comics         NA Hellboy  good      male
## Joining, by = "publisher"
## # A tibble: 1 x 4
##   name    alignment gender publisher        
##   <chr>   <chr>     <chr>  <chr>            
## 1 Hellboy good      male   Dark Horse Comics
## Joining, by = "publisher"
## # A tibble: 1 x 2
##   publisher yr_founded
##   <chr>          <dbl>
## 1 Image           1992
## Joining, by = "publisher"
## # A tibble: 8 x 5
##   name     alignment gender publisher         yr_founded
##   <chr>    <chr>     <chr>  <chr>                  <dbl>
## 1 Magneto  bad       male   Marvel                  1939
## 2 Storm    good      female Marvel                  1939
## 3 Mystique bad       female Marvel                  1939
## 4 Batman   good      male   DC                      1934
## 5 Joker    bad       male   DC                      1934
## 6 Catwoman bad       female DC                      1934
## 7 Hellboy  good      male   Dark Horse Comics         NA
## 8 <NA>     <NA>      <NA>   Image                   1992
## Joining, by = "publisher"
## # A tibble: 8 x 5
##   publisher         yr_founded name     alignment gender
##   <chr>                  <dbl> <chr>    <chr>     <chr> 
## 1 DC                      1934 Batman   good      male  
## 2 DC                      1934 Joker    bad       male  
## 3 DC                      1934 Catwoman bad       female
## 4 Marvel                  1939 Magneto  bad       male  
## 5 Marvel                  1939 Storm    good      female
## 6 Marvel                  1939 Mystique bad       female
## 7 Image                   1992 <NA>     <NA>      <NA>  
## 8 Dark Horse Comics         NA Hellboy  good      male
## # A tibble: 6 x 11
##   Subject   `0`   `1`   `2`   `3`   `4`   `5`   `6`   `7`   `8`   `9`
##     <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1     308  250.  259.  251.  321.  357.  415.  382.  290.  431.  466.
## 2     309  223.  205.  203.  205.  208.  216.  214.  218.  224.  237.
## 3     310  199.  194.  234.  233.  229.  220.  235.  256.  261.  248.
## 4     330  322.  300.  284.  285.  286.  298.  280.  318.  305.  354.
## 5     331  288.  285   302.  320.  316.  293.  290.  335.  294.  372.
## 6     332  235.  243.  273.  310.  317.  310.  454.  347.  330.  254.
## # A tibble: 6 x 3
##   Subject ddays rreaction
##     <dbl> <chr>     <dbl>
## 1     308 0          250.
## 2     309 0          223.
## 3     310 0          199.
## 4     330 0          322.
## 5     331 0          288.
## 6     332 0          235.
## # A tibble: 6 x 2
##   Reaction Subject_Days
##      <dbl> <chr>       
## 1     250. 308_0       
## 2     259. 308_1       
## 3     251. 308_2       
## 4     321. 308_3       
## 5     357. 308_4       
## 6     415. 308_5
data1_separate<- data1_unite %>% separate(Subject_Days, c("subjects", "days"), sep="_")
head(data1_separate)## # A tibble: 6 x 3
##   Reaction subjects days 
##      <dbl> <chr>    <chr>
## 1     250. 308      0    
## 2     259. 308      1    
## 3     251. 308      2    
## 4     321. 308      3    
## 5     357. 308      4    
## 6     415. 308      5