## Warning: package 'knitr' was built under R version 3.6.2
hook_output = knit_hooks$get('output')
knit_hooks$set(output = function(x, options) {
# this hook is used only when the linewidth option is not NULL
if (!is.null(n <- options$linewidth)) {
x = knitr:::split_lines(x)
# any lines wider than n should be wrapped
if (any(nchar(x) > n)) x = strwrap(x, width = n)
x = paste(x, collapse = '\n')
}
hook_output(x, options)
})
This note is largely based on Applied Statistics with R
. https://daviddalpiaz.github.io/appliedstats/
example_data = data.frame(x = c(1, 3, 5, 7, 9, 1, 3, 5, 7, 9),
y = c(rep("Hello", 9), "Goodbye"),
z = rep(c(TRUE, FALSE), 5))
write.csv
save (or export) the dataframe in .csv
format.R
, as well as use data stored in packages.csv
file into R.
read.csv()
function as defaultread_csv()
function from the readr
package. This is faster for larger data.# install.packages("readr")
#library(readr)
#example_data_from_csv = read_csv("example-data.csv")
example_data_from_csv = read.csv("example-data.csv")
example_data.csv
exists in your current working directory.## [1]
"C:/Users/Yuta/Dropbox/Teaching/2020_1_3_4_Applied_Metrics/Note_Github/02_RIntro"
setwd()
functionggplot2
package is a dataset called mpg
. By loading the package using the library()
function, we can now access mpg
.head()
and str()
str()
will display the “structure” of the data frame.
## Classes 'tbl_df', 'tbl' and 'data.frame': 234 obs. of 11 variables:
## $ manufacturer: chr "audi" "audi" "audi" "audi" ...
## $ model : chr "a4" "a4" "a4" "a4" ...
## $ displ : num 1.8 1.8 2 2 2.8 2.8 3.1 1.8 1.8 2 ...
## $ year : int 1999 1999 2008 2008 1999 1999 2008 1999 1999 2008 ...
## $ cyl : int 4 4 4 4 6 6 6 4 4 4 ...
## $ trans : chr "auto(l5)" "manual(m5)" "manual(m6)" "auto(av)" ...
## $ drv : chr "f" "f" "f" "f" ...
## $ cty : int 18 21 20 21 16 18 18 18 16 20 ...
## $ hwy : int 29 29 31 30 26 26 27 26 25 28 ...
## $ fl : chr "p" "p" "p" "p" ...
## $ class : chr "compact" "compact" "compact" "compact" ...
names()
function to obtain names of the variables in the dataset## [1] "manufacturer" "model" "displ" "year" "cyl"
## [6] "trans" "drv" "cty" "hwy" "fl"
## [11] "class"
$
operator.## [1] 1999 1999 2008 2008 1999 1999 2008 1999 1999 2008 2008 1999 1999
2008 2008
## [16] 1999 2008 2008 2008 2008 2008 1999 2008 1999 1999 2008 2008 2008
2008 2008
## [31] 1999 1999 1999 2008 1999 2008 2008 1999 1999 1999 1999 2008 2008
2008 1999
## [46] 1999 2008 2008 2008 2008 1999 1999 2008 2008 2008 1999 1999 1999
2008 2008
## [61] 2008 1999 2008 1999 2008 2008 2008 2008 2008 2008 1999 1999 2008
1999 1999
## [76] 1999 2008 1999 1999 1999 2008 2008 1999 1999 1999 1999 1999 2008
1999 2008
## [91] 1999 1999 2008 2008 1999 1999 2008 2008 2008 1999 1999 1999 1999
1999 2008
## [106] 2008 2008 2008 1999 1999 2008 2008 1999 1999 2008 1999 1999 2008
2008 2008
## [121] 2008 2008 2008 2008 1999 1999 2008 2008 2008 2008 1999 2008 2008
1999 1999
## [136] 1999 2008 1999 2008 2008 1999 1999 1999 2008 2008 2008 2008 1999
1999 2008
## [151] 1999 1999 2008 2008 1999 1999 1999 2008 2008 1999 1999 2008 2008
2008 2008
## [166] 1999 1999 1999 1999 2008 2008 2008 2008 1999 1999 1999 1999 2008
2008 1999
## [181] 1999 2008 2008 1999 1999 2008 1999 1999 2008 2008 1999 1999 2008
1999 1999
## [196] 1999 2008 2008 1999 2008 1999 1999 2008 1999 1999 2008 2008 1999
1999 2008
## [211] 2008 1999 1999 1999 1999 2008 2008 2008 2008 1999 1999 1999 1999
1999 1999
## [226] 2008 2008 1999 1999 2008 2008 1999 1999 2008
## [1] 29 29 31 30 26 26 27 26 25 28 27 25 25 25 25 24 25 23 20 15 20 17
17 26 23
## [26] 26 25 24 19 14 15 17 27 30 26 29 26 24 24 22 22 24 24 17 22 21 23
23 19 18
## [51] 17 17 19 19 12 17 15 17 17 12 17 16 18 15 16 12 17 17 16 12 15 16
17 15 17
## [76] 17 18 17 19 17 19 19 17 17 17 16 16 17 15 17 26 25 26 24 21 22 23
22 20 33
## [101] 32 32 29 32 34 36 36 29 26 27 30 31 26 26 28 26 29 28 27 24 24 24
22 19 20
## [126] 17 12 19 18 14 15 18 18 15 17 16 18 17 19 19 17 29 27 31 32 27 26
26 25 25
## [151] 17 17 20 18 26 26 27 28 25 25 24 27 25 26 23 26 26 26 26 25 27 25
27 20 20
## [176] 19 17 20 17 29 27 31 31 26 26 28 27 29 31 31 26 26 27 30 33 35 37
35 15 18
## [201] 20 20 22 17 19 18 20 29 26 29 29 24 44 29 26 29 29 29 29 23 24 44
41 29 26
## [226] 28 29 29 29 28 29 26 26 26
dim()
, nrow()
and ncol()
functions to obtain information about the dimension of the data frame.## [1] 234 11
## [1] 234
## [1] 11
[,]
.manufacturer
, model
and year
.subset()
function, which has a much more readable syntax.filter
and select
functions from the dplyr
package which introduces the %>%
operator from the magrittr
package.dplyr
package in the DataCamp
as a makeup lecture.