Before we start … let’s install R packages first!
install.packages(c("tidyverse", "gapminder"))
library(tidyverse)
library(gapminder)
e.g.
histogram
plots the distribution of a numeric
variable’s values using bars[R code]
# Load the 'gapminder' dataset
df <- gapminder
# Print first 6 rows
head(df)
## # A tibble: 6 × 6
## country continent year lifeExp pop gdpPercap
## <fct> <fct> <int> <dbl> <int> <dbl>
## 1 Afghanistan Asia 1952 28.8 8425333 779.
## 2 Afghanistan Asia 1957 30.3 9240934 821.
## 3 Afghanistan Asia 1962 32.0 10267083 853.
## 4 Afghanistan Asia 1967 34.0 11537966 836.
## 5 Afghanistan Asia 1972 36.1 13079460 740.
## 6 Afghanistan Asia 1977 38.4 14880372 786.
# Plot a histogram of life expectancy
hist(df$lifeExp,
main = "Life expectancy", # plot title,
xlab = "life exp.", # x-axis label
ylab = "frequency", # y-axis label
breaks = "FD" # number of bins
)
par(mfrow = c(1, 2))
: makes side-by-side plotsc(1, 2)
indicates the number of rows and
columns.par(mfrow = c(1, 2))
hist(df$pop, main = "Gapminder: population", xlab = "Population")
hist(df$lifeExp, main = "Gapminder: life expectancy", xlab = "Life expectancy")
mean()
median()
sd()
var()
quantile()
min()
max()
summary()
can show these numeric summaries at oncemean(df$lifeExp)
## [1] 59.47444
median(df$lifeExp)
## [1] 60.7125
sd(df$lifeExp)
## [1] 12.91711
var(df$lifeExp)
## [1] 166.8517
quantile(df$lifeExp)
## 0% 25% 50% 75% 100%
## 23.5990 48.1980 60.7125 70.8455 82.6030
min(df$lifeExp)
## [1] 23.599
max(df$lifeExp)
## [1] 82.603
summary(df$lifeExp)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 23.60 48.20 60.71 59.47 70.85 82.60