\[ f_{X}(x)=\frac{1}{\sqrt{2 \pi \sigma^{2}}} e^{-\frac{(x-\mu)^{2}}{2 \sigma^{2}}} \quad-\infty<x<\infty \]
curve(dnorm, from=-4, to=4, ylab="PDF", main="PDF of a standard normal", lwd=4, col=2)
\[ F_{X}(x)=\operatorname{Pr}(X \leq x) \]
\[ F_{X}(x)=\int_{X \leq x} f_{X}(x) d x \]
curve(qnorm, from=0, to=1, ylab="CDF", main="CDF of a standard normal", lwd=4, col=2)
\[ E[X]=\mu \]
\[ \operatorname{Var}(X)=\sigma^{2} \]
\[ Z=\frac{Y-\mu}{\sigma} \]
y <- rnorm(1000, 20, 4)
hist(y, freq = F, col = "lemonchiffon",
xlab = "data values",
breaks = 30,
ylab = "density",
main = "N(20,4) Distribution")
lines(density(y, bw=1.5), col="midnightblue", lwd=4)
z <- (y - mean(y)) / sd(y)
mean(z) %>% round(3) # 0
## [1] 0
sd(z) %>% round(3) # 1
## [1] 1
qqnorm()
function in R
: this function
plots a sample against a normal distribution.qqline()
: this adds a line to the
normal Q-Q plot. This line makes it a lot easier to evaluate if there is
a clear deviation from normality.par(mfrow=c(1,2))
qqnorm(y, main="QQ normal plot of Y")
qqline(y, col="orchid", lwd = 4)
qqnorm(z, main="QQ normal plot of Z")
qqline(z, col="orchid", lwd=4)
\[ L\left(\mu, \sigma^{2}\right)=\prod_{i=1}^{n}\left(\frac{1}{\sqrt{2 \pi \sigma^{2}}}\right) \exp \left\{-\frac{\left(x_{i}-\mu\right)^{2}}{2 \sigma^{2}}\right\} \]
\[ l\left(\mu, \sigma^{2}\right)=-n \log (\sqrt{2 \pi})-\frac{n}{2} \log \sigma^{2}-\frac{1}{2 \sigma^{2}} \sum_{i=1}^{n}\left(x_{i}-\mu\right)^{2} \]
\[ \frac{\partial l\left(\mu, \sigma^{2}\right)}{\partial \mu}=\frac{1}{\sigma^{2}}\left(n \mu-\sum_{i=1}^{n} x_{i}\right)=0 \]
\[ \hat{\mu}=\frac{1}{n} \sum_{i=1}^{n} x_{i} \]
\[ \hat{\mu}=\frac{1}{n} \sum_{i=1}^{n} x_{i} \]
\[ \begin{aligned} E(\hat{\mu}) &=E\left(\frac{1}{n} \sum_{i=1}^{n} x_{i}\right) \\ &=\frac{1}{n} E\left(\sum_{i=1}^{n} x_{i}\right) \\ &=\frac{1}{n} n \\ &=\mu \end{aligned} \]
Since E(\(\hat\mu\)) = \(\mu\), the MLE estimate, the sample mean, is an unbiased estimate of \(\mu\).
Hence, the MLE of the variance of a normal random variable is biased because
\[ \begin{aligned} E\left[\hat{\sigma}^{2}\right] &=E\left[\frac{1}{n} \sum_{i=1}^{n}\left(X_{i}-\hat{\mu}\right)^{2}\right] \\ &=\frac{1}{n} E\left[\sum_{i=1}^{n} X_{i}^{2}-n \hat{\mu}^{2}\right] \\ &=\frac{1}{n}\left[\sum_{i=1}^{n} E\left[X_{i}^{2}\right]-n E\left[\hat{\mu}^{2}\right]\right] \\ &=\frac{1}{n}\left[\sum_{i=1}^{n}\left(\sigma^{2}+E\left[X_{i}\right]^{2}\right)-n\left(\frac{\sigma^{2}}{n}+E[\hat{\mu}]^{2}\right)\right] \\ &=\frac{1}{n}\left[n \sigma^{2}+n \mu^{2}-\sigma^{2}-n \mu^{2}\right] \\ &=\frac{n-1}{n} \sigma^{2} \end{aligned} \]
\[ \begin{aligned} \operatorname{Var}(\hat{\mu}) &=\operatorname{Var}\left[\frac{1}{n} \sum_{i=1}^{n} X_{i}\right] \\ &=\frac{1}{n^{2}} \sum_{i=1}^{n} \operatorname{Var}\left(X_{i}\right) \\ &=\frac{1}{n^{2}}\left[n \sigma^{2}\right]=\frac{\sigma^{2}}{n} \end{aligned} \]
\[ \hat{\mu}=\frac{1}{n} \sum_{i=1}^{n} X_{i} \sim N\left(\mu, \frac{\sigma^{2}}{n}\right) \]
\[ \operatorname{Pr}(|\hat{\mu}-\mu|>\varepsilon)=\operatorname{Pr}\left(\frac{|\hat{\mu}-\mu|}{\sigma / \sqrt{n}}>\frac{\sqrt{n} \varepsilon}{\sigma}\right) \]
\[ \operatorname{Pr}(|\hat{\mu}-\mu|>\varepsilon)=2 \operatorname{Pr}\left(Z>\frac{\sqrt{n} \varepsilon}{\sigma}\right)=2\left(1-\Phi\left(\frac{\sqrt{n} \varepsilon}{\sigma}\right)\right) \rightarrow 0 \]
R
N <- seq(3, 30)
epsilon <- 1
sigma <- 1
pr <- sapply(N, function(n) {
2 * (1 - pnorm((sqrt(n) * epsilon)/sigma))
})
df <- data.frame(N = N, Prob = pr)
library(ggplot2)
ggplot(df, aes(x = N, y = Prob)) +
geom_line(size = 2) +
geom_hline(yintercept = 0, col=2) +
theme_bw()
\[ \hat{\mu} \sim N\left(\mu, \frac{\sigma^{2}}{n}\right) \]
\[ f_{X}(x)=\frac{x^{\alpha-1}}{\Gamma(\alpha) \beta^{\alpha}} \exp \left\{-\frac{x}{\beta}\right\}, \quad x>0, \alpha, \beta>0 \]
\[ l(\alpha, \beta)=(\alpha-1) \sum_{i=1}^{n} \log x_{i}-n \log \Gamma(\alpha)-n \alpha \log \beta-\frac{1}{\beta} \sum_{i=1}^{n} x_{i} \]
\[ \frac{\partial l(\alpha, \beta)}{\partial \beta}=\frac{-n \alpha}{\beta}+\frac{1}{\beta^{2}} \sum_{i=1}^{n} x_{i}=0 \]
\[ \begin{array}{c}E\left[X_{i}\right]=\mu=\alpha \beta \\ \operatorname{Var}\left(X_{i}\right)=\sigma^{2}=\alpha \beta^{2}\end{array} \]
p <- dgamma(seq(0, 10, by=0.1), shape = 2, scale = 1)
plot(p, xlab = "x", ylab = "f(x)", main = "PDF of Gamma", type='l', col = 2, lwd=4)
\[ \begin{array}{l}E[\hat{\mu}]=\mu=\alpha \beta \\ \operatorname{Var}(\hat{\mu})=\frac{\alpha \beta^{2}}{n}\end{array} \]
muhat <- replicate(1000,
{
x <- rgamma(10, shape = 2, scale = 1)
mean(x)
})
mean(muhat)
## [1] 1.991642
var(muhat)
## [1] 0.2043863
hist(muhat, col="lemonchiffon", freq=F,
breaks=30, ylim=c(0, 1),
main="Sample Distribution Sample Mean Gamma",
xlab="sample mean", ylab="density")
lines(density(muhat), col=2, lwd=4)
qqnorm()
function.
muhat.z <- (muhat - mean(muhat))/sd(muhat)
qqnorm(muhat.z)
qqline(muhat.z, col=2, lwd=4)
muhat5 <- replicate(1000,
{
x <- rgamma(5, shape = 2, scale = 1)
mean(x)
})
muhat5.z <- (muhat5 - mean(muhat5))/sd(muhat5)
qqnorm(muhat5.z, main = "n = 5")
qqline(muhat5.z, col=2, lwd=4)
muhat30 <- replicate(1000,
{
x <- rgamma(30, shape = 2, scale = 1)
mean(x)
})
muhat30.z <- (muhat30 - mean(muhat30))/sd(muhat30)
qqnorm(muhat30.z, main = "n = 30")
qqline(muhat30.z, col=2, lwd=4)
muhat100 <- replicate(1000,
{
x <- rgamma(100, shape = 2, scale = 1)
mean(x)
})
muhat100.z <- (muhat100 - mean(muhat100))/sd(muhat100)
qqnorm(muhat100.z, main = "n = 100")
qqline(muhat100.z, col=2, lwd=4)
plot(density(muhat5), col=1, lwd=4,
xlim=c(0,4), ylim=c(0,3),
xlab = "sample mean", ylab="density",
main="sampling distribution for different n")
lines(density(muhat), col=2, lty=2, lwd=5)
lines(density(muhat30), col=3, lty=3, lwd=5)
lines(density(muhat100), col=4, lty=4, lwd=5)
\[ \lim _{n \rightarrow \infty} \operatorname{Pr}\left(\frac{\hat{\mu}-\mu}{\sigma / \sqrt{n}} \leq z\right)=\Phi(z) \]
\[ \hat{\mu} \sim N\left(\mu, \frac{\sigma^{2}}{n}\right) \]
The CLT is one of the most important results in statistics. In classic statistics, one had very few alternatives to the CLT to compute the distribution of the sample mean, because of the lack of close-form solutions for most of the distributions. Now, we can often obtain better finite sample approximations through simulation-based methods, such as bootstrapping.
\[ \lim _{n \rightarrow \infty} \operatorname{Pr}\left(\left|\hat{\theta}_{n}-\theta\right|>\varepsilon\right)=0 \]
\[ \hat{\theta}_{n} \rightarrow N\left(\theta, \frac{1}{n I(\theta)}\right) \]
where \(I(\theta)\) is the Fisher information,
\[ I(\theta)=-E_{\theta}\left[\frac{\partial^{2}}{\partial \theta^{2}} l(\theta)\right] \]
\[ \hat{\sigma}^{2}=\frac{1}{n} \sum_{i=1}^{n}\left(X_{i}-\hat{\mu}\right)^{2} \]
\[ E\left[\hat{\sigma^{2}}\right]=\frac{n-1}{n} \sigma^{2} \]
R
: simulation that the estimator is
asymptotically unbiased and normally distributed.mle <- function(x) {
mean((x - mean(x))^2)
}
get_dist <- function(n) {
sigmahat <- replicate(1000, {
x <- rnorm(n, mean=5, sd=2)
mle(x)
})
return(sigmahat)
}
ns <- seq(10, 500, by=10)
dist <- lapply(ns, get_dist)
bias <- sapply(dist, function(x) mean(x) - 4)
plot(ns, bias, type = 'l', lwd=3, xlab="n", ylab="Bias")
abline(h=0, col=2, lty=2, lwd=3)
par(mfrow=c(1,2))
plot(density(dist[[1]], bw = 1), main="n = 10", xlab="sigmahat values", ylab="density", col="green", lwd=4)
qqnorm((dist[[1]] - mean(dist[[1]]))/sd(dist[[1]]), main="n = 10")
qqline((dist[[1]] - mean(dist[[1]]))/sd(dist[[1]]), col="orchid", lwd=4)
par(mfrow=c(1,2))
plot(density(dist[[50]], bw = 1), main="n = 500", xlab="sigmahat values", ylab="density", col="green", lwd=4)
qqnorm((dist[[50]] - mean(dist[[50]]))/sd(dist[[50]]), main="n = 500")
qqline((dist[[50]] - mean(dist[[50]]))/sd(dist[[50]]), col="orchid", lwd=4)