Acknowledgement: This chapter is largely based on chapter 3 of “Introduction to Econometrics with R”. https://www.econometrics-with-r.org/index.html
The goal of this chapter is
Consider the estimator \(\hat{\mu}_N\) for the unknown parameter \(\mu\).
Unbiasdeness: The expectation of the estimator is the same as the true parameter in the population. \[ E[\hat{\mu}_N] = \mu \]
Consistency: The estimator converges to the true parameter in probability. \[ \forall \epsilon >0, \lim_{N \rightarrow \infty} \ Prob(|\hat{\mu}_{N}-\mu|<\epsilon)=1 \]
## Parsed with column specification:
## cols(
## AGE = col_double(),
## INCTOT = col_double()
## )
## [1] 30165.47
## [1] 38306.17
# `log` option specifies which axis is represented in log scale.
fig2 <- qplot(pop, geom = "density",
xlab = "Income",
ylab = "Density",
log = "x")
# Set the seed for the random number.
# This is needed to maintaine the reproducibility of the results.
set.seed(123)
# draw random sample of 100 observations from the variable pop
test <- sample(x = pop, size = 100)
# Use loop to repeat 2000 times.
Nsamples = 2000
result1 <- numeric(Nsamples)
for (i in 1:Nsamples ){
test <- sample(x = pop, size = 100)
result1[i] <- mean(test)
}
# Anotther way to do this.
result1 <- replicate(expr = mean(sample(x = pop, size = 10)), n = Nsamples)
result2 <- replicate(expr = mean(sample(x = pop, size = 100)),
n = Nsamples)
result3 <- replicate(expr = mean(sample(x = pop, size = 500)),
n = Nsamples)
# Create dataframe
result_data <- data.frame( Ybar10 = result1,
Ybar100 = result2,
Ybar500 = result3)
# Use "melt" to change the format of result_data
data_for_plot <- melt(data = result_data, variable.name = "Variable" )
## Using as id variables
# Use "ggplot2" to create the figure.
# The variable `fig` contains the information about the figure
fig <-
ggplot(data = data_for_plot) +
xlab("Sample mean") +
geom_line(aes(x = value, colour = variable ), stat = "density" ) +
geom_vline(xintercept=pop_mean ,colour="black")
# define function for simulation
f_simu_CLT = function(Nsamples, samplesize, pop, pop_mean, pop_sd ){
output = numeric(Nsamples)
for (i in 1:Nsamples ){
test <- sample(x = pop, size = samplesize)
output[i] <- ( mean(test) - pop_mean ) / (pop_sd / sqrt(samplesize))
}
return(output)
}
# Set the seed for the random number
set.seed(124)
# Run simulation
Nsamples = 2000
result_CLT1 <- f_simu_CLT(Nsamples, 10, pop, pop_mean, pop_sd )
result_CLT2 <- f_simu_CLT(Nsamples, 100, pop, pop_mean, pop_sd )
result_CLT3 <- f_simu_CLT(Nsamples, 1000, pop, pop_mean, pop_sd )
# Random draw from standard normal distribution as comparison
result_stdnorm = rnorm(Nsamples)
# Create dataframe
result_CLT_data <- data.frame( Ybar_standardized_10 = result_CLT1,
Ybar_standardized_100 = result_CLT2,
Ybar_standardized_1000 = result_CLT3,
Standard_Normal = result_stdnorm)
# Use "melt" to change the format of result_data
data_for_plot <- melt(data = result_CLT_data, variable.name = "Variable" )
## Using as id variables
# Use "ggplot2" to create the figure.
fig <-
ggplot(data = data_for_plot) +
xlab("Sample mean") +
geom_line(aes(x = value, colour = variable ), stat = "density" ) +
geom_vline(xintercept=0 ,colour="black")