# topic 18
# This will be remarkably similar to topics 15-17, so
# much so that it would be worth comparing the four scripts.
# First, set up the situation. We have a population
# with an unknown standard deviation, and for that matter,
# an unknown mean, but we are not worried about that.
# We do need to know that the population is approximately
# normally distributed.
#
source("../gnrnd5.R")
source("../gnrnd4.R")
gnrnd5( 138579399904, 432001673) # population size is 4000 items
big_pop <- L1
# We do not know the standard deviation of big_pop
# Now, someone says that they believe that the
# standard deviation in big_pop is 50.
# Thus our null hypothesis is that the standard
# deviation in big_pop is 50. We want to look at an
# alternative that says the standard deviation is
# less than 50. This is a one-tailed test.
# We will get a sample of size 83 and we will
# look at the standard deviation of the sample.
# We are willing to be wrong in telling the
# person that they are wrong 5% of the time!
# That is, even if the true standard deviation in
# big_pop is 50, we are willing to reject the null
# hypothesis for 5% of the random samples that we take.
#
########################
## The critical value approach. We know that
## we can use the fact that the ratio of (n-1) times
## the variance of the sample to the
## variance of the population will be a
## chi-squared distribution with n-1 degrees of freedom.
##
## So, if we assume the null hypothesis is true then,
## for a sample of size 83 we want to find the chi-squared
## value that has 5% of the area to its left.
chi_value <- qchisq(0.05, 82)
chi_value # this is our critical low value
##
# Get our sample
# the first time we do this let us get the
# same sample each time
gnrnd4(922138201, 400000001)
L1
# take those as the index values of our random sample
our_samp <- big_pop[ L1 ]
our_samp
# find the standard deviation of our sample
samp_sd <- sd( our_samp )
samp_sd
## so now compare (n-1)*samp_sd^2/(50^2)
## to our critical low value.
(82)*samp_sd^2/(50^2)
## In this case the ratio is not less than
## than our critical low. Therefore, at the
## 0.05 level of significance, we do not have evidence
## to reject null hypothesis that the true
## population standard deviation is 50
## in favor of the alternative hypothesis that
## the true standard deviation is less than 50.
#############
############# the attained significance approach
## how strange would it be to get a ratio of
## 82*samp_sd^2/(50^2) if the true
## standard deviation is 50?
#
pchisq(82*samp_sd^2/(50^2), 82 )
#
# That probability is not less than our 5% level
# of significance. Therefore, do not reject the
# null hypothesis in favor of the alternative.
####################### Now use the function to
####################### do the same thing
source("../hypo_sigma.R")
hypoth_test_sigma( 50, 83, samp_sd, -34, 0.05)
#######################################
#######################################
# Now we want to repeat this process
# but each time we want a different sample
# of size 80
L1 <- sample( big_pop, 83 )
samp_sd <- sd( L1 )
hypoth_test_sigma( 50, 83, samp_sd, -34, 0.05)
#### perform lines 95-97 again and again
### now, since we have the population let us peek
# at the true standard deviation
source("../pop_sd.R")
true_sd <- pop_sd( big_pop )
true_sd
####### Try our samples again, but this time test
## the null hypothesis that the true standard
## deviation is 46.50194, and do the test at
## the 0.05 level of significance.
L1 <- sample( big_pop, 83 )
samp_sd <- sd( L1 )
hypoth_test_sigma( true_sd, 83, samp_sd, -34, 0.05)
#### perform lines 109-111 again and again,
#### and we should see a Type I error about
#### 5% of the time.
### we can actually do this 1000 times and see how
### times we reject the null hypothesis even
### though it is true.
L2 <- 1:1000
for( i in 1:1000) {
L1 <- sample( big_pop, 83 )
samp_sd <- sd( L1 )
answer <- hypoth_test_sigma( true_sd, 83, samp_sd, -34, 0.05)
L2[i] <- answer[10]
}
table( L2 )