# topic 17
# This will be remarkably similar to topics 15 and 16, so
# much so that it would be worth comparing the three scripts.
# First, set up the situation. We have a population
# with an unknown proportion of some characteristic.
#
source("../gnrnd5.R")
source("../gnrnd4.R")
gnrnd5( 38579399907, 567685) # population size is 4000 items
big_pop <- L1
# I do not know the proportion of 1's in big_pop
# Now, someone says that they believe that the
# proportion of 1's in big_pop is 20%.
# Thus our null hypothesis is that the proportion
# of 1's in big_pop is 0.20. We want to look at an
# alternative that says the proportion of 1's is
# greater than 0.20. This is a one-tailed test.
# We will get a sample of size 80 and we will
# look at the proportion of 1's in the sample.
# We are willing to be wrong in telling the
# person that they are wrong 5% of the time!
# That is, even if the true proportion of 1's in
# big_pop is 20%, we are willing to reject the null
# hypothesis for 5% of the random samples that we take.
#
########################
## The critical value approach. We know that
## we can use a normal approximation to the distribution
## of sample proportions because 0.20*80 > 10. Also,
## our sample size is way less that 5% of the population.
## Assuming that the null hypothesis is true, the
## distribution of sample proportions, p_hat, will
## be N( 0.20, sqrt(0.20*(1-0.20)/80))
## This is a one-tailed test because only
## a sample proportion that is too high
## would indicate that 0.20 is not the proportion.
## Therefore, find the value, in a normal distribution
## that has 0.05 as the P(X > x).
# get the standard deviation
p_sd <- sqrt(0.20*(1-0.20)/80)
p_sd
# long way
high_z <- qnorm( 0.05, lower.tail=FALSE)
high_z
high_val <- 0.20 + high_z*p_sd
high_val
####### pause and look at a shorter way to get
####### those two values
high_val <-qnorm( 0.05, mean=0.20, sd=p_sd,
lower.tail=FALSE)
high_val
##########
##########
# Get our sample
# the first time we do this let us get the
# same sample each time
gnrnd4(962137901, 400000001)
L1
# take those as the index values of our random sample
our_samp <- big_pop[ L1 ]
our_samp
# find the number of 1's in our sample
table( our_samp )
# what proportion of the sample is that
21/80
## so now compare the proportion in the sample
## to our critical value.
high_val
## In this case the sample proportion is not greater
## than our critical high. Therefore, at the
## 0.05 level of significance, we do not have evidence
## to reject null hypothesis that the true
## proportion is 0.20
## in favor of the alternative hypothesis that
## the true proportion is greater than 0.20.
#############
############# the attained significance approach
## how strange would it be to get a proportion of
## 0.2625 for a sample of size 80 if the true
## proportion is 0.200?
#
pnorm( 0.2625, mean=0.20, sd=p_sd, lower.tail=FALSE)
#
# That probability is not less than our 5% level
# of significance. Therefore, do not reject the
# null hypothesis in favor of the alternative.
####################### Now use the function to
####################### do the same thing
source("../hypo_prop.R")
hypoth_test_prop( 0.20, 21, 80, 1, 0.05)
#######################################
#######################################
# Now we want to repeat this process
# but each time we want a different sample
# of size 80
L1 <- sample( big_pop, 80 )
freqs <- table( L1 )
freqs
num_x <- freqs[1]
hypoth_test_prop( 0.20, num_x, 80, 1, 0.05)
#### perform lines 115-119 again and again
### now, since we have the population let us peek
# at the true proportion
freqs <- table( big_pop )
freqs
true_prop <- freqs[1]/4000
true_prop
####### Try our samples again, but this time test
## the null hypothesis that the true proportion
## is 0.24875, and do the test at the 0.05
## level of significance.
L1 <- sample( big_pop, 80 )
freqs <- table( L1 )
num_x <- freqs[1]
hypoth_test_prop( 0.24875, num_x, 80, 1, 0.05)
#### perform lines 132-135 again and again,
#### and we should see a Type I error about
#### 5% of the time.
### we can actually do this 1000 times and see how
### times we reject the null hypothesis even
### though it is true.
L2 <- 1:1000
for( i in 1:1000) {
L1 <- sample( big_pop, 80 )
freqs <- table( L1 )
num_x <- freqs[1]
answer <- hypoth_test_prop( 0.24875, num_x, 80, 1, 0.05)
L2[i] <- answer[13]
}
table( L2 )