# topic 15
# First, set up the situation. We have a population
# with a known standard deviation.
#
source("../gnrnd5.R")
source("../gnrnd4.R")
gnrnd5( 169453199902, 873001383)
big_pop <- L1
# I can tell you that I know the standard deviation
# of big_pop is 17.12479
# Now, someone says that they believe that the
# mean of big_pop is 150. Thus our null hypothesis
# is that the true mean is 150. We want to see if that
# could be correct. We will get a sample of
# size 35 and we will look at the sample mean.
# We are willing to be wrong in telling the
# person that they are wrong 2.5% of the time!
########################
## The critical value approach. We know that
## samples of size 35 will have a standard error
## of the mean be 17.12479/sqrt(35) and that those
## values will be normally distributed with the
## same mean as the population, a value we assume
## to be 150. This is a two-tailed test because
## a sample mean that is either too low or too high
## would indicate that 150 is not the mean.
## Therefore, find the value that has 0.025/2 as the
## P(Xz)
# long way
low_z <- qnorm(0.025/2)
low_z
low_val <- 150 + low_z*17.12479/sqrt(35) # + because z is <0
low_val
high_z <- qnorm( 0.025/2, lower.tail=FALSE)
high_z # this was silly because we know it is -low_z
high_val <- 150 + high_z*17.12479/sqrt(35)
high_val
####### pause and look at a shorter way to get
####### those two values
low_val <- qnorm( 0.025/2,
mean=150, sd=17.124479/sqrt(35))
low_val
high_val <-qnorm( 0.025/2,
mean=150, sd=17.124479/sqrt(35),
lower.tail=FALSE)
high_val
##########
##########
# Get our sample
# the first time we do this let us get the
# same sample each time
gnrnd4(768733401, 200000001)
L1
# take those as the index values of our random sample
our_samp <- big_pop[ L1 ]
our_samp
# find the mean of our sample
mean( our_samp )
## so now compare that mean to our critical values.
## In this case the sample mean is greater than
## our critical high. Therefore reject the
## null hypothesis that the true mean is 150.
#############
############# the attained significance approach
## how strange would it be to get a mean of
## 160.3743 for a sample of size 35 if the true
## mean is 150?
#
pnorm( 160.3743, mean=150, sd=17.12479/sqrt(35),
lower.tail=FALSE)
#
## but we would need to double that to account for
## values that extreme or more extreme on the low
## side
0.0001691883*2
# is that probability less than our 2.5% ?
# Yes, therefore, reject the null hypothesis
# in favor of the alternative.
####################### Now use the function to
####################### do the same thing
source("../hypo_known.R")
hypoth_test_known( 150, 17.12479, 0, 0.025,
35, mean(our_samp) )
#######################################
#######################################
# Now we want to repeat this process
# but each time we want a different sample
# of size 35
L1 <- sample( big_pop, 35 )
L1
hypoth_test_known( 150, 17.12479, 0, 0.025,
35, mean(L1) )
#### perform lines 102-106 again and again
### now, since we have the population let us peek
# at the true mean
mean( big_pop )
####### Try our samples again, but this time test
## the null hypothesis that the true mean
## is 160.2938, and do the test at the 0.05
## level of significance.
L1 <- sample( big_pop, 35 )
x_bar <- mean( L1 )
hypoth_test_known( 160.2938, 17.12479, 0, 0.05,
35, mean(L1) )
#### perform lines 116-120 again and again,
#### and we should see a Type I error about
#### 5% of the time.
### we can actually do this 1000 times and see how
### times we reject the null hypothesis even
### though it is true.
L2 <- 1:1000
for( i in 1:1000) {
L1 <- sample( big_pop, 35 )
x_bar <- mean( L1 )
s_x <- sd( L1 )
answer <- hypoth_test_known( 160.2938, 17.12479, 0, 0.05,
35, mean(L1) )
L2[i] <- answer[13]
}
table( L2 )