# Line 1: a small demonstration of hypothesis testing, # in this case for a population with unknown standard deviation # # First, we will get a population # In this case we will get a large population source("../gnrnd5.R") gnrnd5(177938412404,184000676) #let us look at the head and tail values head(L1) tail(L1) min(L1) max(L1) # just a quick look at L1 hist(L1) boxplot(L1, horizontal=TRUE) source("../assess_normality.R") assess_normality( L1 ) # # L1 sure looks like a Normal distribution. # # ########################## # ## Problem: test the null hypothesis # ## that the population mean is equal to 60.0 # ## against the alternative hypothesis that # ## the population mean is greater than 60.0. # ## Run the test at the 0.05 level of significance. # ########################## # take a simple random sample of size 23 # # Be careful: Every time we do this we get # a different random sample # L2 <- as.integer( runif(23, 1, 4126) ) # L2 holds the index values of our simple random sample L2 L3 <- L1[ L2 ] # L3 holds the simple random sample L3 # we will get the mean of L3 xbar <- mean(L3) xbar # and we need to get the standard deviation of # the sample sx <- sd( L3 ) sx # The long way to do the test is to find the probability # getting this mean or higher if the true mean is 60.0 # given that the standard deviation of the population is # unknown and that we have a sample of size 23 # from a population that we know to be normally distributed. # # Since the means of samples of size 23 are # distributed as a Student's-t with 22 degrees # of freedom and with mean = population mean and standard # deviation = sx/sqrt( n ) for the # attained significance approach we just need to find pt( (xbar-60)/( sx/sqrt(23)), 22, lower.tail=FALSE) # If that value is less than 0.05 then we reject the # null hypothesis in favor of the alternative # For the critical value approach we first need to find the # value of t that has P(X>t)=0.05 t <- qt( 0.05, 22, lower.tail=FALSE) t # Then transform that to our sample t*sx/sqrt(23)+60.0 # Then if xbar is greater than this value we reject the # null hypothesis in favor of the alternative # # Of course, we could use the function # hypoth_test_unknown() to do both of these approaches in # one easy step. # source("../hypo_unknown.R") hypoth_test_unknown( 60.0, 1, 0.05, 23, xbar, sx) # # ################################# # go back and execute lines 34-77 many more times. # Each time you get a different random sample. # Keep track of the number of times that you reject or # do not reject the null hypothesis. By the way, the # true mean of the population is 67.63552. ################################# # # now we will do the same thing for a different population # gnrnd5(146723412404,184000600) #let us look at the head and tail values head(L1) tail(L1) min(L1) max(L1) sigma <- pop_sd( L1 ) sigma # just a quick look at L1 hist(L1) boxplot(L1, horizontal=TRUE) assess_normality( L1 ) # # L1 sure looks like a Normal distribution. # # ########################## # ## Problem: test the null hypothesis # ## that the population mean is equal to 60.0 # ## against the alternative hypothesis that # ## the population mean is greater than 60.0. # ## Run the test at the 0.05 level of significance. # ########################## # take a simple random sample of size 23 # # Be careful: Every time we do this we get # a different random sample # L2 <- as.integer( runif(23, 1, 4126) ) # L2 holds the index values of our simple random sample L2 L3 <- L1[ L2 ] # L3 holds the simple random sample L3 # we will get the mean of L3 xbar <- mean(L3) xbar # and we need to get the standard deviation of # the sample sx <- sd( L3 ) sx # The long way to do the test is to find the probability # getting this mean or higher if the true mean is 60.0 # given that the standard deviation of the population is # unknown and that we have a sample of size 23 # from a population that we know to be normally distributed. # # Since the means of samples of size 23 are # distributed as a Student's-t with 22 degrees # of freedom and with mean = population mean and standard # deviation = sx/sqrt( n ) for the # attained significance approach we just need to find pt( (xbar-60)/( sx/sqrt(23)), 22, lower.tail=FALSE) # If that value is less than 0.05 then we reject the # null hypothesis in favor of the alternative # For the critical value approach we first need to find the # value of t that has P(X>t)=0.05 t <- qt( 0.05, 22, lower.tail=FALSE) t # Then transform that to our sample t*sx/sqrt(23)+60.0 # Then if xbar is greater than this value we reject the # null hypothesis in favor of the alternative # # Of course, we could use the function # hypoth_test_unknown() to do both of these approaches in # one easy step. # source("../hypo_unknown.R") hypoth_test_unknown( 60.0, 1, 0.05, 23, xbar, sx) # ################################# # go back and execute lines 121-164 many more times. # Each time you get a different random sample. # Keep track of the number of times that you reject or # do not reject the null hypothesis. By the way, the # true mean of the population is 60.03343. #################################