# line 1 # Look at testing the null hypothesis that # the mean difference of PAIRED data is zero # against the alternative that the mean difference in # the pairs is greater than 0, # when the population standard deviation is unknown. # Perform the test at the 0.025 level of significance. # # We will start by generating a population, but the # data appears as paired values source("../gnrnd5.R") gnrnd5( 282553499910, 23001625005076) head(L1,10) head(L2,10) tail(L1,10) tail(L2,10) length(L1) # # Now that we have our population of pairs # we need to take random samples of the pairs # of values # # ############################################ # ## Each time we do the following steps ## # ## we will geta different sample and as ## # ## such we will perform a different ## # ## test of the null hypothesis. ## # ############################################ n_1 <- 42 # get a sample of size 42 from L1 and L2 index_1 <- as.integer( runif( n_1, 1, 5001)) index_1 samp_1 <- L1[ index_1 ] samp_1 samp_2 <- L2[ index_1 ] samp_2 # # We really want to test the hypothesis that # mean difference in the pairs, computed as # the second value - the first value is 0 # so we will form a new data set based upon # samp_2 - samp_1 samp_3 <- samp_2 - samp_1 samp_3 # # then our best point estimate is the mean(samp_3) pnt_est <- mean( samp_3 ) pnt_est # # Our problem now resolves to the hypothesis that # the population mean = 0 by looking at the mean of samp_3. # But the means of samples of size n_1 are # disributed as a Student's-t with n_1 - 1 degrees of # freedom. # # find the t value with 2.5% of the area to its right t <- qt( 0.025, n_1 - 1, lower.tail=FALSE ) std_dev <- sd( samp_3 ) std_dev # then the critical value will be 0+t*std_dev/sqrt( n_1 ) # # reject H0 if the sample mean is greater than # that critical value # # or to use the attained significance approach, # we find the probability that we get a sample # mean as strange or stranger than what we found # pt( pnt_est/(sd(samp_3)/sqrt( n_1)), n_1 - 1, lower.tail=FALSE) # Reject H0 if that value is less than the level # of significance stated, namely, 0.025 # # rather than take the long way for this we could just # jump to doing hypoth_test_unknown(). # source("../hypo_unknown.R") hypoth_test_unknown( 0, 1, 0.025, length(samp_3), mean( samp_3), sd(samp_3)) # # ################################################ # ## Now, highlight and rerun, over and over, ## # ## lines 31-81, to get repeated samples and ## # ## thus, repeated the test of the null ## # ## hypothesis against the alternative one. ## # ## While you do this be aware that the true ## # ## mean of the differences in the paired ## # ## values in the overall population of the ## # ## pairs is 1.626204. How often do you ## # ## reject the null hypothesis? ## # ################################################