# line 1 # Look at testing the hypothesis that there is no # difference between two means when the population # standard deviations are unknown. # # For this example we will run the test at the 0.02 # level of significance and the alternative hypotheis # is that the difference mu_1 - mu_2 != 0. # # We will start by generating two populations source("../gnrnd5.R") gnrnd5( 213572499901, 11225010176) L2 <- L1 head(L2,10) tail(L2,10) # length(L2) # gnrnd5(258202599901, 10413010732) head(L1,14) tail(L1,14) # length(L1) # # Now that we have our two populations # and now that we know the standard deviation of each # population, we need to take random samples of the # two populations # # ############################################ # ## each time we do the following steps ## # ## we will get different samples and as ## # ## such we will get a different ## # ## test of the null hypothesis. ## # ############################################ n_1 <- 37 # get a sample of size 37 from L1 index_1 <- as.integer( runif( n_1, 1, 6001)) index_1 samp_1 <- L1[ index_1 ] samp_1 # n_2 <- 49 # get a sample of size 49 from L2 index_2 <- as.integer( runif( n_2, 1, 5001)) index_2 samp_2 <- L2[ index_2 ] samp_2 # # Then we need to find the mean of each sample xbar_1 = mean( samp_1 ) xbar_1 xbar_2 = mean( samp_2 ) xbar_2 # # And we need to find the standard deviation of # each sample sx1 <- sd( samp_1 ) sx1 sx2 <- sd( samp_2 ) sx2 # # So our difference of the sample means is samp_diff <- xbar_1 - xbar_2 samp_diff # # The distribution of the difference of the means will # be Student's-t with standard deviation equal to # sqrt( sx1^2/n_1 + sx2^2/n_2 ) sd_difference <- sqrt( sx1^2/n_1 + sx2^2/n_2) sd_difference # # But we still needd to determine the degrees of freedom # to use. One way will be to use one less than the smaller # of n_1 and n_2. We could improve our test by taking # the much more complex computed number of degrees of # freedom d_f_simple <- n_1 - 1 if( n_2 < n_1 ) { d_f_simple <- n_2 - 1 } d_f_simple # # or we do the computation for the complex version d1 <- sx1^2 / n_1 d2 <- sx2^2 / n_2 d_f_complex <- ( d1 + d2)^2 / (d1^2/(n_1 - 1) + d2^2/(n_2 - 1)) d_f_complex # # First the critical value approach: # # To run the test at the 0.02 level of significance # have the 2% we are missing split evenly on both # tails. But, since this is a Student's-t distribution # the two t-values will just be opposites. We will # just find the upper value. # t_simple <- qt( 0.02/2, d_f_simple, lower.tail=FALSE ) t_simple # then our low critical value will be -t_simple*sd_difference # and our high critical value will be t_simple*sd_difference # # or to use the complex degrees of freedom t_complex <- qt( 0.02/2, d_f_simple, lower.tail=FALSE ) t_complex # then our low critical value will be -t_complex*sd_difference # and our high critical value will be t_complex*sd_difference # REJECT H0 if samp_diff is less than the low # critical value or greater than the high value # # Second, the attained significance approach # # Because this is a two-tailed test we need to # find how strange it is to get the difference of # the means to be this strange, the value of samp_diff, # or stranger. # # Because this is a dynamic section of the script # samp_diff could be positive or negative. Here # we will use the fact that the Student's distribution # is symmetric so we can just use the absolute # value of the difference of the sample means. # abs_diff <- abs( samp_diff) # # The the question is how strange would it be to # get that value of higher from a population that # is Student's-t with mean=0 and standard deviation # equal to sd_difference and for our choice of # simple or complex degrees of freedom # # First, using the simple degrees of freedom attained <- pt( abs_diff/sd_difference, d_f_simple, lower.tail=FALSE) attained # # Then, double that value, because this is a two-tailed # test, and if the doubled value is less than our # level of significance we reject H0. attained*2 # Then using the complex degrees of freedom attained <- pt( abs_diff/sd_difference, d_f_complex, lower.tail=FALSE) attained # # Then, double that value, because this is a two-tailed # test, and if the doubled value is less than our # level of significance we reject H0. attained*2 # We could have taken the shorcut and used the # function that is provided. # source("../hypo_2unknown.R") hypoth_2test_unknown( sx1, n_1, xbar_1, sx2, n_2, xbar_2, 0, 0.02 ) # # ################################################# # ## Now, highlight and rerun, over and over, ## # ## lines 36-160, to get repeated samples and ## # ## thus, repeated 2% tests of the null ## # ## hyothesis that the means are the same. ## # ## While you do this be aware that the true ## # ## difference between the population means ## # ## is 1.30584. How often do your tests ## # ## reject the null hypothesis? ## # ################################################# # ----------------------------------------------------- # # We can do this all over again, but this time we will # create two populations that have means that differ by # a significant amount # # We will start by generating two populations # gnrnd5( 273502499901, 11225010176) L2 <- L1 head(L2,10) tail(L2,10) # sigma_2 <- pop_sd( L2 ) sigma_2 length(L2) # gnrnd5(214825599901, 13413010732) head(L1,14) tail(L1,14) sigma_1 <- pop_sd(L1) sigma_1 length(L1) # # Now that we have our two populations # and now that we know the standard deviation of each # population, we need to take random samples of the # two populations # # ############################################ # ## each time we do the following steps ## # ## we will get different samples and as ## # ## such we will get a different ## # ## test of the null hypothesis. ## # ############################################ n_1 <- 37 # get a sample of size 43 from 37 index_1 <- as.integer( runif( n_1, 1, 6001)) index_1 samp_1 <- L1[ index_1 ] samp_1 # n_2 <- 49 # get a sample of size 49 from L1 index_2 <- as.integer( runif( n_1, 1, 5001)) index_2 samp_2 <- L2[ index_2 ] samp_2 # # Then we need to find the mean of each sample xbar_1 = mean( samp_1 ) xbar_1 xbar_2 = mean( samp_2 ) xbar_2 # # And we need to find the standard deviation of # each sample sx1 <- sd( samp_1 ) sx1 sx2 <- sd( samp_2 ) sx2 # # So our difference of the sample means is samp_diff <- xbar_1 - xbar_2 samp_diff # # The distribution of the difference of the means will # be Student's-t with standard deviation equal to # sqrt( sx1^2/n_1 + sx2^2/n_2 ) sd_difference <- sqrt( sx1^2/n_1 + sx2^2/n_2) sd_difference # # But we still needd to determine the degrees of freedom # to use. One way will be to use one less than the smaller # of n_1 and n_2. We could improve our test by taking # the much more complex computed number of degrees of # freedom d_f_simple <- n_1 - 1 if( n_2 < n_1 ) { d_f_simple <- n_2 - 1 } d_f_simple # # or we do the computation for the complex version d1 <- sx1^2 / n_1 d2 <- sx2^2 / n_2 d_f_complex <- ( d1 + d2)^2 / (d1^2/(n_1 - 1) + d2^2/(n_2 - 1)) d_f_complex # # First the critical value approach: # # To run the test at the 0.02 level of significance # have the 2% we are missing split evenly on both # tails. But, since this is a Student's-t distribution # the two t-values will just be opposites. We will # just find the upper value. # t_simple <- qt( 0.02/2, d_f_simple, lower.tail=FALSE ) t_simple # then our low critical value will be -t_simple*sd_difference # and our high critical value will be t_simple*sd_difference # # or to use the complex degrees of freedom t_complex <- qt( 0.02/2, d_f_simple, lower.tail=FALSE ) t_complex # then our low critical value will be -t_complex*sd_difference # and our high critical value will be t_complex*sd_difference # REJECT H0 if samp_diff is less than the low # critical value or greater than the high value # # Second, the attained significance approach # # Because this is a two-tailed test we need to # find how strange it is to get the difference of # the means to be this strange, the value of samp_diff, # or stranger. # # Because this is a dynamic section of the script # samp_diff could be positive or negative. Here # we will use the fact that the Student's distribution # is symmetric so we can just use the absolute # value of the difference of the sample means. # abs_diff <- abs( samp_diff) # # The the question is how strange would it be to # get that value of higher from a population that # is Student's-t with mean=0 and standard deviation # equal to sd_difference and for our choice of # simple or complex degrees of freedom # # First, using the simple degrees of freedom attained <- pt( abs_diff/sd_difference, d_f_simple, lower.tail=FALSE) attained # # Then, double that value, because this is a two-tailed # test, and if the doubled value is less than our # level of significance we reject H0. attained*2 # Then using the complex degrees of freedom attained <- pt( abs_diff/sd_difference, d_f_complex, lower.tail=FALSE) attained # # Then, double that value, because this is a two-tailed # test, and if the doubled value is less than our # level of significance we reject H0. attained*2 # We could have taken the shorcut and used the # function that is provided. # source("../hypo_2unknown.R") hypoth_2test_unknown( sx1, n_1, xbar_1, sx2, n_2, xbar_2, 0, 0.02 ) # # ################################################# # ## Now, highlight and rerun, over and over, ## # ## lines 208-332, to get repeated samples and## # ## thus, repeated 2% tests of the null ## # ## hyothesis that the means are the same. ## # ## While you do this be aware that the true ## # ## difference between the population means ## # ## is 16.57508. How often do your tests ## # ## reject the null hypothesis? ## # #################################################