# line 1 # Look at finding a confidence interval for # the mean difference of PAIRED data # when the population standard deviation is unknown. # # For this example get a 95% confidence interval for # the mean the difference of the pairs # # We will start by generating a population, but the # data appears as paired values source("../gnrnd5.R") gnrnd5( 282091499910, 41001525010176) head(L1,10) head(L2,10) tail(L1,10) tail(L2,10) length(L1) # # Now that we have our population of pairs # we need to take random samples of the pairs # of values # # ############################################ # ## Each time we do the following steps ## # ## we will get a different sample and as ## # ## such we will get different confidence ## # ## interval. ## # ############################################ n_1 <- 38 # get a sample of size 38 from L1 and L2 index_1 <- as.integer( runif( n_1, 1, 5001)) index_1 samp_1 <- L1[ index_1 ] samp_1 samp_2 <- L2[ index_1 ] samp_2 # # We really want to look at a confidence interval for L2-L1 # so we will form a new data set based upon samp_2 - samp_1 samp_3 <- samp_2 - samp_1 samp_3 # # then our best point estimate is the mean(samp_3) pnt_est <- mean( samp_3 ) pnt_est # # Our problem now resolves to finding the 95% confidence # interval for the mean on samp_3, but samp_3 is # disributed as a Student's-t with n_1 - 1 degrees of # freedom. # # find the t value with half of 5% to its right t <- qt( 0.025, n_1 - 1, lower.tail=FALSE ) std_dev <- sd( samp_3 ) std_dev # then the confidence interval will have a low value of pnt_est - t*std_dev/sqrt( n_1 ) # and the high value of pnt_est + t*std_dev/sqrt( n_1 ) # # rather than take the long way for this we could just # jump to doing ci_unknown. # source("../ci_unknown.R") ci_unknown( sd( samp_3), length(samp_3), mean( samp_3), cl=0.95) # # ################################################ # ## Now, highlight and rerun, over and over, ## # ## lines 31-67, to get repeated samples and ## # ## thus, repeated 95% confidence intervals. ## # ## While you do this be aware that the true ## # ## mean of the paired values in the two ## # ## populations is 4.6501. How often do you r## # ## intervals contain the true mean? ## # ################################################ # Finally, we could get a picture of the change between # samp_1 and samp_2 via the following commands. # This will be for the last repetition of lines 31-67 plot(1:38,samp_1, col="darkgreen", xlim=c(0,45), xaxp=c(0,45,9), xlab="index values", ylim=c(40,160), yaxp=c(40,160, 12), ylab="item values",pch=22, las=1, cex.axis=0.7, main="Pairs of Values from last sample" ) points(1:38,samp_2, col="darkred", pch=20) for(i in 1:38) { lines(c(i,i),c(samp_1[i],samp_2[i]))} abline(h=seq(40,160,10),lty=3,col="darkgray") abline(v=seq(0,40,5),lty=3,col="darkgray") legend("topright", legend = c("samp_1","samp_2"), pch=c(22,20), col=c("darkgreen","darkred"), inset=0.03)