# topic 23 Paired values
#
# Create population
source("../gnrnd5.R")
source("../gnrnd4.R")
gnrnd5(57214499910, 31000011000075) # Generates L1 and L2
# These values represent paired measures on a single
# population. Generally, this is a "before" and "after"
# measure of some value, i.e., a measure before some
# treatment and a measure after the treatment.
# For example, perhaps we want to look at how eating
# flaxseed affects a person's HDL measure. [HDL is
# High Density Lipoprotien, the "good cholesterol] We could
# take HDL measures on people, then have them eat
# 3 tablespoons of flaxseed each day for a month, then
# take a new measure of their HDL. Then we have a pair
# of measures on those people, a "before" and an "after"
# measure.
# What we have done here is to create such a population
# of measures.
HDL_before <- L1
HDL_after <- L2
#########################################
## It is somewhat silly to do the ###
## following if we have all the data,###
## but here we want to see how we ###
## can get a confidence interval for ###
## the change in HDL level and/or do ###
## a hypothesis test on H0: no change###
## versus H1: HDL went up. ###
#########################################
# To generate a confidence interval for the change in
# HDL levels we want to take a sample of the population.
# generate the index values for a sample of size 31
gnrnd4( 885033001, 500000001)
L1
pre_HDL <- HDL_before[ L1 ]
post_HDL <- HDL_after[ L1 ]
# look at the scores
pre_HDL
post_HDL
# but these are paired values. We can find how much of
# an increase each of these 31 participants showed
HDL_increase <- post_HDL - pre_HDL
HDL_increase
# Now we want to get a 95% confidence interval for that
# increase. But the increase is just a single value
# for each participant. We are really back in the
# case of getting a confidence interval for the
# population mean based on a sample where we do not
# know the population standard deviation.
source( "../ci_unknown.R")
ci_unknown( sd( HDL_increase), length(HDL_increase),
mean(HDL_increase), 0.95 )
###################################
## Now, since we actually have the measures for the
## entire population, let us get the true mean of
## the increase.
###################################
pop_increase <- HDL_after - HDL_before
mu <- mean( pop_increase )
mu
# our sample generated a 95% confidence interval that
# did not contain the true mean! What went wrong?
# Nothing, it just turned out that this was one
# of the 5% generated confidence intervals that do
# not contain the true mean.
# To show this, let us generate 10000 such
# samples and get from them the 10000 confidence
# intervals. Then, see how many of those
# contain the true mean increase
L1 <- 1:10000
for ( i in 1:10000 ) {
this_samp <- sample( pop_increase, 31 )
this_interval <- ci_unknown( sd( this_samp), 31,
mean(this_samp), 0.95)
if( as.numeric(this_interval[1]) < mu &
mu < as.numeric( this_interval[2]) ) {
L1[i] <- "contains" }
else {
L1[i] <- "misses" }
}
table( L1 ) # should give around 5% misses
#########################################
## Hypotheses test ##
#########################################
# In this class the only null hypothesis that
# we will use for this type of problem is that
# the mean difference in the values is 0. But,
# for us, in this situation, that is H0: mu=0.
# The alternative could be any of the usual
# three suspects: >, < !=
# We will use H1: mu>0
# But, again, this is just a hypothesis test for the
# mean based on a sample where we do not know the
# population standard deviations. That was the case
# when we would use hypoth_test_unknown().
# We will do this at the 0.05 level of significance.
# Recall that our sample is still in HDL_increase
HDL_increase
source("../hypo_unknown.R")
hypoth_test_unknown( 0, 1, 0.05, 31,
mean( HDL_increase),
sd(HDL_increase))
# this resounding rejection is not a shock since it
# was this sample that gave us the confidence interval
# that did not contain the true mean.
# We could run this same test with 10000 samples to see
# how often we would reject H0 based on those samples
L1 <- 1:10000
for( i in 1:10000 ) {
this_samp <- sample( pop_increase, 31 )
this_test <- hypoth_test_unknown( 0, 1, 0.05,
31, mean(this_samp),
sd( this_samp))
L1[i] <- this_test[14]
}
table( L1 )
# And, again, we should not expect to have a 85% rejection
# because the true mean difference is 2.0422, not 0.
# Let us start with a new population, but this one where
# there is little change, still a change, but not much of one
gnrnd5(77344499910, 3000011000075) # Generates L1 and L2
HDL_before <- L1
HDL_after <- L2
pop_increase <- HDL_after - HDL_before
mean( pop_increase )
# now try the demo of 10000 samples
L1 <- 1:10000
for( i in 1:10000 ) {
this_samp <- sample( pop_increase, 31 )
this_test <- hypoth_test_unknown( 0, 1, 0.05,
31, mean(this_samp),
sd( this_samp))
L1[i] <- this_test[14]
}
table( L1 )