# looking at topic 9
################################################
## Important Note: Students are not expected ##
## to be able to come up with the ideas and ##
## programming examples demonstrated on this ##
## script. ##
################################################
# first look at a simple random sample
source("../gnrnd4.R")
# first we will get some data to use
gnrnd4( key1=2765929704, key2=0342313872 )
L1
population <- L1
summary(population)
# Then, since the values are in positions 1 through 98
# we can get a simple random sample, of size 10, by choosing
# 10 random values between 1 and 98 and then using those as
# an index to pick the random values
# Note in line 17 change the first 5 digits in the first
# key to get a different random sample
gnrnd4( 920450901, 9700001)
L1
our_sample <- population[ L1 ]
our_sample
# Change the seed value, those 5 digits, and try again.
# That was a bit complicated. R has a built-in function
# to do this, either with or without replacement
sort(sample( population, 10, replace=FALSE)) # feel free to do this many times
sort(sample( population, 10, replace=TRUE)) # feel free to do this many times
# for topic 9 Here is a method that is just a bit longer
# First let us get a much bigger population
source("../gnrnd5.R")
gnrnd5( 180056234404, 156000784)
n <- length(L1)
n
#
# look at different ways to get a sample of 15
#
# simple random
# one way to get a sample is to use the sample()
# function
samp_1a <- sample( L1, 15)
samp_1a
# However, sample() hides the index values that are being
# selected. Let us do this in two steps. First get the
# index values, then get the sample.
index_1 <- sample(1:n,15)
index_1 # these are the index values of our sample
sort( index_1 )
# take the sample
samp_1 <-L1[ index_1 ]
samp_1
# here is a sample of convenience
samp_2 <- head(L1,15)
samp_2
# stratified sample
###############################################
## This time we will do some random sampling ##
## but we will make sure that we have 5 items##
## from each of the first, second and third ##
## portion of our population. Note that ##
## this is from the original list of values ##
## not from a sorted list of the population. ##
###############################################
# choose 5 randomly from each third
# of the values
index_1 <- sample( 1:as.integer(n/3),5)
index_2 <- sample( as.integer(n/3+1):as.integer(2*n/3),5)
index_3 <- sample( as.integer(2*n/3+1):n,5)
# look at the three sets of index values
index_1
index_2
index_3
# look at them again but this time sorted so that
# it is easier to read the index values
sort( index_1 )
sort( index_2 )
sort( index_3 )
# now take our 15 item sample
samp_3 <- L1[ c(index_1, index_2, index_3)]
samp_3
# cluster
# we will start by finding 3 separate markers
# in most of the range of values
index_s <- sample(1:(n-30),3)
index_s
# then for each marker choose 5 index values
# from the marker to 30 more than the marker
index_1 <- sample(index_s[1]:(index_s[1]+30),5)
index_1
index_2 <- sample(index_s[2]:(index_s[2]+30),5)
index_2
index_3 <- sample(index_s[3]:(index_s[3]+30),5)
index_3
# now use our index values to get a sample
samp_4 <- L1[ c(index_1, index_2, index_3)]
samp_4
# systematic
# Since we want 15 items, we will divide the range of
# possible index values by 15
step_size <- as.integer(n/15)
step_size
# then step through the index values in that size step
index_s <- seq( step_size, n, step_size)
index_s
# now take out sample
samp_5 <- L1[ index_s ]
samp_5
# voluntary You change each of the values in
# this list to a value between 1 and
# 2345, inclusive
index_v <- c( 97, 2146, 188, 434, 565, 624, 807, 899,
1079, 287,
1164, 1235, 1305, 1427, 1526)
index_v
samp_6 <- L1[ index_v ]
samp_6
# Now, just to see how representative these samples might
# be, look at the mean of each sample
mean( samp_1)
mean( samp_1a )
mean(samp_2)
mean(samp_3 )
mean(samp_4)
mean(samp_5)
mean(samp_6)
# and at the mean of the population.
mean(L1)
# Or, looking at more descriptive measures
summary( samp_1 )
summary( samp_1a )
summary( samp_2 )
summary( samp_3 )
summary( samp_4 )
summary( samp_5 )
#compare to the population
summary( L1 )
hist( L1 )
# do a quick look at larger samples
samp_1b <- sample(L1, 45 )
summary( samp_1b)
samp_1c <- sample(L1, 95 )
summary( samp_1c)