# looking at topic 9 ################################################ ## Important Note: Students are not expected ## ## to be able to come up with the ideas and ## ## programming examples demonstrated on this ## ## script. ## ################################################ # first look at a simple random sample source("../gnrnd4.R") # first we will get some data to use gnrnd4( key1=2765929704, key2=0342313872 ) L1 population <- L1 summary(population) # Then, since the values are in positions 1 through 98 # we can get a simple random sample, of size 10, by choosing # 10 random values between 1 and 98 and then using those as # an index to pick the random values # Note in line 17 change the first 5 digits in the first # key to get a different random sample gnrnd4( 920450901, 9700001) L1 our_sample <- population[ L1 ] our_sample # Change the seed value, those 5 digits, and try again. # That was a bit complicated. R has a built-in function # to do this, either with or without replacement sort(sample( population, 10, replace=FALSE)) # feel free to do this many times sort(sample( population, 10, replace=TRUE)) # feel free to do this many times # for topic 9 Here is a method that is just a bit longer # First let us get a much bigger population source("../gnrnd5.R") gnrnd5( 180056234404, 156000784) n <- length(L1) n # # look at different ways to get a sample of 15 # # simple random # one way to get a sample is to use the sample() # function samp_1a <- sample( L1, 15) samp_1a # However, sample() hides the index values that are being # selected. Let us do this in two steps. First get the # index values, then get the sample. index_1 <- sample(1:n,15) index_1 # these are the index values of our sample sort( index_1 ) # take the sample samp_1 <-L1[ index_1 ] samp_1 # here is a sample of convenience samp_2 <- head(L1,15) samp_2 # stratified sample ############################################### ## This time we will do some random sampling ## ## but we will make sure that we have 5 items## ## from each of the first, second and third ## ## portion of our population. Note that ## ## this is from the original list of values ## ## not from a sorted list of the population. ## ############################################### # choose 5 randomly from each third # of the values index_1 <- sample( 1:as.integer(n/3),5) index_2 <- sample( as.integer(n/3+1):as.integer(2*n/3),5) index_3 <- sample( as.integer(2*n/3+1):n,5) # look at the three sets of index values index_1 index_2 index_3 # look at them again but this time sorted so that # it is easier to read the index values sort( index_1 ) sort( index_2 ) sort( index_3 ) # now take our 15 item sample samp_3 <- L1[ c(index_1, index_2, index_3)] samp_3 # cluster # we will start by finding 3 separate markers # in most of the range of values index_s <- sample(1:(n-30),3) index_s # then for each marker choose 5 index values # from the marker to 30 more than the marker index_1 <- sample(index_s[1]:(index_s[1]+30),5) index_1 index_2 <- sample(index_s[2]:(index_s[2]+30),5) index_2 index_3 <- sample(index_s[3]:(index_s[3]+30),5) index_3 # now use our index values to get a sample samp_4 <- L1[ c(index_1, index_2, index_3)] samp_4 # systematic # Since we want 15 items, we will divide the range of # possible index values by 15 step_size <- as.integer(n/15) step_size # then step through the index values in that size step index_s <- seq( step_size, n, step_size) index_s # now take out sample samp_5 <- L1[ index_s ] samp_5 # voluntary You change each of the values in # this list to a value between 1 and # 2345, inclusive index_v <- c( 97, 2146, 188, 434, 565, 624, 807, 899, 1079, 287, 1164, 1235, 1305, 1427, 1526) index_v samp_6 <- L1[ index_v ] samp_6 # Now, just to see how representative these samples might # be, look at the mean of each sample mean( samp_1) mean( samp_1a ) mean(samp_2) mean(samp_3 ) mean(samp_4) mean(samp_5) mean(samp_6) # and at the mean of the population. mean(L1) # Or, looking at more descriptive measures summary( samp_1 ) summary( samp_1a ) summary( samp_2 ) summary( samp_3 ) summary( samp_4 ) summary( samp_5 ) #compare to the population summary( L1 ) hist( L1 ) # do a quick look at larger samples samp_1b <- sample(L1, 45 ) summary( samp_1b) samp_1c <- sample(L1, 95 ) summary( samp_1c)