# script for video on goodness of fit
# First look at a test of a single proportion.
# This is from Topic 17
#
source("../hypo_prop.R")
hypoth_test_prop( 0.1, 13, 225, 0, 0.02)
# for goodness of fit we want the proportions for all
# of the possible outcomes. These are the null
# hypothesis proportions:
#
null_props <- c(0.15, 0.09, 0.1, 0.15,
0.135, 0.08, 0.14, 0.155)
# Then, knowing that we will take or even have taken a sample
# of size 225, find the expected values for each outcome
expected <- null_props * 225
expected
source("../gnrnd5.R")
gnrnd5(95632022407,985785588)
L1
table(L1)
#so here are the observed values
observed <- c(27, 19, 14, 39, 30, 18, 39, 39 )
observed
# then we want to find the observed - expected values
diff <- observed - expected
diff
# and we move on from there to get the squares of those
# differences
diff_sqr <- diff^2
diff_sqr
# That magnified the values that we big differences and
# it made everything positive. Now divide each of those
# by the respective "expected" value so that the same
# differences from larger expected values carries less
# weight than do similar differences form lower expected
# values.
quotients <- diff_sqr / expected
quotients
#
# Now to find the overall "strangeness" of our observed
# values from the expected values we get the sum of
# all of those quotients.
how_strange <- sum( quotients )
how_strange
#
# Even if our true population had exactly the proportions
# given in the null hypothesis we would not expect a sample
# of 225 items to have those same proportions. Each such
# sample would have differences between the observed values
# and the expected values. Each such sample would therefore
# have a value for "how_strange". The distribution of
# those "how_strange" values will be a chi-squared distribution
# with the degrees of freedom equal to one less than the
# number of different outcomes. We have 8 possible outcomes
# so there are 7 degrees of freedom. THerefore, we can say,
# if the null hypothesis is true then how strange is it to
# get a "how_strange" value of 7.733185 or higher?
pchisq( 7.733185, 7, lower.tail=FALSE)
# That is not strange at all.
# Or we could find the critical value for 7 degrees of freedom
# and for a level of significance of 0.02.
qchisq( 0.02, 7, lower.tail=FALSE)
### we can do all of this in one step with the goodfit()
### function
source( "../goodfit.R")
goodfit( 1:8, null_props, observed, 0.02)