RunExamples/rXB.html

<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">source</span>(<span class="st">&#39;runDir.R&#39;</span>)</code></pre></div>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">runDir</span>(<span class="st">&#39;../CodeExamples/x0B_Important_statistical_concepts&#39;</span>,
       <span class="st">&#39;../bioavailability&#39;</span>)</code></pre></div>
<pre><code>[1] &quot;############################### start  246 Fri Jun 17 10:30:31 2016&quot;
[1] &quot;#####  running  ../CodeExamples/x0B_Important_statistical_concepts/00246_example_B.1_of_section_B.1.1.R&quot;
[1] &quot;#####   in directory ../bioavailability&quot;

&gt; # example B.1 of section B.1.1 
&gt; # (example B.1 of section B.1.1)  : Important statistical concepts : Distributions : Normal distribution 
&gt; # Title: Plotting the theoretical normal density 
&gt; 
&gt; library(ggplot2)

&gt; x &lt;- seq(from=-5, to=5, length.out=100) # the interval [-5 5]

&gt; f &lt;- dnorm(x)                           # normal with mean 0 and sd 1

&gt; ggplot(data.frame(x=x,y=f), aes(x=x,y=y)) + geom_line()</code></pre>
<div class="figure">
<img src="rXB_files/figure-markdown_github/xA1-1.png" alt="" />

</div>
<pre><code>[1] &quot;############################### end  246 Fri Jun 17 10:30:31 2016&quot;
[1] &quot;############################### start  247 Fri Jun 17 10:30:31 2016&quot;
[1] &quot;#####  running  ../CodeExamples/x0B_Important_statistical_concepts/00247_example_B.2_of_section_B.1.1.R&quot;
[1] &quot;#####   in directory ../bioavailability&quot;

&gt; # example B.2 of section B.1.1 
&gt; # (example B.2 of section B.1.1)  : Important statistical concepts : Distributions : Normal distribution 
&gt; # Title: Plotting an empirical normal density 
&gt; 
&gt; library(ggplot2)

&gt; # draw 1000 points from a normal with mean 0, sd 1
&gt; u &lt;- rnorm(1000)

&gt; # plot the distribution of points,
&gt; # compared to normal curve as computed by dnorm() (dashed line)
&gt; ggplot(data.frame(x=u), aes(x=x)) + geom_density() +
    geom_line(data=data.frame(x=x,y=f), aes(x=x,y=y), linetype=2)</code></pre>
<div class="figure">
<img src="rXB_files/figure-markdown_github/xA1-2.png" alt="" />

</div>
<pre><code>[1] &quot;############################### end  247 Fri Jun 17 10:30:31 2016&quot;
[1] &quot;############################### start  248 Fri Jun 17 10:30:31 2016&quot;
[1] &quot;#####  running  ../CodeExamples/x0B_Important_statistical_concepts/00248_example_B.3_of_section_B.1.1.R&quot;
[1] &quot;#####   in directory ../bioavailability&quot;

&gt; # example B.3 of section B.1.1 
&gt; # (example B.3 of section B.1.1)  : Important statistical concepts : Distributions : Normal distribution 
&gt; # Title: Working with the normal cdf 
&gt; 
&gt; # --- estimate probabilities (areas) under the curve ---
&gt; 
&gt; # 50% of the observations will be less than the mean
&gt; pnorm(0)
[1] 0.5

&gt; # [1] 0.5
&gt; 
&gt; # about 2.3% of all observations are more than 2 standard
&gt; # deviations below the mean
&gt; pnorm(-2)
[1] 0.02275013

&gt; # [1] 0.02275013
&gt; 
&gt; # about 95.4% of all observations are within 2 standard deviations
&gt; # from the mean
&gt; pnorm(2) - pnorm(-2)
[1] 0.9544997

&gt; # [1] 0.9544997
&gt; 
[1] &quot;############################### end  248 Fri Jun 17 10:30:31 2016&quot;
[1] &quot;############################### start  249 Fri Jun 17 10:30:31 2016&quot;
[1] &quot;#####  running  ../CodeExamples/x0B_Important_statistical_concepts/00249_example_B.4_of_section_B.1.1.R&quot;
[1] &quot;#####   in directory ../bioavailability&quot;

&gt; # example B.4 of section B.1.1 
&gt; # (example B.4 of section B.1.1)  : Important statistical concepts : Distributions : Normal distribution 
&gt; # Title: Plotting x &lt; qnorm(0.75) 
&gt; 
&gt; # --- return the quantiles corresponding to specific probabilities ---
&gt; 
&gt; # the median (50th percentile) of a normal is also the mean
&gt; qnorm(0.5)
[1] 0

&gt; # [1] 0
&gt; 
&gt; # calculate the 75th percentile
&gt; qnorm(0.75)
[1] 0.6744898

&gt; # [1] 0.6744898
&gt; pnorm(0.6744898)
[1] 0.75

&gt; # [1] 0.75
&gt; 
&gt; # --- Illustrate the 75th percentile ---
&gt; 
&gt; # create a graph of the normal distribution with mean 0, sd 1
&gt; x &lt;- seq(from=-5, to=5, length.out=100)

&gt; f &lt;- dnorm(x)

&gt; nframe &lt;- data.frame(x=x,y=f) 

&gt; # calculate the 75th percentile
&gt; line &lt;- qnorm(0.75)

&gt; xstr &lt;- sprintf(&quot;qnorm(0.75) = %1.3f&quot;, line)

&gt; # the part of the normal distribution to the left
&gt; # of the 75th percentile
&gt; nframe75 &lt;- subset(nframe, nframe$x &lt; line)

&gt; # Plot it. 
&gt; # The shaded area is 75% of the area under the normal curve
&gt; ggplot(nframe, aes(x=x,y=y)) + geom_line() +
   geom_area(data=nframe75, aes(x=x,y=y), fill=&quot;gray&quot;) + 
   geom_vline(aes(xintercept=line), linetype=2) +
   geom_text(x=line, y=0, label=xstr, vjust=1)</code></pre>
<div class="figure">
<img src="rXB_files/figure-markdown_github/xA1-3.png" alt="" />

</div>
<pre><code>[1] &quot;############################### end  249 Fri Jun 17 10:30:31 2016&quot;
[1] &quot;############################### start  250 Fri Jun 17 10:30:31 2016&quot;
[1] &quot;#####  running  ../CodeExamples/x0B_Important_statistical_concepts/00250_example_B.5_of_section_B.1.3.R&quot;
[1] &quot;#####   in directory ../bioavailability&quot;

&gt; # example B.5 of section B.1.3 
&gt; # (example B.5 of section B.1.3)  : Important statistical concepts : Distributions : Lognormal distribution 
&gt; # Title: Demonstrating some properties of the lognormal distribution 
&gt; 
&gt; # draw 1001 samples from a lognormal with meanlog 0, sdlog 1
&gt; u &lt;- rlnorm(1001)

&gt; # the mean of u is higher than the median
&gt; mean(u)
[1] 1.729891

&gt; # [1] 1.638628
&gt; median(u)
[1] 1.036753

&gt; # [1] 1.001051
&gt; 
&gt; # the mean of log(u) is approx meanlog=0
&gt; mean(log(u))
[1] 0.0326221

&gt; # [1] -0.002942916
&gt; 
&gt; # the sd of log(u) is approx sdlog=1
&gt; sd(log(u))
[1] 0.9978703

&gt; # [1] 0.9820357
&gt; 
&gt; # generate the lognormal with meanlog=0, sdlog=1
&gt; x &lt;- seq(from=0, to=25, length.out=500)

&gt; f &lt;- dlnorm(x)

&gt; # generate a normal with mean=0, sd=1
&gt; x2 &lt;- seq(from=-5,to=5, length.out=500)

&gt; f2 &lt;- dnorm(x2)

&gt; # make data frames
&gt; lnormframe &lt;- data.frame(x=x,y=f)

&gt; normframe &lt;- data.frame(x=x2, y=f2)

&gt; dframe &lt;- data.frame(u=u)

&gt; # plot densityplots with theoretical curves superimposed
&gt; p1 &lt;- ggplot(dframe, aes(x=u)) + geom_density() +
   geom_line(data=lnormframe, aes(x=x,y=y), linetype=2)

&gt; p2 &lt;- ggplot(dframe, aes(x=log(u))) + geom_density() +
   geom_line(data=normframe, aes(x=x,y=y), linetype=2)

&gt; # functions to plot multiple plots on one page
&gt; library(grid)

&gt; nplot &lt;- function(plist) {
   n &lt;- length(plist)
   grid.newpage()
   pushViewport(viewport(layout=grid.layout(n,1)))
   vplayout&lt;-function(x,y) {viewport(layout.pos.row=x, layout.pos.col=y)}
   for(i in 1:n) {
     print(plist[[i]], vp=vplayout(i,1))
   }
 }

&gt; # this is the plot that leads this section.
&gt; nplot(list(p1, p2))</code></pre>
<div class="figure">
<img src="rXB_files/figure-markdown_github/xA1-4.png" alt="" />

</div>
<pre><code>[1] &quot;############################### end  250 Fri Jun 17 10:30:32 2016&quot;
[1] &quot;############################### start  251 Fri Jun 17 10:30:32 2016&quot;
[1] &quot;#####  running  ../CodeExamples/x0B_Important_statistical_concepts/00251_example_B.6_of_section_B.1.3.R&quot;
[1] &quot;#####   in directory ../bioavailability&quot;

&gt; # example B.6 of section B.1.3 
&gt; # (example B.6 of section B.1.3)  : Important statistical concepts : Distributions : Lognormal distribution 
&gt; # Title: Plotting the lognormal distribution 
&gt; 
&gt; # the 50th percentile (or median) of the lognormal with
&gt; # meanlog=0 and sdlog=10
&gt; qlnorm(0.5)
[1] 1

&gt; # [1] 1
&gt; # the probability of seeing a value x less than 1
&gt; plnorm(1)
[1] 0.5

&gt; # [1] 0.5
&gt; 
&gt; # the probability of observing a value x less than 10:
&gt; plnorm(10)
[1] 0.9893489

&gt; # [1] 0.9893489
&gt; 
&gt; # -- show the 75th percentile of the lognormal 
&gt; 
&gt; # use lnormframe from previous example: the 
&gt; # theoretical lognormal curve
&gt; 
&gt; line &lt;- qlnorm(0.75)

&gt; xstr &lt;- sprintf(&quot;qlnorm(0.75) = %1.3f&quot;, line)

&gt; lnormframe75 &lt;- subset(lnormframe, lnormframe$x &lt; line)

&gt; # Plot it 
&gt; # The shaded area is 75% of the area under the lognormal curve
&gt; ggplot(lnormframe, aes(x=x,y=y)) + geom_line() +
   geom_area(data=lnormframe75, aes(x=x,y=y), fill=&quot;gray&quot;) + 
   geom_vline(aes(xintercept=line), linetype=2) +
   geom_text(x=line, y=0, label=xstr, hjust= 0, vjust=1)</code></pre>
<div class="figure">
<img src="rXB_files/figure-markdown_github/xA1-5.png" alt="" />

</div>
<pre><code>[1] &quot;############################### end  251 Fri Jun 17 10:30:32 2016&quot;
[1] &quot;############################### start  252 Fri Jun 17 10:30:32 2016&quot;
[1] &quot;#####  running  ../CodeExamples/x0B_Important_statistical_concepts/00252_example_B.7_of_section_B.1.4.R&quot;
[1] &quot;#####   in directory ../bioavailability&quot;

&gt; # example B.7 of section B.1.4 
&gt; # (example B.7 of section B.1.4)  : Important statistical concepts : Distributions : Binomial distribution 
&gt; # Title: Plotting the binomial distribution 
&gt; 
&gt; library(ggplot2)

&gt; #
&gt; # use dbinom to produce the theoretical curves
&gt; #
&gt; 
&gt; numflips &lt;- 50

&gt; # x is the number of heads that we see
&gt; x &lt;- 0:numflips

&gt; # probability of heads for several different coins
&gt; p &lt;- c(0.05, 0.15, 0.5, 0.75)

&gt; plabels &lt;- paste(&quot;p =&quot;, p)

&gt; # calculate the probability of seeing x heads in numflips flips
&gt; # for all the coins. This probably isn&#39;t the most elegant
&gt; # way to do this, but at least it&#39;s easy to read
&gt; 
&gt; flips &lt;- NULL

&gt; for(i in 1:length(p)) {
   coin &lt;- p[i]
   label &lt;- plabels[i]
   tmp &lt;- data.frame(number.of.heads=x,
                    probability = dbinom(x, numflips, coin),
                    coin.type = label)
   flips &lt;- rbind(flips, tmp)
 }

&gt; # plot it
&gt; # this is the plot that leads this section
&gt; ggplot(flips, aes(x=number.of.heads, y=probability)) +
   geom_point(aes(color=coin.type, shape=coin.type)) +
   geom_line(aes(color=coin.type))</code></pre>
<div class="figure">
<img src="rXB_files/figure-markdown_github/xA1-6.png" alt="" />

</div>
<pre><code>[1] &quot;############################### end  252 Fri Jun 17 10:30:32 2016&quot;
[1] &quot;############################### start  253 Fri Jun 17 10:30:32 2016&quot;
[1] &quot;#####  running  ../CodeExamples/x0B_Important_statistical_concepts/00253_example_B.8_of_section_B.1.4.R&quot;
[1] &quot;#####   in directory ../bioavailability&quot;

&gt; # example B.8 of section B.1.4 
&gt; # (example B.8 of section B.1.4)  : Important statistical concepts : Distributions : Binomial distribution 
&gt; # Title: Working with the theoretical binomial distribution 
&gt; 
&gt; p = 0.5 # the percentage of females in this student population

&gt; class.size &lt;- 20 # size of a classroom

&gt; numclasses &lt;- 100 # how many classrooms we observe

&gt; # what might a typical outcome look like?
&gt; numFemales &lt;- rbinom(numclasses, class.size, p)   # Note: 1 

&gt; # the theoretical counts (not necessarily integral)
&gt; probs &lt;- dbinom(0:class.size, class.size, p)

&gt; tcount &lt;- numclasses*probs

&gt; # the obvious way to plot this is with histogram or geom_bar
&gt; # but this might just look better
&gt; 
&gt; zero &lt;- function(x) {0} # a dummy function that returns only 0

&gt; ggplot(data.frame(number.of.girls=numFemales, dummy=1),
   aes(x=number.of.girls, y=dummy)) + 
   # count the number of times you see x heads
   stat_summary(fun.y=&quot;sum&quot;, geom=&quot;point&quot;, size=2) +    # Note: 2 
   stat_summary(fun.ymax=&quot;sum&quot;, fun.ymin=&quot;zero&quot;, geom=&quot;linerange&quot;) + 
   # superimpose the theoretical number of times you see x heads
   geom_line(data=data.frame(x=0:class.size, y=probs),
             aes(x=x, y=tcount), linetype=2) +
   scale_x_continuous(breaks=0:class.size, labels=0:class.size) +
   scale_y_continuous(&quot;number of classrooms&quot;)</code></pre>
<div class="figure">
<img src="rXB_files/figure-markdown_github/xA1-7.png" alt="" />

</div>
<pre><code>&gt; # Note 1: 
&gt; #   Because we didn’t call set.seed, we 
&gt; #   expect different results each time we run this line. 
&gt; 
&gt; # Note 2: 
&gt; #   stat_summary is one of the ways to 
&gt; #   control data aggregation during plotting. In this case, we’re using it to 
&gt; #   place the dot and bar measured from the empirical data in with the 
&gt; #   theoretical density curve. 
&gt; 
[1] &quot;############################### end  253 Fri Jun 17 10:30:33 2016&quot;
[1] &quot;############################### start  254 Fri Jun 17 10:30:33 2016&quot;
[1] &quot;#####  running  ../CodeExamples/x0B_Important_statistical_concepts/00254_example_B.9_of_section_B.1.4.R&quot;
[1] &quot;#####   in directory ../bioavailability&quot;

&gt; # example B.9 of section B.1.4 
&gt; # (example B.9 of section B.1.4)  : Important statistical concepts : Distributions : Binomial distribution 
&gt; # Title: Simulating a binomial distribution 
&gt; 
&gt; # use rbinom to simulate flipping a coin of probability p N times
&gt; 
&gt; p75 &lt;- 0.75 # a very unfair coin (mostly heads)

&gt; N &lt;- 1000  # flip it several times

&gt; flips_v1 &lt;- rbinom(N, 1, p75)

&gt; # Another way to generate unfair flips is to use runif:
&gt; # the probability that a uniform random number from [0 1)
&gt; # is less than p is exactly p. So &quot;less than p&quot; is &quot;heads&quot;.
&gt; flips_v2 &lt;- as.numeric(runif(N) &lt; p75) 

&gt; prettyprint_flips &lt;- function(flips) {
   outcome &lt;- ifelse(flips==1, &quot;heads&quot;, &quot;tails&quot;)
   table(outcome)
 }

&gt; prettyprint_flips(flips_v1)
outcome
heads tails 
  737   263 

&gt; # outcome
&gt; # heads tails 
&gt; # 756   244 
&gt; prettyprint_flips(flips_v2)
outcome
heads tails 
  765   235 

&gt; # outcome
&gt; # heads tails 
&gt; # 743   257
&gt; 
[1] &quot;############################### end  254 Fri Jun 17 10:30:33 2016&quot;
[1] &quot;############################### start  255 Fri Jun 17 10:30:33 2016&quot;
[1] &quot;#####  running  ../CodeExamples/x0B_Important_statistical_concepts/00255_example_B.10_of_section_B.1.4.R&quot;
[1] &quot;#####   in directory ../bioavailability&quot;

&gt; # example B.10 of section B.1.4 
&gt; # (example B.10 of section B.1.4)  : Important statistical concepts : Distributions : Binomial distribution 
&gt; # Title: Working with the binomial distribution 
&gt; 
&gt; # pbinom example
&gt; 
&gt; nflips &lt;- 100

&gt; nheads &lt;- c(25, 45, 50, 60)  # number of heads

&gt; # what are the probabilities of observing at most that 
&gt; # number of heads on a fair coin?
&gt; left.tail &lt;- pbinom(nheads, nflips, 0.5)

&gt; sprintf(&quot;%2.2f&quot;, left.tail)
[1] &quot;0.00&quot; &quot;0.18&quot; &quot;0.54&quot; &quot;0.98&quot;

&gt; # [1] &quot;0.00&quot; &quot;0.18&quot; &quot;0.54&quot; &quot;0.98&quot;
&gt; 
&gt; # the probabilities of observing more than that
&gt; # number of heads on a fair coin?
&gt; right.tail &lt;- pbinom(nheads, nflips, 0.5, lower.tail=F)

&gt; sprintf(&quot;%2.2f&quot;, right.tail)
[1] &quot;1.00&quot; &quot;0.82&quot; &quot;0.46&quot; &quot;0.02&quot;

&gt; # [1] &quot;1.00&quot; &quot;0.82&quot; &quot;0.46&quot; &quot;0.02&quot;
&gt; 
&gt; # as expected:
&gt; left.tail+right.tail
[1] 1 1 1 1

&gt; #  [1] 1 1 1 1 
&gt; 
&gt; # so if you flip a fair coin 100 times,
&gt; # you are guaranteed to see more than 10 heads, 
&gt; # almost guaranteed to see fewer than 60, and
&gt; # probably more than 45.
&gt; 
&gt; # qbinom example
&gt; 
&gt; nflips &lt;- 100

&gt; # what&#39;s the 95% &quot;central&quot; interval of heads that you
&gt; # would expect to observe on 100 flips of a fair coin?
&gt; 
&gt; left.edge &lt;- qbinom(0.025, nflips, 0.5)

&gt; right.edge &lt;- qbinom(0.025, nflips, 0.5, lower.tail=F)

&gt; c(left.edge, right.edge)
[1] 40 60

&gt; # [1] 40 60
&gt; 
&gt; # so with 95% probability you should see between 40 and 60 heads
&gt; 
[1] &quot;############################### end  255 Fri Jun 17 10:30:33 2016&quot;
[1] &quot;############################### start  256 Fri Jun 17 10:30:33 2016&quot;
[1] &quot;#####  running  ../CodeExamples/x0B_Important_statistical_concepts/00256_example_B.11_of_section_B.1.4.R&quot;
[1] &quot;#####   in directory ../bioavailability&quot;

&gt; # example B.11 of section B.1.4 
&gt; # (example B.11 of section B.1.4)  : Important statistical concepts : Distributions : Binomial distribution 
&gt; # Title: Working with the binomial cdf 
&gt; 
&gt; # because this is a discrete probability distribution, 
&gt; # pbinom and qbinom are not exact inverses of each other
&gt; 
&gt; # this direction works
&gt; pbinom(45, nflips, 0.5)
[1] 0.1841008

&gt; # [1] 0.1841008
&gt; qbinom(0.1841008, nflips, 0.5)
[1] 45

&gt; # [1] 45
&gt; 
&gt; # this direction won&#39;t be exact
&gt; qbinom(0.75, nflips, 0.5)
[1] 53

&gt; # [1] 53
&gt; pbinom(53, nflips, 0.5)
[1] 0.7579408

&gt; # [1] 0.7579408
&gt; 
[1] &quot;############################### end  256 Fri Jun 17 10:30:33 2016&quot;
[1] &quot;############################### start  258 Fri Jun 17 10:30:33 2016&quot;
[1] &quot;#####  running  ../CodeExamples/x0B_Important_statistical_concepts/00258_example_B.12_of_section_B.2.2.R&quot;
[1] &quot;#####   in directory ../bioavailability&quot;

&gt; # example B.12 of section B.2.2 
&gt; # (example B.12 of section B.2.2)  : Important statistical concepts : Statistical theory : A/B tests 
&gt; # Title: Building simulated A/B test data 
&gt; 
&gt; set.seed(123515)

&gt; d &lt;- rbind(   # Note: 1 
    data.frame(group=&#39;A&#39;,converted=rbinom(100000,size=1,p=0.05)),   # Note: 2 
    data.frame(group=&#39;B&#39;,converted=rbinom(10000,size=1,p=0.055))    # Note: 3 
 )

&gt; # Note 1: 
&gt; #   Build a data frame to store simulated 
&gt; #   examples. 
&gt; 
&gt; # Note 2: 
&gt; #   Add 100,000 examples from the A group 
&gt; #   simulating a conversion rate of 5%. 
&gt; 
&gt; # Note 3: 
&gt; #   Add 10,000 examples from the B group 
&gt; #   simulating a conversion rate of 5.5%. 
&gt; 
[1] &quot;############################### end  258 Fri Jun 17 10:30:33 2016&quot;
[1] &quot;############################### start  259 Fri Jun 17 10:30:33 2016&quot;
[1] &quot;#####  running  ../CodeExamples/x0B_Important_statistical_concepts/00259_example_B.13_of_section_B.2.2.R&quot;
[1] &quot;#####   in directory ../bioavailability&quot;

&gt; # example B.13 of section B.2.2 
&gt; # (example B.13 of section B.2.2)  : Important statistical concepts : Statistical theory : A/B tests 
&gt; # Title: Summarizing the A/B test into a contingency table 
&gt; 
&gt; tab &lt;- table(d)

&gt; print(tab)
     converted
group     0     1
    A 94979  5021
    B  9398   602

&gt; ##      converted
&gt; ## group     0     1
&gt; ##     A 94979  5021
&gt; ##     B  9398   602
&gt; 
[1] &quot;############################### end  259 Fri Jun 17 10:30:33 2016&quot;
[1] &quot;############################### start  260 Fri Jun 17 10:30:33 2016&quot;
[1] &quot;#####  running  ../CodeExamples/x0B_Important_statistical_concepts/00260_example_B.14_of_section_B.2.2.R&quot;
[1] &quot;#####   in directory ../bioavailability&quot;

&gt; # example B.14 of section B.2.2 
&gt; # (example B.14 of section B.2.2)  : Important statistical concepts : Statistical theory : A/B tests 
&gt; # Title: Calculating the observed A and B rates 
&gt; 
&gt; aConversionRate &lt;- tab[&#39;A&#39;,&#39;1&#39;]/sum(tab[&#39;A&#39;,])

&gt; print(aConversionRate)
[1] 0.05021

&gt; ## [1] 0.05021
&gt; bConversionRate &lt;- tab[&#39;B&#39;,&#39;1&#39;]/sum(tab[&#39;B&#39;,])

&gt; print(bConversionRate)
[1] 0.0602

&gt; ## [1] 0.0602
&gt; commonRate &lt;- sum(tab[,&#39;1&#39;])/sum(tab)

&gt; print(commonRate)
[1] 0.05111818

&gt; ## [1] 0.05111818
&gt; 
[1] &quot;############################### end  260 Fri Jun 17 10:30:33 2016&quot;
[1] &quot;############################### start  261 Fri Jun 17 10:30:33 2016&quot;
[1] &quot;#####  running  ../CodeExamples/x0B_Important_statistical_concepts/00261_example_B.15_of_section_B.2.2.R&quot;
[1] &quot;#####   in directory ../bioavailability&quot;

&gt; # example B.15 of section B.2.2 
&gt; # (example B.15 of section B.2.2)  : Important statistical concepts : Statistical theory : A/B tests 
&gt; # Title: Calculating the significance of the observed difference in rates 
&gt; 
&gt; fisher.test(tab)

    Fisher&#39;s Exact Test for Count Data

data:  tab
p-value = 2.469e-05
alternative hypothesis: true odds ratio is not equal to 1
95 percent confidence interval:
 1.108716 1.322464
sample estimates:
odds ratio 
  1.211706 


&gt; ##    Fisher&#39;s Exact Test for Count Data
&gt; ##
&gt; ## data:  tab
&gt; ## p-value = 2.469e-05
&gt; ## alternative hypothesis: true odds ratio is not equal to 1
&gt; ## 95 percent confidence interval:
&gt; ##  1.108716 1.322464
&gt; ## sample estimates:
&gt; ## odds ratio 
&gt; ##   1.211706
&gt; 
[1] &quot;############################### end  261 Fri Jun 17 10:30:33 2016&quot;
[1] &quot;############################### start  262 Fri Jun 17 10:30:33 2016&quot;
[1] &quot;#####  running  ../CodeExamples/x0B_Important_statistical_concepts/00262_example_B.16_of_section_B.2.2.R&quot;
[1] &quot;#####   in directory ../bioavailability&quot;

&gt; # example B.16 of section B.2.2 
&gt; # (example B.16 of section B.2.2)  : Important statistical concepts : Statistical theory : A/B tests 
&gt; # Title: Computing frequentist significance 
&gt; 
&gt; print(pbinom(     # Note: 1 
    lower.tail=F,   # Note: 2 
    q=tab[&#39;B&#39;,&#39;1&#39;]-1,   # Note: 3 
    size=sum(tab[&#39;B&#39;,]),    # Note: 4 
    prob=commonRate     # Note: 5 
    )) 
[1] 3.153319e-05

&gt; ## [1] 3.153319e-05
&gt; 
&gt; # Note 1: 
&gt; #   Use the pbinom() call to calculate how 
&gt; #   likely different observed counts are. 
&gt; 
&gt; # Note 2: 
&gt; #   Signal we want the probability of being 
&gt; #   greater than a given q. 
&gt; 
&gt; # Note 3: 
&gt; #   Ask for the probability of seeing at least as many conversions as our observed B groups 
&gt; #   did. 
&gt; 
&gt; # Note 4: 
&gt; #   Specify the total number of trials as 
&gt; #   equal to what we saw in our B group. 
&gt; 
&gt; # Note 5: 
&gt; #   Specify the conversion probability at the 
&gt; #   estimated common rate. 
&gt; 
[1] &quot;############################### end  262 Fri Jun 17 10:30:33 2016&quot;
[1] &quot;############################### start  263 Fri Jun 17 10:30:33 2016&quot;
[1] &quot;#####  running  ../CodeExamples/x0B_Important_statistical_concepts/00263_example_B.17_of_section_B.2.2.R&quot;
[1] &quot;#####   in directory ../bioavailability&quot;

&gt; # example B.17 of section B.2.2 
&gt; # (example B.17 of section B.2.2)  : Important statistical concepts : Statistical theory : A/B tests 
&gt; # Title: Bayesian estimate of the posterior tail mass 
&gt; 
&gt; print(pbeta(  # Note: 1 
    aConversionRate,    # Note: 2 
    shape1=commonRate+tab[&#39;B&#39;,&#39;1&#39;],     # Note: 3 
    shape2=(1-commonRate)+tab[&#39;B&#39;,&#39;0&#39;]))    # Note: 4 
[1] 4.731817e-06

&gt; ## [1] 4.731817e-06
&gt; 
&gt; # Note 1: 
&gt; #   pbeta() functionUse pbeta() to estimate how likely 
&gt; #   different observed conversion rates are. 
&gt; 
&gt; # Note 2: 
&gt; #   Ask for the probability of seeing a 
&gt; #   conversion rate no larger than aConversionRate. 
&gt; 
&gt; # Note 3: 
&gt; #   Estimate conversion count as prior 
&gt; #   commonRate plus the B observations. 
&gt; 
&gt; # Note 4: 
&gt; #   Estimate nonconversion count as prior 
&gt; #   1-commonRate plus the B observations. 
&gt; 
[1] &quot;############################### end  263 Fri Jun 17 10:30:33 2016&quot;
[1] &quot;############################### start  264 Fri Jun 17 10:30:33 2016&quot;
[1] &quot;#####  running  ../CodeExamples/x0B_Important_statistical_concepts/00264_example_B.18_of_section_B.2.2.R&quot;
[1] &quot;#####   in directory ../bioavailability&quot;

&gt; # example B.18 of section B.2.2 
&gt; # (example B.18 of section B.2.2)  : Important statistical concepts : Statistical theory : A/B tests 
&gt; # Title: Plotting the posterior distribution of the B group 
&gt; 
&gt; library(&#39;ggplot2&#39;)

&gt; plt &lt;- data.frame(x=seq(from=0.04,to=0.07,length.out=301))

&gt; plt$density &lt;- dbeta(plt$x,
    shape1=commonRate+tab[&#39;B&#39;,&#39;1&#39;],
    shape2=(1-commonRate)+tab[&#39;B&#39;,&#39;0&#39;])

&gt; ggplot(dat=plt) + 
    geom_line(aes(x=x,y=density)) + 
    geom_vline(aes(xintercept=bConversionRate)) +
    geom_vline(aes(xintercept=aConversionRate),linetype=2)</code></pre>
<div class="figure">
<img src="rXB_files/figure-markdown_github/xA1-8.png" alt="" />

</div>
<pre><code>[1] &quot;############################### end  264 Fri Jun 17 10:30:33 2016&quot;
[1] &quot;############################### start  265 Fri Jun 17 10:30:33 2016&quot;
[1] &quot;#####  running  ../CodeExamples/x0B_Important_statistical_concepts/00265_example_B.19_of_section_B.2.3.R&quot;
[1] &quot;#####   in directory ../bioavailability&quot;

&gt; # example B.19 of section B.2.3 
&gt; # (example B.19 of section B.2.3)  : Important statistical concepts : Statistical theory : Power of tests 
&gt; # Title: Sample size estimate 
&gt; 
&gt; estimate &lt;- function(targetRate,difference,errorProb) {
     ceiling(-log(errorProb)*targetRate/(difference^2))
 }

&gt; est &lt;- estimate(0.045,0.004,0.05)

&gt; print(est)
[1] 8426

&gt; ## [1] 8426
&gt; 
[1] &quot;############################### end  265 Fri Jun 17 10:30:33 2016&quot;
[1] &quot;############################### start  266 Fri Jun 17 10:30:33 2016&quot;
[1] &quot;#####  running  ../CodeExamples/x0B_Important_statistical_concepts/00266_example_B.20_of_section_B.2.3.R&quot;
[1] &quot;#####   in directory ../bioavailability&quot;

&gt; # example B.20 of section B.2.3 
&gt; # (example B.20 of section B.2.3)  : Important statistical concepts : Statistical theory : Power of tests 
&gt; # Title: Exact binomial sample size calculation 
&gt; 
&gt; errorProb &lt;- function(targetRate,difference,size) {   # Note: 1 
    pbinom(ceiling((targetRate-difference)*size),
       size=size,prob=targetRate) 
 }

&gt; print(errorProb(0.045,0.004,est))     # Note: 2 
[1] 0.04153646

&gt; ## [1] 0.04153646
&gt; 
&gt; binSearchNonPositive &lt;- function(fEventuallyNegative) {   # Note: 3 
   low &lt;- 1
   high &lt;- low+1
   while(fEventuallyNegative(high)&gt;0) {
     high &lt;- 2*high
   }
   while(high&gt;low+1) {
     m &lt;- low + (high-low) %/% 2
     if(fEventuallyNegative(m)&gt;0) {
        low &lt;- m
     } else {
        high &lt;- m
     }
   }
   high
 }

&gt; actualSize &lt;- function(targetRate,difference,errorProb) {
    binSearchNonPositive(function(n) {
        errorProb(targetRate,difference,n) - errorProb })
 }

&gt; size &lt;- actualSize(0.045,0.004,0.05)  # Note: 4 

&gt; print(size) 
[1] 7623

&gt; ## [1] 7623
&gt; print(errorProb(0.045,0.004,size))
[1] 0.04983659

&gt; ## [1] 0.04983659
&gt; 
&gt; # Note 1: 
&gt; #   Define a function that calculates the 
&gt; #   probability of seeing a low number of conversions, assuming the actual 
&gt; #   conversion rate is targetRate and the size of the experiment is size. Low is 
&gt; #   considered be a count that’s at least difference*size below the expected value 
&gt; #   targetRate*size. 
&gt; 
&gt; # Note 2: 
&gt; #   Calculate probability of a bad experiment using 
&gt; #   estimated experiment size. The failure odds are around 4% (under the 5% we’re 
&gt; #   designing for), which means the estimate size was slightly high. 
&gt; 
&gt; # Note 3: 
&gt; #   Define a binary search that finds a non-positive 
&gt; #   value of a function that’s guaranteed to be eventually negative. This search 
&gt; #   works around the minor non-monotonicity in errorProb() (due to rounding 
&gt; #   issues). 
&gt; 
&gt; # Note 4: 
&gt; #   Calculate the required sample size for our B 
&gt; #   experiment. 
&gt; 
[1] &quot;############################### end  266 Fri Jun 17 10:30:33 2016&quot;
[1] &quot;############################### start  267 Fri Jun 17 10:30:33 2016&quot;
[1] &quot;#####  running  ../CodeExamples/x0B_Important_statistical_concepts/00267_example_B.21_of_section_B.2.4.R&quot;
[1] &quot;#####   in directory ../bioavailability&quot;

&gt; # example B.21 of section B.2.4 
&gt; # (example B.21 of section B.2.4)  : Important statistical concepts : Statistical theory : Specialized statistical tests 
&gt; # Title: Building synthetic uncorrelated income example 
&gt; 
&gt; set.seed(235236)  # Note: 1 

&gt; d &lt;- data.frame(EarnedIncome=100000*rlnorm(100),
                  CapitalGains=100000*rlnorm(100))      # Note: 2 

&gt; print(with(d,cor(EarnedIncome,CapitalGains)))
[1] -0.01066116

&gt; # [1] -0.01066116     # Note: 3
&gt; 
&gt; # Note 1: 
&gt; #   Set the pseudo-random seed to a known 
&gt; #   value so the demonstration is repeatable. 
&gt; 
&gt; # Note 2: 
&gt; #   Generate our synthetic data. 
&gt; 
&gt; # Note 3: 
&gt; #   The correlation is -0.01, which is very near 0—indicating (as designed) no relation. 
&gt; 
[1] &quot;############################### end  267 Fri Jun 17 10:30:33 2016&quot;
[1] &quot;############################### start  268 Fri Jun 17 10:30:33 2016&quot;
[1] &quot;#####  running  ../CodeExamples/x0B_Important_statistical_concepts/00268_example_B.22_of_section_B.2.4.R&quot;
[1] &quot;#####   in directory ../bioavailability&quot;

&gt; # example B.22 of section B.2.4 
&gt; # (example B.22 of section B.2.4)  : Important statistical concepts : Statistical theory : Specialized statistical tests 
&gt; # Title: Calculating the (non)significance of the observed correlation 
&gt; 
&gt; with(d,cor(EarnedIncome,CapitalGains,method=&#39;spearman&#39;))
[1] 0.03083108

&gt; # [1] 0.03083108
&gt; with(d,cor.test(EarnedIncome,CapitalGains,method=&#39;spearman&#39;))

    Spearman&#39;s rank correlation rho

data:  EarnedIncome and CapitalGains
S = 161510, p-value = 0.7604
alternative hypothesis: true rho is not equal to 0
sample estimates:
       rho 
0.03083108 


&gt; #
&gt; #       Spearman&#39;s rank correlation rho
&gt; #
&gt; #data:  EarnedIncome and CapitalGains
&gt; #S = 161512, p-value = 0.7604
&gt; #alternative hypothesis: true rho is not equal to 0
&gt; #sample estimates:
&gt; #       rho
&gt; #0.03083108
&gt; 
[1] &quot;############################### end  268 Fri Jun 17 10:30:33 2016&quot;
[1] &quot;############################### start  269 Fri Jun 17 10:30:33 2016&quot;
[1] &quot;#####  running  ../CodeExamples/x0B_Important_statistical_concepts/00269_example_B.23_of_section_B.3.1.R&quot;
[1] &quot;#####   in directory ../bioavailability&quot;

&gt; # example B.23 of section B.3.1 
&gt; # (example B.23 of section B.3.1)  : Important statistical concepts : Examples of the statistical view of data : Sampling bias 
&gt; # Title: Misleading significance result from biased observations 
&gt; 
&gt; veryHighIncome &lt;- subset(d, EarnedIncome+CapitalGains&gt;=500000)

&gt; print(with(veryHighIncome,cor.test(EarnedIncome,CapitalGains,
     method=&#39;spearman&#39;)))

    Spearman&#39;s rank correlation rho

data:  EarnedIncome and CapitalGains
S = 1046, p-value &lt; 2.2e-16
alternative hypothesis: true rho is not equal to 0
sample estimates:
       rho 
-0.8678571 


&gt; #
&gt; #       Spearman&#39;s rank correlation rho
&gt; #
&gt; #data:  EarnedIncome and CapitalGains
&gt; #S = 1046, p-value &lt; 2.2e-16
&gt; #alternative hypothesis: true rho is not equal to 0
&gt; #sample estimates:
&gt; #       rho
&gt; #-0.8678571
&gt; 
[1] &quot;############################### end  269 Fri Jun 17 10:30:33 2016&quot;
[1] &quot;############################### start  270 Fri Jun 17 10:30:33 2016&quot;
[1] &quot;#####  running  ../CodeExamples/x0B_Important_statistical_concepts/00270_example_B.24_of_section_B.3.1.R&quot;
[1] &quot;#####   in directory ../bioavailability&quot;

&gt; # example B.24 of section B.3.1 
&gt; # (example B.24 of section B.3.1)  : Important statistical concepts : Examples of the statistical view of data : Sampling bias 
&gt; # Title: Plotting biased view of income and capital gains 
&gt; 
&gt; library(ggplot2)

&gt; ggplot(data=d,aes(x=EarnedIncome,y=CapitalGains)) +
    geom_point() + geom_smooth(method=&#39;lm&#39;) +
    coord_cartesian(xlim=c(0,max(d)),ylim=c(0,max(d)))  # Note: 1 </code></pre>
<div class="figure">
<img src="rXB_files/figure-markdown_github/xA1-9.png" alt="" />

</div>
<pre><code>&gt; ggplot(data=veryHighIncome,aes(x=EarnedIncome,y=CapitalGains)) +
    geom_point() + geom_smooth(method=&#39;lm&#39;) +
    geom_point(data=subset(d,EarnedIncome+CapitalGains&lt;500000),
          aes(x=EarnedIncome,y=CapitalGains),
       shape=4,alpha=0.5,color=&#39;red&#39;) +
    geom_segment(x=0,xend=500000,y=500000,yend=0,
       linetype=2,alpha=0.5,color=&#39;red&#39;) +
    coord_cartesian(xlim=c(0,max(d)),ylim=c(0,max(d)))  # Note: 2 


&gt; print(with(subset(d,EarnedIncome+CapitalGains&lt;500000),
     cor.test(EarnedIncome,CapitalGains,method=&#39;spearman&#39;)))    # Note: 3 

    Spearman&#39;s rank correlation rho

data:  EarnedIncome and CapitalGains
S = 107660, p-value = 0.6357
alternative hypothesis: true rho is not equal to 0
sample estimates:
        rho 
-0.05202267 


&gt; #
&gt; #        Spearman&#39;s rank correlation rho
&gt; #
&gt; #data:  EarnedIncome and CapitalGains
&gt; #S = 107664, p-value = 0.6357
&gt; #alternative hypothesis: true rho is not equal to 0
&gt; #sample estimates:
&gt; #        rho
&gt; #-0.05202267
&gt; 
&gt; # Note 1: 
&gt; #   Plot all of the income data with linear 
&gt; #   trend line (and uncertainty band). 
&gt; 
&gt; # Note 2: 
&gt; #   Plot the very high income data and linear 
&gt; #   trend line (also include cut-off and portrayal of suppressed data). 
&gt; 
&gt; # Note 3: 
&gt; #   Compute correlation of suppressed 
&gt; #   data. 
&gt; 
[1] &quot;############################### end  270 Fri Jun 17 10:30:33 2016&quot;
[1] &quot;############################### start  271 Fri Jun 17 10:30:33 2016&quot;
[1] &quot;#####  running  ../CodeExamples/x0B_Important_statistical_concepts/00271_example_B.25_of_section_B.3.2.R&quot;
[1] &quot;#####   in directory ../bioavailability&quot;

&gt; # example B.25 of section B.3.2 
&gt; # (example B.25 of section B.3.2)  : Important statistical concepts : Examples of the statistical view of data : Omitted variable bias 
&gt; # Title: Summarizing our synthetic biological data 
&gt; 
&gt; load(&#39;synth.RData&#39;)

&gt; print(summary(s))
      week         Caco2A2BPapp       FractionHumanAbsorption
 Min.   :  1.00   Min.   :6.994e-08   Min.   :0.09347        
 1st Qu.: 25.75   1st Qu.:7.312e-07   1st Qu.:0.50343        
 Median : 50.50   Median :1.378e-05   Median :0.86937        
 Mean   : 50.50   Mean   :2.006e-05   Mean   :0.71492        
 3rd Qu.: 75.25   3rd Qu.:4.238e-05   3rd Qu.:0.93908        
 Max.   :100.00   Max.   :6.062e-05   Max.   :0.99170        

&gt; ##       week         Caco2A2BPapp       FractionHumanAbsorption
&gt; ##  Min.   :  1.00   Min.   :6.994e-08   Min.   :0.09347        
&gt; ##  1st Qu.: 25.75   1st Qu.:7.312e-07   1st Qu.:0.50343        
&gt; ##  Median : 50.50   Median :1.378e-05   Median :0.86937        
&gt; ##  Mean   : 50.50   Mean   :2.006e-05   Mean   :0.71492        
&gt; ##  3rd Qu.: 75.25   3rd Qu.:4.238e-05   3rd Qu.:0.93908        
&gt; ##  Max.   :100.00   Max.   :6.062e-05   Max.   :0.99170
&gt; head(s)
  week Caco2A2BPapp FractionHumanAbsorption
1    1 6.061924e-05              0.11568186
2    2 6.061924e-05              0.11732401
3    3 6.061924e-05              0.09347046
4    4 6.061924e-05              0.12893540
5    5 5.461941e-05              0.19021858
6    6 5.370623e-05              0.14892154

&gt; ##   week Caco2A2BPapp FractionHumanAbsorption
&gt; ## 1    1 6.061924e-05              0.11568186
&gt; ## 2    2 6.061924e-05              0.11732401
&gt; ## 3    3 6.061924e-05              0.09347046
&gt; ## 4    4 6.061924e-05              0.12893540
&gt; ## 5    5 5.461941e-05              0.19021858
&gt; ## 6    6 5.370623e-05              0.14892154
&gt; # View(s)     # Note: 1
&gt; 
&gt; # Note 1: 
&gt; #   Display a date in spreadsheet like 
&gt; #   window. View is one of the commands that has a much better implementation in 
&gt; #   RStudio than in basic R. 
&gt; 
[1] &quot;############################### end  271 Fri Jun 17 10:30:33 2016&quot;
[1] &quot;############################### start  272 Fri Jun 17 10:30:33 2016&quot;
[1] &quot;#####  running  ../CodeExamples/x0B_Important_statistical_concepts/00272_example_B.26_of_section_B.3.2.R&quot;
[1] &quot;#####   in directory ../bioavailability&quot;

&gt; # example B.26 of section B.3.2 
&gt; # (example B.26 of section B.3.2)  : Important statistical concepts : Examples of the statistical view of data : Omitted variable bias 
&gt; # Title: Building data that improves over time 
&gt; 
&gt; set.seed(2535251)

&gt; s &lt;- data.frame(week=1:100)

&gt; s$Caco2A2BPapp &lt;- sort(sample(d$Caco2A2BPapp,100,replace=T),
    decreasing=T)

&gt; sigmoid &lt;- function(x) {1/(1+exp(-x))}

&gt; s$FractionHumanAbsorption &lt;-  # Note: 1 
  sigmoid(
    7.5 + 0.5*log(s$Caco2A2BPapp) +     # Note: 2 
    s$week/10 - mean(s$week/10) +   # Note: 3 
    rnorm(100)/3    # Note: 4 
    )

&gt; write.table(s,&#39;synth.csv&#39;,sep=&#39;,&#39;,
    quote=F,row.names=F)

&gt; # Note 1: 
&gt; #   Build synthetic examples. 
&gt; 
&gt; # Note 2: 
&gt; #   Add in Caco2 to absorption relation learned from original dataset. Note the relation is 
&gt; #   positive: better Caco2 always drives better absorption in our 
&gt; #   synthetic dataset. We’re log transforming Caco2, as it has over 3 
&gt; #   decades of range. 
&gt; 
&gt; # Note 3: 
&gt; #   Add in a mean-0 term that depends on time to simulate the effects of improvements as the 
&gt; #   project moves forward. 
&gt; 
&gt; # Note 4: 
&gt; #   Add in a mean-0 noise term. 
&gt; 
[1] &quot;############################### end  272 Fri Jun 17 10:30:33 2016&quot;
[1] &quot;############################### start  273 Fri Jun 17 10:30:33 2016&quot;
[1] &quot;#####  running  ../CodeExamples/x0B_Important_statistical_concepts/00273_example_B.27_of_section_B.3.2.R&quot;
[1] &quot;#####   in directory ../bioavailability&quot;

&gt; # example B.27 of section B.3.2 
&gt; # (example B.27 of section B.3.2)  : Important statistical concepts : Examples of the statistical view of data : Omitted variable bias 
&gt; # Title: A bad model (due to omitted variable bias) 
&gt; 
&gt; print(summary(glm(data=s,
    FractionHumanAbsorption~log(Caco2A2BPapp),
    family=binomial(link=&#39;logit&#39;))))

Warning: non-integer #successes in a binomial glm!


Call:
glm(formula = FractionHumanAbsorption ~ log(Caco2A2BPapp), family = binomial(link = &quot;logit&quot;), 
    data = s)

Deviance Residuals: 
    Min       1Q   Median       3Q      Max  
-0.6097  -0.2462  -0.1181   0.2022   0.5567  

Coefficients:
                  Estimate Std. Error z value Pr(&gt;|z|)    
(Intercept)        -9.9893     2.7494  -3.633 0.000280 ***
log(Caco2A2BPapp)  -0.9681     0.2568  -3.770 0.000163 ***
---
Signif. codes:  0 &#39;***&#39; 0.001 &#39;**&#39; 0.01 &#39;*&#39; 0.05 &#39;.&#39; 0.1 &#39; &#39; 1

(Dispersion parameter for binomial family taken to be 1)

    Null deviance: 43.7328  on 99  degrees of freedom
Residual deviance:  9.4679  on 98  degrees of freedom
AIC: 64.715

Number of Fisher Scoring iterations: 6


&gt; ## Warning: non-integer #successes in a binomial glm!
&gt; ## 
&gt; ## Call:
&gt; ## glm(formula = FractionHumanAbsorption ~ log(Caco2A2BPapp), 
&gt; ##    family = binomial(link = &quot;logit&quot;), 
&gt; ##     data = s)
&gt; ## 
&gt; ## Deviance Residuals: 
&gt; ##    Min      1Q  Median      3Q     Max  
&gt; ## -0.609  -0.246  -0.118   0.202   0.557  
&gt; ## 
&gt; ## Coefficients:
&gt; ##                   Estimate Std. Error z value Pr(&gt;|z|)    
&gt; ## (Intercept)        -10.003      2.752   -3.64  0.00028 ***
&gt; ## log(Caco2A2BPapp)   -0.969      0.257   -3.77  0.00016 ***
&gt; ## ---
&gt; ## Signif. codes:  0 &#39;***&#39; 0.001 &#39;**&#39; 0.01 &#39;*&#39; 0.05 &#39;.&#39; 0.1 &#39; &#39; 1
&gt; ## 
&gt; ## (Dispersion parameter for binomial family taken to be 1)
&gt; ## 
&gt; ##     Null deviance: 43.7821  on 99  degrees of freedom
&gt; ## Residual deviance:  9.4621  on 98  degrees of freedom
&gt; ## AIC: 64.7
&gt; ## 
&gt; ## Number of Fisher Scoring iterations: 6
&gt; 
[1] &quot;############################### end  273 Fri Jun 17 10:30:33 2016&quot;
[1] &quot;############################### start  274 Fri Jun 17 10:30:33 2016&quot;
[1] &quot;#####  running  ../CodeExamples/x0B_Important_statistical_concepts/00274_example_B.28_of_section_B.3.2.R&quot;
[1] &quot;#####   in directory ../bioavailability&quot;

&gt; # example B.28 of section B.3.2 
&gt; # (example B.28 of section B.3.2)  : Important statistical concepts : Examples of the statistical view of data : Omitted variable bias 
&gt; # Title: A better model 
&gt; 
&gt; print(summary(glm(data=s,
    FractionHumanAbsorption~week+log(Caco2A2BPapp),
    family=binomial(link=&#39;logit&#39;))))

Warning: non-integer #successes in a binomial glm!</code></pre>
<div class="figure">
<img src="rXB_files/figure-markdown_github/xA1-10.png" alt="" />

</div>
<pre><code>Call:
glm(formula = FractionHumanAbsorption ~ week + log(Caco2A2BPapp), 
    family = binomial(link = &quot;logit&quot;), data = s)

Deviance Residuals: 
     Min        1Q    Median        3Q       Max  
-0.34737  -0.05685  -0.00104   0.07092   0.30367  

Coefficients:
                  Estimate Std. Error z value Pr(&gt;|z|)   
(Intercept)        3.15105    4.68158   0.673  0.50090   
week               0.10328    0.03857   2.678  0.00741 **
log(Caco2A2BPapp)  0.56961    0.54162   1.052  0.29295   
---
Signif. codes:  0 &#39;***&#39; 0.001 &#39;**&#39; 0.01 &#39;*&#39; 0.05 &#39;.&#39; 0.1 &#39; &#39; 1

(Dispersion parameter for binomial family taken to be 1)

    Null deviance: 43.7328  on 99  degrees of freedom
Residual deviance:  1.2596  on 97  degrees of freedom
AIC: 47.829

Number of Fisher Scoring iterations: 6


&gt; ## Warning: non-integer #successes in a binomial glm!
&gt; ## 
&gt; ## Call:
&gt; ## glm(formula = FractionHumanAbsorption ~ week + log(Caco2A2BPapp), 
&gt; ##     family = binomial(link = &quot;logit&quot;), data = s)
&gt; ## 
&gt; ## Deviance Residuals: 
&gt; ##     Min       1Q   Median       3Q      Max  
&gt; ## -0.3474  -0.0568  -0.0010   0.0709   0.3038  
&gt; ## 
&gt; ## Coefficients:
&gt; ##                   Estimate Std. Error z value Pr(&gt;|z|)   
&gt; ## (Intercept)         3.1413     4.6837    0.67   0.5024   
&gt; ## week                0.1033     0.0386    2.68   0.0074 **
&gt; ## log(Caco2A2BPapp)   0.5689     0.5419    1.05   0.2938   
&gt; ## ---
&gt; ## Signif. codes:  0 &#39;***&#39; 0.001 &#39;**&#39; 0.01 &#39;*&#39; 0.05 &#39;.&#39; 0.1 &#39; &#39; 1
&gt; ## 
&gt; ## (Dispersion parameter for binomial family taken to be 1)
&gt; ## 
&gt; ##     Null deviance: 43.7821  on 99  degrees of freedom
&gt; ## Residual deviance:  1.2595  on 97  degrees of freedom
&gt; ## AIC: 47.82
&gt; ## 
&gt; ## Number of Fisher Scoring iterations: 6
&gt; 
[1] &quot;############################### end  274 Fri Jun 17 10:30:33 2016&quot;</code></pre>