16. Instrumental variables estimation

Program 16.1

  • Estimating the average causal using the standard IV estimator via the calculation of sample averages
  • Data from NHEFS
library(here)
#install.packages("readxl") # install package if required
library("readxl")
nhefs <- read_excel(here("data", "NHEFS.xls"))

# some preprocessing of the data
nhefs$cens <- ifelse(is.na(nhefs$wt82), 1, 0)
summary(nhefs$price82)
#>    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
#>   1.452   1.740   1.815   1.806   1.868   2.103      92

# for simplicity, ignore subjects with missing outcome or missing instrument
nhefs.iv <- nhefs[which(!is.na(nhefs$wt82) & !is.na(nhefs$price82)),]
nhefs.iv$highprice <- ifelse(nhefs.iv$price82>=1.5, 1, 0)

table(nhefs.iv$highprice, nhefs.iv$qsmk)
#>    
#>        0    1
#>   0   33    8
#>   1 1065  370

t.test(wt82_71 ~ highprice, data=nhefs.iv)
#> 
#>  Welch Two Sample t-test
#> 
#> data:  wt82_71 by highprice
#> t = -0.10179, df = 41.644, p-value = 0.9194
#> alternative hypothesis: true difference in means between group 0 and group 1 is not equal to 0
#> 95 percent confidence interval:
#>  -3.130588  2.830010
#> sample estimates:
#> mean in group 0 mean in group 1 
#>        2.535729        2.686018

Program 16.2

  • Estimating the average causal effect using the standard IV estimator via two-stage-least-squares regression
  • Data from NHEFS
#install.packages ("sem") # install package if required
library(sem)

model1 <- tsls(wt82_71 ~ qsmk, ~ highprice, data = nhefs.iv)
summary(model1)
#> 
#>  2SLS Estimates
#> 
#> Model Formula: wt82_71 ~ qsmk
#> 
#> Instruments: ~highprice
#> 
#> Residuals:
#>      Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
#> -43.34863  -4.00206  -0.02712   0.00000   4.17040  46.47022 
#> 
#>              Estimate Std. Error t value Pr(>|t|)
#> (Intercept)  2.068164   5.085098 0.40671  0.68428
#> qsmk         2.396270  19.840037 0.12078  0.90388
#> 
#> Residual standard error: 7.8561141 on 1474 degrees of freedom
confint(model1)  # note the wide confidence intervals
#>                  2.5 %   97.5 %
#> (Intercept)  -7.898445 12.03477
#> qsmk        -36.489487 41.28203

Program 16.3

  • Estimating the average causal using the standard IV estimator via additive marginal structural models
  • Data from NHEFS
  • G-estimation: Checking one possible value of psi
  • See Chapter 14 for program that checks several values and computes 95% confidence intervals
nhefs.iv$psi <- 2.396
nhefs.iv$Hpsi <- nhefs.iv$wt82_71-nhefs.iv$psi*nhefs.iv$qsmk

#install.packages("geepack") # install package if required
library("geepack")
g.est <- geeglm(highprice ~ Hpsi, data=nhefs.iv, id=seqn, family=binomial(),
                corstr="independence")
summary(g.est)
#> 
#> Call:
#> geeglm(formula = highprice ~ Hpsi, family = binomial(), data = nhefs.iv, 
#>     id = seqn, corstr = "independence")
#> 
#>  Coefficients:
#>              Estimate   Std.err  Wald Pr(>|W|)    
#> (Intercept) 3.555e+00 1.652e-01 463.1   <2e-16 ***
#> Hpsi        2.748e-07 2.273e-02   0.0        1    
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#> 
#> Correlation structure = independence 
#> Estimated Scale Parameters:
#> 
#>             Estimate Std.err
#> (Intercept)        1  0.7607
#> Number of clusters:   1476  Maximum cluster size: 1

beta <- coef(g.est)
SE <- coef(summary(g.est))[,2]
lcl <- beta-qnorm(0.975)*SE
ucl <- beta+qnorm(0.975)*SE
cbind(beta, lcl, ucl)
#>                  beta      lcl     ucl
#> (Intercept) 3.555e+00  3.23152 3.87917
#> Hpsi        2.748e-07 -0.04456 0.04456

Program 16.4

  • Estimating the average causal using the standard IV estimator with altnerative proposed instruments
  • Data from NHEFS
summary(tsls(wt82_71 ~ qsmk, ~ ifelse(price82 >= 1.6, 1, 0), data = nhefs.iv))
#> 
#>  2SLS Estimates
#> 
#> Model Formula: wt82_71 ~ qsmk
#> 
#> Instruments: ~ifelse(price82 >= 1.6, 1, 0)
#> 
#> Residuals:
#>    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
#>   -55.6   -13.5     7.6     0.0    12.5    56.4 
#> 
#>             Estimate Std. Error t value Pr(>|t|)
#> (Intercept)    -7.89      42.25  -0.187    0.852
#> qsmk           41.28     164.95   0.250    0.802
#> 
#> Residual standard error: 18.6055 on 1474 degrees of freedom
summary(tsls(wt82_71 ~ qsmk, ~ ifelse(price82 >= 1.7, 1, 0), data = nhefs.iv))
#> 
#>  2SLS Estimates
#> 
#> Model Formula: wt82_71 ~ qsmk
#> 
#> Instruments: ~ifelse(price82 >= 1.7, 1, 0)
#> 
#> Residuals:
#>    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
#>   -54.4   -13.4    -8.4     0.0    18.1    75.3 
#> 
#>             Estimate Std. Error t value Pr(>|t|)
#> (Intercept)    13.16      48.08   0.274    0.784
#> qsmk          -40.91     187.74  -0.218    0.828
#> 
#> Residual standard error: 20.591 on 1474 degrees of freedom
summary(tsls(wt82_71 ~ qsmk, ~ ifelse(price82 >= 1.8, 1, 0), data = nhefs.iv))
#> 
#>  2SLS Estimates
#> 
#> Model Formula: wt82_71 ~ qsmk
#> 
#> Instruments: ~ifelse(price82 >= 1.8, 1, 0)
#> 
#> Residuals:
#>    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
#>  -49.37   -8.31   -3.44    0.00    7.27   60.53 
#> 
#>             Estimate Std. Error t value Pr(>|t|)
#> (Intercept)    8.086      7.288   1.110    0.267
#> qsmk         -21.103     28.428  -0.742    0.458
#> 
#> Residual standard error: 13.0188 on 1474 degrees of freedom
summary(tsls(wt82_71 ~ qsmk, ~ ifelse(price82 >= 1.9, 1, 0), data = nhefs.iv))
#> 
#>  2SLS Estimates
#> 
#> Model Formula: wt82_71 ~ qsmk
#> 
#> Instruments: ~ifelse(price82 >= 1.9, 1, 0)
#> 
#> Residuals:
#>    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
#>  -47.24   -6.33   -1.43    0.00    5.52   54.36 
#> 
#>             Estimate Std. Error t value Pr(>|t|)
#> (Intercept)    5.963      6.067   0.983    0.326
#> qsmk         -12.811     23.667  -0.541    0.588
#> 
#> Residual standard error: 10.3637 on 1474 degrees of freedom

Program 16.5

  • Estimating the average causal using the standard IV estimator
  • Conditional on baseline covariates
  • Data from NHEFS
model2 <- tsls(wt82_71 ~ qsmk + sex + race + age + smokeintensity + smokeyrs +
                      as.factor(exercise) + as.factor(active) + wt71,
             ~ highprice + sex + race + age + smokeintensity + smokeyrs + as.factor(exercise) +
               as.factor(active) + wt71, data = nhefs.iv)
summary(model2)
#> 
#>  2SLS Estimates
#> 
#> Model Formula: wt82_71 ~ qsmk + sex + race + age + smokeintensity + smokeyrs + 
#>     as.factor(exercise) + as.factor(active) + wt71
#> 
#> Instruments: ~highprice + sex + race + age + smokeintensity + smokeyrs + as.factor(exercise) + 
#>     as.factor(active) + wt71
#> 
#> Residuals:
#>    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
#>  -42.23   -4.29   -0.62    0.00    3.87   46.74 
#> 
#>                       Estimate Std. Error t value Pr(>|t|)    
#> (Intercept)          17.280330   2.335402   7.399  2.3e-13 ***
#> qsmk                 -1.042295  29.987369  -0.035   0.9723    
#> sex                  -1.644393   2.630831  -0.625   0.5320    
#> race                 -0.183255   4.650386  -0.039   0.9686    
#> age                  -0.163640   0.240548  -0.680   0.4964    
#> smokeintensity        0.005767   0.145504   0.040   0.9684    
#> smokeyrs              0.025836   0.161421   0.160   0.8729    
#> as.factor(exercise)1  0.498748   2.171239   0.230   0.8184    
#> as.factor(exercise)2  0.581834   2.183148   0.267   0.7899    
#> as.factor(active)1   -1.170145   0.607466  -1.926   0.0543 .  
#> as.factor(active)2   -0.512284   1.308451  -0.392   0.6955    
#> wt71                 -0.097949   0.036271  -2.701   0.0070 ** 
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#> 
#> Residual standard error: 7.7162 on 1464 degrees of freedom