<!DOCTYPE html>
<html>
<head>
  <meta charset="utf-8">
  <meta name="generator" content="pandoc">
  <meta name="author" content="Fill In Your Name" />
  <meta name="dcterms.date" content="2022-03-01" />
  <title>Estimating Estimands with Estimators</title>
  <meta name="apple-mobile-web-app-capable" content="yes">
  <meta name="apple-mobile-web-app-status-bar-style" content="black-translucent">
  <meta name="viewport" content="width=device-width, initial-scale=1.0, maximum-scale=1.0, user-scalable=no, minimal-ui">
  <link rel="stylesheet" href="estimation-slides_files/reveal.js-3.3.0.1/css/reveal.css"/>


<style type="text/css">
pre > code.sourceCode { white-space: pre; position: relative; }
pre > code.sourceCode > span { display: inline-block; line-height: 1.25; }
pre > code.sourceCode > span:empty { height: 1.2em; }
.sourceCode { overflow: visible; }
code.sourceCode > span { color: inherit; text-decoration: inherit; }
div.sourceCode { margin: 1em 0; }
pre.sourceCode { margin: 0; }
@media screen {
div.sourceCode { overflow: auto; }
}
@media print {
pre > code.sourceCode { white-space: pre-wrap; }
pre > code.sourceCode > span { text-indent: -5em; padding-left: 5em; }
}
pre.numberSource code
  { counter-reset: source-line 0; }
pre.numberSource code > span
  { position: relative; left: -4em; counter-increment: source-line; }
pre.numberSource code > span > a:first-child::before
  { content: counter(source-line);
    position: relative; left: -1em; text-align: right; vertical-align: baseline;
    border: none; display: inline-block;
    -webkit-touch-callout: none; -webkit-user-select: none;
    -khtml-user-select: none; -moz-user-select: none;
    -ms-user-select: none; user-select: none;
    padding: 0 4px; width: 4em;
    color: #aaaaaa;
  }
pre.numberSource { margin-left: 3em; border-left: 1px solid #aaaaaa;  padding-left: 4px; }
div.sourceCode
  {   }
@media screen {
pre > code.sourceCode > span > a:first-child::before { text-decoration: underline; }
}
code span.al { color: #ff0000; font-weight: bold; } /* Alert */
code span.an { color: #60a0b0; font-weight: bold; font-style: italic; } /* Annotation */
code span.at { color: #7d9029; } /* Attribute */
code span.bn { color: #40a070; } /* BaseN */
code span.bu { } /* BuiltIn */
code span.cf { color: #007020; font-weight: bold; } /* ControlFlow */
code span.ch { color: #4070a0; } /* Char */
code span.cn { color: #880000; } /* Constant */
code span.co { color: #60a0b0; font-style: italic; } /* Comment */
code span.cv { color: #60a0b0; font-weight: bold; font-style: italic; } /* CommentVar */
code span.do { color: #ba2121; font-style: italic; } /* Documentation */
code span.dt { color: #902000; } /* DataType */
code span.dv { color: #40a070; } /* DecVal */
code span.er { color: #ff0000; font-weight: bold; } /* Error */
code span.ex { } /* Extension */
code span.fl { color: #40a070; } /* Float */
code span.fu { color: #06287e; } /* Function */
code span.im { } /* Import */
code span.in { color: #60a0b0; font-weight: bold; font-style: italic; } /* Information */
code span.kw { color: #007020; font-weight: bold; } /* Keyword */
code span.op { color: #666666; } /* Operator */
code span.ot { color: #007020; } /* Other */
code span.pp { color: #bc7a00; } /* Preprocessor */
code span.sc { color: #4070a0; } /* SpecialChar */
code span.ss { color: #bb6688; } /* SpecialString */
code span.st { color: #4070a0; } /* String */
code span.va { color: #19177c; } /* Variable */
code span.vs { color: #4070a0; } /* VerbatimString */
code span.wa { color: #60a0b0; font-weight: bold; font-style: italic; } /* Warning */
</style>

<link rel="stylesheet" href="estimation-slides_files/reveal.js-3.3.0.1/css/theme/simple.css" id="theme">


  <!-- some tweaks to reveal css -->
  <style type="text/css">
    .reveal h1 { font-size: 2.0em; }
    .reveal h2 { font-size: 1.5em;  }
    .reveal h3 { font-size: 1.25em;	}
    .reveal h4 { font-size: 1em;	}

    .reveal .slides>section,
    .reveal .slides>section>section {
      padding: 0px 0px;
    }



    .reveal table {
      border-width: 1px;
      border-spacing: 2px;
      border-style: dotted;
      border-color: gray;
      border-collapse: collapse;
      font-size: 0.7em;
    }

    .reveal table th {
      border-width: 1px;
      padding-left: 10px;
      padding-right: 25px;
      font-weight: bold;
      border-style: dotted;
      border-color: gray;
    }

    .reveal table td {
      border-width: 1px;
      padding-left: 10px;
      padding-right: 25px;
      border-style: dotted;
      border-color: gray;
    }


  </style>

    <style type="text/css">code{white-space: pre;}</style>


<!-- Printing and PDF exports -->
<script id="paper-css" type="application/dynamic-css">

/* Default Print Stylesheet Template
   by Rob Glazebrook of CSSnewbie.com
   Last Updated: June 4, 2008

   Feel free (nay, compelled) to edit, append, and
   manipulate this file as you see fit. */


@media print {

	/* SECTION 1: Set default width, margin, float, and
	   background. This prevents elements from extending
	   beyond the edge of the printed page, and prevents
	   unnecessary background images from printing */
	html {
		background: #fff;
		width: auto;
		height: auto;
		overflow: visible;
	}
	body {
		background: #fff;
		font-size: 20pt;
		width: auto;
		height: auto;
		border: 0;
		margin: 0 5%;
		padding: 0;
		overflow: visible;
		float: none !important;
	}

	/* SECTION 2: Remove any elements not needed in print.
	   This would include navigation, ads, sidebars, etc. */
	.nestedarrow,
	.controls,
	.fork-reveal,
	.share-reveal,
	.state-background,
	.reveal .progress,
	.reveal .backgrounds {
		display: none !important;
	}

	/* SECTION 3: Set body font face, size, and color.
	   Consider using a serif font for readability. */
	body, p, td, li, div {
		font-size: 20pt!important;
		font-family: Georgia, "Times New Roman", Times, serif !important;
		color: #000;
	}

	/* SECTION 4: Set heading font face, sizes, and color.
	   Differentiate your headings from your body text.
	   Perhaps use a large sans-serif for distinction. */
	h1,h2,h3,h4,h5,h6 {
		color: #000!important;
		height: auto;
		line-height: normal;
		font-family: Georgia, "Times New Roman", Times, serif !important;
		text-shadow: 0 0 0 #000 !important;
		text-align: left;
		letter-spacing: normal;
	}
	/* Need to reduce the size of the fonts for printing */
	h1 { font-size: 28pt !important;  }
	h2 { font-size: 24pt !important; }
	h3 { font-size: 22pt !important; }
	h4 { font-size: 22pt !important; font-variant: small-caps; }
	h5 { font-size: 21pt !important; }
	h6 { font-size: 20pt !important; font-style: italic; }

	/* SECTION 5: Make hyperlinks more usable.
	   Ensure links are underlined, and consider appending
	   the URL to the end of the link for usability. */
	a:link,
	a:visited {
		color: #000 !important;
		font-weight: bold;
		text-decoration: underline;
	}
	/*
	.reveal a:link:after,
	.reveal a:visited:after {
		content: " (" attr(href) ") ";
		color: #222 !important;
		font-size: 90%;
	}
	*/


	/* SECTION 6: more reveal.js specific additions by @skypanther */
	ul, ol, div, p {
		visibility: visible;
		position: static;
		width: auto;
		height: auto;
		display: block;
		overflow: visible;
		margin: 0;
		text-align: left !important;
	}
	.reveal pre,
	.reveal table {
		margin-left: 0;
		margin-right: 0;
	}
	.reveal pre code {
		padding: 20px;
		border: 1px solid #ddd;
	}
	.reveal blockquote {
		margin: 20px 0;
	}
	.reveal .slides {
		position: static !important;
		width: auto !important;
		height: auto !important;

		left: 0 !important;
		top: 0 !important;
		margin-left: 0 !important;
		margin-top: 0 !important;
		padding: 0 !important;
		zoom: 1 !important;

		overflow: visible !important;
		display: block !important;

		text-align: left !important;
		-webkit-perspective: none;
		   -moz-perspective: none;
		    -ms-perspective: none;
		        perspective: none;

		-webkit-perspective-origin: 50% 50%;
		   -moz-perspective-origin: 50% 50%;
		    -ms-perspective-origin: 50% 50%;
		        perspective-origin: 50% 50%;
	}
	.reveal .slides section {
		visibility: visible !important;
		position: static !important;
		width: auto !important;
		height: auto !important;
		display: block !important;
		overflow: visible !important;

		left: 0 !important;
		top: 0 !important;
		margin-left: 0 !important;
		margin-top: 0 !important;
		padding: 60px 20px !important;
		z-index: auto !important;

		opacity: 1 !important;

		page-break-after: always !important;

		-webkit-transform-style: flat !important;
		   -moz-transform-style: flat !important;
		    -ms-transform-style: flat !important;
		        transform-style: flat !important;

		-webkit-transform: none !important;
		   -moz-transform: none !important;
		    -ms-transform: none !important;
		        transform: none !important;

		-webkit-transition: none !important;
		   -moz-transition: none !important;
		    -ms-transition: none !important;
		        transition: none !important;
	}
	.reveal .slides section.stack {
		padding: 0 !important;
	}
	.reveal section:last-of-type {
		page-break-after: avoid !important;
	}
	.reveal section .fragment {
		opacity: 1 !important;
		visibility: visible !important;

		-webkit-transform: none !important;
		   -moz-transform: none !important;
		    -ms-transform: none !important;
		        transform: none !important;
	}
	.reveal section img {
		display: block;
		margin: 15px 0px;
		background: rgba(255,255,255,1);
		border: 1px solid #666;
		box-shadow: none;
	}

	.reveal section small {
		font-size: 0.8em;
	}

}  
</script>


<script id="pdf-css" type="application/dynamic-css">
    
/**
 * This stylesheet is used to print reveal.js
 * presentations to PDF.
 *
 * https://github.com/hakimel/reveal.js#pdf-export
 */

* {
	-webkit-print-color-adjust: exact;
}

body {
	margin: 0 auto !important;
	border: 0;
	padding: 0;
	float: none !important;
	overflow: visible;
}

html {
	width: 100%;
	height: 100%;
	overflow: visible;
}

/* Remove any elements not needed in print. */
.nestedarrow,
.reveal .controls,
.reveal .progress,
.reveal .playback,
.reveal.overview,
.fork-reveal,
.share-reveal,
.state-background {
	display: none !important;
}

h1, h2, h3, h4, h5, h6 {
	text-shadow: 0 0 0 #000 !important;
}

.reveal pre code {
	overflow: hidden !important;
	font-family: Courier, 'Courier New', monospace !important;
}

ul, ol, div, p {
	visibility: visible;
	position: static;
	width: auto;
	height: auto;
	display: block;
	overflow: visible;
	margin: auto;
}
.reveal {
	width: auto !important;
	height: auto !important;
	overflow: hidden !important;
}
.reveal .slides {
	position: static;
	width: 100%;
	height: auto;

	left: auto;
	top: auto;
	margin: 0 !important;
	padding: 0 !important;

	overflow: visible;
	display: block;

	-webkit-perspective: none;
	   -moz-perspective: none;
	    -ms-perspective: none;
	        perspective: none;

	-webkit-perspective-origin: 50% 50%; /* there isn't a none/auto value but 50-50 is the default */
	   -moz-perspective-origin: 50% 50%;
	    -ms-perspective-origin: 50% 50%;
	        perspective-origin: 50% 50%;
}

.reveal .slides section {
	page-break-after: always !important;

	visibility: visible !important;
	position: relative !important;
	display: block !important;
	position: relative !important;

	margin: 0 !important;
	padding: 0 !important;
	box-sizing: border-box !important;
	min-height: 1px;

	opacity: 1 !important;

	-webkit-transform-style: flat !important;
	   -moz-transform-style: flat !important;
	    -ms-transform-style: flat !important;
	        transform-style: flat !important;

	-webkit-transform: none !important;
	   -moz-transform: none !important;
	    -ms-transform: none !important;
	        transform: none !important;
}

.reveal section.stack {
	margin: 0 !important;
	padding: 0 !important;
	page-break-after: avoid !important;
	height: auto !important;
	min-height: auto !important;
}

.reveal img {
	box-shadow: none;
}

.reveal .roll {
	overflow: visible;
	line-height: 1em;
}

/* Slide backgrounds are placed inside of their slide when exporting to PDF */
.reveal section .slide-background {
	display: block !important;
	position: absolute;
	top: 0;
	left: 0;
	width: 100%;
	z-index: -1;
}

/* All elements should be above the slide-background */
.reveal section>* {
	position: relative;
	z-index: 1;
}

/* Display slide speaker notes when 'showNotes' is enabled */
.reveal .speaker-notes-pdf {
	display: block;
	width: 100%;
	max-height: none;
	left: auto;
	top: auto;
	z-index: 100;
}

/* Display slide numbers when 'slideNumber' is enabled */
.reveal .slide-number-pdf {
	display: block;
	position: absolute;
	font-size: 14px;
}

</script>


<script>
var style = document.createElement( 'style' );
style.type = 'text/css';
var style_script_id = window.location.search.match( /print-pdf/gi ) ? 'pdf-css' : 'paper-css';
var style_script = document.getElementById(style_script_id).text;
style.innerHTML = style_script;
document.getElementsByTagName('head')[0].appendChild(style);
</script>

    <script src="estimation-slides_files/header-attrs-2.10/header-attrs.js"></script>
    <link href="estimation-slides_files/font-awesome-5.1.0/css/all.css" rel="stylesheet" />
    <link href="estimation-slides_files/font-awesome-5.1.0/css/v4-shims.css" rel="stylesheet" />
</head>
<body>
  <div class="reveal">
    <div class="slides">

<section>
    <h1 class="title">Estimating Estimands with Estimators</h1>
    <h2 class="author">Fill In Your Name</h2>
    <h3 class="date">01 March 2022</h3>
</section>
<section id="TOC">
<ul>
<li><a href="#/key-points">Key points</a>
<ul>
<li><a href="#/key-points-about-estimation-i">Key points about
estimation I</a></li>
<li><a href="#/key-points-about-estimation-ii">Key points about
estimation II</a></li>
<li><a href="#/key-points-about-estimation-iii">Key points about
estimation III</a></li>
<li><a href="#/key-points-about-estimation-iv">Key points about
estimation IV</a></li>
<li><a href="#/key-points-about-estimation-v">Key points about
estimation V</a></li>
</ul></li>
<li><a href="#/review">Review</a>
<ul>
<li><a href="#/review-causal-effects">Review: Causal effects</a></li>
</ul></li>
<li><a href="#/estimands-and-estimators-and-averages">Estimands and
estimators and averages</a>
<ul>
<li><a
href="#/how-can-we-learn-about-causal-effects-from-observed-data">How
can we learn about causal effects from observed data?</a></li>
<li><a
href="#/a-common-estimand-and-estimator-the-average-treatment-effect-and-the-difference-of-means">A
common estimand and estimator: The average treatment effect and the
difference of means</a></li>
<li><a
href="#/simulation-step-1-create-some-data-with-a-known-ate">Simulation
Step 1: create some data with a known ATE</a></li>
<li><a href="#/first-make-fake-data">First make fake data</a></li>
<li><a href="#/using-declaredesign">Using DeclareDesign</a></li>
<li><a href="#/using-declaredesign-make-fake-data">Using DeclareDesign:
make fake data</a></li>
<li><a href="#/using-declaredesign-define-estimand-and-estimators">Using
DeclareDesign: define estimand and estimators</a></li>
<li><a
href="#/using-declaredesign-define-estimand-and-estimators-1">Using
DeclareDesign: define estimand and estimators</a></li>
<li><a
href="#/using-declaredesign-define-estimand-and-estimators-2">Using
DeclareDesign: define estimand and estimators</a></li>
<li><a href="#/then-simulate-with-one-randomization">Then simulate with
one randomization</a></li>
<li><a href="#/then-simulate-with-one-randomization-1">Then simulate
with one randomization</a></li>
<li><a
href="#/then-simulate-a-different-randomization-and-estimate-the-ate-with-the-same-estimators">Then
simulate a different randomization and estimate the ATE with the same
estimators</a></li>
<li><a
href="#/how-do-our-estimators-behave-in-general-for-this-design">How do
our estimators behave in general for this design?</a></li>
<li><a
href="#/how-do-our-estimators-behave-in-general-for-this-design-1">How
do our estimators behave in general for this design?</a></li>
<li><a href="#/which-estimator-is-closer-to-the-truth">Which estimator
is closer to the truth?</a></li>
<li><a href="#/unbiased-and-biased-estimators">Unbiased and biased
estimators</a></li>
</ul></li>
<li><a href="#/block-randomization">Block randomization</a>
<ul>
<li><a
href="#/block-randomized-experiments-are-a-collection-of-mini-experiments">Block-randomized
experiments are a collection of mini-experiments</a></li>
<li><a
href="#/block-randomized-experiments-are-a-collection-of-mini-experiments-1">Block-randomized
experiments are a collection of mini-experiments</a></li>
<li><a
href="#/estimating-the-ate-in-block-randomized-experiments">Estimating
the ATE in block-randomized experiments</a></li>
<li><a
href="#/estimating-the-ate-in-block-randomized-experiments-1">Estimating
the ATE in block-randomized experiments</a></li>
<li><a
href="#/estimating-the-ate-in-block-randomized-experiments-2">Estimating
the ATE in block-randomized experiments</a></li>
<li><a
href="#/estimating-the-ate-in-block-randomized-experiments-3">Estimating
the ATE in block-randomized experiments</a></li>
<li><a href="#/which-estimator-should-we-use">Which estimator should we
use?</a></li>
<li><a href="#/which-estimator-should-we-use-1">Which estimator should
we use?</a></li>
<li><a href="#/which-estimator-should-we-use-2">Which estimator should
we use?</a></li>
<li><a href="#/which-estimator-should-we-use-3">Which estimator should
we use?</a></li>
<li><a href="#/which-estimator-is-closer-to-the-truth-1">Which estimator
is closer to the truth?</a></li>
</ul></li>
<li><a href="#/cluster-randomization">Cluster randomization</a>
<ul>
<li><a
href="#/in-cluster-randomized-experiments-units-are-randomized-as-a-group-cluster-to-treatment">In
cluster-randomized experiments, units are randomized as a group
(cluster) to treatment</a></li>
<li><a
href="#/estimating-the-ate-in-cluster-randomized-experiments">Estimating
the ATE in cluster-randomized experiments</a></li>
<li><a
href="#/estimating-the-se-for-the-ate-in-cluster-randomized-experiments">Estimating
the SE for the ATE in cluster-randomized experiments</a></li>
<li><a href="#/an-example-of-estimation">An example of
estimation</a></li>
<li><a href="#/an-example-of-estimation-1">An example of
estimation</a></li>
<li><a href="#/use-simulation-to-assess-estimators-and-tests">Use
simulation to assess estimators and tests</a></li>
<li><a href="#/use-simulation-to-assess-estimators-and-tests-1">Use
simulation to assess estimators and tests</a></li>
<li><a href="#/use-simulation-to-assess-estimators-and-tests-2">Use
simulation to assess estimators and tests</a></li>
<li><a
href="#/summary-of-estimation-and-testing-in-cluster-randomized-trials">Summary
of estimation and testing in cluster-randomized trials</a></li>
</ul></li>
<li><a href="#/binary-outcomes">Binary outcomes</a>
<ul>
<li><a
href="#/binary-outcomes-set-up-our-data-for-simulation-in-declaredesign">Binary
outcomes: Set up our data for simulation in DeclareDesign</a></li>
<li><a
href="#/binary-outcomes-set-up-our-data-for-simulation-in-declaredesign-1">Binary
outcomes: Set up our data for simulation in DeclareDesign</a></li>
<li><a href="#/binary-outcomes-estimands-i">Binary outcomes: Estimands
I</a></li>
<li><a href="#/binary-outcomes-estimands-ii">Binary outcomes: Estimands
II</a></li>
<li><a href="#/binary-outcomes-estimands-iii">Binary outcomes: Estimands
III</a></li>
<li><a href="#/an-example-of-estimation-i">An example of estimation
I</a></li>
<li><a href="#/an-example-of-estimation-ii">An example of estimation
II</a></li>
<li><a href="#/an-example-of-estimation-iii">An example of estimation
III</a></li>
<li><a
href="#/an-example-of-estimation-the-freedman-plugin-estimators-i">An
example of estimation: The Freedman plugin estimators I</a></li>
<li><a
href="#/an-example-of-estimation-the-freedman-plugin-estimators-ii">An
example of estimation: The Freedman plugin estimators II</a></li>
<li><a
href="#/an-example-of-using-declaredesign-to-assess-our-estimators-i">An
example of using DeclareDesign to assess our estimators I</a></li>
<li><a
href="#/an-example-of-using-declaredesign-to-assess-our-estimators-ii">An
example of using DeclareDesign to assess our estimators II</a></li>
<li><a href="#/using-simulation-to-assess-our-estimators">Using
simulation to assess our estimators</a></li>
<li><a href="#/which-estimator-is-closer-to-the-truth-2">Which estimator
is closer to the truth?</a></li>
</ul></li>
<li><a href="#/other-topics-in-estimation">Other topics in
estimation</a>
<ul>
<li><a href="#/covariance-adjustment-estimands">Covariance adjustment:
Estimands</a></li>
</ul></li>
<li><a href="#/conclusion">Conclusion</a>
<ul>
<li><a href="#/final-thoughts-on-basics-of-estimation">Final thoughts on
basics of estimation</a></li>
</ul></li>
<li><a
href="#/causal-effects-that-differ-by-groups-or-covariates">Causal
effects that differ by groups or covariates</a>
<ul>
<li><a href="#/effects-that-differ-by-groups-i">Effects that differ by
groups I</a></li>
<li><a href="#/effects-that-differ-by-groups-ii">Effects that differ by
groups II</a></li>
</ul></li>
<li><a href="#/causal-effects-when-we-do-not-control-the-dose">Causal
effects when we do not control the dose</a>
<ul>
<li><a href="#/defining-causal-effects-i">Defining causal effects
I</a></li>
<li><a href="#/defining-causal-effects-ii">Defining causal effects
II</a></li>
<li><a href="#/defining-causal-effects-iii">Defining causal effects
III</a></li>
<li><a href="#/defining-causal-effects-iv">Defining causal effects
IV</a></li>
<li><a href="#/defining-causal-effects-v">Defining causal effects
V</a></li>
<li><a href="#/defining-causal-effects-vi">Defining causal effects
VI</a></li>
<li><a href="#/defining-causal-effects-vii">Defining causal effects
VII</a></li>
<li><a href="#/learning-about-the-itt-i">Learning about the ITT
I</a></li>
<li><a href="#/learning-about-the-itt-ii">Learning about the ITT
II</a></li>
<li><a href="#/learning-about-the-itt-iii">Learning about the ITT
III</a></li>
<li><a href="#/learning-about-the-itt-iv">Learning about the ITT
IV</a></li>
<li><a href="#/the-complier-average-causal-effect-i">The complier
average causal effect I</a></li>
<li><a href="#/the-complier-average-causal-effect-ii">The complier
average causal effect II</a></li>
<li><a href="#/how-to-calculate-the-itt-and-cacelate-i">How to calculate
the ITT and CACE/LATE I</a></li>
<li><a href="#/how-to-calculate-the-itt-and-cacelate-ii">How to
calculate the ITT and CACE/LATE II</a></li>
<li><a href="#/how-to-calculate-the-itt-and-cacelate-iii">How to
calculate the ITT and CACE/LATE III</a></li>
<li><a
href="#/summary-of-encouragementcomplierdose-oriented-designs">Summary
of Encouragement/Complier/Dose oriented designs:</a></li>
<li><a href="#/references">References</a></li>
</ul></li>
</ul>
</section>

<section>
<section id="key-points" class="title-slide slide level1">
<h1>Key points</h1>

</section>
<section id="key-points-about-estimation-i" class="slide level2">
<h2>Key points about estimation I</h2>
<ul>
<li><p>A causal effect, <span class="math inline">\(\tau_i\)</span>, is
a comparison of unobserved potential outcomes for each unit <span
class="math inline">\(i\)</span>: examples <span
class="math inline">\(\tau_{i} = Y_{i}(T_{i}=1) -
Y_{i}(T_{i}=0)\)</span> or <span class="math inline">\(\tau_{i} =
\frac{Y_{i}(T_{i}=1)}{ Y_{i}(T_{i}=0)}\)</span>.</p></li>
<li><p>To learn about <span class="math inline">\(\tau_{i}\)</span>, we
can treat <span class="math inline">\(\tau_{i}\)</span> as an
<strong>estimand</strong> or target quantity to be estimated (discussed
here) or as a target quantity to be hypothesized about (session on
hypothesis testing).</p></li>
<li><p>Many focus on the <strong>average treatment effect
(ATE)</strong>, <span
class="math inline">\(\bar{\tau}=\sum_{i=1}^n\tau_{i}\)</span>, in part,
because it allows for easy <strong>estimation</strong>.</p></li>
</ul>
</section>
<section id="key-points-about-estimation-ii" class="slide level2">
<h2>Key points about estimation II</h2>
<ul>
<li><p>The key to estimation for causal inference is to choose an
estimand that helps you learn about your theoretical or policy question.
So, one could use the ATE but other common estimands include the ITT,
LATE/CACE, ATT, or ATE for some subgroup (or even a different of causal
effects between groups).</p></li>
<li><p>An <strong>estimator</strong> is a recipe for calculating a guess
about the value of an estimand. For example, the difference of observed
means for <span class="math inline">\(m\)</span> treated units is one
estimator of <span class="math inline">\(\bar{\tau}\)</span>: <span
class="math inline">\(\hat{\bar{\tau}} = \frac{\sum_{i=1}^n (T_i
Y_i)}{m} - \frac{\sum_{i=1}^n ( ( 1 -
T_i)Y_i)}{(n-m)}\)</span>.</p></li>
</ul>
</section>
<section id="key-points-about-estimation-iii" class="slide level2">
<h2>Key points about estimation III</h2>
<ul>
<li><p>The <strong>standard error</strong> of an estimator in a
randomized experiment summarizes how the estimates would vary if the
experiment were repeated.</p></li>
<li><p>We use the <strong>standard error</strong> to produce
<strong>confidence intervals</strong> and <strong>p-values</strong>: so
that we can begin with an estimator and end at a hypothesis
test.</p></li>
<li><p>Different randomizations will produce different values of the
same estimator targeting the same estimand. A <strong>standard
error</strong> summarizes this variability in an estimator.</p></li>
<li><p>A <span class="math inline">\(100(1-\alpha)\)</span>%
<strong>confidence interval</strong> is a collection of hypotheses that
cannot be rejected at the <span class="math inline">\(\alpha\)</span>
level. We tend to report confidence intervals containing hypotheses
about values of our estimand and use our estimator as a test
statistic.</p></li>
</ul>
</section>
<section id="key-points-about-estimation-iv" class="slide level2">
<h2>Key points about estimation IV</h2>
<ul>
<li><p>Estimators should:</p>
<ul>
<li><p>avoid systematic error in their guessing of the estimand (be
unbiased);</p></li>
<li><p>vary little in their guesses from experiment to experiment (be
precise or efficient); and</p></li>
<li><p>perhaps ideally converge to the estimand as they use more and
more information (be consistent).</p></li>
</ul></li>
</ul>
</section>
<section id="key-points-about-estimation-v" class="slide level2">
<h2>Key points about estimation V</h2>
<ul>
<li><p><strong>Analyze as you randomize</strong> in the context of
estimation means that (1) our standard errors should measure variability
from randomization and (2) our estimators should target estimands
defined in terms of potential outcomes.</p></li>
<li><p>We do not <strong>control for</strong> background covariates when
we analyze data from randomized experiments. But covariates can make our
estimation more <strong>precise</strong>. This is called
<strong>covariance adjustment</strong> (or covariate adjustment).
<strong>Covariance adjustment</strong> in randomized experiments differs
from controlling for in observational studies.</p></li>
</ul>
</section></section>
<section>
<section id="review" class="title-slide slide level1">
<h1>Review</h1>

</section>
<section id="review-causal-effects" class="slide level2">
<h2>Review: Causal effects</h2>
<p>Review: Causal inference refers to a comparison of unobserved, fixed,
potential outcomes.</p>
<p>For example:</p>
<ul>
<li>the potential, or possible, outcome for unit <span
class="math inline">\(i\)</span> when assigned to treatment, <span
class="math inline">\(T_i=1\)</span> is <span
class="math inline">\(Y_{i}(T_{i}=1)\)</span>.</li>
<li>the potential, or possible, outcome for unit <span
class="math inline">\(i\)</span> when assigned to control, <span
class="math inline">\(T_i=0\)</span> is <span
class="math inline">\(Y_{i}(T_{i}=0)\)</span>.</li>
</ul>
<p>Treatment assignment, <span class="math inline">\(T_i\)</span>, has a
causal effect on unit <span class="math inline">\(i\)</span>, that we
call <span class="math inline">\(\tau_i\)</span>, if <span
class="math inline">\(Y_{i}(T_{i}=1) - Y_{i}(T_{i}=0) \ne 0\)</span> or
<span class="math inline">\(Y_{i}(T_{i}=1) \ne
Y_{i}(T_{i}=0)\)</span>.</p>
</section></section>
<section>
<section id="estimands-and-estimators-and-averages"
class="title-slide slide level1">
<h1>Estimands and estimators and averages</h1>

</section>
<section id="how-can-we-learn-about-causal-effects-from-observed-data"
class="slide level2">
<h2>How can we learn about causal effects from observed data?</h2>
<ol type="1">
<li><p>Recall: we can <strong>test hypotheses</strong> about the pair of
potential outcomes <span class="math inline">\(\{ Y_{i}(T_{i}=1),
Y_{i}(T_{i}=0) \}\)</span>.</p></li>
<li><p>We can <strong>define estimands</strong> in terms of <span
class="math inline">\(\{ Y_{i}(T_{i}=1), Y_{i}(T_{i}=0) \}\)</span> or
<span class="math inline">\(\tau_i\)</span>, <strong>develop
estimators</strong> for those estimands, and then calculate values and
standard errors for those estimators.</p></li>
</ol>
</section>
<section
id="a-common-estimand-and-estimator-the-average-treatment-effect-and-the-difference-of-means"
class="slide level2">
<h2>A common estimand and estimator: The average treatment effect and
the difference of means</h2>
<p>Say we are interested in the ATE, or <span
class="math inline">\(\bar{\tau}=\sum_{i=1}^n \tau_{i}\)</span>. What is
a good estimator?</p>
<p>Two candidates:</p>
<ol type="1">
<li><p>The difference of means: <span
class="math inline">\(\hat{\bar{\tau}} = \frac{\sum_{i=1}^n (T_i
Y_i)}{m} - \frac{\sum_{i=1}^n ( ( 1 - T_i) Y_i)}{n-m}\)</span>.</p></li>
<li><p>A difference of means after top-coding the highest <span
class="math inline">\(Y_i\)</span> observation (a kind of “winsorized”
mean to prevent extreme values from exerting too much influence over our
estimator — to increase <em>precision</em>).</p></li>
</ol>
<p>How would we know which estimator is best for our particular research
design?</p>
<p>Let’s simulate!</p>
</section>
<section id="simulation-step-1-create-some-data-with-a-known-ate"
class="slide level2">
<h2>Simulation Step 1: create some data with a known ATE</h2>
<p>Notice that we need to <em>know</em> the potential outcomes and the
treatment assignment in order to learn whether our proposed estimator
does a good job.</p>
<pre><code>The true ATE is 54</code></pre>
<p>In reality, we would observe only one of the potential outcomes.</p>
<p>Note that each unit has its own treatment effect.</p>
</section>
<section id="first-make-fake-data" class="slide level2">
<h2>First make fake data</h2>
<p>The table in the previous slide was generated in R with:</p>
<div class="sourceCode" id="cb2"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb2-1"><a href="#cb2-1" aria-hidden="true" tabindex="-1"></a><span class="co"># We have ten units</span></span>
<span id="cb2-2"><a href="#cb2-2" aria-hidden="true" tabindex="-1"></a>N <span class="ot">&lt;-</span> <span class="dv">10</span></span>
<span id="cb2-3"><a href="#cb2-3" aria-hidden="true" tabindex="-1"></a><span class="co">#  y0 is potential outcome to control</span></span>
<span id="cb2-4"><a href="#cb2-4" aria-hidden="true" tabindex="-1"></a>y0 <span class="ot">&lt;-</span> <span class="fu">c</span>(<span class="dv">0</span>, <span class="dv">0</span>, <span class="dv">0</span>, <span class="dv">1</span>, <span class="dv">1</span>, <span class="dv">3</span>, <span class="dv">4</span>, <span class="dv">5</span>, <span class="dv">190</span>, <span class="dv">200</span>)</span>
<span id="cb2-5"><a href="#cb2-5" aria-hidden="true" tabindex="-1"></a><span class="co"># Each unit has its own treatment effect</span></span>
<span id="cb2-6"><a href="#cb2-6" aria-hidden="true" tabindex="-1"></a>tau <span class="ot">&lt;-</span> <span class="fu">c</span>(<span class="dv">10</span>, <span class="dv">30</span>, <span class="dv">200</span>, <span class="dv">90</span>, <span class="dv">10</span>, <span class="dv">20</span>, <span class="dv">30</span>, <span class="dv">40</span>, <span class="dv">90</span>, <span class="dv">20</span>)</span>
<span id="cb2-7"><a href="#cb2-7" aria-hidden="true" tabindex="-1"></a><span class="co"># y1 is potential outcome to treatment</span></span>
<span id="cb2-8"><a href="#cb2-8" aria-hidden="true" tabindex="-1"></a>y1 <span class="ot">&lt;-</span> y0 <span class="sc">+</span> tau</span>
<span id="cb2-9"><a href="#cb2-9" aria-hidden="true" tabindex="-1"></a><span class="co"># Two blocks, a and b</span></span>
<span id="cb2-10"><a href="#cb2-10" aria-hidden="true" tabindex="-1"></a>block <span class="ot">&lt;-</span> <span class="fu">c</span>(<span class="st">&quot;a&quot;</span>, <span class="st">&quot;a&quot;</span>, <span class="st">&quot;a&quot;</span>, <span class="st">&quot;a&quot;</span>, <span class="st">&quot;a&quot;</span>, <span class="st">&quot;a&quot;</span>, <span class="st">&quot;b&quot;</span>, <span class="st">&quot;b&quot;</span>, <span class="st">&quot;b&quot;</span>, <span class="st">&quot;b&quot;</span>)</span>
<span id="cb2-11"><a href="#cb2-11" aria-hidden="true" tabindex="-1"></a><span class="co"># Z is treatment assignment (Z instead of T in the code)</span></span>
<span id="cb2-12"><a href="#cb2-12" aria-hidden="true" tabindex="-1"></a>Z <span class="ot">&lt;-</span> <span class="fu">c</span>(<span class="dv">0</span>, <span class="dv">0</span>, <span class="dv">0</span>, <span class="dv">0</span>, <span class="dv">1</span>, <span class="dv">1</span>, <span class="dv">0</span>, <span class="dv">0</span>, <span class="dv">1</span>, <span class="dv">1</span>)</span>
<span id="cb2-13"><a href="#cb2-13" aria-hidden="true" tabindex="-1"></a><span class="co"># Y is observed outcomes</span></span>
<span id="cb2-14"><a href="#cb2-14" aria-hidden="true" tabindex="-1"></a>Y <span class="ot">&lt;-</span> Z <span class="sc">*</span> y1 <span class="sc">+</span> (<span class="dv">1</span> <span class="sc">-</span> Z) <span class="sc">*</span> y0</span>
<span id="cb2-15"><a href="#cb2-15" aria-hidden="true" tabindex="-1"></a><span class="co"># The data</span></span>
<span id="cb2-16"><a href="#cb2-16" aria-hidden="true" tabindex="-1"></a>dat <span class="ot">&lt;-</span> <span class="fu">data.frame</span>(<span class="at">Z =</span> Z, <span class="at">y0 =</span> y0, <span class="at">y1 =</span> y1, <span class="at">tau =</span> tau, <span class="at">b =</span> block, <span class="at">Y =</span> Y)</span>
<span id="cb2-17"><a href="#cb2-17" aria-hidden="true" tabindex="-1"></a><span class="fu">set.seed</span>(<span class="dv">12345</span>)</span></code></pre></div>
</section>
<section id="using-declaredesign" class="slide level2">
<h2>Using DeclareDesign</h2>
<p>DeclareDesign represents research designs in a few steps shown
below:</p>
<div class="sourceCode" id="cb3"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb3-1"><a href="#cb3-1" aria-hidden="true" tabindex="-1"></a><span class="co"># take just the potential outcomes under treatment and control from our</span></span>
<span id="cb3-2"><a href="#cb3-2" aria-hidden="true" tabindex="-1"></a><span class="co"># fake data</span></span>
<span id="cb3-3"><a href="#cb3-3" aria-hidden="true" tabindex="-1"></a>small_dat <span class="ot">&lt;-</span> dat[, <span class="fu">c</span>(<span class="st">&quot;y0&quot;</span>, <span class="st">&quot;y1&quot;</span>)]</span>
<span id="cb3-4"><a href="#cb3-4" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb3-5"><a href="#cb3-5" aria-hidden="true" tabindex="-1"></a><span class="co"># DeclareDesign first asks you to declare your population</span></span>
<span id="cb3-6"><a href="#cb3-6" aria-hidden="true" tabindex="-1"></a>pop <span class="ot">&lt;-</span> <span class="fu">declare_population</span>(small_dat)</span>
<span id="cb3-7"><a href="#cb3-7" aria-hidden="true" tabindex="-1"></a>N <span class="ot">&lt;-</span> <span class="fu">nrow</span>(small_dat)</span>
<span id="cb3-8"><a href="#cb3-8" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb3-9"><a href="#cb3-9" aria-hidden="true" tabindex="-1"></a><span class="co"># 5 units assigned to treatment; default is simple random assignment with</span></span>
<span id="cb3-10"><a href="#cb3-10" aria-hidden="true" tabindex="-1"></a><span class="co"># probability 0.5</span></span>
<span id="cb3-11"><a href="#cb3-11" aria-hidden="true" tabindex="-1"></a>trt_assign <span class="ot">&lt;-</span> <span class="fu">declare_assignment</span>(<span class="at">Z =</span> <span class="fu">conduct_ra</span>(<span class="at">N =</span> N, <span class="at">m =</span> <span class="dv">2</span>), <span class="at">legacy =</span> <span class="cn">FALSE</span>)</span>
<span id="cb3-12"><a href="#cb3-12" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb3-13"><a href="#cb3-13" aria-hidden="true" tabindex="-1"></a><span class="co"># observed Y is y1 if Z=1 and y0 if Z=0</span></span>
<span id="cb3-14"><a href="#cb3-14" aria-hidden="true" tabindex="-1"></a>pot_out <span class="ot">&lt;-</span> <span class="fu">declare_potential_outcomes</span>(Y <span class="sc">~</span> Z <span class="sc">*</span> y1 <span class="sc">+</span> (<span class="dv">1</span> <span class="sc">-</span> Z) <span class="sc">*</span> y0)</span>
<span id="cb3-15"><a href="#cb3-15" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb3-16"><a href="#cb3-16" aria-hidden="true" tabindex="-1"></a><span class="co"># specify outcome and assignment variables</span></span>
<span id="cb3-17"><a href="#cb3-17" aria-hidden="true" tabindex="-1"></a>reveal <span class="ot">&lt;-</span> <span class="fu">declare_reveal</span>(Y, Z)</span>
<span id="cb3-18"><a href="#cb3-18" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb3-19"><a href="#cb3-19" aria-hidden="true" tabindex="-1"></a><span class="co"># the basic research design object includes these four objects</span></span>
<span id="cb3-20"><a href="#cb3-20" aria-hidden="true" tabindex="-1"></a>base_design <span class="ot">&lt;-</span> pop <span class="sc">+</span> trt_assign <span class="sc">+</span> pot_out <span class="sc">+</span> reveal</span></code></pre></div>
</section>
<section id="using-declaredesign-make-fake-data" class="slide level2">
<h2>Using DeclareDesign: make fake data</h2>
<p>DeclareDesign renames <code>y0</code> and <code>y1</code> by default
to <code>Y_Z_0</code> and <code>Y_Z_1</code>:</p>
<div class="sourceCode" id="cb4"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb4-1"><a href="#cb4-1" aria-hidden="true" tabindex="-1"></a><span class="do">## A simulation is one random assignment of treatment</span></span>
<span id="cb4-2"><a href="#cb4-2" aria-hidden="true" tabindex="-1"></a>sim_dat1 <span class="ot">&lt;-</span> <span class="fu">draw_data</span>(base_design)</span>
<span id="cb4-3"><a href="#cb4-3" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb4-4"><a href="#cb4-4" aria-hidden="true" tabindex="-1"></a><span class="do">## Simulated data (just the first 6 lines)</span></span>
<span id="cb4-5"><a href="#cb4-5" aria-hidden="true" tabindex="-1"></a><span class="fu">head</span>(sim_dat1)</span></code></pre></div>
<pre><code>  y0  y1 Z Y_Z_0 Y_Z_1  Y
1  0  10 0     0    10  0
2  0  30 1     0    30 30
3  0 200 0     0   200  0
4  1  91 0     1    91  1
5  1  11 0     1    11  1
6  3  23 1     3    23 23</code></pre>
</section>
<section id="using-declaredesign-define-estimand-and-estimators"
class="slide level2">
<h2>Using DeclareDesign: define estimand and estimators</h2>
<p>No output here. Just define functions and estimators and one
estimand.</p>
<div class="sourceCode" id="cb6"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb6-1"><a href="#cb6-1" aria-hidden="true" tabindex="-1"></a><span class="do">## The estimand</span></span>
<span id="cb6-2"><a href="#cb6-2" aria-hidden="true" tabindex="-1"></a>estimandATE <span class="ot">&lt;-</span> <span class="fu">declare_inquiry</span>(<span class="at">ATE =</span> <span class="fu">mean</span>(Y_Z_1 <span class="sc">-</span> Y_Z_0))</span>
<span id="cb6-3"><a href="#cb6-3" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb6-4"><a href="#cb6-4" aria-hidden="true" tabindex="-1"></a><span class="do">## The first estimator is difference-in-means</span></span>
<span id="cb6-5"><a href="#cb6-5" aria-hidden="true" tabindex="-1"></a>diff_means <span class="ot">&lt;-</span> <span class="fu">declare_estimator</span>(Y <span class="sc">~</span> Z,</span>
<span id="cb6-6"><a href="#cb6-6" aria-hidden="true" tabindex="-1"></a>  <span class="at">inquiry =</span> estimandATE,</span>
<span id="cb6-7"><a href="#cb6-7" aria-hidden="true" tabindex="-1"></a>  <span class="at">model =</span> lm_robust, <span class="at">se_type =</span> <span class="st">&quot;classical&quot;</span>, <span class="at">label =</span> <span class="st">&quot;Diff-Means/OLS&quot;</span></span>
<span id="cb6-8"><a href="#cb6-8" aria-hidden="true" tabindex="-1"></a>)</span></code></pre></div>
</section>
<section id="using-declaredesign-define-estimand-and-estimators-1"
class="slide level2">
<h2>Using DeclareDesign: define estimand and estimators</h2>
<div class="sourceCode" id="cb7"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb7-1"><a href="#cb7-1" aria-hidden="true" tabindex="-1"></a><span class="do">## The second estimator is top-coded difference-in-means</span></span>
<span id="cb7-2"><a href="#cb7-2" aria-hidden="true" tabindex="-1"></a>diff_means_topcoded_fn <span class="ot">&lt;-</span> <span class="cf">function</span>(data) {</span>
<span id="cb7-3"><a href="#cb7-3" aria-hidden="true" tabindex="-1"></a>  data<span class="sc">$</span>rankY <span class="ot">&lt;-</span> <span class="fu">rank</span>(data<span class="sc">$</span>Y)</span>
<span id="cb7-4"><a href="#cb7-4" aria-hidden="true" tabindex="-1"></a>  <span class="do">## Code the maximum value of Y as the second to maximum value of Y</span></span>
<span id="cb7-5"><a href="#cb7-5" aria-hidden="true" tabindex="-1"></a>  data<span class="sc">$</span>newY <span class="ot">&lt;-</span> <span class="fu">with</span>(</span>
<span id="cb7-6"><a href="#cb7-6" aria-hidden="true" tabindex="-1"></a>    data,</span>
<span id="cb7-7"><a href="#cb7-7" aria-hidden="true" tabindex="-1"></a>    <span class="fu">ifelse</span>(rankY <span class="sc">==</span> <span class="fu">max</span>(rankY), Y[rankY <span class="sc">==</span> (<span class="fu">max</span>(rankY) <span class="sc">-</span> <span class="dv">1</span>)], Y)</span>
<span id="cb7-8"><a href="#cb7-8" aria-hidden="true" tabindex="-1"></a>  )</span>
<span id="cb7-9"><a href="#cb7-9" aria-hidden="true" tabindex="-1"></a>  obj <span class="ot">&lt;-</span> <span class="fu">lm_robust</span>(newY <span class="sc">~</span> Z, <span class="at">data =</span> data, <span class="at">se_type =</span> <span class="st">&quot;classical&quot;</span>)</span>
<span id="cb7-10"><a href="#cb7-10" aria-hidden="true" tabindex="-1"></a>  res <span class="ot">&lt;-</span> <span class="fu">tidy</span>(obj) <span class="sc">%&gt;%</span> <span class="fu">filter</span>(term <span class="sc">==</span> <span class="st">&quot;Z&quot;</span>)</span>
<span id="cb7-11"><a href="#cb7-11" aria-hidden="true" tabindex="-1"></a>  <span class="fu">return</span>(res)</span>
<span id="cb7-12"><a href="#cb7-12" aria-hidden="true" tabindex="-1"></a>}</span>
<span id="cb7-13"><a href="#cb7-13" aria-hidden="true" tabindex="-1"></a>diff_means_topcoded <span class="ot">&lt;-</span> <span class="fu">declare_estimator</span>(</span>
<span id="cb7-14"><a href="#cb7-14" aria-hidden="true" tabindex="-1"></a>  <span class="at">handler =</span> <span class="fu">label_estimator</span>(diff_means_topcoded_fn),</span>
<span id="cb7-15"><a href="#cb7-15" aria-hidden="true" tabindex="-1"></a>  <span class="at">inquiry =</span> estimandATE, <span class="at">label =</span> <span class="st">&quot;Top-coded Diff Means&quot;</span></span>
<span id="cb7-16"><a href="#cb7-16" aria-hidden="true" tabindex="-1"></a>)</span></code></pre></div>
</section>
<section id="using-declaredesign-define-estimand-and-estimators-2"
class="slide level2">
<h2>Using DeclareDesign: define estimand and estimators</h2>
<p>Here we show how the DD estimators work using our simulated data.</p>
<div class="sourceCode" id="cb8"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb8-1"><a href="#cb8-1" aria-hidden="true" tabindex="-1"></a><span class="do">## Demonstrate that the estimand works:</span></span>
<span id="cb8-2"><a href="#cb8-2" aria-hidden="true" tabindex="-1"></a><span class="fu">estimandATE</span>(sim_dat1)</span></code></pre></div>
<pre><code>  inquiry estimand
1     ATE       54</code></pre>
<div class="sourceCode" id="cb10"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb10-1"><a href="#cb10-1" aria-hidden="true" tabindex="-1"></a><span class="do">## Demonstrate that the estimators estimate</span></span>
<span id="cb10-2"><a href="#cb10-2" aria-hidden="true" tabindex="-1"></a><span class="do">## Estimator 1 (difference in means)</span></span>
<span id="cb10-3"><a href="#cb10-3" aria-hidden="true" tabindex="-1"></a><span class="fu">diff_means</span>(sim_dat1)[<span class="sc">-</span><span class="fu">c</span>(<span class="dv">1</span>, <span class="dv">2</span>, <span class="dv">10</span>, <span class="dv">11</span>)]</span></code></pre></div>
<pre><code>  estimate std.error statistic p.value conf.low conf.high df
1   -23.62     66.18    -0.357  0.7304   -176.2       129  8</code></pre>
<div class="sourceCode" id="cb12"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb12-1"><a href="#cb12-1" aria-hidden="true" tabindex="-1"></a><span class="do">## Estimator 2 (top-coded difference in means)</span></span>
<span id="cb12-2"><a href="#cb12-2" aria-hidden="true" tabindex="-1"></a><span class="fu">diff_means_topcoded</span>(sim_dat1)[<span class="sc">-</span><span class="fu">c</span>(<span class="dv">1</span>, <span class="dv">2</span>, <span class="dv">10</span>, <span class="dv">11</span>)]</span></code></pre></div>
<pre><code>  estimate std.error statistic p.value conf.low conf.high df
1   -22.37     64.44   -0.3472  0.7374     -171     126.2  8</code></pre>
</section>
<section id="then-simulate-with-one-randomization" class="slide level2">
<h2>Then simulate with one randomization</h2>
<p>Recall the true ATE:</p>
<div class="sourceCode" id="cb14"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb14-1"><a href="#cb14-1" aria-hidden="true" tabindex="-1"></a>trueATE <span class="ot">&lt;-</span> <span class="fu">with</span>(sim_dat1, <span class="fu">mean</span>(y1 <span class="sc">-</span> y0))</span>
<span id="cb14-2"><a href="#cb14-2" aria-hidden="true" tabindex="-1"></a><span class="fu">with</span>(sim_dat1, <span class="fu">mean</span>(Y_Z_1 <span class="sc">-</span> Y_Z_0))</span></code></pre></div>
<pre><code>[1] 54</code></pre>
<p>In one experiment (one simulation of the data) here are the simple
estimates:</p>
<div class="sourceCode" id="cb16"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb16-1"><a href="#cb16-1" aria-hidden="true" tabindex="-1"></a><span class="do">## Two ways to calculate the difference of means estimator</span></span>
<span id="cb16-2"><a href="#cb16-2" aria-hidden="true" tabindex="-1"></a>est_diff_means_1 <span class="ot">&lt;-</span> <span class="fu">with</span>(sim_dat1, <span class="fu">mean</span>(Y[Z <span class="sc">==</span> <span class="dv">1</span>]) <span class="sc">-</span> <span class="fu">mean</span>(Y[Z <span class="sc">==</span> <span class="dv">0</span>]))</span>
<span id="cb16-3"><a href="#cb16-3" aria-hidden="true" tabindex="-1"></a>est_diff_means_2 <span class="ot">&lt;-</span> <span class="fu">coef</span>(<span class="fu">lm_robust</span>(Y <span class="sc">~</span> Z,</span>
<span id="cb16-4"><a href="#cb16-4" aria-hidden="true" tabindex="-1"></a>  <span class="at">data =</span> sim_dat1,</span>
<span id="cb16-5"><a href="#cb16-5" aria-hidden="true" tabindex="-1"></a>  <span class="at">se =</span> <span class="st">&quot;classical&quot;</span></span>
<span id="cb16-6"><a href="#cb16-6" aria-hidden="true" tabindex="-1"></a>))[[<span class="st">&quot;Z&quot;</span>]]</span>
<span id="cb16-7"><a href="#cb16-7" aria-hidden="true" tabindex="-1"></a><span class="fu">c</span>(est_diff_means_1, est_diff_means_2)</span></code></pre></div>
<pre><code>[1] -23.62 -23.62</code></pre>
</section>
<section id="then-simulate-with-one-randomization-1"
class="slide level2">
<h2>Then simulate with one randomization</h2>
<p>In one experiment (one simulation of the data) here are the estimates
after top-coding:</p>
<div class="sourceCode" id="cb18"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb18-1"><a href="#cb18-1" aria-hidden="true" tabindex="-1"></a><span class="do">## Two ways to calculate the topcoded difference of means estimator</span></span>
<span id="cb18-2"><a href="#cb18-2" aria-hidden="true" tabindex="-1"></a>sim_dat1<span class="sc">$</span>rankY <span class="ot">&lt;-</span> <span class="fu">rank</span>(sim_dat1<span class="sc">$</span>Y)</span>
<span id="cb18-3"><a href="#cb18-3" aria-hidden="true" tabindex="-1"></a>sim_dat1<span class="sc">$</span>Y_tc <span class="ot">&lt;-</span> <span class="fu">with</span>(sim_dat1, <span class="fu">ifelse</span>(rankY <span class="sc">==</span> <span class="fu">max</span>(rankY),</span>
<span id="cb18-4"><a href="#cb18-4" aria-hidden="true" tabindex="-1"></a>  Y[rankY <span class="sc">==</span> (<span class="fu">max</span>(rankY) <span class="sc">-</span> <span class="dv">1</span>)], Y</span>
<span id="cb18-5"><a href="#cb18-5" aria-hidden="true" tabindex="-1"></a>))</span>
<span id="cb18-6"><a href="#cb18-6" aria-hidden="true" tabindex="-1"></a>est_topcoded_1 <span class="ot">&lt;-</span> <span class="fu">with</span>(sim_dat1, <span class="fu">mean</span>(Y_tc[Z <span class="sc">==</span> <span class="dv">1</span>]) <span class="sc">-</span> <span class="fu">mean</span>(Y_tc[Z <span class="sc">==</span> <span class="dv">0</span>]))</span>
<span id="cb18-7"><a href="#cb18-7" aria-hidden="true" tabindex="-1"></a>est_topcoded_2 <span class="ot">&lt;-</span> <span class="fu">coef</span>(<span class="fu">lm_robust</span>(Y_tc <span class="sc">~</span> Z,</span>
<span id="cb18-8"><a href="#cb18-8" aria-hidden="true" tabindex="-1"></a>  <span class="at">data =</span> sim_dat1,</span>
<span id="cb18-9"><a href="#cb18-9" aria-hidden="true" tabindex="-1"></a>  <span class="at">se =</span> <span class="st">&quot;classical&quot;</span></span>
<span id="cb18-10"><a href="#cb18-10" aria-hidden="true" tabindex="-1"></a>))[[<span class="st">&quot;Z&quot;</span>]]</span>
<span id="cb18-11"><a href="#cb18-11" aria-hidden="true" tabindex="-1"></a><span class="fu">c</span>(est_topcoded_1, est_topcoded_2)</span></code></pre></div>
<pre><code>[1] -22.38 -22.37</code></pre>
</section>
<section
id="then-simulate-a-different-randomization-and-estimate-the-ate-with-the-same-estimators"
class="slide level2">
<h2>Then simulate a different randomization and estimate the ATE with
the same estimators</h2>
<p>Now calculate your estimate with the same estimators using a
<strong>different</strong> randomization. Notice that the answers
differ. The estimators are estimating the <em>same estimand</em> but now
they have a different randomization to work with.</p>
<div class="sourceCode" id="cb20"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb20-1"><a href="#cb20-1" aria-hidden="true" tabindex="-1"></a><span class="co"># do another random assignment of the treatment in DeclareDesign</span></span>
<span id="cb20-2"><a href="#cb20-2" aria-hidden="true" tabindex="-1"></a><span class="co"># this produces a new simulated dataset with a different random assignment</span></span>
<span id="cb20-3"><a href="#cb20-3" aria-hidden="true" tabindex="-1"></a>sim_dat2 <span class="ot">&lt;-</span> <span class="fu">draw_data</span>(base_design)</span>
<span id="cb20-4"><a href="#cb20-4" aria-hidden="true" tabindex="-1"></a><span class="co"># the first estimator (difference in means)</span></span>
<span id="cb20-5"><a href="#cb20-5" aria-hidden="true" tabindex="-1"></a><span class="fu">coef</span>(<span class="fu">lm_robust</span>(Y <span class="sc">~</span> Z, <span class="at">data =</span> sim_dat2, <span class="at">se =</span> <span class="st">&quot;classical&quot;</span>))[[<span class="st">&quot;Z&quot;</span>]]</span></code></pre></div>
<pre><code>[1] -18</code></pre>
<div class="sourceCode" id="cb22"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb22-1"><a href="#cb22-1" aria-hidden="true" tabindex="-1"></a><span class="co"># the second estimator (top-coded difference in means)</span></span>
<span id="cb22-2"><a href="#cb22-2" aria-hidden="true" tabindex="-1"></a>sim_dat2<span class="sc">$</span>rankY <span class="ot">&lt;-</span> <span class="fu">rank</span>(sim_dat2<span class="sc">$</span>Y)</span>
<span id="cb22-3"><a href="#cb22-3" aria-hidden="true" tabindex="-1"></a>sim_dat2<span class="sc">$</span>Y_tc <span class="ot">&lt;-</span> <span class="fu">with</span>(sim_dat2, <span class="fu">ifelse</span>(rankY <span class="sc">==</span> <span class="fu">max</span>(rankY),</span>
<span id="cb22-4"><a href="#cb22-4" aria-hidden="true" tabindex="-1"></a>  Y[rankY <span class="sc">==</span> (<span class="fu">max</span>(rankY) <span class="sc">-</span> <span class="dv">1</span>)], Y</span>
<span id="cb22-5"><a href="#cb22-5" aria-hidden="true" tabindex="-1"></a>))</span>
<span id="cb22-6"><a href="#cb22-6" aria-hidden="true" tabindex="-1"></a><span class="fu">coef</span>(<span class="fu">lm_robust</span>(Y_tc <span class="sc">~</span> Z, <span class="at">data =</span> sim_dat2, <span class="at">se =</span> <span class="st">&quot;classical&quot;</span>))[[<span class="st">&quot;Z&quot;</span>]]</span></code></pre></div>
<pre><code>[1] -16.75</code></pre>
</section>
<section id="how-do-our-estimators-behave-in-general-for-this-design"
class="slide level2">
<h2>How do our estimators behave in general for this design?</h2>
<p>Our estimates vary across randomizations. Do our two estimators vary
in the same ways?</p>
<div class="sourceCode" id="cb24"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb24-1"><a href="#cb24-1" aria-hidden="true" tabindex="-1"></a><span class="do">## Combine into one DeclareDesign design object</span></span>
<span id="cb24-2"><a href="#cb24-2" aria-hidden="true" tabindex="-1"></a><span class="do">## This has the base design, estimand, then our two estimators</span></span>
<span id="cb24-3"><a href="#cb24-3" aria-hidden="true" tabindex="-1"></a>design_plus_ests <span class="ot">&lt;-</span> base_design <span class="sc">+</span> estimandATE <span class="sc">+</span> diff_means <span class="sc">+</span></span>
<span id="cb24-4"><a href="#cb24-4" aria-hidden="true" tabindex="-1"></a>  diff_means_topcoded</span>
<span id="cb24-5"><a href="#cb24-5" aria-hidden="true" tabindex="-1"></a><span class="do">## Run 100 simulations (reassignments of treatment) and</span></span>
<span id="cb24-6"><a href="#cb24-6" aria-hidden="true" tabindex="-1"></a><span class="do">## apply the two estimators (diff_means and diff_means_topcoded)</span></span>
<span id="cb24-7"><a href="#cb24-7" aria-hidden="true" tabindex="-1"></a>diagnosis1 <span class="ot">&lt;-</span> <span class="fu">diagnose_design</span>(design_plus_ests,</span>
<span id="cb24-8"><a href="#cb24-8" aria-hidden="true" tabindex="-1"></a>  <span class="at">bootstrap_sims =</span> <span class="dv">0</span>, <span class="at">sims =</span> <span class="dv">100</span></span>
<span id="cb24-9"><a href="#cb24-9" aria-hidden="true" tabindex="-1"></a>)</span>
<span id="cb24-10"><a href="#cb24-10" aria-hidden="true" tabindex="-1"></a>sims1 <span class="ot">&lt;-</span> <span class="fu">get_simulations</span>(diagnosis1)</span>
<span id="cb24-11"><a href="#cb24-11" aria-hidden="true" tabindex="-1"></a><span class="fu">head</span>(sims1[, <span class="sc">-</span><span class="fu">c</span>(<span class="dv">1</span><span class="sc">:</span><span class="dv">6</span>)])</span></code></pre></div>
<pre><code>  estimate std.error statistic p.value conf.low conf.high df outcome
1   -18.00     66.23  -0.27179  0.7927   -170.7     134.7  8       Y
2   -16.75     64.49  -0.25974  0.8016   -165.5     132.0  8    newY
3     0.75     67.98   0.01103  0.9915   -156.0     157.5  8       Y
4     2.00     66.29   0.03017  0.9767   -150.9     154.9  8    newY
5    54.50     75.93   0.71775  0.4933   -120.6     229.6  8       Y
6   -19.14     75.85  -0.25237  0.8092   -204.7     166.5  6    newY</code></pre>
</section>
<section id="how-do-our-estimators-behave-in-general-for-this-design-1"
class="slide level2">
<h2>How do our estimators behave in general for this design?</h2>
<p>Our estimates vary across randomizations. Do our two estimators vary
in the same ways? How should we interpret this plot?</p>
<p><img src="figs/figsim_plot-1.png" width=".8\textwidth"  /></p>
</section>
<section id="which-estimator-is-closer-to-the-truth"
class="slide level2">
<h2>Which estimator is closer to the truth?</h2>
<p>One way to choose among estimators is to choose the one that is
<strong>close to the truth</strong> whenever we use it — regardless of
the specific randomization.</p>
<p>An “unbiased” estimator is one for which <strong>average of the
estimates across repeated designs</strong> is the same as the truth (or
<span class="math inline">\(E_R(\hat{\bar{\tau}})=\bar{\tau}\)</span>).
An unbiased estimator has “no systematic error” but doesn’t guarantee
closeness to the truth.</p>
<p>Another measure of closeness is <strong>root mean squared
error</strong> (RMSE) which records squared distances between the truth
and the individual estimates.</p>
<p>Which estimator is better? (One is closer to the truth on average
(RMSE) and is more precise. The other has no systematic error — is
unbiased.)</p>
<table>
<thead>
<tr class="header">
<th style="text-align: left;">Estimator</th>
<th style="text-align: left;">Bias</th>
<th style="text-align: left;">RMSE</th>
<th style="text-align: left;">SD Estimate</th>
<th style="text-align: left;">Mean Se</th>
<th style="text-align: left;">Power</th>
</tr>
</thead>
<tbody>
<tr class="odd">
<td style="text-align: left;">Diff-Means/OLS</td>
<td style="text-align: left;">-5.70</td>
<td style="text-align: left;">71.23</td>
<td style="text-align: left;">71.36</td>
<td style="text-align: left;">66.54</td>
<td style="text-align: left;">0.09</td>
</tr>
<tr class="even">
<td style="text-align: left;">Top-coded Diff Means</td>
<td style="text-align: left;">-25.50</td>
<td style="text-align: left;">64.86</td>
<td style="text-align: left;">59.94</td>
<td style="text-align: left;">64.53</td>
<td style="text-align: left;">0.03</td>
</tr>
</tbody>
</table>
</section>
<section id="unbiased-and-biased-estimators" class="slide level2">
<h2>Unbiased and biased estimators</h2>
<p>Summary:</p>
<ul>
<li><p>We have a <em>choice</em> of both estimands and
estimators</p></li>
<li><p>A good estimator performs well regardless of the particular
randomization of a given design. And <em>performs well</em> can mean
“unbiased” and/or “low mse” (or “consistent” — which means increasingly
close to the truth as the sample size increases).</p></li>
<li><p>We can learn about how a given estimator performs in a given
study using simulation.</p></li>
</ul>
</section></section>
<section>
<section id="block-randomization" class="title-slide slide level1">
<h1>Block randomization</h1>

</section>
<section
id="block-randomized-experiments-are-a-collection-of-mini-experiments"
class="slide level2">
<h2>Block-randomized experiments are a collection of
mini-experiments</h2>
<p>What is the <strong>ATE</strong> estimand in a block-randomized
experiment?</p>
<p>If we think of the unit-level ATE as: <span
class="math inline">\((1/N) \sum_{i=1}^N y_{i,1} - y_{i,0}\)</span> then
we could re-express this equivalently using the ATE in block <span
class="math inline">\(j\)</span> is <span
class="math inline">\(ATE_j\)</span> as follows:</p>
<p><span class="math display">\[
ATE = \frac{1}{J}\sum^J_{j=1} \sum^{N_j}_{i=1} \frac{y_{i,1} -
y_{i,0}}{N_j}  = \sum^J_{j=1} \frac{N_j}{N} ATE_j
\]</span></p>
<p>And it would be natural to <em>estimate</em> this quantity by
plugging in what we can calculate: <span
class="math inline">\(\widehat{ATE} = \displaystyle\sum^J_{j=1}
\frac{N_j}{N} \widehat{ATE}_j\)</span></p>
</section>
<section
id="block-randomized-experiments-are-a-collection-of-mini-experiments-1"
class="slide level2">
<h2>Block-randomized experiments are a collection of
mini-experiments</h2>
<p>And we could <em>define</em> the standard error of the estimator by
also just averaging the within-block standard errors (if our blocks are
large enough):</p>
<p><span class="math inline">\(SE(\widehat{ATE}) = \sqrt{\sum^J_{j=1}
(\frac{N_{j}}{N})^2SE^2(\widehat{ATE}_j)}\)</span></p>
</section>
<section id="estimating-the-ate-in-block-randomized-experiments"
class="slide level2">
<h2>Estimating the ATE in block-randomized experiments</h2>
<p>One approach to estimation simply replaces <span
class="math inline">\(ATE_j\)</span> with <span
class="math inline">\(\widehat{ATE}\)</span> above:</p>
<div class="sourceCode" id="cb26"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb26-1"><a href="#cb26-1" aria-hidden="true" tabindex="-1"></a><span class="fu">with</span>(dat, <span class="fu">table</span>(b, Z))</span></code></pre></div>
<pre><code>   Z
b   0 1
  a 4 2
  b 2 2</code></pre>
<p>We have 6 units in block <code>a</code>, 2 of which are assigned to
treatment, and 4 units in block <code>b</code>, 2 of which are
assignment to treatment.</p>
</section>
<section id="estimating-the-ate-in-block-randomized-experiments-1"
class="slide level2">
<h2>Estimating the ATE in block-randomized experiments</h2>
<p>One approach to estimation simply replaces <span
class="math inline">\(ATE_j\)</span> with <span
class="math inline">\(\widehat{ATE}\)</span> above:</p>
<div class="sourceCode" id="cb28"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb28-1"><a href="#cb28-1" aria-hidden="true" tabindex="-1"></a>datb <span class="ot">&lt;-</span> dat <span class="sc">%&gt;%</span></span>
<span id="cb28-2"><a href="#cb28-2" aria-hidden="true" tabindex="-1"></a>  <span class="fu">group_by</span>(b) <span class="sc">%&gt;%</span></span>
<span id="cb28-3"><a href="#cb28-3" aria-hidden="true" tabindex="-1"></a>  <span class="fu">summarize</span>(</span>
<span id="cb28-4"><a href="#cb28-4" aria-hidden="true" tabindex="-1"></a>    <span class="at">nb =</span> <span class="fu">n</span>(), <span class="at">pb =</span> <span class="fu">mean</span>(Z), <span class="at">estateb =</span> <span class="fu">mean</span>(Y[Z <span class="sc">==</span> <span class="dv">1</span>]) <span class="sc">-</span> <span class="fu">mean</span>(Y[Z <span class="sc">==</span> <span class="dv">0</span>]),</span>
<span id="cb28-5"><a href="#cb28-5" aria-hidden="true" tabindex="-1"></a>    <span class="at">ateb =</span> <span class="fu">mean</span>(y1 <span class="sc">-</span> y0), <span class="at">.groups =</span> <span class="st">&quot;drop&quot;</span></span>
<span id="cb28-6"><a href="#cb28-6" aria-hidden="true" tabindex="-1"></a>  )</span>
<span id="cb28-7"><a href="#cb28-7" aria-hidden="true" tabindex="-1"></a>datb</span></code></pre></div>
<pre><code># A tibble: 2 × 5
  b        nb    pb estateb  ateb
  &lt;chr&gt; &lt;int&gt; &lt;dbl&gt;   &lt;dbl&gt; &lt;dbl&gt;
1 a         6 0.333    16.8    60
2 b         4 0.5     246.     45</code></pre>
<div class="sourceCode" id="cb30"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb30-1"><a href="#cb30-1" aria-hidden="true" tabindex="-1"></a><span class="do">## True ate by block:</span></span>
<span id="cb30-2"><a href="#cb30-2" aria-hidden="true" tabindex="-1"></a><span class="fu">with</span>(dat, <span class="fu">mean</span>(y1 <span class="sc">-</span> y0))</span></code></pre></div>
<pre><code>[1] 54</code></pre>
<div class="sourceCode" id="cb32"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb32-1"><a href="#cb32-1" aria-hidden="true" tabindex="-1"></a><span class="do">## This is another way to calculate the true ate</span></span>
<span id="cb32-2"><a href="#cb32-2" aria-hidden="true" tabindex="-1"></a><span class="fu">with</span>(datb, <span class="fu">sum</span>(ateb <span class="sc">*</span> (nb <span class="sc">/</span> <span class="fu">sum</span>(nb))))</span></code></pre></div>
<pre><code>[1] 54</code></pre>
</section>
<section id="estimating-the-ate-in-block-randomized-experiments-2"
class="slide level2">
<h2>Estimating the ATE in block-randomized experiments</h2>
<p>One approach is to estimate the overall ATE using block-size
weights:</p>
<div class="sourceCode" id="cb34"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb34-1"><a href="#cb34-1" aria-hidden="true" tabindex="-1"></a><span class="do">## Showing that difference_in_means uses the blocksize weight.</span></span>
<span id="cb34-2"><a href="#cb34-2" aria-hidden="true" tabindex="-1"></a>e1 <span class="ot">&lt;-</span> <span class="fu">difference_in_means</span>(Y <span class="sc">~</span> Z, <span class="at">blocks =</span> b, <span class="at">data =</span> dat)</span>
<span id="cb34-3"><a href="#cb34-3" aria-hidden="true" tabindex="-1"></a>e2 <span class="ot">&lt;-</span> <span class="fu">with</span>(datb, <span class="fu">sum</span>(estateb <span class="sc">*</span> (nb <span class="sc">/</span> <span class="fu">sum</span>(nb))))</span>
<span id="cb34-4"><a href="#cb34-4" aria-hidden="true" tabindex="-1"></a><span class="fu">c</span>(<span class="fu">coef</span>(e1)[[<span class="st">&quot;Z&quot;</span>]], e2)</span></code></pre></div>
<pre><code>[1] 108.2 108.2</code></pre>
</section>
<section id="estimating-the-ate-in-block-randomized-experiments-3"
class="slide level2">
<h2>Estimating the ATE in block-randomized experiments</h2>
<p>Notice that this is <strong>not</strong> the same as either of the
following:</p>
<div class="sourceCode" id="cb36"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb36-1"><a href="#cb36-1" aria-hidden="true" tabindex="-1"></a><span class="do">## Ignoring blocks</span></span>
<span id="cb36-2"><a href="#cb36-2" aria-hidden="true" tabindex="-1"></a>e3 <span class="ot">&lt;-</span> <span class="fu">lm</span>(Y <span class="sc">~</span> Z, <span class="at">data =</span> dat)</span>
<span id="cb36-3"><a href="#cb36-3" aria-hidden="true" tabindex="-1"></a><span class="fu">coef</span>(e3)[[<span class="st">&quot;Z&quot;</span>]]</span></code></pre></div>
<pre><code>[1] 131.8</code></pre>
<div class="sourceCode" id="cb38"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb38-1"><a href="#cb38-1" aria-hidden="true" tabindex="-1"></a><span class="do">## With block fixed effects</span></span>
<span id="cb38-2"><a href="#cb38-2" aria-hidden="true" tabindex="-1"></a>e4 <span class="ot">&lt;-</span> <span class="fu">lm</span>(Y <span class="sc">~</span> Z <span class="sc">+</span> block, <span class="at">data =</span> dat)</span>
<span id="cb38-3"><a href="#cb38-3" aria-hidden="true" tabindex="-1"></a><span class="fu">coef</span>(e4)[[<span class="st">&quot;Z&quot;</span>]]</span></code></pre></div>
<pre><code>[1] 114.8</code></pre>
<p>How do they differ? (The first ignores the blocks. The second uses a
different set of weights that are created by use of “fixed effects” or
“indicator” or “dummy” variables.)</p>
</section>
<section id="which-estimator-should-we-use" class="slide level2">
<h2>Which estimator should we use?</h2>
<p>We now have three estimators each with a different estimate
(imagining they all target the same estimand):</p>
<div class="sourceCode" id="cb40"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb40-1"><a href="#cb40-1" aria-hidden="true" tabindex="-1"></a><span class="fu">c</span>(<span class="fu">coef</span>(e1)[[<span class="st">&quot;Z&quot;</span>]], <span class="fu">coef</span>(e3)[[<span class="st">&quot;Z&quot;</span>]], <span class="fu">coef</span>(e4)[[<span class="st">&quot;Z&quot;</span>]])</span></code></pre></div>
<pre><code>[1] 108.2 131.8 114.8</code></pre>
<p>Which estimator should we use for this design? We can set up a
DeclareDesign simulation to figure this out.</p>
<div class="sourceCode" id="cb42"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb42-1"><a href="#cb42-1" aria-hidden="true" tabindex="-1"></a><span class="do">## declare a new base design that includes the block indicator b</span></span>
<span id="cb42-2"><a href="#cb42-2" aria-hidden="true" tabindex="-1"></a>base_design_blocks <span class="ot">&lt;-</span></span>
<span id="cb42-3"><a href="#cb42-3" aria-hidden="true" tabindex="-1"></a>  <span class="co"># declare the population</span></span>
<span id="cb42-4"><a href="#cb42-4" aria-hidden="true" tabindex="-1"></a>  <span class="fu">declare_population</span>(dat[, <span class="fu">c</span>(<span class="st">&quot;b&quot;</span>, <span class="st">&quot;y0&quot;</span>, <span class="st">&quot;y1&quot;</span>)]) <span class="sc">+</span></span>
<span id="cb42-5"><a href="#cb42-5" aria-hidden="true" tabindex="-1"></a>  <span class="co"># tell DD that b indicates block and to assign 2 treated units in each block</span></span>
<span id="cb42-6"><a href="#cb42-6" aria-hidden="true" tabindex="-1"></a>  <span class="fu">declare_assignment</span>(</span>
<span id="cb42-7"><a href="#cb42-7" aria-hidden="true" tabindex="-1"></a>    <span class="at">Z =</span> <span class="fu">conduct_ra</span>(<span class="at">N =</span> N, <span class="at">m =</span> <span class="dv">2</span>, <span class="at">blocks =</span> b),</span>
<span id="cb42-8"><a href="#cb42-8" aria-hidden="true" tabindex="-1"></a>    <span class="at">Z_cond_prob =</span></span>
<span id="cb42-9"><a href="#cb42-9" aria-hidden="true" tabindex="-1"></a>      <span class="fu">obtain_condition_probabilities</span>(<span class="at">assignment =</span> Z, <span class="at">m =</span> <span class="dv">2</span>)</span>
<span id="cb42-10"><a href="#cb42-10" aria-hidden="true" tabindex="-1"></a>  ) <span class="sc">+</span></span>
<span id="cb42-11"><a href="#cb42-11" aria-hidden="true" tabindex="-1"></a>  <span class="co"># relationship of potential outcomes to observed outcome</span></span>
<span id="cb42-12"><a href="#cb42-12" aria-hidden="true" tabindex="-1"></a>  <span class="fu">declare_potential_outcomes</span>(Y <span class="sc">~</span> Z <span class="sc">*</span> y1 <span class="sc">+</span> (<span class="dv">1</span> <span class="sc">-</span> Z) <span class="sc">*</span> y0) <span class="sc">+</span></span>
<span id="cb42-13"><a href="#cb42-13" aria-hidden="true" tabindex="-1"></a>  <span class="co"># observed outcome and treatment assignment</span></span>
<span id="cb42-14"><a href="#cb42-14" aria-hidden="true" tabindex="-1"></a>  <span class="fu">declare_reveal</span>(Y, Z)</span></code></pre></div>
</section>
<section id="which-estimator-should-we-use-1" class="slide level2">
<h2>Which estimator should we use?</h2>
<div class="sourceCode" id="cb43"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb43-1"><a href="#cb43-1" aria-hidden="true" tabindex="-1"></a><span class="co"># the estimand is the average treatment effect</span></span>
<span id="cb43-2"><a href="#cb43-2" aria-hidden="true" tabindex="-1"></a>estimandATEb <span class="ot">&lt;-</span> <span class="fu">declare_inquiry</span>(<span class="at">ATE =</span> <span class="fu">mean</span>(Y_Z_1 <span class="sc">-</span> Y_Z_0))</span>
<span id="cb43-3"><a href="#cb43-3" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb43-4"><a href="#cb43-4" aria-hidden="true" tabindex="-1"></a><span class="co"># three different estimators</span></span>
<span id="cb43-5"><a href="#cb43-5" aria-hidden="true" tabindex="-1"></a>est1 <span class="ot">&lt;-</span> <span class="fu">declare_estimator</span>(Y <span class="sc">~</span> Z,</span>
<span id="cb43-6"><a href="#cb43-6" aria-hidden="true" tabindex="-1"></a>  <span class="at">inquiry =</span> estimandATEb, <span class="at">model =</span> lm_robust,</span>
<span id="cb43-7"><a href="#cb43-7" aria-hidden="true" tabindex="-1"></a>  <span class="at">label =</span> <span class="st">&quot;Ignores Blocks&quot;</span></span>
<span id="cb43-8"><a href="#cb43-8" aria-hidden="true" tabindex="-1"></a>)</span>
<span id="cb43-9"><a href="#cb43-9" aria-hidden="true" tabindex="-1"></a>est2 <span class="ot">&lt;-</span> <span class="fu">declare_estimator</span>(Y <span class="sc">~</span> Z,</span>
<span id="cb43-10"><a href="#cb43-10" aria-hidden="true" tabindex="-1"></a>  <span class="at">inquiry =</span> estimandATEb, <span class="at">model =</span> difference_in_means, <span class="at">blocks =</span> b,</span>
<span id="cb43-11"><a href="#cb43-11" aria-hidden="true" tabindex="-1"></a>  <span class="at">label =</span> <span class="st">&quot;DiM: Block-Size Weights&quot;</span></span>
<span id="cb43-12"><a href="#cb43-12" aria-hidden="true" tabindex="-1"></a>)</span>
<span id="cb43-13"><a href="#cb43-13" aria-hidden="true" tabindex="-1"></a>est3 <span class="ot">&lt;-</span> <span class="fu">declare_estimator</span>(Y <span class="sc">~</span> Z,</span>
<span id="cb43-14"><a href="#cb43-14" aria-hidden="true" tabindex="-1"></a>  <span class="at">inquiry =</span> estimandATEb, <span class="at">model =</span> lm_robust,</span>
<span id="cb43-15"><a href="#cb43-15" aria-hidden="true" tabindex="-1"></a>  <span class="at">weights =</span> (Z <span class="sc">/</span> Z_cond_prob) <span class="sc">+</span> ((<span class="dv">1</span> <span class="sc">-</span> Z) <span class="sc">/</span> (Z_cond_prob)),</span>
<span id="cb43-16"><a href="#cb43-16" aria-hidden="true" tabindex="-1"></a>  <span class="at">label =</span> <span class="st">&quot;LM: Block Size Weights&quot;</span></span>
<span id="cb43-17"><a href="#cb43-17" aria-hidden="true" tabindex="-1"></a>)</span></code></pre></div>
</section>
<section id="which-estimator-should-we-use-2" class="slide level2">
<h2>Which estimator should we use?</h2>
<div class="sourceCode" id="cb44"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb44-1"><a href="#cb44-1" aria-hidden="true" tabindex="-1"></a><span class="co"># two more estimators</span></span>
<span id="cb44-2"><a href="#cb44-2" aria-hidden="true" tabindex="-1"></a>est4 <span class="ot">&lt;-</span> <span class="fu">declare_estimator</span>(Y <span class="sc">~</span> Z,</span>
<span id="cb44-3"><a href="#cb44-3" aria-hidden="true" tabindex="-1"></a>  <span class="at">inquiry =</span> estimandATEb,</span>
<span id="cb44-4"><a href="#cb44-4" aria-hidden="true" tabindex="-1"></a>  <span class="at">model =</span> lm_robust, <span class="at">fixed_effects =</span> <span class="sc">~</span>b, <span class="at">label =</span> <span class="st">&quot;Precision Weights&quot;</span></span>
<span id="cb44-5"><a href="#cb44-5" aria-hidden="true" tabindex="-1"></a>)</span>
<span id="cb44-6"><a href="#cb44-6" aria-hidden="true" tabindex="-1"></a>est5 <span class="ot">&lt;-</span> <span class="fu">declare_estimator</span>(Y <span class="sc">~</span> Z <span class="sc">+</span> b,</span>
<span id="cb44-7"><a href="#cb44-7" aria-hidden="true" tabindex="-1"></a>  <span class="at">inquiry =</span> estimandATEb,</span>
<span id="cb44-8"><a href="#cb44-8" aria-hidden="true" tabindex="-1"></a>  <span class="at">model =</span> lm_robust, <span class="at">label =</span> <span class="st">&quot;Precision Weights (LSDV)&quot;</span></span>
<span id="cb44-9"><a href="#cb44-9" aria-hidden="true" tabindex="-1"></a>)</span>
<span id="cb44-10"><a href="#cb44-10" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb44-11"><a href="#cb44-11" aria-hidden="true" tabindex="-1"></a><span class="do">## new design object has the base design, the estimand, and five estimators</span></span>
<span id="cb44-12"><a href="#cb44-12" aria-hidden="true" tabindex="-1"></a>design_blocks <span class="ot">&lt;-</span> base_design_blocks <span class="sc">+</span> estimandATEb <span class="sc">+</span></span>
<span id="cb44-13"><a href="#cb44-13" aria-hidden="true" tabindex="-1"></a>  est1 <span class="sc">+</span> est2 <span class="sc">+</span> est3 <span class="sc">+</span> est4 <span class="sc">+</span> est5</span></code></pre></div>
<p>Then we will run 10,000 simulations (reassign treatment 10,000 times)
and summarize the estimates produced by each of these five
estimators.</p>
</section>
<section id="which-estimator-should-we-use-3" class="slide level2">
<h2>Which estimator should we use?</h2>
<p>How should we interpret this plot?</p>
<p><img src="figs/figsim_plot2-1.png" width=".9\textwidth"  /></p>
</section>
<section id="which-estimator-is-closer-to-the-truth-1"
class="slide level2">
<h2>Which estimator is closer to the truth?</h2>
<p>Which estimator works better on this design and these data?</p>
<table>
<thead>
<tr class="header">
<th style="text-align: left;">Estimator</th>
<th style="text-align: left;">Bias</th>
<th style="text-align: left;">RMSE</th>
<th style="text-align: left;">SD Est</th>
<th style="text-align: left;">Mean SE</th>
<th style="text-align: left;">Power</th>
<th style="text-align: left;">Coverage</th>
</tr>
</thead>
<tbody>
<tr class="odd">
<td style="text-align: left;">DiM: Block-Size Weights</td>
<td style="text-align: left;">-0.63</td>
<td style="text-align: left;">53.08</td>
<td style="text-align: left;">53.11</td>
<td style="text-align: left;">51.90</td>
<td style="text-align: left;">0.22</td>
<td style="text-align: left;">0.77</td>
</tr>
<tr class="even">
<td style="text-align: left;">Ignores Blocks</td>
<td style="text-align: left;">14.48</td>
<td style="text-align: left;">55.23</td>
<td style="text-align: left;">53.33</td>
<td style="text-align: left;">60.79</td>
<td style="text-align: left;">0.10</td>
<td style="text-align: left;">0.97</td>
</tr>
<tr class="odd">
<td style="text-align: left;">LM: Block Size Weights</td>
<td style="text-align: left;">14.48</td>
<td style="text-align: left;">55.23</td>
<td style="text-align: left;">53.33</td>
<td style="text-align: left;">60.79</td>
<td style="text-align: left;">0.10</td>
<td style="text-align: left;">0.97</td>
</tr>
<tr class="even">
<td style="text-align: left;">Precision Weights</td>
<td style="text-align: left;">-1.02</td>
<td style="text-align: left;">55.39</td>
<td style="text-align: left;">55.40</td>
<td style="text-align: left;">56.96</td>
<td style="text-align: left;">0.11</td>
<td style="text-align: left;">0.92</td>
</tr>
<tr class="odd">
<td style="text-align: left;">Precision Weights (LSDV)</td>
<td style="text-align: left;">-1.02</td>
<td style="text-align: left;">55.39</td>
<td style="text-align: left;">55.40</td>
<td style="text-align: left;">56.96</td>
<td style="text-align: left;">0.11</td>
<td style="text-align: left;">0.92</td>
</tr>
</tbody>
</table>
<p>Notice that the coverage is not always at 95% in all cases. We used
10,000 simulations so simulation error is around <span
class="math inline">\(\pm 2 \sqrt{p(1-p)/10000}\)</span> or, say, for
coverage calculated as .93, a different simulation could have easily
produced 0.9249 or 0.9351 (or would rarely have produced coverage
numbers outside that range just by chance).</p>
</section></section>
<section>
<section id="cluster-randomization" class="title-slide slide level1">
<h1>Cluster randomization</h1>

</section>
<section
id="in-cluster-randomized-experiments-units-are-randomized-as-a-group-cluster-to-treatment"
class="slide level2 allowframebreaks">
<h2 class="allowframebreaks">In cluster-randomized experiments, units
are randomized as a group (cluster) to treatment</h2>
<ul>
<li><strong>Example 1:</strong> an intervention is randomized across
neighborhoods, so <strong>all</strong> households in a neighborhood will
be assigned to the same treatment condition, but different neighborhoods
will be assigned different treatment conditions.</li>
<li><strong>Example 2:</strong> an intervention is randomized across
people and each person is measured four times after treatment, so our
data contain four rows per person.</li>
<li><strong>Not An Example 1:</strong> Neighborhoods are chosen for the
study. Within each neighborhood about half of the people are assigned to
treatment and half to control. (What kind of study is this? It is not a
cluster-randomized study.)</li>
<li><strong>Not an Example 2:</strong> an intervention is randomized to
some neighborhoods and not to others, the outcomes include measurements
of neighborhood-level trust in government and total land area in the
neighborhood devoted to gardens. (Sometimes a cluster randomized
experiment can be turned into a simple randomized experiment. Or may
contain more than one possible approach to analysis and
interpretation.)</li>
</ul>
<p>How might the distribution of test statistics and estimators differ
from an experiment where individual units (not clusters) are
randomized?</p>
</section>
<section id="estimating-the-ate-in-cluster-randomized-experiments"
class="slide level2">
<h2>Estimating the ATE in cluster-randomized experiments</h2>
<p>Bias problems in cluster-randomized experiments:</p>
<ul>
<li><p>When clusters are the same size, the usual difference-in-means
estimator is unbiased.</p></li>
<li><p>But be careful when clusters have different numbers of units or
you have very few clusters because then treatment effects may be
correlated with cluster size.</p></li>
<li><p>When cluster size is related to potential outcomes, the usual
difference-in-means estimator is biased. <a
href="https://declaredesign.org/blog/bias-cluster-randomized-trials.html"
class="uri">https://declaredesign.org/blog/bias-cluster-randomized-trials.html</a></p></li>
</ul>
</section>
<section
id="estimating-the-se-for-the-ate-in-cluster-randomized-experiments"
class="slide level2 allowframebreaks">
<h2 class="allowframebreaks">Estimating the SE for the ATE in
cluster-randomized experiments</h2>
<ul>
<li><p><strong>Misleading statistical inferences:</strong> The default
SE will generally underestimate precision in such designs and thus
produce tests with false positive rates that are too high (or
equivalently confidence intervals coverage rates that are too
low).</p></li>
<li><p>The “cluster robust standard errors” implemented in common
software work well <strong>when the number of clusters is large</strong>
(like more than 50 in some simulation studies).</p></li>
<li><p>The default cluster-appropriate standard errors in
<code>lm_robust</code> (the <code>CR2</code> SEs) work better than the
common approach in Stata (as of this writing).</p></li>
<li><p>The wild bootstrap helps control error rates but gives up
statistical power much more than perhaps necessary in a cluster
randomized study where direct randomization inference is
possible.</p></li>
<li><p>When in doubt, one can produce <span
class="math inline">\(p\)</span>-values by direct simulation (direct
randomization inference) to see if they agree with one of the cluster
robust approaches.</p></li>
</ul>
<p>Overall, it is worth simulating to study the performance of your
estimators, tests, and confidence intervals if you have any worries or
doubts.</p>
</section>
<section id="an-example-of-estimation" class="slide level2">
<h2>An example of estimation</h2>
<p>Imagine we had data from 10 clusters with either 100 people (for 2
clusters) or 10 people per cluster (for 8 clusters). The total size of
the data is 280.</p>
<pre><code># A tibble: 6 × 6
# Groups:   clus_id [2]
  clus_id indiv Y_Z_0 Y_Z_1     Z     Y
  &lt;chr&gt;   &lt;chr&gt; &lt;dbl&gt; &lt;dbl&gt; &lt;int&gt; &lt;dbl&gt;
1 01      010    4.51  4.61     0  4.51
2 01      035    4.63  4.73     0  4.63
3 01      068    4.76  4.86     0  4.76
4 03      205    3.13  4.13     1  4.13
5 03      206    2.41  3.41     1  3.41
6 03      208    2.95  3.95     1  3.95</code></pre>
</section>
<section id="an-example-of-estimation-1" class="slide level2">
<h2>An example of estimation</h2>
<p>Which estimator should we use? Which test should we use? On what
basis should we choose among these approaches?</p>
<div class="sourceCode" id="cb46"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb46-1"><a href="#cb46-1" aria-hidden="true" tabindex="-1"></a>lmc1 <span class="ot">&lt;-</span> <span class="fu">lm_robust</span>(Y <span class="sc">~</span> Z, <span class="at">data =</span> dat1)</span>
<span id="cb46-2"><a href="#cb46-2" aria-hidden="true" tabindex="-1"></a>lmc2 <span class="ot">&lt;-</span> <span class="fu">lm_robust</span>(Y <span class="sc">~</span> Z, <span class="at">clusters =</span> clus_id, <span class="at">data =</span> dat1)</span>
<span id="cb46-3"><a href="#cb46-3" aria-hidden="true" tabindex="-1"></a>lmc3 <span class="ot">&lt;-</span> <span class="fu">lm_robust</span>(Y <span class="sc">~</span> Z <span class="sc">+</span> cl_sizeF, <span class="at">clusters =</span> clus_id, <span class="at">data =</span> dat1)</span>
<span id="cb46-4"><a href="#cb46-4" aria-hidden="true" tabindex="-1"></a><span class="fu">tidy</span>(lmc1)[<span class="dv">2</span>, ]</span></code></pre></div>
<pre><code>  term estimate std.error statistic p.value conf.low conf.high  df outcome
2    Z   0.3024    0.1207     2.504 0.01284  0.06471    0.5401 278       Y</code></pre>
<div class="sourceCode" id="cb48"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb48-1"><a href="#cb48-1" aria-hidden="true" tabindex="-1"></a><span class="fu">tidy</span>(lmc2)[<span class="dv">2</span>, ]</span></code></pre></div>
<pre><code>  term estimate std.error statistic p.value conf.low conf.high    df outcome
2    Z   0.3024     1.079    0.2804   0.796   -2.969     3.574 3.282       Y</code></pre>
<div class="sourceCode" id="cb50"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb50-1"><a href="#cb50-1" aria-hidden="true" tabindex="-1"></a><span class="fu">tidy</span>(lmc3)[<span class="dv">2</span>, ]</span></code></pre></div>
<pre><code>  term estimate std.error statistic p.value conf.low conf.high    df outcome
2    Z   0.3024     0.306    0.9882  0.4386   -1.194     1.799 1.769       Y</code></pre>
</section>
<section id="use-simulation-to-assess-estimators-and-tests"
class="slide level2">
<h2>Use simulation to assess estimators and tests</h2>
<p>If you look at the code for the slides you will see that we simulate
the design 5000 times, each time calculating an estimate and confidence
interval for different estimators of the ATE.</p>
<p>What should we learn from this table? (Coverage?
<code>sd_estimate</code> versus <code>mean_se</code>).</p>
<table>
<caption>Estimator and Test Performance in 5000 simulations of the
cluster randomized design for different estimators and confidence
intervals</caption>
<thead>
<tr class="header">
<th style="text-align: left;">estimator</th>
<th style="text-align: right;">coverage</th>
<th style="text-align: right;">sd_estimate</th>
<th style="text-align: right;">mean_se</th>
</tr>
</thead>
<tbody>
<tr class="odd">
<td style="text-align: left;">Y~Z, CR2</td>
<td style="text-align: right;">0.53</td>
<td style="text-align: right;">1.12</td>
<td style="text-align: right;">0.70</td>
</tr>
<tr class="even">
<td style="text-align: left;">Y~Z, cl_size fe, CR2</td>
<td style="text-align: right;">0.74</td>
<td style="text-align: right;">0.36</td>
<td style="text-align: right;">0.30</td>
</tr>
<tr class="odd">
<td style="text-align: left;">Y~Z, HC2</td>
<td style="text-align: right;">0.53</td>
<td style="text-align: right;">1.12</td>
<td style="text-align: right;">0.13</td>
</tr>
<tr class="even">
<td style="text-align: left;">Y~Z, IID</td>
<td style="text-align: right;">0.53</td>
<td style="text-align: right;">1.12</td>
<td style="text-align: right;">0.12</td>
</tr>
<tr class="odd">
<td style="text-align: left;">Y~Z, weight=clus_size, CR2</td>
<td style="text-align: right;">0.53</td>
<td style="text-align: right;">1.27</td>
<td style="text-align: right;">0.80</td>
</tr>
<tr class="even">
<td style="text-align: left;">Y~Z*I(cl_size-mean(cl_size)), CR2</td>
<td style="text-align: right;">0.74</td>
<td style="text-align: right;">1.63</td>
<td style="text-align: right;">0.06</td>
</tr>
<tr class="odd">
<td style="text-align: left;">Y~Z+cl_sizeF, CR2</td>
<td style="text-align: right;">0.74</td>
<td style="text-align: right;">0.36</td>
<td style="text-align: right;">0.30</td>
</tr>
</tbody>
</table>
</section>
<section id="use-simulation-to-assess-estimators-and-tests-1"
class="slide level2">
<h2>Use simulation to assess estimators and tests</h2>
<p>What should we learn from this table? (Bias? Closeness to truth?)</p>
<table>
<caption>Estimator and Test Performance in 5000 simulations of the
cluster randomized design for different estimators and confidence
intervals</caption>
<thead>
<tr class="header">
<th style="text-align: left;">estimator</th>
<th style="text-align: right;">bias</th>
<th style="text-align: right;">rmse</th>
</tr>
</thead>
<tbody>
<tr class="odd">
<td style="text-align: left;">Y~Z, CR2</td>
<td style="text-align: right;">0.110</td>
<td style="text-align: right;">1.124</td>
</tr>
<tr class="even">
<td style="text-align: left;">Y~Z, cl_size fe, CR2</td>
<td style="text-align: right;">0.298</td>
<td style="text-align: right;">0.466</td>
</tr>
<tr class="odd">
<td style="text-align: left;">Y~Z, HC2</td>
<td style="text-align: right;">0.110</td>
<td style="text-align: right;">1.124</td>
</tr>
<tr class="even">
<td style="text-align: left;">Y~Z, IID</td>
<td style="text-align: right;">0.110</td>
<td style="text-align: right;">1.124</td>
</tr>
<tr class="odd">
<td style="text-align: left;">Y~Z, weight=clus_size, CR2</td>
<td style="text-align: right;">-0.035</td>
<td style="text-align: right;">1.273</td>
</tr>
<tr class="even">
<td style="text-align: left;">Y~Z*I(cl_size-mean(cl_size)), CR2</td>
<td style="text-align: right;">0.858</td>
<td style="text-align: right;">1.839</td>
</tr>
<tr class="odd">
<td style="text-align: left;">Y~Z+cl_sizeF, CR2</td>
<td style="text-align: right;">0.298</td>
<td style="text-align: right;">0.466</td>
</tr>
</tbody>
</table>
</section>
<section id="use-simulation-to-assess-estimators-and-tests-2"
class="slide level2">
<h2>Use simulation to assess estimators and tests</h2>
<p>How should we interpret this plot?</p>
<p><img src="figs/figsim_plot_clus-1.png" width=".95\textwidth"  /></p>
</section>
<section
id="summary-of-estimation-and-testing-in-cluster-randomized-trials"
class="slide level2">
<h2>Summary of estimation and testing in cluster-randomized trials</h2>
<ul>
<li><p>Cluster randomized trials pose special problems for standard
approaches to estimation and testing.</p></li>
<li><p>If randomization is at the cluster level, then uncertainty arises
from the cluster level randomization.</p></li>
<li><p>If we have enough clusters, then one of the “cluster robust”
standard errors can help us produce confidence intervals with correct
coverage. <strong>Cluster robust standard errors require many
clusters</strong>.</p></li>
<li><p>If cluster size (or characteristic) is related to effect size,
then we can have bias (and we need to adjust somehow).</p></li>
</ul>
</section></section>
<section>
<section id="binary-outcomes" class="title-slide slide level1">
<h1>Binary outcomes</h1>

</section>
<section
id="binary-outcomes-set-up-our-data-for-simulation-in-declaredesign"
class="slide level2">
<h2>Binary outcomes: Set up our data for simulation in
DeclareDesign</h2>
<div class="sourceCode" id="cb52"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb52-1"><a href="#cb52-1" aria-hidden="true" tabindex="-1"></a><span class="co"># population size</span></span>
<span id="cb52-2"><a href="#cb52-2" aria-hidden="true" tabindex="-1"></a>N <span class="ot">&lt;-</span> <span class="dv">20</span></span>
<span id="cb52-3"><a href="#cb52-3" aria-hidden="true" tabindex="-1"></a><span class="co"># declare the population</span></span>
<span id="cb52-4"><a href="#cb52-4" aria-hidden="true" tabindex="-1"></a>thepop_bin <span class="ot">&lt;-</span> <span class="fu">declare_population</span>(</span>
<span id="cb52-5"><a href="#cb52-5" aria-hidden="true" tabindex="-1"></a>  <span class="at">N =</span> N, <span class="at">x1 =</span> <span class="fu">draw_binary</span>(<span class="at">prob =</span> .<span class="dv">5</span>, <span class="at">N =</span> N),</span>
<span id="cb52-6"><a href="#cb52-6" aria-hidden="true" tabindex="-1"></a>  <span class="at">x2 =</span> <span class="fu">rnorm</span>(N)</span>
<span id="cb52-7"><a href="#cb52-7" aria-hidden="true" tabindex="-1"></a>)</span>
<span id="cb52-8"><a href="#cb52-8" aria-hidden="true" tabindex="-1"></a><span class="co"># declare the potential outcomes</span></span>
<span id="cb52-9"><a href="#cb52-9" aria-hidden="true" tabindex="-1"></a>thepo_bin <span class="ot">&lt;-</span> <span class="fu">declare_potential_outcomes</span>(Y <span class="sc">~</span> <span class="fu">rbinom</span>(</span>
<span id="cb52-10"><a href="#cb52-10" aria-hidden="true" tabindex="-1"></a>  <span class="at">n =</span> N, <span class="at">size =</span> <span class="dv">1</span>,</span>
<span id="cb52-11"><a href="#cb52-11" aria-hidden="true" tabindex="-1"></a>  <span class="at">prob =</span> <span class="fl">0.5</span> <span class="sc">+</span> <span class="fl">0.05</span> <span class="sc">*</span> Z <span class="sc">+</span> x1 <span class="sc">*</span> .<span class="dv">05</span></span>
<span id="cb52-12"><a href="#cb52-12" aria-hidden="true" tabindex="-1"></a>))</span>
<span id="cb52-13"><a href="#cb52-13" aria-hidden="true" tabindex="-1"></a><span class="co"># two possible targets: difference in means or difference in log-odds</span></span>
<span id="cb52-14"><a href="#cb52-14" aria-hidden="true" tabindex="-1"></a>thetarget_ate <span class="ot">&lt;-</span> <span class="fu">declare_inquiry</span>(<span class="at">ate =</span> <span class="fu">mean</span>(Y_Z_1 <span class="sc">-</span> Y_Z_0))</span>
<span id="cb52-15"><a href="#cb52-15" aria-hidden="true" tabindex="-1"></a>thetarget_logodds <span class="ot">&lt;-</span> <span class="fu">declare_inquiry</span>(</span>
<span id="cb52-16"><a href="#cb52-16" aria-hidden="true" tabindex="-1"></a>  <span class="at">logodds =</span> <span class="fu">log</span>(<span class="fu">mean</span>(Y_Z_1) <span class="sc">/</span> (<span class="dv">1</span> <span class="sc">-</span> <span class="fu">mean</span>(Y_Z_1))) <span class="sc">-</span></span>
<span id="cb52-17"><a href="#cb52-17" aria-hidden="true" tabindex="-1"></a>    <span class="fu">log</span>(<span class="fu">mean</span>(Y_Z_0) <span class="sc">/</span> (<span class="dv">1</span> <span class="sc">-</span> <span class="fu">mean</span>(Y_Z_0)))</span>
<span id="cb52-18"><a href="#cb52-18" aria-hidden="true" tabindex="-1"></a>)</span></code></pre></div>
</section>
<section
id="binary-outcomes-set-up-our-data-for-simulation-in-declaredesign-1"
class="slide level2">
<h2>Binary outcomes: Set up our data for simulation in
DeclareDesign</h2>
<div class="sourceCode" id="cb53"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb53-1"><a href="#cb53-1" aria-hidden="true" tabindex="-1"></a><span class="co"># declare how treatment is assigned</span></span>
<span id="cb53-2"><a href="#cb53-2" aria-hidden="true" tabindex="-1"></a><span class="co"># m units are assigned to levels of treatment Z</span></span>
<span id="cb53-3"><a href="#cb53-3" aria-hidden="true" tabindex="-1"></a>theassign_bin <span class="ot">&lt;-</span> <span class="fu">declare_assignment</span>(<span class="at">Z =</span> <span class="fu">conduct_ra</span>(<span class="at">N =</span> N, <span class="at">m =</span> <span class="fu">floor</span>(N <span class="sc">/</span> <span class="dv">3</span>)))</span>
<span id="cb53-4"><a href="#cb53-4" aria-hidden="true" tabindex="-1"></a><span class="co"># declare what outcome values are revealed for possible values of Z</span></span>
<span id="cb53-5"><a href="#cb53-5" aria-hidden="true" tabindex="-1"></a>thereveal_bin <span class="ot">&lt;-</span> <span class="fu">declare_reveal</span>(Y, Z)</span>
<span id="cb53-6"><a href="#cb53-6" aria-hidden="true" tabindex="-1"></a><span class="co"># pull this all together: population, potential outcomes, assignment,</span></span>
<span id="cb53-7"><a href="#cb53-7" aria-hidden="true" tabindex="-1"></a><span class="co"># outcome values connected to Z</span></span>
<span id="cb53-8"><a href="#cb53-8" aria-hidden="true" tabindex="-1"></a>des_bin <span class="ot">&lt;-</span> thepop_bin <span class="sc">+</span> thepo_bin <span class="sc">+</span> theassign_bin <span class="sc">+</span> thereveal_bin</span>
<span id="cb53-9"><a href="#cb53-9" aria-hidden="true" tabindex="-1"></a><span class="co"># then make one draw (randomize treatment once)</span></span>
<span id="cb53-10"><a href="#cb53-10" aria-hidden="true" tabindex="-1"></a><span class="fu">set.seed</span>(<span class="dv">12345</span>)</span>
<span id="cb53-11"><a href="#cb53-11" aria-hidden="true" tabindex="-1"></a>dat2 <span class="ot">&lt;-</span> <span class="fu">draw_data</span>(des_bin)</span></code></pre></div>
</section>
<section id="binary-outcomes-estimands-i" class="slide level2">
<h2>Binary outcomes: Estimands I</h2>
<p>How would we interpret the following true quantities or estimands?
<code>Y_Z_1</code>, <code>Y_Z_0</code> are potential outcomes,
<code>Y</code> is observed, <code>x1</code>, <code>x2</code> are
covariates, <code>Z</code> is treatment assignment. Here <span
class="math inline">\(N\)</span>=20.</p>
<div class="sourceCode" id="cb54"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb54-1"><a href="#cb54-1" aria-hidden="true" tabindex="-1"></a><span class="do">## Look at the first 6 observations only:</span></span>
<span id="cb54-2"><a href="#cb54-2" aria-hidden="true" tabindex="-1"></a><span class="fu">head</span>(dat2[, <span class="sc">-</span><span class="dv">7</span>])</span></code></pre></div>
<pre><code>  ID x1      x2 Y_Z_0 Y_Z_1 Z
1 01  1 -0.1162     0     1 0
2 02  1  1.8173     0     1 1
3 03  1  0.3706     0     1 0
4 04  1  0.5202     1     1 0
5 05  0 -0.7505     1     0 1
6 06  0  0.8169     0     1 0</code></pre>
</section>
<section id="binary-outcomes-estimands-ii" class="slide level2">
<h2>Binary outcomes: Estimands II</h2>
<p>How would we interpret the following true quantities or estimands?
(<code>Y_Z_1</code>, <code>Y_Z_0</code> are potential outcomes,
<code>Y</code> is observed, <code>x1</code>, <code>x2</code> are
covariates, <code>Z</code> is treatment assignment. Here <span
class="math inline">\(N\)</span>=20.</p>
<div class="sourceCode" id="cb56"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb56-1"><a href="#cb56-1" aria-hidden="true" tabindex="-1"></a>ate_bin <span class="ot">&lt;-</span> <span class="fu">with</span>(dat2, <span class="fu">mean</span>(Y_Z_1 <span class="sc">-</span> Y_Z_0))</span>
<span id="cb56-2"><a href="#cb56-2" aria-hidden="true" tabindex="-1"></a>bary1 <span class="ot">&lt;-</span> <span class="fu">mean</span>(dat2<span class="sc">$</span>Y_Z_1)</span>
<span id="cb56-3"><a href="#cb56-3" aria-hidden="true" tabindex="-1"></a>bary0 <span class="ot">&lt;-</span> <span class="fu">mean</span>(dat2<span class="sc">$</span>Y_Z_0)</span>
<span id="cb56-4"><a href="#cb56-4" aria-hidden="true" tabindex="-1"></a>diff_log_odds_bin <span class="ot">&lt;-</span> <span class="fu">with</span>(</span>
<span id="cb56-5"><a href="#cb56-5" aria-hidden="true" tabindex="-1"></a>  dat2,</span>
<span id="cb56-6"><a href="#cb56-6" aria-hidden="true" tabindex="-1"></a>  <span class="fu">log</span>(bary1 <span class="sc">/</span> (<span class="dv">1</span> <span class="sc">-</span> bary1)) <span class="sc">-</span> <span class="fu">log</span>(bary0 <span class="sc">/</span> (<span class="dv">1</span> <span class="sc">-</span> bary0))</span>
<span id="cb56-7"><a href="#cb56-7" aria-hidden="true" tabindex="-1"></a>)</span>
<span id="cb56-8"><a href="#cb56-8" aria-hidden="true" tabindex="-1"></a><span class="fu">c</span>(</span>
<span id="cb56-9"><a href="#cb56-9" aria-hidden="true" tabindex="-1"></a>  <span class="at">bary1 =</span> bary1, <span class="at">bary0 =</span> bary0, <span class="at">true_ate =</span> ate_bin,</span>
<span id="cb56-10"><a href="#cb56-10" aria-hidden="true" tabindex="-1"></a>  <span class="at">true_diff_log_odds =</span> diff_log_odds_bin</span>
<span id="cb56-11"><a href="#cb56-11" aria-hidden="true" tabindex="-1"></a>)</span></code></pre></div>
<pre><code>             bary1              bary0           true_ate true_diff_log_odds 
              0.55               0.55               0.00               0.00 </code></pre>
</section>
<section id="binary-outcomes-estimands-iii" class="slide level2">
<h2>Binary outcomes: Estimands III</h2>
<p>Do you want to estimate the difference in log-odds?</p>
<p><span class="math display">\[\begin{equation}
\delta = \log \frac{\bar{y}_{1}}{1-\bar{y}_{1}} - \log \frac{
\bar{y}_0}{1- \bar{y}_0}
\end{equation}\]</span></p>
<p>Or the difference in proportions?</p>
<p><span class="math display">\[\begin{equation}
\bar{\tau} = \bar{y}_{1} - \bar{y}_0
\end{equation}\]</span></p>
<p>Recall that <span class="math inline">\(\bar{y}_1\)</span> is the
<em>proportion</em> of <span class="math inline">\(y_{1}=1\)</span> in
the data.</p>
<p><span class="citation"
data-cites="freedman2008randomization">Freedman (<a
href="#/ref-freedman2008randomization"
role="doc-biblioref">2008b</a>)</span> shows us that the logit
coefficient estimator is a biased estimator of the difference in
log-odds estimand. He also shows an unbiased estimator of that
estimand.</p>
<p>We know that the difference of proportions in the sample should be an
unbiased estimator of the difference of proportions.</p>
</section>
<section id="an-example-of-estimation-i" class="slide level2">
<h2>An example of estimation I</h2>
<p>How should we interpret the following estimates? (What does the
difference of means estimator require in terms of assumptions? What does
the logistic regression estimator require in terms of assumptions?)</p>
<div class="sourceCode" id="cb58"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb58-1"><a href="#cb58-1" aria-hidden="true" tabindex="-1"></a>lmbin1 <span class="ot">&lt;-</span> <span class="fu">lm_robust</span>(Y <span class="sc">~</span> Z, <span class="at">data =</span> dat2)</span>
<span id="cb58-2"><a href="#cb58-2" aria-hidden="true" tabindex="-1"></a>glmbin1 <span class="ot">&lt;-</span> <span class="fu">glm</span>(Y <span class="sc">~</span> Z, <span class="at">data =</span> dat2, <span class="at">family =</span> <span class="fu">binomial</span>(<span class="at">link =</span> <span class="st">&quot;logit&quot;</span>))</span>
<span id="cb58-3"><a href="#cb58-3" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb58-4"><a href="#cb58-4" aria-hidden="true" tabindex="-1"></a><span class="fu">tidy</span>(lmbin1)[<span class="dv">2</span>, ]</span></code></pre></div>
<pre><code>  term estimate std.error statistic p.value conf.low conf.high df outcome
2    Z  -0.4048    0.2159    -1.875 0.07716  -0.8584   0.04884 18       Y</code></pre>
<div class="sourceCode" id="cb60"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb60-1"><a href="#cb60-1" aria-hidden="true" tabindex="-1"></a><span class="fu">tidy</span>(glmbin1)[<span class="dv">2</span>, ]</span></code></pre></div>
<pre><code># A tibble: 1 × 5
  term  estimate std.error statistic p.value
  &lt;chr&gt;    &lt;dbl&gt;     &lt;dbl&gt;     &lt;dbl&gt;   &lt;dbl&gt;
1 Z        -1.90      1.22     -1.55   0.120</code></pre>
</section>
<section id="an-example-of-estimation-ii" class="slide level2">
<h2>An example of estimation II</h2>
<p>What about with covariates? Why use covariates?</p>
<div class="sourceCode" id="cb62"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb62-1"><a href="#cb62-1" aria-hidden="true" tabindex="-1"></a>lmbin2 <span class="ot">&lt;-</span> <span class="fu">lm_robust</span>(Y <span class="sc">~</span> Z <span class="sc">+</span> x1, <span class="at">data =</span> dat2)</span>
<span id="cb62-2"><a href="#cb62-2" aria-hidden="true" tabindex="-1"></a>glmbin2 <span class="ot">&lt;-</span> <span class="fu">glm</span>(Y <span class="sc">~</span> Z <span class="sc">+</span> x1, <span class="at">data =</span> dat2, <span class="at">family =</span> <span class="fu">binomial</span>(<span class="at">link =</span> <span class="st">&quot;logit&quot;</span>))</span>
<span id="cb62-3"><a href="#cb62-3" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb62-4"><a href="#cb62-4" aria-hidden="true" tabindex="-1"></a><span class="fu">tidy</span>(lmbin2)[<span class="dv">2</span>, ]</span></code></pre></div>
<pre><code>  term estimate std.error statistic p.value conf.low conf.high df outcome
2    Z  -0.4058    0.2179    -1.862 0.07996  -0.8656   0.05398 17       Y</code></pre>
<div class="sourceCode" id="cb64"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb64-1"><a href="#cb64-1" aria-hidden="true" tabindex="-1"></a><span class="fu">tidy</span>(glmbin2)[<span class="dv">2</span>, ]</span></code></pre></div>
<pre><code># A tibble: 1 × 5
  term  estimate std.error statistic p.value
  &lt;chr&gt;    &lt;dbl&gt;     &lt;dbl&gt;     &lt;dbl&gt;   &lt;dbl&gt;
1 Z        -1.90      1.22     -1.55   0.120</code></pre>
</section>
<section id="an-example-of-estimation-iii" class="slide level2">
<h2>An example of estimation III</h2>
<p>Let’s compare our estimates</p>
<div class="sourceCode" id="cb66"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb66-1"><a href="#cb66-1" aria-hidden="true" tabindex="-1"></a><span class="fu">c</span>(</span>
<span id="cb66-2"><a href="#cb66-2" aria-hidden="true" tabindex="-1"></a>  <span class="at">dim =</span> <span class="fu">coef</span>(lmbin1)[[<span class="st">&quot;Z&quot;</span>]],</span>
<span id="cb66-3"><a href="#cb66-3" aria-hidden="true" tabindex="-1"></a>  <span class="at">dim_x1 =</span> <span class="fu">coef</span>(lmbin2)[[<span class="st">&quot;Z&quot;</span>]],</span>
<span id="cb66-4"><a href="#cb66-4" aria-hidden="true" tabindex="-1"></a>  <span class="at">glm =</span> <span class="fu">coef</span>(glmbin1)[[<span class="st">&quot;Z&quot;</span>]],</span>
<span id="cb66-5"><a href="#cb66-5" aria-hidden="true" tabindex="-1"></a>  <span class="at">glm_x1 =</span> <span class="fu">coef</span>(glmbin2)[[<span class="st">&quot;Z&quot;</span>]]</span>
<span id="cb66-6"><a href="#cb66-6" aria-hidden="true" tabindex="-1"></a>)</span></code></pre></div>
<pre><code>    dim  dim_x1     glm  glm_x1 
-0.4048 -0.4058 -1.8971 -1.9025 </code></pre>
</section>
<section id="an-example-of-estimation-the-freedman-plugin-estimators-i"
class="slide level2">
<h2>An example of estimation: The Freedman plugin estimators I</h2>
<p>No covariate: </p>
<div class="sourceCode" id="cb68"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb68-1"><a href="#cb68-1" aria-hidden="true" tabindex="-1"></a>freedman_plugin_estfn1 <span class="ot">&lt;-</span> <span class="cf">function</span>(data) {</span>
<span id="cb68-2"><a href="#cb68-2" aria-hidden="true" tabindex="-1"></a>  glmbin <span class="ot">&lt;-</span> <span class="fu">glm</span>(Y <span class="sc">~</span> Z, <span class="at">data =</span> dat2, <span class="at">family =</span> <span class="fu">binomial</span>(<span class="at">link =</span> <span class="st">&quot;logit&quot;</span>))</span>
<span id="cb68-3"><a href="#cb68-3" aria-hidden="true" tabindex="-1"></a>  preddat <span class="ot">&lt;-</span> <span class="fu">data.frame</span>(<span class="at">Z =</span> <span class="fu">rep</span>(<span class="fu">c</span>(<span class="dv">0</span>, <span class="dv">1</span>), <span class="fu">nrow</span>(dat2)))</span>
<span id="cb68-4"><a href="#cb68-4" aria-hidden="true" tabindex="-1"></a>  preddat<span class="sc">$</span>yhat <span class="ot">&lt;-</span> <span class="fu">predict</span>(glmbin, <span class="at">newdata =</span> preddat, <span class="at">type =</span> <span class="st">&quot;response&quot;</span>)</span>
<span id="cb68-5"><a href="#cb68-5" aria-hidden="true" tabindex="-1"></a>  bary1 <span class="ot">&lt;-</span> <span class="fu">mean</span>(preddat<span class="sc">$</span>yhat[preddat<span class="sc">$</span>Z <span class="sc">==</span> <span class="dv">1</span>])</span>
<span id="cb68-6"><a href="#cb68-6" aria-hidden="true" tabindex="-1"></a>  bary0 <span class="ot">&lt;-</span> <span class="fu">mean</span>(preddat<span class="sc">$</span>yhat[preddat<span class="sc">$</span>Z <span class="sc">==</span> <span class="dv">0</span>])</span>
<span id="cb68-7"><a href="#cb68-7" aria-hidden="true" tabindex="-1"></a>  diff_log_odds <span class="ot">&lt;-</span> <span class="fu">log</span>(bary1 <span class="sc">/</span> (<span class="dv">1</span> <span class="sc">-</span> bary1)) <span class="sc">-</span> <span class="fu">log</span>(bary0 <span class="sc">/</span> (<span class="dv">1</span> <span class="sc">-</span> bary0))</span>
<span id="cb68-8"><a href="#cb68-8" aria-hidden="true" tabindex="-1"></a>  <span class="fu">return</span>(<span class="fu">data.frame</span>(<span class="at">estimate =</span> diff_log_odds))</span>
<span id="cb68-9"><a href="#cb68-9" aria-hidden="true" tabindex="-1"></a>}</span></code></pre></div>
</section>
<section id="an-example-of-estimation-the-freedman-plugin-estimators-ii"
class="slide level2">
<h2>An example of estimation: The Freedman plugin estimators II</h2>
<p>With covariate: </p>
<div class="sourceCode" id="cb69"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb69-1"><a href="#cb69-1" aria-hidden="true" tabindex="-1"></a>freedman_plugin_estfn2 <span class="ot">&lt;-</span> <span class="cf">function</span>(data) {</span>
<span id="cb69-2"><a href="#cb69-2" aria-hidden="true" tabindex="-1"></a>  N <span class="ot">&lt;-</span> <span class="fu">nrow</span>(data)</span>
<span id="cb69-3"><a href="#cb69-3" aria-hidden="true" tabindex="-1"></a>  glmbin <span class="ot">&lt;-</span> <span class="fu">glm</span>(Y <span class="sc">~</span> Z <span class="sc">+</span> x1, <span class="at">data =</span> data, <span class="at">family =</span> <span class="fu">binomial</span>(<span class="at">link =</span> <span class="st">&quot;logit&quot;</span>))</span>
<span id="cb69-4"><a href="#cb69-4" aria-hidden="true" tabindex="-1"></a>  preddat <span class="ot">&lt;-</span> <span class="fu">data.frame</span>(<span class="at">Z =</span> <span class="fu">rep</span>(<span class="fu">c</span>(<span class="dv">0</span>, <span class="dv">1</span>), <span class="at">each =</span> N))</span>
<span id="cb69-5"><a href="#cb69-5" aria-hidden="true" tabindex="-1"></a>  preddat<span class="sc">$</span>x1 <span class="ot">&lt;-</span> <span class="fu">rep</span>(data<span class="sc">$</span>x1, <span class="dv">2</span>)</span>
<span id="cb69-6"><a href="#cb69-6" aria-hidden="true" tabindex="-1"></a>  preddat<span class="sc">$</span>yhat <span class="ot">&lt;-</span> <span class="fu">predict</span>(glmbin, <span class="at">newdata =</span> preddat, <span class="at">type =</span> <span class="st">&quot;response&quot;</span>)</span>
<span id="cb69-7"><a href="#cb69-7" aria-hidden="true" tabindex="-1"></a>  bary1 <span class="ot">&lt;-</span> <span class="fu">mean</span>(preddat<span class="sc">$</span>yhat[preddat<span class="sc">$</span>Z <span class="sc">==</span> <span class="dv">1</span>])</span>
<span id="cb69-8"><a href="#cb69-8" aria-hidden="true" tabindex="-1"></a>  bary0 <span class="ot">&lt;-</span> <span class="fu">mean</span>(preddat<span class="sc">$</span>yhat[preddat<span class="sc">$</span>Z <span class="sc">==</span> <span class="dv">0</span>])</span>
<span id="cb69-9"><a href="#cb69-9" aria-hidden="true" tabindex="-1"></a>  diff_log_odds <span class="ot">&lt;-</span> <span class="fu">log</span>(bary1 <span class="sc">/</span> (<span class="dv">1</span> <span class="sc">-</span> bary1)) <span class="sc">-</span> <span class="fu">log</span>(bary0 <span class="sc">/</span> (<span class="dv">1</span> <span class="sc">-</span> bary0))</span>
<span id="cb69-10"><a href="#cb69-10" aria-hidden="true" tabindex="-1"></a>  <span class="fu">return</span>(<span class="fu">data.frame</span>(<span class="at">estimate =</span> diff_log_odds))</span>
<span id="cb69-11"><a href="#cb69-11" aria-hidden="true" tabindex="-1"></a>}</span></code></pre></div>
<p>Let’s compare our estimates from the six different estimators </p>
<pre><code>       dim     dim_x1        glm     glm_x1   freedman freeman_x1 
   -0.4048    -0.4058    -1.8971    -1.9025    -1.8971    -1.9020 </code></pre>
</section>
<section
id="an-example-of-using-declaredesign-to-assess-our-estimators-i"
class="slide level2">
<h2>An example of using DeclareDesign to assess our estimators I</h2>
<div class="sourceCode" id="cb71"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb71-1"><a href="#cb71-1" aria-hidden="true" tabindex="-1"></a><span class="co"># declare 4 estimators for DD</span></span>
<span id="cb71-2"><a href="#cb71-2" aria-hidden="true" tabindex="-1"></a><span class="co"># first estimator: linear regression with ATE as target</span></span>
<span id="cb71-3"><a href="#cb71-3" aria-hidden="true" tabindex="-1"></a>estb1 <span class="ot">&lt;-</span> <span class="fu">declare_estimator</span>(Y <span class="sc">~</span> Z,</span>
<span id="cb71-4"><a href="#cb71-4" aria-hidden="true" tabindex="-1"></a>  <span class="at">model =</span> lm_robust, <span class="at">label =</span> <span class="st">&quot;lm1:Z&quot;</span>,</span>
<span id="cb71-5"><a href="#cb71-5" aria-hidden="true" tabindex="-1"></a>  <span class="at">inquiry =</span> thetarget_ate</span>
<span id="cb71-6"><a href="#cb71-6" aria-hidden="true" tabindex="-1"></a>)</span>
<span id="cb71-7"><a href="#cb71-7" aria-hidden="true" tabindex="-1"></a><span class="co"># second estimator: linear regression with covariate, with ATE as target</span></span>
<span id="cb71-8"><a href="#cb71-8" aria-hidden="true" tabindex="-1"></a>estb2 <span class="ot">&lt;-</span> <span class="fu">declare_estimator</span>(Y <span class="sc">~</span> Z <span class="sc">+</span> x1,</span>
<span id="cb71-9"><a href="#cb71-9" aria-hidden="true" tabindex="-1"></a>  <span class="at">model =</span> lm_robust, <span class="at">label =</span> <span class="st">&quot;lm1:Z,x1&quot;</span>,</span>
<span id="cb71-10"><a href="#cb71-10" aria-hidden="true" tabindex="-1"></a>  <span class="at">inquiry =</span> thetarget_ate</span>
<span id="cb71-11"><a href="#cb71-11" aria-hidden="true" tabindex="-1"></a>)</span>
<span id="cb71-12"><a href="#cb71-12" aria-hidden="true" tabindex="-1"></a><span class="co"># third estimator: logistic regression, with log odds as target</span></span>
<span id="cb71-13"><a href="#cb71-13" aria-hidden="true" tabindex="-1"></a>estb3 <span class="ot">&lt;-</span> <span class="fu">declare_estimator</span>(Y <span class="sc">~</span> Z,</span>
<span id="cb71-14"><a href="#cb71-14" aria-hidden="true" tabindex="-1"></a>  <span class="at">model =</span> glm, <span class="at">family =</span> <span class="fu">binomial</span>(<span class="at">link =</span> <span class="st">&quot;logit&quot;</span>),</span>
<span id="cb71-15"><a href="#cb71-15" aria-hidden="true" tabindex="-1"></a>  <span class="at">label =</span> <span class="st">&quot;glm1:Z&quot;</span>, <span class="at">inquiry =</span> thetarget_logodds</span>
<span id="cb71-16"><a href="#cb71-16" aria-hidden="true" tabindex="-1"></a>)</span>
<span id="cb71-17"><a href="#cb71-17" aria-hidden="true" tabindex="-1"></a><span class="co"># fourth estimtor: logistic regression with covariate, with log odds as target</span></span>
<span id="cb71-18"><a href="#cb71-18" aria-hidden="true" tabindex="-1"></a>estb4 <span class="ot">&lt;-</span> <span class="fu">declare_estimator</span>(Y <span class="sc">~</span> Z <span class="sc">+</span> x1,</span>
<span id="cb71-19"><a href="#cb71-19" aria-hidden="true" tabindex="-1"></a>  <span class="at">model =</span> glm, <span class="at">family =</span> <span class="fu">binomial</span>(<span class="at">link =</span> <span class="st">&quot;logit&quot;</span>),</span>
<span id="cb71-20"><a href="#cb71-20" aria-hidden="true" tabindex="-1"></a>  <span class="at">label =</span> <span class="st">&quot;glm1:Z,x1&quot;</span>, <span class="at">inquiry =</span> thetarget_logodds</span>
<span id="cb71-21"><a href="#cb71-21" aria-hidden="true" tabindex="-1"></a>)</span></code></pre></div>
</section>
<section
id="an-example-of-using-declaredesign-to-assess-our-estimators-ii"
class="slide level2">
<h2>An example of using DeclareDesign to assess our estimators II</h2>
<div class="sourceCode" id="cb72"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb72-1"><a href="#cb72-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Pull together: des_bin is population, potential outcomes, assignment,</span></span>
<span id="cb72-2"><a href="#cb72-2" aria-hidden="true" tabindex="-1"></a><span class="co"># outcome values connected to Z.  We add the two targets and four estimators.</span></span>
<span id="cb72-3"><a href="#cb72-3" aria-hidden="true" tabindex="-1"></a>des_bin_plus_est <span class="ot">&lt;-</span> des_bin <span class="sc">+</span> thetarget_ate <span class="sc">+</span> thetarget_logodds <span class="sc">+</span></span>
<span id="cb72-4"><a href="#cb72-4" aria-hidden="true" tabindex="-1"></a>  estb1 <span class="sc">+</span> estb2 <span class="sc">+</span> estb3 <span class="sc">+</span> estb4</span></code></pre></div>
</section>
<section id="using-simulation-to-assess-our-estimators"
class="slide level2">
<h2>Using simulation to assess our estimators</h2>
<p>How should we interpret this plot? (Differences in scales make it
difficult.)</p>
<p><img src="figs/figsim_plot_bin-1.png" width=".95\textwidth"  /></p>
</section>
<section id="which-estimator-is-closer-to-the-truth-2"
class="slide level2">
<h2>Which estimator is closer to the truth?</h2>
<p>Which estimator works better on this design and these data?</p>
<table>
<caption>Estimator and Test Performance in 5000 simulations of the
different estimators and confidence intervals for a binary outcome and
completely randomized design.</caption>
<thead>
<tr class="header">
<th style="text-align: left;">est</th>
<th style="text-align: left;">estimand</th>
<th style="text-align: right;">bias</th>
<th style="text-align: right;">rmse</th>
<th style="text-align: right;">power</th>
<th style="text-align: right;">coverage</th>
<th style="text-align: right;">sd_est</th>
<th style="text-align: right;">mean_se</th>
</tr>
</thead>
<tbody>
<tr class="odd">
<td style="text-align: left;">glm1:Z</td>
<td style="text-align: left;">logodds</td>
<td style="text-align: right;">0.691</td>
<td style="text-align: right;">4.099</td>
<td style="text-align: right;">0.023</td>
<td style="text-align: right;">0.995</td>
<td style="text-align: right;">4.226</td>
<td style="text-align: right;">154.088</td>
</tr>
<tr class="even">
<td style="text-align: left;">glm1:Z,x1</td>
<td style="text-align: left;">logodds</td>
<td style="text-align: right;">0.850</td>
<td style="text-align: right;">4.815</td>
<td style="text-align: right;">0.016</td>
<td style="text-align: right;">0.993</td>
<td style="text-align: right;">4.934</td>
<td style="text-align: right;">249.506</td>
</tr>
<tr class="odd">
<td style="text-align: left;">lm1:Z</td>
<td style="text-align: left;">ate</td>
<td style="text-align: right;">0.007</td>
<td style="text-align: right;">0.182</td>
<td style="text-align: right;">0.084</td>
<td style="text-align: right;">0.970</td>
<td style="text-align: right;">0.239</td>
<td style="text-align: right;">0.239</td>
</tr>
<tr class="even">
<td style="text-align: left;">lm1:Z,x1</td>
<td style="text-align: left;">ate</td>
<td style="text-align: right;">0.010</td>
<td style="text-align: right;">0.189</td>
<td style="text-align: right;">0.082</td>
<td style="text-align: right;">0.970</td>
<td style="text-align: right;">0.245</td>
<td style="text-align: right;">0.247</td>
</tr>
</tbody>
</table>
</section></section>
<section>
<section id="other-topics-in-estimation"
class="title-slide slide level1">
<h1>Other topics in estimation</h1>

</section>
<section id="covariance-adjustment-estimands" class="slide level2">
<h2>Covariance adjustment: Estimands</h2>
<p>In general, simply “controlling for” produces a biased estimator of
the ATE <strong>or</strong> ITT estimand. See for example <span
class="citation" data-cites="lin_agnostic_2013">Lin (<a
href="#/ref-lin_agnostic_2013" role="doc-biblioref">2013</a>)</span> and
<span class="citation" data-cites="freedman2008rae">Freedman (<a
href="#/ref-freedman2008rae" role="doc-biblioref">2008a</a>)</span>.
<span class="citation" data-cites="lin_agnostic_2013">Lin (<a
href="#/ref-lin_agnostic_2013" role="doc-biblioref">2013</a>)</span>
shows how to reduce this bias and, importantly, that this bias tends to
be small as the sample size increases.</p>
</section></section>
<section>
<section id="conclusion" class="title-slide slide level1">
<h1>Conclusion</h1>

</section>
<section id="final-thoughts-on-basics-of-estimation"
class="slide level2">
<h2>Final thoughts on basics of estimation</h2>
<ul>
<li><p>Counterfactual causal estimands are unobserved functions of
potential outcomes.</p></li>
<li><p>Estimators are recipes or computational formulas that use
observed data to learn about an estimand.</p></li>
<li><p>Good estimators produce estimates that are close to the true
estimand</p></li>
<li><p>(Connecting estimation with testing) Standard errors of
estimators allow us to calculate confidence intervals and <span
class="math inline">\(p\)</span>-values. Certain estimators have larger
or smaller (or more or less correct) standard errors.</p></li>
<li><p>You can assess the utility of a chosen estimator for a chosen
estimand by simulation.</p></li>
</ul>
</section></section>
<section>
<section id="causal-effects-that-differ-by-groups-or-covariates"
class="title-slide slide level1">
<h1>Causal effects that differ by groups or covariates</h1>

</section>
<section id="effects-that-differ-by-groups-i" class="slide level2">
<h2>Effects that differ by groups I</h2>
<p>If our theory suggests that effects should differ by group, how can
we assess evidence for or against such claims?</p>
<ul>
<li><p>We can <strong>design</strong> for an assessment of this theory
by creating a block-randomized study — with blocked defined by the
theoretically relevant groups.</p></li>
<li><p>We can <strong>plan</strong> for such an assessment by (1)
<strong>pre-registering specific subgroup analyses</strong> (whether or
not we block on that group in the design phase) and (2) making sure to
measure group membership during baseline data collection
pre-treatment</p></li>
</ul>
</section>
<section id="effects-that-differ-by-groups-ii" class="slide level2">
<h2>Effects that differ by groups II</h2>
<ul>
<li><p>If we have not planned ahead, subgroup-specific analyses can be
useful as explorations but should not be understood as confirmatory:
they can too easily create problems of testing too many hypotheses thus
inflated false positive rates.</p></li>
<li><p>We <strong>should not use groups formed by treatment</strong>.
(This is either “mediation analysis” or “conditioning on post-treatment
variables” and deserves its own module).</p></li>
</ul>
</section></section>
<section>
<section id="causal-effects-when-we-do-not-control-the-dose"
class="title-slide slide level1">
<h1>Causal effects when we do not control the dose</h1>

</section>
<section id="defining-causal-effects-i" class="slide level2">
<h2>Defining causal effects I</h2>
<p>Imagine a door-to-door communication experiment where some houses are
randomly assigned to receive a visit. Note that we now use <span
class="math inline">\(Z\)</span> and <span
class="math inline">\(d\)</span> instead of <span
class="math inline">\(T\)</span>.</p>
<ul>
<li><span class="math inline">\(Z_i\)</span> is random assignment to a
visit (<span class="math inline">\(Z_i=1\)</span>) or not (<span
class="math inline">\(Z_i=0\)</span>).</li>
<li><span class="math inline">\(d_{i,Z_i=1}=1\)</span> means that person
<span class="math inline">\(i\)</span> would open the door to have a
conversation when assigned a visit.</li>
<li><span class="math inline">\(d_{i,Z_i=1}=0\)</span> means that person
<span class="math inline">\(i\)</span> would not open the door to have a
conversation when assigned a visit.</li>
<li>Opening the door is an outcome of the treatment.</li>
</ul>
</section>
<section id="defining-causal-effects-ii" class="slide level2">
<h2>Defining causal effects II</h2>
<ul>
<li><p><span class="math inline">\(y_{i,Z_i = 1, d_{i,Z_i=1}=1}\)</span>
is the potential outcome for people who were assigned a visit and who
opened the door. (“Compliers” or “Always-takers”)</p></li>
<li><p><span class="math inline">\(y_{i,1, d_{i,Z_i=1}=0}\)</span> is
the potential outcome for people who were assigned a visit and who did
not open the door. (“Never-takers” or “Defiers”)</p></li>
<li><p><span class="math inline">\(y_{i,0, d_{i,0}=1}\)</span> is the
potential outcome for people who were not assigned a visit and who
opened the door. (“Defiers” or “Always-takers”)</p></li>
<li><p><span class="math inline">\(y_{i,0, d_{i,0}=0}\)</span> is the
potential outcome for people who were not assigned a visit and who would
not have opened the door. (“Compliers” or “Never-takers”)</p></li>
</ul>
</section>
<section id="defining-causal-effects-iii" class="slide level2">
<h2>Defining causal effects III</h2>
<p>We could also write <span class="math inline">\(y_{i,Z_i = 0,
d_{i,Z_i=1}=1}\)</span> for people who were not assigned a visit but who
would have opened the door had they been assigned a visit etc.</p>
<p>In this case we can simplify our potential outcomes:</p>
<ul>
<li><span class="math inline">\(y_{i,0, d_{i,1}=1} = y_{i,0, d_{i,1}=0}
= y_{i,0, d_{i,0}=0}\)</span> because your outcome is the same
regardless of how you don’t open the door.</li>
</ul>
</section>
<section id="defining-causal-effects-iv" class="slide level2">
<h2>Defining causal effects IV</h2>
<p>We can simplify the ways in which people get a dose of the treatment
like so (where <span class="math inline">\(d\)</span> is lower case
reflecting the idea that whether you open the door when visited or not
is a fixed attribute like a potential outcome).</p>
<ul>
<li><span class="math inline">\(Y\)</span> : outcome (<span
class="math inline">\(y_{i,Z}\)</span> or <span
class="math inline">\(y_{i,Z_i=1}\)</span> for potential outcome to
treatment for person <span class="math inline">\(i\)</span>, fixed)</li>
<li><span class="math inline">\(X\)</span> : covariate/baseline
variable</li>
<li><span class="math inline">\(Z\)</span> : treatment assignment (<span
class="math inline">\(Z_i=1\)</span> if assigned to a visit, <span
class="math inline">\(Z_i=0\)</span> if not assigned to a visit)</li>
<li><span class="math inline">\(D\)</span> : treatment received (<span
class="math inline">\(D_i=1\)</span> if answered phone, <span
class="math inline">\(D_i=0\)</span> if person <span
class="math inline">\(i\)</span> did not answer the door) (using <span
class="math inline">\(D\)</span> here because <span
class="math inline">\(D_i = d_{i,1} Z_{i} + d_{i,0}
(1-Z_i)\)</span>)</li>
</ul>
</section>
<section id="defining-causal-effects-v" class="slide level2">
<h2>Defining causal effects V</h2>
<p>We have two causal effects of <span class="math inline">\(Z\)</span>:
<span class="math inline">\(Z \rightarrow Y\)</span> (<span
class="math inline">\(\delta\)</span>, ITT, ITT<span
class="math inline">\(_Y\)</span>), and <span class="math inline">\(Z
\rightarrow D\)</span> (GG call this ITT<span
class="math inline">\(_D\)</span>).</p>
<p>And different types of people can react differently to the attempt to
move the dose with the instrument.</p>
</section>
<section id="defining-causal-effects-vi" class="slide level2">
<h2>Defining causal effects VI</h2>
<p>The <span class="math inline">\(ITT=ITT_Y=\delta= \bar{y}_{Z=1} -
\bar{y}_{Z=0}\)</span>.</p>
<p>But, in this design, <span
class="math inline">\(\bar{y}_{Z=1}=\bar{y}_{1}\)</span> is split into
pieces: the outcome of those who answered the door (Compliers and
Always-takers and Defiers). Write <span
class="math inline">\(p_C\)</span> for the proportion of compliers in
the study.</p>
<p><span class="math display">\[\begin{equation}
\bar{y}_{1}=(\bar{y}_{1}|C)p_C + (\bar{y}_{1}|A)p_A + (\bar{y}_1|N)p_N +
(\bar{y}_1|D)p_D.
\end{equation}\]</span></p>
<p>And <span class="math inline">\(\bar{y}_{0}\)</span> is also split
into pieces:</p>
<p><span class="math display">\[\begin{equation}
\bar{y}_{0}=(\bar{y}_{0}|C)p_C + (\bar{y}_{1}|A)p_A + (\bar{y}_{0}|N)p_N
+ (\bar{y}_0|D)p_D.
\end{equation}\]</span></p>
</section>
<section id="defining-causal-effects-vii" class="slide level2">
<h2>Defining causal effects VII</h2>
<p>So, the ITT itself is a combination of the effects of <span
class="math inline">\(Z\)</span> on <span
class="math inline">\(Y\)</span> within these different groups (imagine
substituting in and then re-arranging so that we have a set of ITTs, one
for each type of subject). But, we can still estimate it because we have
unbiased estimators of <span class="math inline">\(\bar{y}_1\)</span>
and <span class="math inline">\(\bar{y}_0\)</span> within each type.</p>
</section>
<section id="learning-about-the-itt-i" class="slide level2">
<h2>Learning about the ITT I</h2>
<p>First, let’s learn about the effect of the policy itself. To write
down the ITT, we do not need to consider all of the types above. We have
no defiers (<span class="math inline">\(p_D=0\)</span>) and we know the
ITT for both Always-takers and Never-takers is 0.</p>
<p><span class="math display">\[\begin{equation}
\bar{y}_{1}=(\bar{y}_{1}|C)p_C + (\bar{y}_{1}|A)p_A + (\bar{y}_1|N)p_N
\end{equation}\]</span></p>
<p><span class="math display">\[\begin{equation}
\bar{y}_{0}=(\bar{y}_{0}|C)p_C + (\bar{y}_{0}|A)p_A + (\bar{y}_{0}|N)p_N
\end{equation}\]</span></p>
</section>
<section id="learning-about-the-itt-ii" class="slide level2">
<h2>Learning about the ITT II</h2>
<p>First, let’s learn about the effect of the policy itself. To write
down the ITT, we do not need to consider all of the types above. We have
no defiers (<span class="math inline">\(p_D=0\)</span>) and we know the
ITT for both Always-takers and Never-takers is 0.</p>
<p><span class="math display">\[\begin{align}
ITT    = &amp; \bar{y}_{1} - \bar{y}_{0} \\
        = &amp; ( (\bar{y}_{1}|C)p_C + (\bar{y}_{1}|A)p_A +
(\bar{y}_1|N)p_N ) - \\
       &amp; ( (\bar{y}_{0}|C)p_C + (\bar{y}_{0}|A)p_A +
(\bar{y}_{0}|N)p_N )  \\
       \intertext{collecting each type together --- to have an ITT for
each type}
       = &amp; ( (\bar{y}_{1}|C)p_C -  (\bar{y}_{0}|C)p_C )  +   (
(\bar{y}_{1}|A)p_A - (\bar{y}_{1}|A)p_A ) + \\
       &amp; ( (\bar{y}_1|N)p_N  - (\bar{y}_{0}|N)p_N ) \\
       = &amp; \left( (\bar{y}_{1}|C) -  (\bar{y}_{0}|C)
\right)p_C   +  \\
       &amp; \left( (\bar{y}_{1}|A)- (\bar{y}_{0}|A)
\right)p_A  +  \left( (\bar{y}_1|N) - (\bar{y}_{0}|N) \right)p_N
\end{align}\]</span></p>
</section>
<section id="learning-about-the-itt-iii" class="slide level2">
<h2>Learning about the ITT III</h2>
<p><span class="math display">\[\begin{align}
ITT     = &amp;   \bar{y}_{1} - \bar{y}_{0} \\
        = &amp;  ( (\bar{y}_{1}|C)p_C + (\bar{y}_{1}|A)p_A +
(\bar{y}_1|N)p_N ) - \\
       &amp; ( (\bar{y}_{0}|C)p_C + (\bar{y}_{0}|A)p_A +
(\bar{y}_{0}|N)p_N )  \\
        = &amp;   ( (\bar{y}_{1}|C)p_C -  (\bar{y}_{0}|C)p_C )  +   (
(\bar{y}_{1}|A)p_A - (\bar{y}_{1}|A)p_A ) + \\
       &amp; ( (\bar{y}_1|N)p_N  - (\bar{y}_{0}|N)p_N ) \\
        = &amp;   ( (\bar{y}_{1}|C) -  (\bar{y}_{0}|C))p_C   +   (
(\bar{y}_{1}|A)- (\bar{y}_{0}|A))p_A  + \\
       &amp; ( (\bar{y}_1|N) - (\bar{y}_{0}|N) )p_N
\end{align}\]</span></p>
</section>
<section id="learning-about-the-itt-iv" class="slide level2">
<h2>Learning about the ITT IV</h2>
<p>And, if the effect of the dose can only occur for those who open the
door, and you can only open the door when assigned to do so then:</p>
<p><span class="math display">\[\begin{equation}
( (\bar{y}_{1}|A)- (\bar{y}_{0}|A))p_A = 0  \text{ and } ( (\bar{y}_1|N)
- (\bar{y}_{0}|N) )p_N = 0
\end{equation}\]</span></p>
<p>And</p>
<p><span class="math display">\[\begin{equation}
ITT =  ( (\bar{y}_{1}|C) -  (\bar{y}_{0}|C))p_C  = ( CACE ) p_C.
\end{equation}\]</span></p>
</section>
<section id="the-complier-average-causal-effect-i" class="slide level2">
<h2>The complier average causal effect I</h2>
<p>We would also like to learn about the causal effect of answering the
door and having the conversation, the theoretically interesting
effect.</p>
<p>But this comparison is confounded by <span
class="math inline">\(x\)</span>: a simple <span
class="math inline">\(\bar{Y}|D=1 - \bar{Y}|D=0\)</span> comparison
tells us about differences in the outcome due to <span
class="math inline">\(x\)</span> in addition to the difference caused by
<span class="math inline">\(D\)</span>. (Numbers below from some
simulated data)</p>
</section>
<section id="the-complier-average-causal-effect-ii"
class="slide level2">
<h2>The complier average causal effect II</h2>
<div class="sourceCode" id="cb73"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb73-1"><a href="#cb73-1" aria-hidden="true" tabindex="-1"></a><span class="fu">with</span>(dat, <span class="fu">cor</span>(Y, x)) <span class="do">## can be any number</span></span>
<span id="cb73-2"><a href="#cb73-2" aria-hidden="true" tabindex="-1"></a><span class="fu">with</span>(dat, <span class="fu">cor</span>(d, x)) <span class="do">## can be any number</span></span>
<span id="cb73-3"><a href="#cb73-3" aria-hidden="true" tabindex="-1"></a><span class="fu">with</span>(dat, <span class="fu">cor</span>(Z, x)) <span class="do">## should be near 0</span></span></code></pre></div>
<p>But we just saw that, in this design, and with these assumptions
(including a SUTVA assumption) that <span class="math inline">\(ITT = (
(\bar{y}_{1}|C) - (\bar{y}_{0}|C))p_C = (CACE) p_C\)</span>, so we can
define <span class="math inline">\(CACE=ITT/p_C\)</span>.</p>
</section>
<section id="how-to-calculate-the-itt-and-cacelate-i"
class="slide level2">
<h2>How to calculate the ITT and CACE/LATE I</h2>
<p>Some example data (where we know all potential outcomes):</p>
<table>
<thead>
<tr class="header">
<th style="text-align: right;">X</th>
<th style="text-align: right;">u</th>
<th style="text-align: left;">type</th>
<th style="text-align: right;">Z</th>
<th style="text-align: right;">pZ</th>
<th style="text-align: right;">DZ1</th>
<th style="text-align: right;">YD0Z0</th>
<th style="text-align: right;">YD1Z0</th>
<th style="text-align: right;">YD0Z1</th>
<th style="text-align: right;">YD1Z1</th>
<th style="text-align: right;">D</th>
<th style="text-align: right;">Y</th>
</tr>
</thead>
<tbody>
<tr class="odd">
<td style="text-align: right;">4</td>
<td style="text-align: right;">1.95</td>
<td style="text-align: left;">Complier</td>
<td style="text-align: right;">0</td>
<td style="text-align: right;">0</td>
<td style="text-align: right;">1</td>
<td style="text-align: right;">1.95</td>
<td style="text-align: right;">2.52</td>
<td style="text-align: right;">1.95</td>
<td style="text-align: right;">2.52</td>
<td style="text-align: right;">0</td>
<td style="text-align: right;">1.95</td>
</tr>
<tr class="even">
<td style="text-align: right;">2</td>
<td style="text-align: right;">0.05</td>
<td style="text-align: left;">Complier</td>
<td style="text-align: right;">1</td>
<td style="text-align: right;">0</td>
<td style="text-align: right;">1</td>
<td style="text-align: right;">0.05</td>
<td style="text-align: right;">0.63</td>
<td style="text-align: right;">0.05</td>
<td style="text-align: right;">0.63</td>
<td style="text-align: right;">1</td>
<td style="text-align: right;">0.63</td>
</tr>
</tbody>
</table>
</section>
<section id="how-to-calculate-the-itt-and-cacelate-ii"
class="slide level2">
<h2>How to calculate the ITT and CACE/LATE II</h2>
<p>The ITT and CACE (the parts)</p>
<div class="sourceCode" id="cb74"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb74-1"><a href="#cb74-1" aria-hidden="true" tabindex="-1"></a>itt_y <span class="ot">&lt;-</span> <span class="fu">difference_in_means</span>(Y <span class="sc">~</span> Z, <span class="at">data =</span> dat0)</span>
<span id="cb74-2"><a href="#cb74-2" aria-hidden="true" tabindex="-1"></a>itt_y</span></code></pre></div>
<pre><code>Design:  Standard 
  Estimate Std. Error t value Pr(&gt;|t|) CI Lower CI Upper    DF
Z  0.08725      0.233  0.3745   0.7089  -0.3752   0.5497 97.97</code></pre>
<div class="sourceCode" id="cb76"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb76-1"><a href="#cb76-1" aria-hidden="true" tabindex="-1"></a>itt_d <span class="ot">&lt;-</span> <span class="fu">difference_in_means</span>(D <span class="sc">~</span> Z, <span class="at">data =</span> dat0)</span>
<span id="cb76-2"><a href="#cb76-2" aria-hidden="true" tabindex="-1"></a>itt_d</span></code></pre></div>
<pre><code>Design:  Standard 
  Estimate Std. Error t value  Pr(&gt;|t|) CI Lower CI Upper    DF
Z     0.68    0.07307   9.307 8.454e-15   0.5348   0.8252 89.31</code></pre>
</section>
<section id="how-to-calculate-the-itt-and-cacelate-iii"
class="slide level2">
<h2>How to calculate the ITT and CACE/LATE III</h2>
<p>All together:<a href="#/fn1" class="footnote-ref" id="fnref1"
role="doc-noteref"><sup>1</sup></a></p>
<div class="sourceCode" id="cb78"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb78-1"><a href="#cb78-1" aria-hidden="true" tabindex="-1"></a>cace_est <span class="ot">&lt;-</span> <span class="fu">iv_robust</span>(Y <span class="sc">~</span> D <span class="sc">|</span> Z, <span class="at">data =</span> dat0)</span>
<span id="cb78-2"><a href="#cb78-2" aria-hidden="true" tabindex="-1"></a>cace_est</span></code></pre></div>
<pre><code>            Estimate Std. Error t value Pr(&gt;|t|) CI Lower CI Upper DF
(Intercept)   0.3347     0.1912  1.7502  0.08321 -0.04479   0.7142 98
D             0.1283     0.3404  0.3769  0.70705 -0.54727   0.8039 98</code></pre>
<div class="sourceCode" id="cb80"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb80-1"><a href="#cb80-1" aria-hidden="true" tabindex="-1"></a><span class="do">## Notice same as below:</span></span>
<span id="cb80-2"><a href="#cb80-2" aria-hidden="true" tabindex="-1"></a><span class="fu">coef</span>(itt_y)[[<span class="st">&quot;Z&quot;</span>]] <span class="sc">/</span> <span class="fu">coef</span>(itt_d)[[<span class="st">&quot;Z&quot;</span>]]</span></code></pre></div>
<pre><code>[1] 0.1283</code></pre>
</section>
<section id="summary-of-encouragementcomplierdose-oriented-designs"
class="slide level2">
<h2>Summary of Encouragement/Complier/Dose oriented designs:</h2>
<ul>
<li>Analyze as you randomized, even when you don’t control the dose</li>
<li>The danger of per-protocol analysis.</li>
</ul>
</section>
<section id="references" class="slide level2 unnumbered">
<h2 class="unnumbered">References</h2>
<div id="refs" class="references csl-bib-body hanging-indent"
role="doc-bibliography">
<div id="ref-freedman2008rae" class="csl-entry" role="doc-biblioentry">
Freedman, David A. 2008a. <span>“<span class="nocase">On regression
adjustments to experimental data</span>.”</span> <em>Advances in Applied
Mathematics</em> 40 (2): 180–93.
</div>
<div id="ref-freedman2008randomization" class="csl-entry"
role="doc-biblioentry">
———. 2008b. <span>“Randomization Does Not Justify Logistic
Regression.”</span> <em>Statistical Science</em> 23 (2): 237–49.
</div>
<div id="ref-imbens2005robust" class="csl-entry" role="doc-biblioentry">
Imbens, Guido W., and Paul R. Rosenbaum. 2005. <span>“Robust, Accurate
Confidence Intervals with a Weak Instrument: Quarter of Birth and
Education.”</span> <em>Journal of the Royal Statistical Society Series
A</em> 168 (1): 109–26.
</div>
<div id="ref-lin_agnostic_2013" class="csl-entry"
role="doc-biblioentry">
Lin, Winston. 2013. <span>“Agnostic Notes on Regression Adjustments to
Experimental Data: <span>Reexamining</span> <span>Freedman</span>’s
Critique.”</span> <em>The Annals of Applied Statistics</em> 7 (1):
295–318.
</div>
</div>
</section></section>
<section class="footnotes footnotes-end-of-document"
role="doc-endnotes">
<hr />
<ol>
<li id="fn1" role="doc-endnote"><p>works when <span
class="math inline">\(Z \rightarrow D\)</span> is not weak see <span
class="citation" data-cites="imbens2005robust">Imbens and Rosenbaum (<a
href="#/ref-imbens2005robust" role="doc-biblioref">2005</a>)</span> for
a cautionary tale<a href="#/fnref1" class="footnote-back"
role="doc-backlink">↩︎</a></p></li>
</ol>
</section>
    </div>
  </div>

  <script src="estimation-slides_files/reveal.js-3.3.0.1/lib/js/head.min.js"></script>
  <script src="estimation-slides_files/reveal.js-3.3.0.1/js/reveal.js"></script>

  <script>

      // Full list of configuration options available at:
      // https://github.com/hakimel/reveal.js#configuration
      Reveal.initialize({
        // Display controls in the bottom right corner
        controls: true,
        // Display a presentation progress bar
        progress: true,
        // Display the page number of the current slide
        slideNumber: true,
        // Push each slide change to the browser history
        history: true,
        // Enable keyboard shortcuts for navigation
        keyboard: true,
        // Enable the slide overview mode
        overview: true,
        // Vertical centering of slides
        center: false,
        // Enables touch navigation on devices with touch input
        touch: true,
        // Turns fragments on and off globally
        fragments: true,
        // Flags if we should show a help overlay when the questionmark
        // key is pressed
        help: true,
        // Number of milliseconds between automatically proceeding to the
        // next slide, disabled when set to 0, this value can be overwritten
        // by using a data-autoslide attribute on your slides
        autoSlide: 0,
        // Stop auto-sliding after user input
        autoSlideStoppable: true,
        // Opens links in an iframe preview overlay
        previewLinks: true,
        // Transition style
        transition: 'fade', // none/fade/slide/convex/concave/zoom
        // Transition speed
        transitionSpeed: 'default', // default/fast/slow
        // Transition style for full page slide backgrounds
        backgroundTransition: 'default', // none/fade/slide/convex/concave/zoom
        // Number of slides away from the current that are visible
        viewDistance: 3,



        chalkboard: {
          toggleNotesButton: false,
          theme: 'whiteboard',
        },

        keyboard: {
          67: function() { RevealChalkboard.toggleNotesCanvas() },    // toggle notes canvas when 'c' is pressed
          66: function() { RevealChalkboard.toggleChalkboard() }, // toggle chalkboard when 'b' is pressed
          46: function() { RevealChalkboard.clear() },    // clear chalkboard when 'DEL' is pressed
           8: function() { RevealChalkboard.reset() },    // reset chalkboard data on current slide when 'BACKSPACE' is pressed
          68: function() { RevealChalkboard.download() }, // downlad recorded chalkboard drawing when 'd' is pressed
        },

        // Optional reveal.js plugins
        dependencies: [
          { src: 'estimation-slides_files/reveal.js-3.3.0.1/plugin/notes/notes.js', async: true },
          { src: 'estimation-slides_files/reveal.js-3.3.0.1/plugin/search/search.js', async: true },
          { src: 'estimation-slides_files/reveal.js-3.3.0.1/plugin/chalkboard/chalkboard.js', async: true },
        ]
      });
    </script>
  <!-- dynamically load mathjax for compatibility with self-contained -->
  <script>
    (function () {
      var script = document.createElement("script");
      script.type = "text/javascript";
      script.src  = "https://mathjax.rstudio.com/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML";
      document.getElementsByTagName("head")[0].appendChild(script);
    })();
  </script>

<script>
  (function() {
    if (window.jQuery) {
      Reveal.addEventListener( 'slidechanged', function(event) {  
        window.jQuery(event.previousSlide).trigger('hidden');
        window.jQuery(event.currentSlide).trigger('shown');
      });
    }
  })();
</script>


  </body>
</html>