\input fontmac
\input mathmac

\def\eps{\epsilon}
\def\FF{{\bf F}}
\def\bar#1{\overline{#1}}
\def\hat#1{\widehat{#1}}
\def\norm#1{|\!|#1|\!|}
\def\bignorm#1{\big|\!\big|#1\big|\!\big|}
\def\Norm#1{\Big|\!\Big|#1\Big|\!\Big|}
\def\normm#1{\bigg|\!\bigg|#1\bigg|\!\bigg|}
\def\Bohr{\op{\rm Bohr}}
\def\Eta{\op{\bf H}}
\def\Etaseven{\op{\sevenbf H}}
\def\II{\op{\bf I}}
\def\dd{\op{\bf d}}
\def\ee{\op{\bf e}}
\def\P{{\cal P}}
\def\given{\mathbin{|}}
\def\argmax{\limitop{\rm arg$\,$max}}
\def\dTV{d_{\rm TV}}
\def\advthm{\the\sectcount.\the\thmcount\global\advance \thmcount by 1}
\def\advsect{\global\advance\sectcount by 1\section\the\sectcount\global\thmcount=1. }
\sectcount=0

\widemargins
\bookheader{ENTROPY AND ADDITIVE COMBINATORICS}{MARCEL K. GOH}

\maketitle{Entropy and additive combinatorics}{}{Marcel K. Goh}{\sl Department of Mathematics and Statistics,
McGill University}

\floattext4.5 \ninebf Abstract.
\ninepoint These expository notes give a gentle introduction to the notion of entropy as it is used
in additive combinatorics, moving at a leisurely pace through the entropic analogues of Pl\"unnecke's
theorem and the Balog--Szemer\'edi--Gowers theorem before tackling the recent
proof of the polynomial Freiman--Ruzsa
conjecture by W.~T.~Gowers, B.~Green, F.~Manners, and T.~Tao. Effort has been put into making this
document as self-contained as possible, and extra proof details have been supplied in the hope
that these notes may be accessible to the average graduate student or enterprising undergraduate.

\bigskip

\advsect The Khintchine--Shannon axioms

Let $X$ be a discrete random variable. Its entropy $\Eta\{X\}$ is a real number
(or $\infty$) that measures the ``information content'' of $X$. For example, if $X$ is
a constant random variable, then $\Eta\{X\}$ should be zero (we do not gain any information
from knowing the value of $X$), and if $X$ is
uniformly distributed on $\{0,1\}^n$, then $\Eta\{X\}$ should be proportional to $n$, since $X$ is
determined by
$n$ bits of information.
It satisfies the following axioms, which are
sometimes called the Khinchine--Shannon axioms.
\medskip
\item{a)} ({\it Invariance.}) If $X$ takes values in $A$, $Y$ takes values in $B$,
$\phi:A\to B$ is a bijection, and $\pr\{Y = \phi(a)\} = \pr\{X = a\}$ for all $a\in A$,
then $\Eta\{X\} = \Eta\{Y\}$.
\smallskip
\item{b)} ({\it Extensibility.}) If $X$ takes values in $A$ and $Y$ takes values in $B$ for
a set $B$ such that $A\subseteq B$, and furthermore $\pr\{Y=a\} = \pr\{X=a\}$ for all $a\in A$,
then $\Eta\{X\} = \Eta\{Y\}$.
\smallskip
\item{c)} ({\it Continuity.}) The quantity $\Eta\{X\}$ depends continuously on the probabilities
$\pr\{X=a\}$.
\smallskip
\item{d)} ({\it Maximisation.}) Over all possible random variables $X$ taking values in a finite
set $A$, the quantity $\Eta\{X\}$ is maximised for the uniform distribution.
\smallskip
\item{e)} ({\it Additivity.})
For $X$ taking values in $A$ and $Y$ taking values in $B$, we have the formula
$$ \Eta\{X,Y\} = \Eta\{X\} + \Eta\{ Y \given X\},$$
where $\Eta\{X,Y\} = \Eta\bigl\{ (X,Y)\bigr\}$ and
$$ \Eta\{Y\given X\} = \sum_{x\in A} \pr\{X=x\} \Eta\{ Y\given X = x\}.$$
\medskip

The continuity axiom presupposes a topology on the set of all distributions on the target
space of $X$. This topology will only be important here and there, so we will not give a complete
description of it now, but simply introduce the relevant properties as needed later on.

We shall take it on faith that there really exists a function on random variables satisfying
these axioms.
(It is very possible you have met the formula for entropy
somewhere on your travels, but you will not find it anywhere in
these notes.)
In fact, the axioms only define entropy
up to a multiplicative constant, so we shall add the following axiom.

\medskip
\item{f)} ({\it Normalisation.}) If $X$ is uniformly distributed on $\{0,1\}$, then
$\Eta\{X\} = 1$.
\medskip

Notationally, we would expect that $\Eta\{Y\given X\} = \Eta\{Y\}$ if $X$ and $Y$ are independent.
This is the first proposition we will carefully prove, using only the axioms. The axiomatic description above,
as well as many of the proofs in
this section, are transcribed (with some adaptations here and there) from lectures given by W.~T.~Gowers.

\proclaim Proposition \advthm. Let $X$ and $Y$ be independent random variables. Then
$\Eta\{Y\given X\} = \Eta\{Y\}$ and consequently
$\Eta\{X,Y\} = \Eta\{X\} + \Eta\{Y\}$.

\proof Suppose $X$ takes values in a finite set $A$. Then for all $x\in A$, the distribution of
$Y$ and $Y$ given that $X = x$ is the same, so
$$\Eta\{Y \given X\} = \sum_{x\in A} \pr\{X = x\} \Eta\{Y\given X = x\} = \sum_{x\in A} \pr\{X = x\} \Eta\{Y\}
=\Eta\{Y\}.$$
The second version of the statement follows from the additivity axiom.\slug

We will sometimes use the notation $X^n$ to denote the vector $(X_1,\ldots,X_n)$ where the $X_i$
are independent copies of the random variable $X$. We have the following three corollaries, which
are each proved by induction. The second also requires the normalisation axiom,
and the third is often known as the {\it chain rule}.

\edef\corxn{\the\sectcount.\the\thmcount}
\proclaim Corollary \advthm. We have $\Eta\{X^n\} = n\Eta\{X\}$.\slug

\edef\cortwon{\the\sectcount.\the\thmcount}
\proclaim Corollary \advthm. If $X$ is uniformly distributed on a set of size $2^n$, then
$$\Eta\{X\} = n.\noskipslug$$

\parenproclaim Corollary~{\advthm} (Chain rule).
Let $X_1,\ldots,X_n$ be random variables. Then
$$\Eta\{X_1,\ldots,X_n\} = \Eta\{X_1\} + \Eta\{X_2\given X_1\}
+ \cdots+\Eta\{X_n \given X_1,\ldots,X_{n-1}\}.\noskipslug$$

Next we establish the intuitive statement
that the entropy of a uniform random variable supported on a set $A$ is at most
the entropy of a uniform random variable supported on a superset $B$ of $A$.

\edef\propunifineq{\the\sectcount.\the\thmcount}
\proclaim Proposition \advthm. Let $A\subseteq B$ with $B$ finite,
let $X$ be uniformly distributed on $A$, and let $Y$ be uniformly distributed on $B$.
Then $\Eta\{X\} \le \Eta\{Y\}$, with equality if and only if $A = B$.

\proof By the extensibility axiom, $\Eta\{X\}$ is not affected if we regard $X$ as a function
taking values in $B$. Then by the maximisation axiom, $\Eta\{X\}\le \Eta\{Y\}$, since $Y$ is
uniform on $B$.

If $A = B$, then it is clear that $\Eta\{X\} = \Eta\{Y\}$, since $X$ and $Y$
are the same random variable.

On the other hand, say $|A| = m$ and $|B| = n$ with $m<n$. If $m=1$, then by the previous proposition
we have $\Eta\{X\} = 0$, and by normalisation and invariance, $\Eta\{Y\} = 1$.
When $m\ge 2$, pick $k$ such that $m^k \le n^{k-1}$, so that $|A^k| \le |B^{k-1}|$.
Then by Corollary~{\corxn} and the inequality we showed in the first paragraph of this proof, we have
$$n\Eta\{X\} = \Eta\{X^n\} \le \Eta\{X^{n-1}\} = (n-1)\Eta\{Y\},$$
whence $\Eta\{X\} < \Eta\{Y\}$.\slug

The thread connecting entropy and additive combinatorics is rather a precarious one.
As noted by I.~Ruzsa~\ref{ruzsa2008}, there are scenarios in which combinatorial inequalities and
their corresponding entropy ones
are equivalent, scenarios in which they both hold but their equivalence cannot be established,
and further scenarios where an inequality holds in one world but cannot (or has not) been proven in the other.

The ``dictionary'' that allows us to translate
statements between cardinality inequalities and entropy inequalities is based, in part, on the following
observation. (In this and the rest of the notes, $\lg$ denotes the base-$2$ logarithm. This base can
be changed by modifying the normalisation axiom.)

\edef\proplogeq{\the\sectcount.\the\thmcount}
\proclaim Proposition \advthm. Let $X$ be a uniform random variable on a finite set $A$.
Then
$$\Eta\{X\} = \lg |A|.$$

\proof For any positive integer $n$ we can let
$X^n$ denote a tuple of independent copies of $X$; Corollary~{\corxn} tells us $\Eta\{X^n\} = n\Eta\{X\}$.
Let $m$ be such that $2^m \le |A|^n \le 2^{m+1}$ so that
$${m\over n} \le \lg |A| \le {(m+1)\over n}.$$
Let $Y$ be uniform on a set of size $2^m$,
and let $Z$ be uniform on a set of size $2^{m+1}$. Then by Corollary~{\cortwon} we have
$\Eta\{Y\} = m $ and $\Eta\{Z\} = (m+1)$. Then by Proposition~{\propunifineq}
we have
$${m\over n} \le \Eta\{X\} \le {(m+1)\over n}.$$
In other words, $\Eta\{X\}$ satisfies the same bounds as $\lg |A|$. Taking $n$ large, we can make
these bounds arbitrarily tight, proving the claim.\slug

The maximisation axiom gives the following corollary.

\edef\corlogineq{\the\sectcount.\the\thmcount}
\proclaim Corollary \advthm. Let $X$ be a random variable supported on a finite set $A$.
Then
$$\Eta\{X\} \le \lg |A|.\noskipslug$$

Hence the entropy $\Eta\{X\}$ is at most the exponential of the size of its support. As we will see,
simply replacing (logarithms of) cardinalities with entropies, we at useful ``entropic analogues''
of combinatorial statements. But first, we need more lemmas.

If $Y$ is a random variable such that $Y = f(X)$ for some random variable $Y$ and some function $f$,
then we say that $Y$ is {\it determined by} $X$ or $X$ {\it determines} $Y$.
We want to show that $\Eta\{Y\}\le \Eta\{X\}$, which reflects the idea that we get more information
from $X$ than from $Y$. This, rather annoyingly, seems to require a couple of steps.

\proclaim Lemma \advthm. If $Y = f(X)$ then $\Eta\{X\} = \Eta\{Y\} + \Eta\{X\given Y\}$.

\proof There is a a bijection between values $x$ taken
by $X$ and values $\bigl(x,f(x)\bigr)$ taken by $(X,Y)$, so we have
$$\Eta\{X\} = \Eta\{X,Y\} = \Eta\{Y\} + \Eta\{X\given Y\}$$
by invariance and additivity.\slug

We are now done if we can show that entropy is nonnegative.
This is a corollary of the following lemma. The following proof is a modification of
one due to S.~Eberhard.

\edef\propmaxprob{\the\sectcount.\the\thmcount}
\proclaim Proposition \advthm. Let $X$ be a discrete random variable supported on a set $A$ and let
$$a^* = \argmax_{a\in A} \pr\{X = a\}.$$
Then
$$\pr\{X = a^*\} \ge 2^{-\Etaseven\{X\}}.$$

\proof First we will work in the case where there exists $n$ such that $\pr\{X = a\}$ is a multiple of
$1/n$ for all $a\in A$. Let $Y$ be uniformly distributed on $[n]$ and let $\{E_a\}_{a\in A}$
be a partition of $[n]$ such that $|E_a| = n\pr\{X= a\}$ for all $a\in A$, and let $Z = a$
if $Y\in E_a$. This definition makes $Z$ and $X$ identically distributed, so $\Eta\{Z\} = \Eta\{X\}$
by the invariance axiom.

For every $a\in A$, the conditional entropy $\Eta\{Y \given Z = a\}$ is uniformly
distributed on a set of size $|E_a|$. From our choice of $a^*$
we have $|E_{a^*}| \ge |E_a|$ for all $a\in A$. Hence by Proposition~{\proplogeq}, we have
$$\Eta\{Y\given Z\} = \sum_{a\in A} \pr\{X = a\} \Eta\{Y\given X = a\}
= \sum_{a\in A} \pr\{X = a\} \log |E_a| \le \log |E_{a^*}|.$$
Since $Z$ is determined by $Y$, we have $\Eta\{Y\} = \Eta\{Z\} + \Eta\{Y\given Z\}$ by the previous lemma.
So by another invocation of Proposition~{\proplogeq}, we have
$$\eqalign{
\Eta\{Z\} &= \Eta\{Y\} - \Eta\{Y\given Z\} \cr
&\ge \log n - \log |E_{a^*}| \cr
&\ge \log \biggl({n\over |E_{a^*}|}\biggr) \cr
&= \log\biggl({1\over \pr\{X = a^*\}}\biggr), \cr
}$$
and hence $2^{-\Etaseven\{X\}} = 2^{-\Etaseven\{Z\}} \le \pr\{X = a^*\}$.

The general case follows from the continuity axiom.\slug

This proof came dangerously close to deriving the formula for entropy, but we will not need any such formula,
so we will refrain from mentioning it.
From the fact that $\pr\{X = a^*\} \le 1$ we can immediately conclude that entropy is nonnegative.

\edef\corentropynonneg{\the\sectcount.\the\thmcount}
\proclaim Corollary \advthm. Let $X$ be a discrete random variable taking values in a finite
set $A$. Then $\Eta\{X\} \ge 0$.\slug

This observation completes the proof that a random variable has a smaller entropy than one by which
it is determined.

\edef\cordetermines{\the\sectcount.\the\thmcount}
\proclaim Corollary \advthm. If $Y = f(X)$ then $\Eta\{X\} \ge \Eta\{Y\}$.\slug

Next we show that a random variable has zero entropy if and only if it is constant.
This reflects the idea that the variables from which we get no information are those which take
the same value no matter what.

\proclaim Proposition \advthm. Let $X$ be a discrete random variable.
Then $\Eta\{X\} = 0$ if and only if it takes exactly one value.

\proof First suppose that $X$ takes only one value.
Let $a$ be the value of $X$ such that $\pr\{X=a\} = 1$.
Then $(X,X)$ equals $(a,a)$ with probability $1$ as well,
so $\Eta\{X\} = \Eta\{X,X\}$ by the invariance axiom. But it can easily be checked that
$X$ and $(X,X)$ are independent (we have
$$\pr\bigl\{X = a, (X,X)= (a,a)\bigr\} = \pr\{X = a\}\pr\bigl\{(X,X) = (a,a)\bigr\}$$
for instance), so $\Eta\{X,X\} = 2\Eta\{X\}$. Thus we conclude
that $\Eta\{X\} = 0$.

Now suppose that $X$ takes more than one value; let $A$ be the set of $a$ such that
$\pr\{X = a\} > 0$ and let $\alpha = \max_{a\in A} \pr\{X = a\}$. For all $n$ let $X^n$ denote
the tuple of $n$ independent copies of $X$; the maximum probability of any particular value (in $A^n$)
that $X^n$ takes is $\alpha^n$. But $\alpha < 1$ since $X$ takes more than one value, so for any
$\eps > 0$ we can find $n$ such that $\alpha^n < \eps$. This means that we can partition $A^n$ into
two disjoint sets $E$ and $F$ such that $\pr\{X^n \in E\}$ and $\pr\{X^n \in F\}$ are both
in the range $[1/2-\eps, 1/2+\eps]$.

Let $Y$ be the random variable taking the value $0$ if $X^n\in E$ and $1$ if $X^n\in F$. Then
by Corollary~{\corxn}, $\Eta\{X^n\} = n\Eta\{X\}$, and since $X^n$ determines $Y$,
$$\Eta\{X^n\} = \Eta\{Y\} + \Eta\{X^n\given Y\} \ge \Eta\{Y\}.$$
But $\Eta\{Y\} > 0$ for $\eps$ small enough, the normalisation and continuity axioms. So
$\Eta\{X\} \ge \Eta\{Y\}/n > 0$ as well.\slug

\medskip\boldlabel Mutual information.
For random variables $X$ and $Y$, the {\it mutual information} $\II\{X : Y\}$ is defined
by the equivalent formulas
$$\eqalign{
\II\{ X : Y\} &= \Eta\{X\} + \Eta\{Y\} - \Eta\{X,Y\} \cr
&= \Eta\{X\} - \Eta\{X\given Y\} \cr
&= \Eta\{Y\} - \Eta\{Y\given X\}. \cr
}$$
It measures, roughly speaking, how much information one can get from one variable
by looking at the other one. From the formula it is clear that $\II\{X : Y\}  = 0$
if $X$ and $Y$ are independent. In general, we still have the inequality $\II\{X : Y\} \ge 0$,
which a corollary of the following lemma, which expresses the intuitive fact that conditioning
cannot increase the entropy of a random variable.
The proof, which is due to C.~West, uses an argument similar to the one
we used to prove Proposition~{\propmaxprob}.

\proclaim Proposition {\advthm}. Let $X$ and $Y$ be discrete random variables. Then
$$\Eta\{X\given Y\} \le \Eta\{X\}.$$

\proof Let $A$ be the support of $X$ and $B$ be the support of $Y$.
First we consider the case that $X$ is uniform on $A$ (so $A$ is finite). Then by the definition
of conditional entropy,
$$\Eta\{X\given Y\} = \sum_{b\in B} \pr\{Y = b\} \Eta\{X \given Y= b\}.$$
But for each $b$, the random variable $(X\given Y = b)$ takes values in $A$, so its entropy is
bounded above by $\Eta\{X\}$ by the maximisation axiom. Hence $\Eta\{X\given Y\} \le \Eta\{X\}$.

Next, suppose that $A$ and $B$ are both finite and suppose further that $\pr\{Y = b\}$
is rational for all $b$. Then there is an integer $n$ and integers $\{m_b\}_{b\in B}$
such that $\pr\{Y= b\} = m_b/n$ for all $b\in B$. Now partition $[n]$ into sets $\{E_b\}_{b\in B}$,
where $|E_b| = m_b$ for all $b\in B$. We define a random variable $Z$ by sampling uniformly at
random from $E_b$ if $Y = b$, and doing so independently of $(X\given Y=b)$. The result is
a random variable $Z$ that is uniform on $[n]$ and which is independent of $X\given Y$.
Furthermore, since $Z$ determines $Y$, we have $\Eta\{Z\} = \Eta\{Y,Z\}$ by the invariance
axiom. Hence
$$\eqalign{
\Eta\{X\given Y\} &= \Eta\{X\given Y, Z\} \cr
&= \Eta\{X,Y,Z\} - \Eta\{Y,Z\} \cr
&= \Eta\{X,Z\} - \Eta\{Z\} \cr
&= \Eta\{X\given Z\} \cr
&\le \Eta\{X\},\cr
}$$
where the inequality on the last line follows from the previous paragraph.

The general case follows from the continuity axiom and the fact that any discrete random
variable, regarded as a vector in $l_1(\RR)$,
by vectors with finite support, and these in turn can be approximated by vectors of finite support
and rational coordinates.\slug

By the additivity axiom, the previous proposition is equivalent to
$$\Eta\{X,Y\} \le \Eta\{X\} + \Eta\{Y\}.$$
and we shall use this to prove the following submodularity inequality.

\parenproclaim Proposition {\advthm} (Submodularity).
Suppose $X$, $Y$, $Z$, and $W$ are random variables such that $(Z,W)$ determines
$X$, $Z$ determines $Y$, and $W$ also determines $Y$. Then
$$\Eta\{X\} + \Eta\{Y\} \le \Eta\{Z\} + \Eta\{W\}.$$

\proof The hypotheses give the three inequalities
$$\Eta\{X\} \le \Eta\{Z,W\}, \quad \Eta\{Y\}\le \Eta\{Z\},\quad\hbox{and}\quad \Eta\{Y\}\le \Eta\{W\}.$$
From this we see that
$$2\Eta\{X\} + 2\Eta\{Y\} \le 2\Eta\{Z,W\} + \Eta\{Z\} + \Eta\{Y\}.$$
But then since conditioning does not increase entropy, we have
$$2\Eta\{X\} + 2\Eta\{Y\} \le 2\Eta\{Z\} + 2\Eta\{Y\},$$
whence dividing both sides by $2$ completes the proof.\slug

The submodularity inequality is often stated in terms of a triple of random variables in terms of
the {\it conditional mutual information}, which is defined by
$$\eqalign{
\II\{ X : Y \given Z\} &= \sum_{z\in C} \pr\{Z = z\} \II\bigl\{ (X\given Z = z) : (Y\given Z = z)\bigr\} \cr
&= \Eta\{X\given Z\} - \Eta\{X\given Y, Z\} \cr
&= \Eta\{Y\given Z\} - \Eta\{Y\given X, Z\}.\cr
}$$

\proclaim Proposition \advthm. Let $X$, $Y$, and $Z$ be discrete random variables. Then
$$\Eta\{X,Y,Z\} + \Eta\{Z\} \le \Eta\{X,Z\} + \Eta\{Y,Z\},$$
which is equivalent to
$$\II\{X : Y\given Z\} \ge 0.$$
Equality holds if and only if $X$ and $Y$ are independent conditional on $Z$.

\proof It is clear from the additivity axiom that both sides in the first
inequality are equal to $\Eta\{X\} + \Eta\{Y\} + 2\Eta\{Z\}$
if and only if $X$ and $Y$ are independent conditional on $Z$.

To prove the first inequality, note that $(X,Y,Z)$ is jointly determined by $(X,Z)$ and $(Y,Z)$,
and $Z$ is determined by both $(X,Z)$ and $(Y,Z)$ separately, then apply the previous proposition.
Now by the definition of conditional mutual information,
$$\eqalign{
\II\{ X : Y \given Z\} &= \sum_{z\in C} \pr\{Z=z\} \II\bigl\{ (X\given Z = z) : (Y\given Z = z)\bigr\} \cr
&= \Eta\{ X\given Z\} + \Eta\{Y\given Z\} - \Eta\{ X, Y \given Z \} \cr
&= \Eta\{ X, Z\} -2\Eta\{ Z\} + \Eta\{Y,Z\} - \Eta\{ X, Y ,Z \} + \Eta\{Z\}\cr
&= \Eta\{ X, Z\} + \Eta\{Y,Z\} - \Eta\{ X, Y ,Z \} - \Eta\{Z\},\cr
}$$
and this proves that the first statement is equivalent to the second.\slug

\advsect Group-valued random variables

Now we will examine the case where the random variables in question take values in an abelian group $G$,
meaning we can take sums $X+Y$ and differences $X-Y$ of them. Note that if we condition on $Y$,
then the values taken by $X+Y$ are in bijection with values taken by $X$. This leads to the following
proposition.

\edef\maxsumsetbound{\the\sectcount.\the\thmcount}
\proclaim Proposition \advthm. Let $X$ and $Y$ be random variables each taking finitely
many values in an abelian group $G$.
We have
$$\max\bigl( \Eta\{X\}, \Eta\{Y\} \bigr) - \II\{X : Y\} \le \Eta\{ X \pm Y\}.$$
Furthermore, for any random variable $Z$, we have the conditional version
$$\max\bigl( \Eta\{X\given Z\}, \Eta\{Y\given Z\} \bigr) - \II\{X : Y\given Z\} \le \Eta\{ X \pm Y\given Z\}$$
of the same statement.

\proof Since conditioning does not increase entropy, we have
$$\Eta\{ X\pm Y\} \ge \Eta\{ X\pm Y \given Y\},$$
and since the probabilities $\pr\{ X + Y = z \given Y = y\} = \pr\{ X = z-y \given Y = y\}$ for all $z\in G$,
by invariance we have
$$\Eta\{ X\pm Y\} \ge \Eta\{X\given Y\} = \Eta\{X\} - \II\{X : Y\}.$$
Repeating the same argument but exchanging the r\^oles of $X$ and $Y$, we get
$$\Eta\{ X\pm Y\} \ge \Eta\{Y\given X\} = \Eta\{Y\} - \II\{X : Y\},$$
so
$$\Eta\{ X\pm Y\} \ge \max\bigl(\Eta\{X\}, \Eta\{Y\}\bigr) - \II\{X : Y\}.$$
Now let $Z$ be any random variable with finite support.
$$\eqalign{
\Eta\{ X\pm Y\given Z\} &= \sum_{z\in G} \pr\{Z = z\} \Eta\{ X \pm Y \given Z = z\} \cr
&\ge \Bigl(\max\bigl(\Eta\{X\given Z\}, \Eta\{Y\given Z\}\bigr) - \II\{X : Y\given Z\} \Bigr)
\sum_{z\in G} \pr\{Z = z\} \cr
&= \max\bigl(\Eta\{X\given Z\}, \Eta\{Y\given Z\}\bigr) - \II\{X : Y\given Z\} ,\cr
}$$
which completes the proof.\slug

\edef\cormaxsumineq{\the\sectcount.\the\thmcount}
\proclaim Corollary \advthm. If $X$ and $Y$ are independent, then
$$\max\bigl( \Eta\{X\}, \Eta\{Y\} \bigr) \le \Eta\{ X \pm Y\}.$$

\proof The mutual information $\II\{X : Y\}$ is zero whenever $X$ and $Y$ are independent.\slug

\medskip\boldlabel Entropic Ruzsa distance.
In additive combinatorics, whenever we have two finite subsets $A$ and $B$
of the same abelian group, we can compute the Ruzsa distance
$$d(A,B) = \lg {|A-B|\over \sqrt{|A|\cdot|B|}}$$
between them. (This satisfies all the axioms of a metric except the one requiring $d(A,A) = 0$ for all sets
$A$.) This ``distance'' is called the {\it Ruzsa distance} as it was first defined by I.~Ruzsa~\ref{ruzsa1996}.

The entropic analogue of the Ruzsa distance is defined as follows.
For finitely supported random variables $X$ and $Y$ taking values in the same abelian group,
we let $X'$ and $Y'$ be independent copies of $X$
and $Y$, respectively, and define the {\it entropic Ruzsa distance} by
$$\dd\{X,Y\} = \Eta\{X'-Y'\} - {\Eta\{X'\} \over 2} - {\Eta\{Y'\}\over 2}.$$
This definition, first established by T.~Tao~\ref{tao2010},
only depends on the individual distributions of $X$ and $Y$ and does not require them to
have the same sample space. When $X$ and $Y$ are independent, the entropic Ruzsa distance enjoys some
nice properties.

\edef\propabsdist{\the\sectcount.\the\thmcount}
\proclaim Proposition \advthm. Let $X$ and $Y$ be independent taking values in the same group. Then
$$\bigl| \Eta\{X\} - \Eta\{Y\} \bigr| \le 2\dd\{X,Y\}$$
and
$$\max\bigl( \Eta\{X-Y\} - \Eta\{X\}, \Eta\{X-Y\} - \Eta\{Y\} \bigr) \le 2\dd\{X,Y\}$$

\proof By independence we have
$$2\dd\{X,Y\} = 2\Eta\{X-Y\} - \Eta\{X\} - \Eta\{Y\}.$$
Corollary~{\cormaxsumineq} tells us that $2\Eta\{X-Y\} \ge 2\Eta\{X\}$, so this is at least $\Eta\{X\}-\Eta\{Y\}$.
The same corollary also says that $2\Eta\{X-Y\} \ge 2\Eta\{Y\}$, which allows us to add the absolute value bars
to the inequality.

The second inequality is proved using the same corollary, but only applying it to one of the $\Eta\{X-Y\}$
terms.\slug

Similarly to the Ruzsa distance on sets, we don't necessarily have $\dd\{X,X\} = 0$, but we do have
the triangle inequality, which shall now prove.

\proclaim Proposition \advthm. Let $X$, $Y$, and $Z$ be random variables with finite support in the same
abelian group. Then
$$\dd\{X,Z\} \le \dd\{X,Y\} + \dd\{Y,Z\},$$
which is equivalent to
$$\Eta\{X'-Z'\} \le \Eta\{X'-Y'\} + \Eta\{Y'-Z'\} - \Eta\{Y'\}$$
for $X'$, $Y'$, and $Z'$ independent and distributed as $X$, $Y$, and $Z$, respectively.

\proof That the two statements are equivalent is easily obtained by expanding the definition of
entropic Ruzsa distance and cancelling some terms. So without loss of generality, we may assume that
that $X$, $Y$, and $Z$ are independent and just prove the second statement.

By submodularity, we have $\II\bigl\{ (X-Y : Z) \given X-Z\bigr\} \ge 0$, so
\edef\eqfirstsubmodularity{\the\eqcount}
$$\eqalign{
0 &\le \II\bigl\{ (X-Y : Z) \given X-Z\bigr\} \cr
&\le \Eta\{X-Y \given X-Z\} + \Eta\{ Z\given X-Z\} - \Eta\{ X-Y, Z \given X-Z\} \cr
&\le \Eta\{X-Y, X-Z\} + \Eta\{ Z, X-Z\} - \Eta\{ X-Y, Z, X-Z\} - \Eta\{X-Z\}.\cr
}\adveq$$
Now, since the values $(x-y,x-z)$ taken by $(X-Y,X-Z)$ are in bijection with values
$(x-z,y-z)$ taken by $(X-Z, Y-Z)$ via the map $(v,w) \mapsto (w, w-v)$,
by the invariance axiom we have
$$\Eta\{X-Y, X-Z\} = \Eta\{X-Z, Y-Z\} \le \Eta\{X-Z\} + \Eta\{Y-Z\}.$$
Similar invocations of the invariance axiom give
$$\Eta\{Z, X-Z\} = \Eta\{X,Z\}$$
and
$$\Eta\{X-Y, Z, X-Z\} = \Eta\{X,Y,Z\} = \Eta\{X,Z\} + \Eta\{Y\},$$
where in the latter statement the second equality follows from the fact that $(X,Y)$ and $Z$ are
independent.
Substituting these three inequalities into~\refeq{\eqfirstsubmodularity}, we have
$$0\le \Eta\{X-Y\} + \Eta\{Y-Z\} +\Eta\{X,Z\} -\Eta\{X,Z\} + \Eta\{Y\} - \Eta\{X-Z\},$$
whence
$$ \Eta\{X-Z\} \le \Eta\{X-Y\} + \Eta\{Y-Z\} + \Eta\{Y\},$$
which completes the proof.\slug

We also define a conditional version of the entropic Ruzsa distance. If $X$ and $Y$ are $G$-valued
random variables with finite support and $Z$ and $W$ are any random variables with finite supports
$A$ and $B$ respectively, then we define
$$\dd\{ X\given Z ; Y\given W\} = \sum_{z\in A} \sum_{w\in B} \pr\{Z = z\} \pr\{W=w\}
\dd\bigl\{ (X\given Z = z) ; (Y\given W=w)\bigr\}.$$
If $(X',Z')$ and $(Y',W')$ are independent copies of $(X,Z)$ and $(Y,W)$ respectively, then this
distance is also given by the formula
$$\dd\{ X\given Z ; Y\given W\} = \Eta\{X'-Y'\given Z',W'\} - {\Eta\{X'\given Z'\}\over 2}
- {\Eta\{Y'\given W'\}\over 2}.$$

The following inequality relates the conditional and unconditional versions of Ruzsa distance.

\edef\lemfiveone{\the\sectcount.\the\thmcount}
\parenproclaim Lemma {\advthm} ({\rm\ref{pfr2023},} Lemma 5.1). Let $(X,Z)$, and $(Y,W)$ be random
variables, with $X$ and $Y$ taking values in the same abelian group. Then
$$\dd\{ X\given Z ; Y\given W\} \le \dd\{X,Y\} + {1\over 2} \II\{X:Z\} + {1\over 2}\II\{Y:W\}.$$

\proof Letting $(X',Z')$ and $(Y',W')$ be independent copies of the given random variables, we
expand
$$\eqalign{
\dd\{ X\given Z ; Y\given W\}
&= \Eta\{X'-Y'\given Z',W'\} - {\Eta\{X'\given Z'\}\over 2} - {\Eta\{Y'\given W'\}\over 2} \cr
&\le \Eta\{X'-Y'\} - {\Eta\{X',Z'\}-\Eta\{Z'\}\over 2} \cr
&\hskip130pt- {\Eta\{Y', W'\}-\Eta\{W'\}\over 2} \cr
&= \dd\{X-Y\} + {\Eta\{X'\} + \Eta\{Z'\} -\Eta\{X',Z'\}\over 2} \cr
&\hskip90pt+ {\Eta\{Y'\} + \Eta\{W'\} - \Eta\{Y', W'\}\over 2} \cr
&= \dd\{X-Y\} + {1\over 2}\II\{X:Z\} + {1\over 2}\II\{Y:W\},\cr
}$$
by independence and the fact that conditioning does not increase entropy.\slug

\medskip\boldlabel The sum-difference inequality.
The Ruzsa triangle inequality bounds the size of a difference set by passing through a different subset. There
is another inequality that relates the size of a sumset with the size of a difference set.
If $A$ and $B$ are nonempty finite subsets of an abelian group $G$, then
$$|A+B| \le {|A-B|^3\over |A||B|}.$$
If we replace cardinalities by exponentials of entropies, then we obtain the statement of the following
proposition.

\proclaim Proposition \advthm.
Let $X$ and $Y$ be independent random variables taking values in the same abelian group. Then
$$\Eta\{X+Y\} \le 3\Eta\{X-Y\} - \Eta\{X\} - \Eta\{Y\}.$$

Before we proceed to the proof, we establish the following definition, which will be needed later in these
notes as well. Let $X$ and $Y$ be random variables (not necessarily independent). We say that
$X_1$ and $Y_1$ are {\it conditionally independent trials of $X$ and $Y$ relative to $Z$} if for all
$z$ in the range
of $Z$, the random variables distributed as $(X_1\given Z=z)$ and $(Y_1\given Z=z)$ are independent,
$(X_1\given Z=z)$ has the same distribution as $(X\given Z=z)$,
and similarly for $Y_1$ and $Y$.
In particular, if $X=Y$ and $X_1$ and $X_2$ are conditionally independent trials of $X$ relative
to $Z$, we have
$$\Eta\{X_1,X_2\given Z\} = \Eta\{X_1\given Z\} + \Eta\{X_2\given Z\} = 2\Eta\{X\given Z\},$$
by additivity and independence. From this we obtain
\edef\eqcondindep{\the\eqcount}
$$\Eta\{X_1,X_2,Z\} = 2\Eta\{X\given Z\} + \Eta\{Z\} = 2\Eta\{X,Z\} - \Eta\{Z\}.\adveq$$
It is also important to observe that $(X_1,Z)$ and $(X_2,Z)$ both have the same distributions
as $(X,Z)$.

\proof Let $(X_1,Y_1)$ and $(X_2,Y_2)$ be conditionally independent trials of $(X,Y)$ relative
to $X-Y$. Since $(X,Y)$ determines $X-Y$, we have $X_1-Y_1 =X-Y= X_2-Y_2$. Let $(X_3,Y_3)$ be another
trial of $(X,Y)$ independent of $(X_1, X_2, Y_1, Y_2)$. Then
$$X_3 + Y_3 = X_3 + Y_3 + X_1 - Y_1 - X_2 + Y_2 = (X_3-Y_2) - (X_1-Y_3) + X_2 + Y_1,$$
so $(X_3-Y_2, X_1-Y_3, X_2, Y_1)$ and $(X_3, Y_3)$ each determine $X_3+ Y_3$. On the other hand,
$(X_3-Y_2, X_1-Y_3, X_2, Y_1)$ and $(X_3,Y_3)$ together determine the sextuple $(X_1,X_2,X_3,Y_1,Y_2,Y_3)$,
so by the submodularity inequality, we have
\edef\eqsumdiff{\the\eqcount}
$$\eqalign{
\Eta\{X_1,X_2,X_3,Y_1,Y_2,&Y_3\} + \Eta\{X_3+Y_3\} \cr
&\le \Eta\{X_3-Y_2, X_1-Y_3, X_2, Y_1\} + \Eta\{X_3, Y_3\}.\cr
}\adveq$$
We have
$$\Eta\{X_3,Y_3\} = \Eta\{X,Y\} = \Eta\{X\} + \Eta\{Y\}$$
by independence of
$X$ and $Y$, and
since $(X_3,Y_3)$ and $(X_1,X_2,Y_1,Y_2)$ are independent, we have
$$\eqalign{
\Eta\{X_1,X_2,X_3,Y_1,Y_2,Y_3\} &= \Eta\{X_1,X_2,Y_1,Y_2\} + \Eta\{X_3,Y_3\} \cr
&= \Eta\{X_1, Y_1,X_2,Y_2,X-Y\} + \Eta\{X\} + \Eta\{Y\} \cr
&= 2\Eta\{X,Y,X-Y\} - \Eta\{X-Y\} + \Eta\{X\} + \Eta\{Y\} \cr
&= 2\Eta\{X,Y\} - \Eta\{X-Y\} + \Eta\{X\} + \Eta\{Y\}\cr
&= 3\Eta\{X\} + 3\Eta\{Y\} - \Eta\{X-Y\},\cr
}$$
where in the third line we applied~\refeq{\eqcondindep}. On the other hand,
$$\Eta\{X_3 + Y_3\} = \Eta\{X+Y\}$$
and
$$\eqalign{
\Eta\{X_3-Y_2, &X_1-Y_3, X_2, Y_1\} \cr
&\le \Eta\{X_3-Y_2\} + \Eta\{X_1-Y_3\} + \Eta\{X_2\} + \Eta\{Y_1\} \cr
&= 2\Eta\{X-Y\} + \Eta\{X\} + \Eta\{Y\}.
}$$
Substituting everything into~\refeq{\eqsumdiff} yields
$$\eqalign{
3\Eta\{X\} + 3\Eta\{Y\} - &\Eta\{X-Y\} + \Eta\{X+Y\} \cr
&\le 2\Eta\{X-Y\} + 2\Eta\{X\} + 2\Eta\{Y\},
}$$
and the desired inequality follows upon rearrangement of terms.\slug

The entropic sum-difference inequality can also be stated in terms of entropic Ruzsa distances;
independence is not necessary here because independent trials are baked into the definition
of entropic Ruzsa distance.

\proclaim Corollary \advthm. Let $X$ and $Y$ be discrete random variables taking values in the same
abelian group. Then
$$\dd\{X, -Y\} \le 3\dd\{X,Y\}.\noskipslug$$

\advsect The Pl\"unnecke--Ruzsa inequality

In additive combinatorics, one of the most useful sumset inequalities is the following.

\parenproclaim Theorem {\advthm} (Pl\"unnecke--Ruzsa inequality). Let $A$ and $B$ be finite subsets of an
abelian group and suppose that $|A+B|\le K|A|$ for some constant $K$. Then for any integers
$r,s\ge 0$, not both zero, we have $|rB-sB| \le K^{r+s}|A|$.\slug

In this section we will develop an entropic analogue of this statement, in which sets are replaced
by random variables of finite support and cardinality is replaced with the exponential of
entropy. First, a technical lemma.

\edef\lemaone{\the\sectcount.\the\thmcount}
\parenproclaim Lemma {\advthm} ({\rm\ref{pfr2023},} Lemma A.1).
Let $X$, $Y$, and $Z$ be independent random variables taking values in a common
abelian group. Then
$$\Eta\{X+Y+Z\} - \Eta\{X+Y\} \le \Eta\{Y+Z\} -\Eta\{Y\}.$$

\proof By submodularity, the quantity $\II\{ X : Z\given X+Y+Z\}$ is nonnegative, so we have
$$\eqalign{
0&\le \II\{ X : Z\given X+Y+Z\} \cr
&= \Eta\{X, X+Y+Z\} + \Eta\{Z, X+Y+Z\}\cr
&\qquad\qquad\qquad\qquad- \Eta\{X,Z,X+Y+Z\} - \Eta\{X+Y+Z\}.\cr
}$$
Since $X$, $Y$, and $Z$ are independent, we have
$$\Eta\{X,X+Y+Z\} = \Eta\{X,Y+Z\} = \Eta\{X\} + \Eta\{Y+Z\},$$
where in the first equality we use invariance. By similar reasoning we have
$$\Eta\{Z, X+Y+Z\} = \Eta\{Z\} + \Eta\{X+Y\}$$
and
$$\Eta\{X,Z,X+Y+Z\} = \Eta\{X\} + \Eta\{Y\} + \Eta\{Z\}.$$
Plugging these three identities into the inequality above yields
$$\eqalign{
0&\le \Eta\{X\} + \Eta\{Y+Z\} + \Eta\{Z\} + \Eta\{X+Y\} \cr
&\qquad\qquad -\Eta\{X\} - \Eta\{Y\} - \Eta\{Z\} - \Eta\{X+Y+Z\} \cr
&= \Eta\{Y+Z\} + \Eta\{X+Y\} - \Eta\{Z\} - \Eta\{X+Y+Z\},\cr
}$$
whence the claim follows upon rearranging.\slug

From here we are not far from proving the entropic Pl\"unnecke--Ruzsa inequality, a result of T.~Tao.

\proclaim Theorem \advthm. Let $X,Y_1,\ldots,Y_m$ be independent random variables of finite entropy
taking values in an abelian group $G$, such that
$$\Eta\{X+Y_i\} \le \Eta\{X\} + \log K_i$$
for all $1\le i\le m$ and some scalars $K_1,\ldots,K_m\ge 1$. Then
$$\Eta\{X+Y_1+\cdots+Y_m\} \le \Eta\{X\} + \log(K_1\cdots K_m).$$

\proof We prove the claim by induction on $m$. If $m=1$, then we are done by hypothesis. Now
suppose that $\Eta\{X+Y_1+\cdots+Y_{m-1}\} \le \Eta\{X\} + \log(K_1\cdots K_{m-1})$. Then by
the previous lemma, the induction hypothesis, and the hypothesis on $\Eta\{X+Y_m\}$, we have
$$\eqalign{
\Eta\{Y_1 + \cdots + Y_{m-1} + X + Y_m\}
&\le \Eta\{Y_1 + \cdots + Y_{m-1} + X\} \cr
&\qquad\qquad\qquad\qquad+ \Eta\{X+Y_m\} - \Eta\{X\}\cr
&\le \Eta\{X\} + \log(K_1\cdots K_{m-1}) + \log K_m\cr
&\le \Eta\{X\} + \log(K_1\cdots K_m),\cr
}$$
which is what we sought to prove.\slug

We can make this look bit more like the version of the Pl\"unnecke--Ruzsa inequality above
by using the triangle inequality.

\parenproclaim Corollary {\advthm} (Entropic Plunnecke--Ruzsa inequality). Let $X$ and $Y$ be random variables
with $\Eta\{X+Y\} \le \Eta\{X\} + \log K$. Then for any $r,s\ge 0$ not both zero, we have
$$\Eta\{ Y_1 + \cdots + Y_r - Z_1 - \cdots - Z_s \} \le \Eta\{X\} + (r+s)\log K,$$
where $Y_1,\ldots,Y_r,Z_1,\ldots,Z_s$ are independent copies of $Y$.

\proof By the entropic Ruzsa triangle inequality, we have
$$\eqalign{
&\Eta\{ Y_1 + \cdots + Y_r - Z_1 - \cdots - Z_s \} \le \cr
&\qquad\qquad\Eta\{Y_1 + \cdots + Y_r + X\} + \Eta\{-X - Z_1 - \cdots - Z_s\} - \Eta\{-X\}. \cr
}$$
The values of $-X$ are in bijection with values of $X$, and the values of $-X-Z_1-\cdots-Z_s$
are in bijection with the values of $X+Z_1+\cdots+Z_s$ (with the same probabilities in both cases),
so by the invariance axiom, we have
$$\eqalign{
&\Eta\{ Y_1 + \cdots + Y_r - Z_1 - \cdots - Z_s \} \le \cr
&\qquad\qquad\Eta\{X+Y_1 + \cdots + Y_r\} + \Eta\{X+ Z_1 + \cdots + Z_s\} - \Eta\{X\},\cr
}$$
and we can apply the the previous theorem twice to get
$$\eqalign{
\Eta\{ Y_1 + \cdots + Y_r - Z_1 - \cdots - Z_s \}  &\le \Eta\{X\} + \log (K^r) + \log (K^s),\cr
&= \Eta\{X\} + (r+s)\log K.\noskipslug\cr
}$$

We conclude this section by recording two consequences of Lemma~{\lemaone} and will
be of use in the proof of the polynomial Freiman--Ruzsa theorem.

\edef\lemfivetwo{\the\sectcount.\the\thmcount}
\parenproclaim Lemma {\advthm} ({\rm\ref{pfr2023},} Lemma 5.2).
Let $X$, $Y$, and $Z$ be random variables taking values in an abelian group,
with $Y$ and $Z$ independent. Then
$$\eqalign{
\dd\{X, Y-Z\} - \dd\{X, Y\} &\le {\Eta\{Y-Z\} - \Eta\{Y\}\over 2} \cr
&= {1\over 2} \dd\{Y, Z\} + {1\over 4}\Eta\{Z\} - {1\over 4}\Eta\{Y\}\cr
}$$
and
$$\eqalign{
\dd\{X; Y\given Y-Z\} - \dd\{X, Y\} &\le {\Eta\{Y-Z\} - \Eta\{Z\}\over 2} \cr
&= {1\over 2} \dd\{Y, Z\} + {1\over 4}\Eta\{Y\} - {1\over 4}\Eta\{Z\}.\cr
}$$

\proof For the first inequality, let $X'$ be a copy of $X$ that is independent of $(Y,Z)$, so that
$$\eqalign{
\dd\{X, Y-Z\} - \dd\{X, Y\}
&= \Eta\{X'-Y+Z\} - {1\over 2}\Eta\{X'\} - {1\over 2} \Eta\{Y-Z\} \cr
&\hskip60pt-\Eta\{X'-Y\} + {1\over 2}\Eta\{X'\} + {1\over 2}\Eta\{Y\}. \cr
&= \Eta\{X'-Y+Z\} - {1\over 2} \Eta\{Y-Z\} \cr
&\hskip110pt-\Eta\{X'-Y\} + {1\over 2}\Eta\{Y\}.\cr
}$$
Lemma~{\lemaone} with $X'$ in place of $X$ and $-Y$ in place of $Y$ yields
$$\Eta\{X'-Y+Z\} - \Eta\{X'-Y\} \le \Eta\{Y-Z\} -\Eta\{Y\},$$
so
$$\dd\{X, Y-Z\} - \dd\{X, Y\} \le {1\over 2} \Eta\{Y-Z\} - {1\over 2}\Eta\{Y\},$$
as desired. The alternate version of the statement follows directly from the definition of Ruzsa distance,
since $Y$ and $Z$ are independent.

For the other inequality, we apply Lemma~{\lemfiveone} with $W = Y-Z$ (and the variable $Z$ in that lemma
set to anything that is independent of $X$) to obtain
$$\eqalign{
\dd\{X; Y\given Y-Z\} - \dd\{X,Y\} &\le {1\over 2} \II\{Y:Y-Z\} \cr
&= {1\over 2}\bigl(\Eta\{Y\} + \Eta\{Y-Z\} - \Eta\{Y, Y-Z\}\bigr) \cr
&= {1\over 2}\bigl(\Eta\{Y\} + \Eta\{Y-Z\} - \Eta\{Y, Z\}\bigr) \cr
&= {1\over 2}\bigl(\Eta\{Y-Z\} - \Eta\{Z\}\bigr),\cr
}$$
where in the last line we used independence of $Y$ and $Z$. Independence also gives the alternative version
of the right-hand-side expression.\slug

Changing variables in the first inequality and then adding the second inequality gives us the following.

\edef\lemsevenone{\the\sectcount.\the\thmcount}
\parenproclaim Lemma {\advthm} ({\rm\ref{pfr2023},} Lemma 7.1).
Let $X$, $Y$, $Z$, and $W$ be random variables taking values in the same abelian group, with $Y$, $Z$,
and $W$ independent. Then
$$\eqalign{
\dd\{&X; Y-Z\given Y-Z-W\} - \dd\{X,Y\} \cr
&\le {1\over 2} \bigl( \Eta\{Y-Z-W\} + \Eta\{Y-Z\} - \Eta\{Y\} - \Eta\{W\}\bigr) .\cr
}$$

\proof The second inequality of Lemma~{\lemfivetwo} (setting $Y \gets Y-Z$ and $Z\gets W$) yields
$$\dd\{X; Y-Z\given Y-Z-W\} - \dd\{X, Y-Z\} \le {\Eta\{Y-Z-W\} - \Eta\{W\}\over 2},$$
and the first inequality (without any change of variables) is
$$\dd\{X, Y-Z\} - \dd\{X, Y\} \le {\Eta\{Y-Z\} - \Eta\{Y\}\over 2}.$$
Adding these two inequalities proves the lemma.\slug

\advsect The Balog--Szemer\'edi--Gowers theorem

If $A$ and $B$ are subsets of the same abelian group such that $|A+B|$ is much smaller than $|A|\cdot|B|$,
then it stands to reason that there must be a lot of redundancy in $A+B$; that is, many elements of $A+B$ can
be expressed as $a+b$ in lots of different ways. To capture this notion, we can define the
{\it additive energy} between two sets $A$ and $B$ to be
$$E(A,B) = \bigl| \bigl\{ (a,a',b,b') \in A\times A\times B\times B : a + b = a'+b'\bigr\}\bigr|.$$

We now define an entropic version of $E(A,B)$. Let $X$ and $Y$ be discrete random variables taking values
in the same
abelian group. Let $(X_1, Y_1)$ and $(X_2, Y_2)$ be conditionally independent trials of $(X,Y)$ relative to
$X+Y$. These  $X_1 + Y_1 = X_2 + Y_2 = A+B$.
The {\it entropic additive energy} between $X$ and $Y$ is
$$ \ee\{X,Y\} = \Eta\{X_1, Y_1, X_2, Y_2\}.$$
This definition makes clear the analogy between this value and the additive energy of sets, but
by conditional independence and the fact that $(X_1, Y_1, X_2, Y_2)$ determines $X_1+Y_1 = X+Y$,
we can rewrite
$$\eqalign{
\ee\{X,Y\} &= \Eta\{X_1, Y_1, X_2, Y_2, X+Y\} \cr
&= 2\Eta\{X,Y,X+Y\} - \Eta\{X+Y\} \cr
&= 2\Eta\{X,Y\} - \Eta\{X+Y\},\cr
}$$
where in the second equality we applied~\refeq{\eqcondindep}. This formula is something we will use
often, as it makes no direct mention of the variables $(X_1, Y_1)$ and $(X_2,Y_2)$.

Quantifying the idea that small sumset must imply large additive energy, we have the following
proposition.

\edef\propinversebalog{\the\sectcount.\the\thmcount}
\proclaim Proposition \advthm.
Let $A$ and $B$ be finite subsets of an abelian group.
If $|A+B|\le K|A|^{1/2} |B|^{1/2}$ for some constant $K$, then we have
$$E(A,B) \ge {1\over K} |A|^{3/2} |B|^{3/2}.\noskipslug$$

Somewhat surprisingly, if we convert
these statements into their entropic analogues in the na\"ive way, as we've been doing,
the implication goes the other way!
However, we have a weak equivalence (with worse constants in one direction)
under the further assumption that the random variables in
question are not too dependent.

\edef\propnaive{\the\sectcount.\the\thmcount}
\proclaim Proposition \advthm. Let $X$ and $Y$ be discrete random variables taking values in the
same abelian group. If
\global\edef\eqenergybound{\the\eqcount}
$$\ee\{X,Y\} \ge {3\over 2} \Eta\{X\} + {3\over 2}\Eta\{Y\} - \log K,\adveq$$
for some constant $K$, then
\global\edef\eqsumbound{\the\eqcount}
$$\Eta\{ X+Y\} \le {1\over 2} \Eta\{X\} + {1\over 2} \Eta\{Y\} + \log K.\adveq$$
If one adds the further assumption that
$\Eta\{X,Y\} \ge \Eta\{X\} + \Eta\{Y\} - C$,
then~{\rm \refeq{\eqsumbound}} implies~{\rm \refeq{\eqenergybound}} with a worse constant, namely,
we may only conclude
\global\edef\eqworsebound{\the\eqcount}
$$\ee\{X,Y\} \ge {3\over 2} \Eta\{X\} + {3\over 2}\Eta\{Y\} - \log K - 2C.\adveq$$
In particular, if $X$ and $Y$ are independent, then we can recover~{\rm \refeq{\eqenergybound}}
from~{\rm \refeq{\eqsumbound}}.

\proof Assuming the lower bound on the additive energy, we have
$$2\Eta\{X,Y\} - \Eta\{X+Y\} \ge {3\over 2} \Eta\{X\} + {3\over 2} \Eta\{Y\} - \log K,$$
so
$$\eqalign{
\Eta\{X+Y\} &\le 2\Eta\{X,Y\} - {3\over 2} \Eta\{X\} - {3\over 2} \Eta\{Y\} + \log K \cr
&\le 2\Eta\{X\} + 2\Eta\{Y\} - {3\over 2} \Eta\{X\} - {3\over 2} \Eta\{Y\} + \log K \cr
&= {1\over 2} \Eta\{X\} + {1\over 2} \Eta\{Y\} + \log K .\cr
}$$
On the other hand, assuming this upper bound on $\Eta\{X+Y\}$, we have
$$\eqalign{
\ee\{X,Y\} &= 2\Eta\{X,Y\} - \Eta\{X+Y\}  \cr
&\ge 2\Eta\{X,Y\} - {1\over 2} \Eta\{X\} - {1\over 2} \Eta\{Y\} - \log K ,\cr
}$$
and if $2\Eta\{X,Y\} \ge 2 \Eta\{X\} + 2\Eta\{Y\} - 2C$, then~\refeq{\eqworsebound} follows
directly.\slug

The fact that the implication goes the ``wrong'' way may seem somewhat baffling. Let us get
to the bottom of this.
If $A$ and $B$ are subsets of a finite abelian group, we can let $X$ and $Y$ be the uniform
distributions on $A$ and $B$, respectively. We have been operating under the belief that $\Eta\{X+Y\}$
should correspond (up to taking powers or logarithms) to the size of $A+B$. But this is not true, since
$X$ and $Y$ may be given a joint distribution that is not uniform on $A\times B$, even if
its marginals are uniform on $A$ and $B$.

For example, let $A$ and $B$ be subsets of $G$ and consider any regular bipartite graph $H$ on the
vertex set $A\cup B$. Let $(X,Y)$ be defined by sampling an edge from $H$ uniformly at random, letting
$X$ be its endpoint in $A$ and $Y$ be its endpoint in $B$. Since the graph is regular, $X$ is uniform
on $A$ and $Y$ is uniform on $B$, but $X+Y$ can only take values $a+b$ where $(a,b)$ is an edge of $H$.
In other words, $X+Y$ samples from the {\it partial sumset}
$$ A +_H B = \bigl\{ a + b : (a,b)\in E(H)\bigr\},$$
where elements that are represented more times as the sum of edge endpoints are given a greater weight.
The way to properly recover the ordinary sumset $A+B$ is to let $H$ be all of $A\times B$, in which case
$X$ and $Y$ are independent. The extra assumption we added in Proposition~{\propnaive} is analogous to
stipulating that $|H| \ge K |A|\cdot |B|$, so that the resulting $X$ and $Y$ are ``nearly'' independent.

Simply put, in the entropic setting the bound~\refeq{\eqenergybound} is
stronger than~\refeq{\eqsumbound} because the entropy $\Eta\{X,Y\}$ of the joint distribution appears in
the formula for $\ee\{X,Y\}$, whereas~\refeq{\eqsumbound} says nothing whatsoever about this joint distribution.

The converse to Proposition~{\propinversebalog} does not hold in general; that is, large additive energy
does not necessarily imply a small sumset. However, there does exist a partial converse,
which says that if sets $A$ and $B$ have a large additive energy, then there are dense subsets $A'\subseteq A$
and $B'\subseteq B$ such that the sumset $|A+B|$ is small. This is the celebrated
Balog--Szemer\'edi--Gowers theorem.

\parenproclaim Theorem {\advthm} (Balog--Szemer\'edi--Gowers theorem). Let $A$ be a finite
subset of an abelian group with $E(A,B) \ge c |A|^{3/2} |B|^{3/2}$. Then there are subsets
$A'\subseteq A$ and $B'\subseteq B$ with $|A'| \ge c' |A|$ and $|B'|\ge c''|B|$ such that
$$|A'+B'| \le C |A|^{1/2}|B|^{1/2},$$
where $c'$, $c''$, and $C$ depend only on $c$.

In the entropy setting, the operation on random variables that corresponds to taking subsets is
conditioning. (As a sanity check, recall that conditioning never increases entropy, just
as taking subsets never increases cardinality.) The Balog--Szemer\'edi--Gowers theorem gives
us subsets between which we can take a {\it bona fide} sumset, so its entropic analogue
should return conditionings $X'$ and $Y'$ of $X$ and $Y$ relative to some random variable $Z$,
such that
\medskip
\item{i)} $X'$ and $Y'$ are conditionally independent relative to $Z$;
\smallskip
\item{ii)} the entropies $\Eta\{X'\given Z\}$ and
$\Eta\{Y'\given Z\}$ are not too small compared to their unconditioned analogues; and
\smallskip
\item{iii)} $\Eta\{X'+Y'\given Z\}$ is small.
\medskip
In fact, the conditioning we shall perform is exactly the one used to define additive energy.

First, we need a lemma, which we state separately since it will also be used later in the proof of
the polynomial Freiman--Ruzsa theorem.

\edef\lematwo{\the\sectcount.\the\thmcount}
\parenproclaim Lemma {\advthm} ({\rm\ref{pfr2023},} Lemma A.2).
Let $X$ and $Y$ be discrete random variables taking values in the same abelian
group. Let $(X_1, Y_1)$ and $(X_2,Y_2)$ be conditionally
independent trials of $(X,Y)$ relative to $X+Y$. Then we have
$$\max\bigl(\Eta\{X_1- X_2\}, \Eta\{X_1-Y_2\}\bigr) \le \Eta\{X+Y\} + 2\II\{X:Y\}.$$
The right-hand side of this expression can also be written $2\Eta\{X\} + 2\Eta\{Y\} - \ee\{X,Y\}$.

\proof First we perform the proof for $X_1 - Y_2$.
Submodularity gives us
$$\Eta\{X_1, Y_1, X_1-Y_2\} + \Eta\{X_1-Y_2\} \le
\Eta\{ X_1, X_1-Y_2\} + \Eta\{Y_1, X_1-Y_2\}.$$
Since $X_1 + Y_1 = X+Y = X_2 + Y_2$, given $(X_1, Y_1, X_1-Y_2)$ we can recover
the values of $X_2$ and $Y_2$. So $(X_1, Y_1, X_1-Y_2)$ and $(X_1,Y_1,X_2,Y_2)$ determine each
other and hence
$$\Eta\{X_1, Y_1, X_1-Y_2\} = \Eta\{X_1, Y_1, X_2, Y_2\} = 2\Eta\{X,Y\} - \Eta\{X+Y\}.$$
On the other side of the inequality, we have
$$\Eta\{ X_1, X_1-Y_2\} = \Eta\{X_1, Y_2\} \le \Eta\{X\} + \Eta\{Y\},$$
and similarly
$$\Eta\{ Y_1, X_1-Y_2\} = \Eta\{Y_1, X_2-Y_1\} = \Eta\{X_2, Y_1\} \le \Eta\{X\} + \Eta\{Y\}.$$
Therefore,
$$\eqalign{
\Eta\{X_1-Y_2\} &\le \Eta\{X+Y\} - 2\Eta\{X,Y\} + 2\Eta\{X\} + 2\Eta\{Y\} \cr
&= \Eta\{X+Y\} - \II\{X : Y\}.\cr
}$$
The same holds with $Y_2$ replaced by $X_2$.\slug

\parenproclaim Theorem {\advthm} (Entropic Balog--Szemer\'edi--Gowers theorem).
Let $X$ and $Y$ be discrete random variables taking values in the same abelian group, and
suppose that
$$\ee\{X,Y\} \ge {3\over 2} \Eta\{X\} + {3\over 2} \Eta\{Y\} + \log K$$
for some constant $K$.
Then letting $(X_1, Y_1)$ and $(X_2,Y_2)$ be conditionally
independent trials of $(X,Y)$ relative to $X+Y$, we have
$$ \Eta\{ X_1 \given X+Y\} \ge \Eta\{X\} - 2\log K$$
and
$$ \Eta\{ Y_2 \given X+Y\} \ge \Eta\{Y\} - 2\log K.$$
Furthermore, the variables $X_1$ and $Y_2$ are conditionally independent relative to $X+Y$, and
we have
$$\Eta\{ X_1 + Y_2 \given X+Y\} \le {1\over 2}\Eta\{X\} + {1\over 2} \Eta\{Y\} + \log K.$$

\proof Using the coupling $X+Y = X_1+Y_1 = X_2 + Y_2$, we have
$$\eqalign{
\Eta\{X_1\given X + Y\} &= \Eta\{X_1, X_1+Y_1\} - \Eta\{X+Y\} \cr
&= \Eta\{X,Y\} - \Eta\{X+Y\} \cr
&= \ee\{X,Y\} - \Eta\{X,Y\} \cr
&\ge {3\over 2} \Eta\{X\} + {3\over 2} \Eta\{Y\} - \log K - \Eta\{X,Y\} \cr
&\ge {1\over 2} \Eta\{X\} + {1\over 2}\Eta\{Y\} -\log K.\cr
}$$
where in the fourth line we used the hypothesis on $\ee\{X,Y\}$, and in the last line we observed
that $\Eta\{X\} + \Eta\{Y\} - \Eta\{X,Y\} \ge 0$. The bound
$$\Eta\{Y_2, X+Y\} \ge {1\over 2} \Eta\{X\} + {1\over 2}\Eta\{Y\} -\log K$$
is shown in the exact same way; only the first step differs.

Now, taking the sum of both these bounds, we arrive at
$$\Eta\{X_1\given X+Y\} + \Eta\{Y_2 \given X+Y\} \ge \Eta\{X\} + \Eta\{Y\} - 2\log K.$$
From this one deduces
$$\eqalign{
\Eta\{X_1\given X+Y\} &\ge \Eta\{X\} + \Eta\{Y_2\} - \Eta\{Y_2\given X+Y\}  - 2\log K \cr
&\ge \Eta\{X\} - 2\log K.\cr
}$$
The corresponding lower bound on $\Eta\{Y_2\given X+Y\}$ is proved similarly.

It remains to prove the upper bound on $\Eta\{X_1 + Y_2 \given X+Y\}$.
Note that $(X_1, Y_2, X+Y)$ and $(X_1 - X_2, X+Y)$ jointly determine $(X_1, X_2, X+Y)$.
Then given $X_1 - X_2$ and $X+Y$ we can calculate
$$X_1 + Y_2 = X_1 - X_2 + X_2 + Y_2 = (X_1 - X_2) + (X+Y),$$
so $(X_1, Y_2, X+Y)$ and $(X_1 - X_2, X+Y)$
each separately determine $(X_1 + Y_2, X+Y)$. Hence the submodularity inequality yields
$$ \Eta\{X_1, X_2, X+Y\} + \Eta\{X_1+Y_2, X+Y\} \le \Eta\{X_1, Y_2, X+Y\} + \Eta\{X_1- X_2, X+Y\}.$$
From $(X_1, X_2, X+Y)$
we can calculate $Y_1 = X+Y-X_1$ and $Y_2 = X+Y-X_2$, so this triple and the triple
$(X_1, X_2, Y_1, Y_2)$ determine each other. So the first term above is simply the additive energy
between $X$ and $Y$; that is
$$ \Eta\{X_1, X_2, X+Y\} = \Eta\{X_1, X_2, Y_1, Y_2\} = 2\Eta\{X,Y\} - \Eta\{X+Y\}.$$
Now since $X_1$ and $Y_2$ are conditionally independent relative to $X+Y$, we have
$$\eqalign{
\Eta\{X_1, Y_2, X+Y\} &= \Eta\{X_1, X+Y\} + \Eta\{Y_2, X+Y\} - \Eta\{X+Y\} \cr
&= \Eta\{X, X+Y\} + \Eta\{Y, X+Y\} - \Eta\{X+Y\} \cr
&= 2\Eta\{X,Y\} - \Eta\{X+Y\} \cr
}$$
For the last term above we split
$$\Eta\{X_1 - X_2, X+Y\} = \Eta\{X_1 - X_2 \given X+Y\} - \Eta\{X+Y\}.$$
Putting everything together, we obtain
$$\eqalign{
2\Eta\{X,Y\} - \Eta\{X+&Y\} + \Eta\{X_1 + Y_2, X+Y\} \cr
&\le 2\Eta\{X,Y\} + \Eta\{X_1-X_2\given X+Y\} - 2\Eta\{X+Y\},\cr
}$$
so that
$$\Eta\{ X_1 + Y_2\given X+Y\} \le \Eta\{X_1 - X_2\given X+Y\} - 2\Eta\{X+Y\}\le \Eta\{X_1 - X_2\}.$$
The previous lemma then gives
$$\Eta\{ X_1 + Y_2\given X+Y\} \le 2\Eta\{X\} + 2\Eta\{Y\} - \ee\{X,Y\},$$
and from our lower bound on $\ee\{X,Y\}$, we conclude that
$$\Eta\{ X_1 + Y_2\given X+Y\} \le {1\over 2} \Eta\{X\} + {1\over 2} \Eta\{Y\} + \log K,$$
which is what we wanted to show.\slug

Let us also extract the specific consequence of Lemma~{\lematwo} that we need for the polynomial
Freiman--Ruzsa theorem.

\edef\lematwopfr{\the\sectcount.\the\thmcount}
\proclaim Lemma \advthm. Let $X$ and $Y$ be $G$-valued random variables, let $Z = X+Y$, and let $C$
denote the support of $Z$. Then
$$\eqalign{
\sum_{z\in C} \pr\{Z=z\} \dd\bigl\{ &(X\given Z=z); (Y\given Z=z)\bigr\} \cr
&\le 2\II\{X:Y\}+2\Eta\{Z\}-\Eta\{X,Y\}. \cr
}$$

\proof Let $(X_1, Y_1)$ and $(X_2, Y_2)$ be as in the proof of Lemma~{\lematwo}. Note that
$X_1$ and $Y_2$ are conditionally independent copies of $X$ and $Y$ relative to $Z$, so it suffices
to show that
\edef\eqxoneytwoa{\the\eqcount}
$$\eqalign{
\Eta\{X_1 - Y_2\given Z\} - {1\over 2} &\Eta\{X_1\given Z\} - {1\over 2} \Eta\{Y_2\given Z\} \cr
&\le 2\II\{X:Y\}+2\Eta\{Z\}-\Eta\{X,Y\}. \cr
}\adveq$$
Lemma~{\lematwo} tells us that $\Eta\{X_1 - Y_2\} \le \Eta\{Z\} + 2\II\{X:Y\}$, so that
\edef\eqxoneytwob{\the\eqcount}
$$\Eta\{X_1 - Y_2\given Z\} \le \Eta\{Z\} + 2\II\{X:Y\},\adveq$$
since conditioning does not increase entropy. Then
we expand
\edef\eqxoneytwoc{\the\eqcount}
$$\Eta\{X_1\given Z\} = \Eta\{X_1, X_1 + Y_1\} - \Eta\{Z\} = \Eta\{X,Y\} - \Eta\{Z\},\adveq$$
and $\Eta\{Y_2\given Z\}$ is equal to this quantity as well. Subtracting~\refeq{\eqxoneytwoc}
from~\refeq{\eqxoneytwob} gives us~\refeq{\eqxoneytwoa}, which completes the proof.\slug

\advsect The Freiman--Ruzsa theorem

Both Pl\"unnecke's theorem and the Balog--Szemer\'edi--Gowers theorem have to do with the ratio $|A+A|/|A|$
of a finite set $A$. Let us now give this ratio a name. It is called the {\it doubling constant} of $A$.
Pl\"unnecke's theorem says that if the doubling constant is at most $K$,
then the ratio $|rA-sA|/|A|$ is at most $K^{r+s}$. The Balog--Szemer\'edi--Gowers theorem, on the other hand,
says that sets $A$ with large additive energy contain large subsets $A$ and $A''$ such that
$|A' + A''|/|A|$ is bounded from above by some constant.

It is natural to ask whether there is some way to characterise the structure of sets $A$ with small doubling
constant (in some asymptotic sense). One reason why $A$ might have $|A+A|\le K|A|$ is if $A$ is contained
in some subgroup $H$, since subgroups are closed under addition. If $H$ is not much larger than $A$,
then this would explain why the doubling constant of $A$ is small. It turns out that the converse of
this statement holds; that is, if $A$ has small doubling constant, then it has high density as a subset
of a subgroup. This is called the Freiman--Ruzsa theorem.

\parenproclaim Theorem {\advthm} (Freiman--Ruzsa theorem). Let $A\subseteq G = (\ZZ/r\ZZ)^n$ and suppose
that $|A+A|\le K|A|$. Then there is a subgroup $H$ of $G$ such that $A\subseteq H$ and $|H|\le K'|A|$,
where $K'$ depends only on $K$ and $r$.

But subspaces may not be a very efficient way of covering sets with small doubling. One can easily
cook up examples where there is enough linear independence in $A$ so that the smallest subspace
containing $A$ has size exponential in $|A|$, but $A$ still has small doubling.

On the other hand, in the extreme case that the doubling
constant of $A$ is $1$, then the structure of $A$ is completely determined, as shown by the following proposition.

\edef\propdoublingone{\the\sectcount.\the\thmcount}
\proclaim Proposition \advthm. Let $A$ be a finite subset of an abelian group $G$. If $|A+A| = |A|$,
then $A$ is a coset of a subgroup $H$.

\proof First we assume that $A$ contains $0$. Then
$$A = A + \{0\} \subseteq A+A,$$
but since $|A+A| = |A|$ this implies that $A+A = A$. Now let $H = \{h\in G : A + h = A$. The above observation
shows that $A\subseteq H$. But if $h\in H$, then $A = A+h$ contains the element $0+h = h$, so in
fact $H=A$.

If $x,y\in H$, then $A+x = A$ and $A+y=A$, so subtracting $y$ from both sides of the second identity we obtain
$A-y = A$, and adding $x$ to both sides of this, we get
$$A + x-y = A+x = A,$$
so $x-y\in H$.
This along with the fact that $0\in H$ shows that $H$ is a subgroup of $G$.

Thus in the case where $A$ contains $0$, we see that $A$ is actually a subgroup of $G$, and
in the general case we may translate $A$ without changing $|A+A|$, so $A$ is the translate of a subgroup;
that is, $A$ is a coset of $H$.\slug

So in the case that $A$ is a union of not too many cosets, we also expect $A+A$ to be quite small, since
each individual coset does not grow under addition (the only possible growth comes from additions between cosets).

It is believed that the converse of this statement holds and yields a more efficient version
of the Freiman--Ruzsa theorem; that is,
if $A$ has doubling constant bounded above by $K$, then $A$ is contained in a union of cosets,
where the number of cosets needed can be taken to be no more than polynomial in the doubling constant.
This conjecture is due to K.~Marton.

\proclaim Conjecture \advthm. Let $A\subseteq \bigl(\ZZ/r\ZZ\bigr)^n$ have $|A+A|\le K|A|$
for some constant $K$. Then there is a subgroup $H$ with $|H|\le |A|$ such that $A$
is contained in a union of $K^C$ cosets of $H$, where $C$ is a constant that can depend
on $r$ but not on $n$ or $K$.

In 2023, the first special case of this conjecture was proved by W.~T.~Gowers, B.~Green, F.~Manners, and
T.~Tao.

\edef\thmpfr{\the\sectcount.\the\thmcount}
\parenproclaim Theorem {\advthm} (Gowers--Green--Manners--Tao, {\rm 2023}).
There is a constant $C$ such that the following holds.
Let $A\subseteq \FF_2^n$ have $|A+A|\le K|A|$
for some constant $K$. Then there is a subgroup $H$ with $|H|\le |A|$ such that $A$
is contained in a union of $2K^C$ cosets of $H$.

The reason we need the factor of two is that if $A$ is almost all of a subgroup, then $K$ is very close
to $1$ and the largest
subgroup of $\FF_2^n$ has cardinality just slightly above half that of $A$. That creates the possibility
that $K^C$ is much less than $2$, making it impossible to satisfy the statement of the conjecture.
Adding the factor of two solves this issue because if $A\subseteq H_0$ with $|A| \ge (1-\eps)|H_0|$, then
we may take a subgroup $H$ of $H_0$ of index $2$ and cover $A$ by two cosets of $H$.

The proof of this theorem will occupy the
remainder of these notes. As a first step, we shall show that
Theorem~{\thmpfr} follows from an entirely information-theoretic statement.
In this and the rest of the notes, for $A$ finite
we use the notation $U_A$ to denote the random variable that is uniform on $A$.

\edef\thmsubgroup{\the\sectcount.\the\thmcount}
\proclaim Theorem \advthm. There is a constant $C'$ such that the following holds.
Let $G = \FF_2^n$ and let $X_1^*$ and $X_2^*$ be random variables taking values in
$G$. There is some subgroup $H\subseteq G$ such that
$$\dd\{ X_1^*, U_H\} + \dd\{ X_2^*, U_H\} \le C'\dd\{X_1^*, X_2^*\}.$$

In the proof that this theorem implies the previous one, we shall need the Ruzsa covering lemma,
whose proof is short enough that we include it for completeness.

\edef\lemruzsacovering{\the\sectcount.\the\thmcount}
\parenproclaim Lemma {\advthm} (Ruzsa covering lemma). Let $G$ be an abelian group and let $A$ and
$B$ be finite subsets of $G$. Then there is a set $L\subseteq A$ of size at most $|A+B|/|B|$ such that
$A\subseteq L+B-B$.

\proof Let $L = \{a_1,\ldots,a_l\}$ be a maximal subset of $A$ with the property that the sets $a_i+B$
are disjoint. For all $a\in A$ there is $i$ such that $(a+B) \cap (a_i+B) \ne \emptyset$, otherwise we
could add $a$ to $L$ and contradict maximality. Equivalently, we can find $b$ and $b'$ such that
$a+b = a_i + b'$ so $a\in a_i + B - B$. Hence $A\subseteq L+B-B$. Lastly, $|L|\cdot |B| = |K+B| \le |A+B|$.\slug

We now show that if Theorem~{\thmsubgroup} holds for some constant $C'$, then
Theorem~{\thmpfr} holds with $C=C'+1$.

\medskip\noindent{\it Proof of Theorem~{\thmpfr} assuming Theorem~{\thmsubgroup}.}\enspace
Let $A\subseteq \FF_2^n$ and $K\ge 1$
such that $|A+A|\le K|A|$. Let $U_A$ be the uniform distribution on $A$, so that $\Eta\{U_A\} = \lg|A|$.
Letting $U_A'$ and $U_A''$ be two independent copies of $U_A$, the sum $U_A' - U_A''$ belongs to $|A+A|$,
so $\Eta\{U_A' + U_A''\} \le \lg|A+A|$, and we have
$$\dd\{U_A, U_A\} = \Eta\{U_A' + U_A''\} - \Eta\{U_A\} \le \lg|A+A|-\lg|A| \le \lg K,$$
since $U_A' + U_A'' = U_A' - U_A''$ in $\FF_2^n$. By Theorem~{\thmsubgroup}, we can find
$H\subseteq G$ such that
$$2\dd\{ U_A, U_H\} \le C'\lg K,$$
where $U_H$ is taken to be uniform on $H$ independently of $U_A$. Independence and the definition
of Ruzsa distance let us conclude
$$\Eta\{U_A-U_H\} \le {\lg|A| + \lg|H| + C'\lg K\over 2},$$
from which applying Proposition~{\propmaxprob} allows us to procure some $x_0 \in \FF_2^n$ such that
$$\pr\{ U_A - U_H = x_0\} \ge {1\over |A|^{1/2} |H|^{1/2} K^{C'/2}}.$$
Since $U_A$ and $U_H$ are uniformly and independently random on $A$ and $H$ respectively, we rewrite this as
$${\bigl| A\cap (H+x_0)\bigr| \over |A|\cdot|H|}  \ge {1\over |A|^{1/2} |H|^{1/2} K^{C'/2}},$$
whence
$$\bigl| A\cap (H+x_0)\bigr|  \ge {|A|^{1/2} |H|^{1/2} \over K^{C'/2}}.$$
By the Ruzsa covering lemma applied to the sets $A$ and $A\cap (H+x_0)$, there is a set $L\subseteq A$ of size
$$\eqalign{
|L| &\le {\bigl| A + (A\cap (H+x_0))\bigr|\over \bigl| A\cap (H+x_0)\bigr|} \cr
&\le {|A + A|\over \bigl| A\cap (H+x_0)\bigr|} \cr
&\le {K|A|\cdot K^{C'/2} \over |A|^{1/2} |H|^{1/2} } \cr
&= {K^{C'/2+1}|A|^{1/2} \over |H|^{1/2} } \cr
}$$
such that
$$A \subseteq L+\bigl(A\cap (H+x_0)\bigr) - \bigl(A\cap (H+x_0)\bigr) \subseteq L+H.$$
Proposition~{\propabsdist} tells us that
$$\bigl| \lg|H| - \lg|A|\bigr| = \bigl| \Eta\{U_H\} - \Eta\{H_A\}\bigr| \le 2\dd\{ U_A, U_H\} \le C'\lg K,$$
so
$$\max\biggl( {|H|\over |A|}, {|A|\over |H|}\biggr) \le K^{C'}$$
and we have
$$|L| \le K^{C'+1}.$$
If $|H|\le |A|$ then we are done (and the factor of $2$ is unnecessary in the theorem statement),
since $A$ is a subset of $|L|$ cosets of $H$.

If not, then we can pick a
subgroup $H'$ of $H$ with $|A|/2 \le |H'|\le |A|$ by iteratively taking hyperplanes until the condition is met.
The group $H$ may be covered by $|H|/|H'| \le 2|H|/|A|$ translates of $H'$, so $A$ can be covered by
$${2|H|\over |A|}|L| \le {2|H|\over |A|}\cdot {K^{C'/2+1}|A|^{1/2} \over |H|^{1/2} } 
= 2K^{C'/2+1} {|H|^{1/2} \over |A|^{1/2}} \le 2K^{C'+1}$$
translates of $H'$.\slug

\edef\sectcompactness{\the\sectcount}
\advsect A compactness argument

We have reduced the proof of the Freiman--Ruzsa theorem to proving the inform\-ation-theoretic
Theorem~{\thmsubgroup}. In this section, we shall reduce the proof to a different statement via a compactness
argument. First we recall some basic topological properties of probability measures.

Fix a finite set $G$ of size $n$. Ordering the elements of $G$ from $1$ to $n$, one can associate
to each probability distribution $\mu$ on $G$ the vector
$$\bigl( \mu(\{1\}), \mu(\{2\}), \ldots, \mu(\{n\})\bigr)$$
in $\RR^n$. (Starting now we will write $\mu_i$ instead of $\mu(\{i\})$.)
Let $\P(G)$ denote the set of all probability distributions on $G$.
This is a metric space under the {\it total variation
distance} $\dTV$, where if $\mu = (\mu_1,\ldots,\mu_n)$
and $\nu = (\nu_1,\ldots,\nu_n)$ are two probability distributions on $G$, then
$$\dTV(\mu,\nu) = \sum_{i=1}^n |\mu_i - \nu_i|.$$
Regarding the space as a subset of $\RR^n$, this norm is equivalent to the $L_1$ norm; hence
the total variation metric on $\P(G)$ is equivalent to the Euclidean metric on $\RR^n$.
Since probabilities are in $[0,1]$, the space $\P(G)$ is a subset of the closed set $[0,1]^n$,
and since $\P(G)$ is the inverse image of the set $\{1\}$ under the continuous function
$$(x_1,\ldots,x_n) \mapsto x_1 + \cdots + x_n,$$
we conclude that $\P(G)$ is a compact topological space. In passing, we note here that this is
the topology under which the continuity axiom asserts that $\Eta\{X\}$ is continuous.

Next we show that the Ruzsa distance is also continuous with respect to this topology.

\edef\propruzsacontinuous{\the\sectcount.\the\thmcount}
\proclaim Proposition \advthm. Let $G$ be a finite abelian group.
The Ruzsa distance $\dd$, regarded as a functional
from $\P(G)\times \P(G)\to \RR$, is continuous.

Let $X$ and $Y$ be random variables; without loss of generality, we may assume they are independent,
so that
$$\dd\{X,Y\} = \Eta\{X-Y\} - {\Eta\{X\} \over 2} - {\Eta\{Y\}\over 2}.$$
The entropy functional is continuous by axiom, so from here it suffices to show that
the function $f : \P(G)\times \P(G) \to \P(G)$ mapping $(X,Y)\mapsto X-Y$ is continuous.

Since all norms on finite-dimensional spaces are equivalent, we may work with
$\P(G)\times \P(G)$ and $\P(G)$ as normed spaces using the $L_1$ norm on $\RR^{2n}$ and $\RR^n$
respectively.
Let $\mu_g = \pr\{X = g\}$, $\nu_g = \pr\{ Y = g\}$, and $\xi_g = \pr\{X-Y = g\}$ for all $g\in G$.
Given another pair $(X', Y')$ of random variables, define $\mu_g'$, $\nu_g'$, and $\xi_g'$ accordingly.
Now let $\eps > 0$ and suppose that the vectors $(\mu, \nu)$ and $(\mu', \nu')$
have $\bignorm{(\mu,\nu), (\mu',\nu')}_1 < \delta$ for some $\delta>0$ to be specified later; writing
this out in full, we have
$$\sum_{g\in G} \big(|\mu_g - \mu_g'| + |\nu_g - \nu_g|\bigr) < \delta.$$
Observe that
$$\xi_g = \pr\{X-Y= g\} = \sum_{h\in G} \pr\{X = g+h\} \pr\{Y = h\} = \sum_{h\in G} \mu_{g+h}\nu_h$$
and similarly for $\xi_g'$. From this we can expand and bound
$$\eqalign{
\norm{\xi, \xi'}_1 &= \sum_{g\in G} |\xi_g - \xi_g'| \cr
&= \sum_{g\in G} \Bigl| \sum_{h\in G} \mu_{g+h}\nu_h - \sum_{h\in G} \mu_{g+h}\nu_h\Bigr| \cr
&\le \sum_{g\in G} \sum_{h\in G} \bigl( |\mu_{g+h}\nu_h - \mu_{g+h}'\nu_h| +
|\mu_{g+h}'\nu_h - \mu_{g+h}'\nu_h'|\bigl) \cr
&= \sum_{h\in G} \nu_h \sum_{g\in G} |\mu_{g+h} - \mu_{g+h}'|
+ \sum_{g\in G} \mu_g \sum_{h\in G} |\nu_h - \nu_h'| \cr
&= \sum_{g\in G} |\mu_{g} - \mu_{g}'| + \sum_{h\in G} |\nu_h - \nu_h'| \cr
&< \delta,
}$$
so in fact we can set $\delta = \eps$.\slug

So much for topology. Returning to entropies, we now prove an entropic version of
Proposition~{\propdoublingone}.

\edef\propentropydoublingone{\the\sectcount.\the\thmcount}
\proclaim Proposition \advthm. Let $X$ be a random variable taking values in an abelian group $G$.
Then $\dd\{X, -X\} = 0$ if and only if $X$ is uniformly distributed on a coset of a finite subgroup of $G$.

\proof If $X$ is the uniform distribution on a coset $a+H$ of a finite subgroup $H$, then letting $X'$ be an
independent copy of $X$, the random variable $X+X'$ is uniform on the coset $a+a+H$, which has the
same size as $|a+H|$, so $\Eta\{X+X'\} = \Eta\{X\}$, meaning that
$$\dd\{X, -X\} = \Eta\{X+X'\} - {\Eta\{X\}\over 2} - {\Eta\{X'\} \over 2} = 0.$$
On the other hand, suppose that $\Eta\{X+X'\} = 2\Eta\{X\}$, where $X'$ is an independent copy of $X$.
Then
$$\eqalign{
\Eta\{X+X'\} &= 2\Eta\{X\} - \Eta\{X\} \cr
&= \Eta\{X, X'\} - \Eta\{X'\} \cr
&= \Eta\{X+X', X'\} - \Eta\{X'\} \cr
&= \Eta\{X+X'\given X'\},\cr
}$$
so $X+X'$ is independent of $X'$.

Let $A$ denote the support of $X$. We have shown that the distribution
$(X + X' \given X' = x)$ is the same
regardless of our choice of $x\in A$. Then for all $x,y\in A$, the distribution of $X+x$ is the
same as that of $X+y$, so the distribution of
of $X+x-y$ is the same as that of $X$. This implies that $A$ is finite,
$X$ is the uniform distribution on $A$,
and adding an element $h\in A-A$ to the set $A$ simply permutes the set.

As in the proof
of Proposition~{\propdoublingone}, let $H = \{h\in G : A+h = A\}$. We proved back then that $H$ is a subgroup
of $G$, and the conclusion of the previous paragraph
asserts that $A-A \subseteq H$. But letting $h\in H$, the fact that
$h+A = A$ implies that $h\in A-A$. We conclude that
$A-A$ is a finite subgroup of $G$, whence $A$ is a coset of a finite subgroup of $G$.\slug

From this, we are able to prove the special case of Theorem~{\thmsubgroup} in which the Ruzsa distance between
the two given random variables is zero.

\edef\lemzerosubgroup{\the\sectcount.\the\thmcount}
\proclaim Lemma \advthm. Let $X_1$ and $X_2$ be random variables taking values in $\FF_2^n$ with
$\dd\{X_1,X_2\} = 0$. Then there exists a subgroup $H$ of $\FF_2^n$ such that
$$\dd\{X_1, U_H\} = \dd\{X_2,U_H\} = 0.$$

\proof The triangle inequality gives $\dd\{X_1,X_1\} = 0$, and since $-x = x$ in $\FF_2^n$,
we also have $\dd\{X_1, - X_1\} = 0$. By the previous proposition, there is a coset $S$ of a
subgroup $H$ such that
$X_1$ has the same distribution as $U_S$. So
$$\dd\{X_1, U_H\} = \Eta\{U_S - U_H\} - {1\over 2}\Eta\{U_S\} - {1\over 2}\Eta\{U_H\} = 0,$$
since $U_S-U_H$ is uniform on $S$ and $|H| = |S|$. Then by the triangle inequality once again,
$\dd\{X_2, U_H\} = 0$ as well.\slug

Recall that in the previous section, we reduced the proof of the polynomial Freiman--Ruzsa theorem
to a statement involving some random variables $X_1^*$ and $X_2^*$. For the remainder of these notes,
we shall consider these variables to be fixed, and we also fix $\eta = 1/9$ for short.
Defining the functional
$$\tau(X_1, X_2) = \dd\{X_1, X_2\} - \eta\dd\{X_1^*, X_1\} - \eta\dd\{X_2^*, X_2\},$$
we can then reduce the proof of Theorem~{\thmsubgroup} (and consequently the entire proof) to showing the
following proposition.

\edef\propmaintau{\the\sectcount.\the\thmcount}
\proclaim Proposition \advthm. Let $X_1$ and $X_2$ be two $\FF_2^n$-valued random variables with
$\dd\{ X_1, X_2\} > 0$. Then there are $\FF_2^n$-valued random variables $X_1'$ and $X_2'$ such that
$$\tau\{X_1', X_2'\} < \tau\{X_1, X_2\}.$$

If we can prove this proposition, then we have Theorem~{\thmsubgroup} with a certain constant $C'$ which
shall be made explicit during the proof.

\medskip\noindent{\it Proof of Theorem~{\thmsubgroup} assuming Proposition~{\propmaintau}}.\enspace
We proved earlier that $\P(\FF_2^n)$ is a compact topological space, thus so is its Cartesian product with
itself. By Proposition~{\propruzsacontinuous}, the Ruzsa distance functional is continuous on this space,
which implies that $\tau$ is as well. Hence $\tau$ attains its infimum on the space $\P(\FF_2^n)^2$;
let $X_1$ and $X_2$ be two distributions such that $\tau\{X_1, X_2\}$ is minimal. By the contrapositive
of Proposition~{\propmaintau}, we must have $\dd\{X_1, X_2\} = 0$, so by Lemma~{\lemzerosubgroup},
there exists a subgroup $H$ of $\FF_2^n$ such that
$$\dd\{X_1, U_H\} = \dd\{X_2, U_H\} = 0.$$
This means that
$$\dd\{X_1^*, U_H\} \le \dd\{X_1^*, X_1\} + \dd\{ X_1, U_H\} = \dd\{X_1^*, X_1\}$$
and
$$\dd\{X_1^*, X_1\} \le \dd\{X_1^*, U_H\} + \dd\{U_H, X_1\} = \dd\{X_1^*, U_H\},$$
so $\dd\{X_1^*, U_H\} = \dd\{X_1^*, X_1\}$. The same holds for $X_2^*$ and $X_2$.
Then since $\dd\{X_1, X_2\} = 0$ and by minimality of $\tau\{X_1, X_2\}$, we obtain
$$\eqalign{
\eta\dd\{X_1^*, U_H\} + \eta\dd\{X_2^*, U_H\} &= \eta\dd\{X_1, U_H\} + \eta\dd\{X_2, U_H\} \cr
&= \tau\{X_1, X_2\} \cr
&\le \tau\{X_1^*, X_2^*\} \cr
&= \dd\{X_2^*, X_1^*\} + \eta\dd\{X_1^*, X_2^*\} + \eta\dd\{X_2^*, X_1^*\} \cr
&= (1+2\eta) \dd\{X_1^*, X_2^*\}. \cr
}$$
From our choice of $\eta = 1/9$ we can multiply both sides by $9$ to get
$$\dd\{X_1^*, U_H\} + \dd\{X_2^*, U_H\} \le 11\dd\{X_1^*, X_2^*\},$$
and furthermore, since
$$\bigl| \dd\{X_1^*, U_H\} - \dd\{X_2^*, U_H\} \bigr| \le \dd\{X_1^*, X_2^*\}$$
by two invocations of the triangle inequality, neither $\dd\{X_1^*, U_H\}$ nor
$\dd\{X_2^*, U_H\}$ can be greater than $6\dd\{X_1^*, X_2^*\}$.\slug

For posterity, let us restate what we have just proved with the constants plugged in.

\proclaim Theorem~{\thmsubgroup}$\mathbold'$.
Let $X_1^*$ and $X_2^*$ be random variables taking values in
$\FF_2^n$. There is some subgroup $H\subseteq \FF_2^n$ such that
$$\dd\{ X_1^*, U_H\} + \dd\{ X_2^*, U_H\} \le 11\dd\{X_1^*, X_2^*\},$$
and furthermore,
$$\max\bigl( \dd\{X_1^*, U_H\}, \dd\{X_2^*, U_H\}\bigr) \le 6\dd\{X_1^*, X_2^*\}.\noskipslug$$

\advsect Entropy distance under homomorphisms

Our goal has been reduced to showing that if $X_1$ and $X_2$ have $\dd\{X_1, X_2\} > 0$, then
there exist $X_1'$ and $X_2'$ with $\tau\{X_1', X_2'\} < \tau\{X_1, X_2\}$. Remember that
the fixed variables $X_1^*$ and $X_2^*$ still play a r\^ole in this statement, since
they feature in the definition of the functional $\tau$.

We have elected to first prove all the technical lemmas
then stitch them into a complete proof.
We employ this ``bottom-up'' approach not because
it is necessarily more natural, but because the original proof is written
in a rather ``top-down'' style, and the reader may find it enlightening to have both expositions at his
or her disposal.

Having said this, the remainder of this section will be devoted to the study of how entropy
behaves under homomorphism. This appeared as Proposition~1.4
in~\ref{revisited2023}, and reproved in~\ref{pfr2023} with the
explicit error term shown below.

\edef\propfourone{\the\sectcount.\the\thmcount}
\parenproclaim Proposition {\advthm} ({\rm\ref{pfr2023},} Proposition 4.1).
Let $\pi : G\to G'$ be a homomorphism of abelian groups and let
$Z_1$ and $Z_2$ be $G$-valued random variables. Then
$$\dd\{Z_1, Z_2\} \ge \dd\bigl\{ \pi(Z_1), \pi(Z_2)\bigr\}
+ \dd\bigl\{ Z_1\given \pi(Z_1); Z_2\given \pi(Z_2)\bigr\}.$$
If $Z_1$ and $Z_2$ are assumed to be independent, then the two sides differ by
$$\II\bigl\{ Z_1 - Z_2 : \bigl( \pi(Z_1), \pi(Z_2)\bigr) \mathbin{\big|} \pi(Z_1-Z_2)\bigr\}.$$

\proof Let $Z_1'$ and $Z_2'$ be independent copies of $Z_1$ and $Z_2$. Then
$$\eqalign{
\dd\bigl\{ Z_1\given &\pi(Z_1); Z_2\given \pi(Z_2)\bigr\} \cr
&= \Eta\bigl\{Z_1'-Z_2'\given \pi(Z_1'), \pi(Z_2') \bigr\}
 - {1\over 2}\Eta\bigl\{ Z_1' \given \pi(Z_1')\bigr\} - {1\over 2}\Eta\bigl\{ Z_2' \given \pi(Z_2')\bigr\} \cr
&\le \Eta\bigl\{Z_1'-Z_2'\given \pi(Z_2') \bigr\}
 - {1\over 2}\Eta\bigl\{ Z_1' \given \pi(Z_1')\bigr\} - {1\over 2}\Eta\bigl\{ Z_2' \given \pi(Z_2')\bigr\} \cr
&= \Eta\bigl\{Z_1'-Z_2'\given \pi(Z_1'- Z_2') \bigr\}
 - {1\over 2}\Eta\bigl\{ Z_1' \given \pi(Z_1')\bigr\} - {1\over 2}\Eta\bigl\{ Z_2' \given \pi(Z_2')\bigr\} \cr
&= \Eta\{Z_1'-Z_2'\} - \Eta\bigl\{ \pi(Z_1'-Z_2')\bigr\} \cr
&\qquad\qquad
+{1\over 2}\Eta\{Z_1'\} - {1\over 2}\Eta\bigl\{ \pi(Z_1')\bigr\} 
+{1\over 2}\Eta\{Z_2'\} - {1\over 2}\Eta\bigl\{ \pi(Z_2')\bigr\} \cr
&= \dd\{Z_1, Z_2\} - \dd\bigl\{ \pi(Z_1),\pi(Z_2)\bigr\},\cr
}$$
where the inequality follows from submodularity, and in the second-last equality we used (three times)
the identity
$$\Eta\bigl\{X\given \pi(X)\bigr\} = \Eta\{X\} - \Eta\bigl\{\pi(X)\bigr\},$$
which holds for any $G$-valued random variable $X$, since $X$ determines $\pi(X)$.

If $Z_1$ and $Z_2$ are independent, then the difference between the two sides is
$$\eqalign{
\Eta\bigl\{&Z_1-Z_2\given \pi(Z_1-Z_2) \bigr\}-\Eta\bigl\{Z_1-Z_2\given \pi(Z_1), \pi(Z_2) \bigr\} \cr
&= \Eta\bigl\{Z_1-Z_2\given \pi(Z_1-Z_2) \bigr\}-\Eta\bigl\{Z_1-Z_2\given \pi(Z_1), \pi(Z_2), \pi(Z_1-Z_2)
\bigr\}.\cr
}$$
But one of the definitions of conditional mutual information is
$$\II\{X:Y\given Z\} = \Eta\{X\given Z\} - \Eta\{X\given Y, Z\},$$
so letting $X = Z_1-Z_2$, $Y=\bigl(\pi(Z_1), \pi(Z_2)\bigr)$, and $Z=\pi(Z_1-Z_2)$, we
see that the two sides of the inequality differ by exactly the conditional mutual information term
claimed above.\slug

We will use this proposition in the following special case.

\edef\corfourtwo{\the\sectcount.\the\thmcount}
\parenproclaim Corollary {\advthm} ({\rm\ref{pfr2023},} Corollary 4.2).
Let $G$ be an abelian group and let $Y_1$, $Y_2$, $Y_3$, and $Y_4$ be independent
$G$-valued random variables. Then
$$\eqalign{
\dd\{Y_1, Y_2\} + \dd\{Y_3,Y_4\} = \dd\{Y_1 - Y_3;&\ Y_2 - Y_4\}
+ \dd\{ Y_1\given Y_1 - Y_3;\ Y_2\given Y_2-Y_4\}\cr
&+ \II\{Y_1 - Y_2 : Y_2-Y_4 \given Y_1 - Y_2 - Y_3 + Y_4\}. \cr
}$$

\proof We apply this with the subtraction homomorphism $\pi : G\times G\to G$ given by $\pi(x,y) = x-y$.
Then the previous proposition applied to $Z_1 = (Y_1, Y_3)$ and $Z_2 = (Y_2, Y_4)$ yields the equality
$$\eqalign{
\dd\bigl\{&(Y_1, Y_3);\ (Y_2, Y_4)\bigr\} \cr
&= \dd\bigl\{ Y_1-Y_3, Y_2-Y_4\bigr\}
+ \dd\bigl\{ (Y_1, Y_3)\given Y_1-Y_3;\ (Y_2,Y_4)\given Y_2-Y_4\bigr\}\cr
&\hskip40pt+\II\bigl\{ (Y_1-Y_2, Y_3-Y_4) : (Y_1-Y_3, Y_2-Y_4)\mathbin{\big|} Y_1 - Y_2 - Y_3 - Y_4\bigr\} \cr
&= \dd\bigl\{ Y_1-Y_3, Y_2-Y_4\bigr\} + \dd\bigl\{ Y_1\given Y_1-Y_3;\ Y_2\given Y_2-Y_4\bigr\}.\cr
&\hskip40pt+\II\bigl\{ Y_1-Y_2 : Y_2-Y_4 \mathbin{\big|} Y_1-Y_2-Y_3+Y_4\bigr\} \cr
}$$
where in the last line we used the fact that $(Y_1-Y_2, Y_1-Y_2-Y_3+Y_4)$ determines $Y_3-Y_4$ as well
as the fact that $(Y_2-Y_4, Y_1-Y_2-Y_3+Y_4)$ determines $Y_1-Y_3$.
But by independence,
$$\eqalign{
\dd\bigl\{(Y_1, Y_3);\ (Y_2, Y_4)\bigr\}
&= \Eta\{ Y_1-Y_2, Y_3-Y_4\} - {\Eta\{Y_1, Y_3\}\over 2} - {\Eta\{Y_2, Y_4\}\over 2} \cr
&= \dd\{Y_1, Y_2\} + \dd\{Y_3, Y_4\},\cr
}$$
so the corollary is proved.\slug

\advsect Sums and fibres

In Section~{\sectcompactness}, we showed that to prove the polynomial Freiman--Ruzsa theorem,
it suffices to produce, for every pair $(X_1, X_2)$ of random variables taking values in $G = \FF_2^n$
with $\dd\{X_1, X_2\}>0$,
a pair $(X_1',X_2')$ with $\tau\{X_1', X_2'\} < \tau\{X_1, X_2\}$. (This was Proposition~{\propmaintau}.)
Recall that the definition of $\tau$ involves the ``global'' variables $X_1^*$ and $X_2^*$, as well as the global
constant $\eta = 1/9$.

To prove this statement, it is somewhat more convenient to work with its contrapositive. That is,
we shall assume that $(X_1, X_2)$ has $\dd\{X_1, X_2\} = k$ and that
this pair minimises $\tau$; that is, $\tau\{X_1', X_2'\} \ge \tau\{X_1,X_2\}$
for all $(X_1',X_2')$.
Without loss of generality we may further assume that $X_1$ and $X_2$ are independent.
Our aim is to show that $k=0$.
The first lemma we prove amounts to little more than expansion of definitions, but is worth writing
explicitly nonetheless.

\edef\lemionehelper{\the\sectcount.\the\thmcount}
\proclaim Lemma \advthm. Suppose $(X_1, X_2)\in \P(G)^2$
is a minimiser of $\tau$ with $\dd\{X_1, X_2\} = k$.
Then for all $(X_1',X_2')\in \P(G)^2$,
$$\dd\{X_1', X_2'\} \ge k-\eta\bigl(\dd\{X_1^*, X_1'\} - \dd\{X_1^*, X_1\}
+ \dd\{X_2^*, X_2'\} - \dd\{X_2^*, X_2\}\bigr),$$
and for all $(X_1',X_2')\in \P(G)^2$ and any discrete random variables $Y_1$ and $Y_2$, we have
$$\eqalign{
\dd\{&X_1'\given Y_1; X_2'\given Y_2\} \cr
&\ge k-\eta\bigl(\dd\{X_1^*; X_1'\given Y_1\} - \dd\{X_1^*, X_1\}
+ \dd\{X_2^*; X_2'\given Y_2\} - \dd\{X_2^*, X_2\} \bigr). \cr
}$$

\proof The first inequality follows directly from $\tau\{X_1',X_2'\} \ge \tau\{X_1, X_2\}$ and
the definition of $\tau$. For the second inequality, suppose that $Y_1$ has support $A_1$ and
$Y_2$ has support $A_2$ and sum the inequalities
$\tau\{X_1\given Y_1 = y_1;\ X_2\given Y_2 = y_2\}\ge \tau\{X_1,X_2\}$,
weighted by $\pr\{Y_1 = y_1, Y_2 = y_2\}$ for all $(y_1, y_2)\in A_1\times A_2$.\slug

The task now is to investigate specific
choices of $(X_1',X_2')$ and see what information we can glean about a minimising pair $(X_1, X_2)$.
The next lemma proves our first inequality in this direction.

\edef\lemionebound{\the\sectcount.\the\thmcount}
\proclaim Lemma \advthm. Let $G = \FF_2^n$
and suppose $(X_1, X_2)\in \P(G)^2$ is a minimiser of $\tau$ with $\dd\{X_1, X_2\} = k$.
Then letting
$$I_1 = \II\{ X_1 + X_2 : \bar X_1 + X_2 \given X_1 + X_2 + \bar X_1 + \bar X_2\},$$
where
$X_1$ and $\bar X_1$ be copies of $X_1$ and $X_2$ and $\bar X_2$ be copies of $X_2$ such that $X_1$,
$\bar X_1$, $X_2$, and $\bar X_2$ are all independent,
we have
$$I_1 \le 2\eta k.$$
Furthermore, we have
$$\Eta\{X_1 + X_2 + \bar X_1 + \bar X_2\} \le {1\over 2} \Eta\{X_1\} + {1\over 2} \Eta\{X_2\}+(2+\eta)k - I_1.$$

\proof
Since
$$ k = \dd\{X_1, X_2\} = \dd\{X_1, \bar X_2\} = \dd\{X_2, \bar X_1\},$$
applying Lemma~{\lemfivetwo} four times gives us the inequalities
$$\eqalign{
\dd\{X_1^*; X_1 + \bar X_2\}-\dd\{X_1^*, X_1\}&\le{1\over 2}k + {1\over 4}\Eta\{X_2\}-{1\over 4}\Eta\{X_1\},\cr
\dd\{X_2^*; X_2 + \bar X_1\}-\dd\{X_2^*, X_2\}&\le{1\over 2}k + {1\over 4}\Eta\{X_1\}-{1\over 4}\Eta\{X_2\},\cr
\dd\{X_1^*; X_1\given X_1-\bar X_2\}-\dd\{X_1^*,X_1\}
  &\le{1\over 2}k + {1\over 4}\Eta\{X_1\} - {1\over 4}\Eta\{X_2\},\cr
}$$
and
$$\dd\{X_2^*; X_2\given X_2-\bar X_1\}-\dd\{X_2^*,X_2\}
  \le{1\over 2}k + {1\over 4}\Eta\{X_2\} - {1\over 4}\Eta\{X_1\}.$$
By summing these four inequalites, we obtain
\edef\eqionefoursum{\the\eqcount}
$$\eqalign{
2k &\ge \dd\{X_1^*; X_1 + \bar X_2\} + \dd\{X_2^*; X_2 + \bar X_1\} \cr
&\qquad\qquad+\dd\{X_1^*; X_1\given X_1 - \bar X_2\} + \dd\{X_2^*; X_2\given X_2 - \bar X_1\} \cr
&\qquad\qquad-2\dd\{X_1^*, X_1\} - 2\dd\{X_2^*, X_2\} .\cr
}\adveq$$
On the other hand,
Corollary~{\corfourtwo} with $(Y_1, Y_2, Y_3, Y_4)$ set to $(X_1, X_2, \bar X_2, \bar X_1)$
gives us
$$\eqalign{
\dd\{X_1, X_2\} + \dd\{\bar X_2,\bar X_1\} = \dd\{&X_1 - \bar X_2;\ X_2 - \bar X_1\} \cr
&+ \dd\{ X_1\given X_1 - \bar X_2;\ X_2\given X_2-\bar X_1\}\cr
&+ \II\{X_1 - X_2 : X_2-\bar X_1 \given X_1 - X_2 - \bar X_2 + \bar X_1\}. \cr
}$$
which can be rewritten
\edef\eqioneidentity{\the\eqcount}
$$\eqalign{
2k &=
\dd\{X_1 + \bar X_2;\ X_2 + \bar X_1\} +\dd\{ X_1\given X_1 + \bar X_2;\ X_2\given X_2+\bar X_1\}\cr
&\qquad\qquad+ \II\{X_1 + X_2 : X_2+\bar X_1 \given X_1 + X_2 + \bar X_1 + \bar X_2\}\cr
&=  \dd\{X_1 + \bar X_2;\ X_2 + \bar X_1\} + \dd\{ X_1\given X_1 + \bar X_2;\ X_2\given X_2+\bar X_1\}+I_1\cr
}\adveq$$
since $\dd\{X_1,X_2\} = k$ and we are working in $G = \FF_2^n$.
By Lemma~{\lemionehelper}, we have
$$\eqalign{
\dd\{X_1 + \bar X_2; X_2 + \bar X_1\} &\ge k-\eta\bigl(\dd\{X_1^*, X_1 + \bar X_2\}-\dd\{X_1^*, X_1\}\cr
&\qquad + \dd\{X_2^*, X_2 + \bar X_1\}- \dd\{X_2^*, X_2\}\bigr) \cr
}\adveq$$
and
\edef\eqioneconditional{\the\eqcount}
$$\eqalign{
\dd\{X_1\given X_1 + \bar X_2;X_2\given X_2 + \bar X_1\}
&\ge k-\eta\bigl(\dd\{X_1^*; X_1\given  X_1 + \bar X_2\} - \dd\{X_1^*, X_1\}\cr
&\qquad+ \dd\{X_2^*, X_2\given X_2 + \bar X_1\}- \dd\{X_2^*, X_2\} \bigr).\cr
}\adveq$$
Substituting these two inequalities into~\refeq{\eqioneidentity} yields
$$\eqalign{
2k &\ge I_1 + 2k - \eta\bigl( \dd\{X_1^*, X_1 + \bar X_2\} + \dd\{X_1^*, X_1\given X_1+\bar X_2\} \cr
&\qquad\qquad\qquad\qquad+ \dd\{X_2^*, X_2 + \bar X_1\} + \dd\{X_2^*, X_2\given X_2+\bar X_2\},\cr
&\qquad\qquad\qquad\qquad -2\dd\{X_1^*, X_1\} - 2\dd\{X_2^*, X_2\} \bigl)\cr
}$$
whence
$$\eqalign{
I_1&\le \eta\bigl( \dd\{X_1^*, X_1 + \bar X_2\} + \dd\{X_1^*, X_1\given X_1+\bar X_2\} \cr
&\qquad\qquad\qquad\qquad+ \dd\{X_2^*, X_2 + \bar X_1\} + \dd\{X_2^*, X_2\given X_2+\bar X_2\} ,\cr
&\qquad\qquad\qquad\qquad -2\dd\{X_1^*, X_1\} - 2\{X_2^*, X_2\} \bigl)\cr
}$$
and we can substitute the inequality~\refeq{\eqionefoursum} to get $I_1\le 2\eta k$.

To prove the other claim, we substitute~\refeq{\eqioneconditional} into~\refeq{\eqioneidentity}
to obtain
$$\eqalign{
k&\ge I_1 + \dd\{X_1 + \bar X_2 ; X_2 + \bar X_1\}
  -\eta\bigl(\dd\{X_1^*; X_1\given  X_1 + \bar X_2\} - \dd\{X_1^*, X_1\}\cr
&\hskip150pt+ \dd\{X_2^*; X_2\given X_2 + \bar X_1\}- \dd\{X_2^*, X_2\} \bigr) \cr
&\ge I_1 + \dd\{X_1 + \bar X_2; X_2 + \bar X_1\} -\eta k, \cr
}$$
where in the second line we have used two of the four inequalities begotten by Lemma~{\lemfivetwo}.
So $(1+\eta)k - I_1 \ge \dd\{X_1 + \bar X_2; X_2 + \bar X_1\}$, which we expand to
$$\eqalign{
(1+\eta)k-I_1 &\ge \Eta\{X_1 + X_2 + \bar X_1 + \bar X_2\} - {1\over 2}\Eta\{X_1 + \bar X_2\} - 
{1\over 2} \Eta\{X_2+\bar X_1\} \cr
&=  \Eta\{X_1 + X_2 + \bar X_1 + \bar X_2\}-\dd\{X_1, X_2\} - {1\over 2} \Eta\{X_1\}-{1\over 2}\Eta\{X_2\}\cr
}$$
by independence and the fact that subtraction is addition in $G$. Rearranging terms completes
the proof.\slug

We can interpret this lemma as saying that if setting $X_1' = X_1 + \bar X_2$ and $X_2' = X_2 + \bar X_1$
gives the inequality $\tau\{X_1',X_2'\} \ge \tau\{X_1,X_2\}$, and furthermore if the same holds when
setting $(X_1',X_2')$ to any pair
$$\bigl(X_1 \given X_1+\bar X_2 = v_1, X_2\given X_2+\bar X_1 = v_2\bigr)$$
of ``fibres'', where $(v_1, v_2)\in G^2$,
then we have a bound on a certain mutual information quantity $I_1$.

Now we perform a similar analysis, where instead of taking sums $X_1+\bar X_2$ and
$X_2+\bar X_1$ across variables, now we sum copies of the {\it same} variables.

\edef\lemitwobound{\the\sectcount.\the\thmcount}
\proclaim Lemma \advthm. Let $G = \FF_2^n$
and suppose $(X_1, X_2)\in \P(G)^2$ is a minimiser of $\tau$ with $\dd\{X_1, X_2\} = k$.
Then letting
$$I_2 = \II\{ X_1 + X_2 : X_1 + \bar X_1 \given X_1 + X_2 + \bar X_1 + \bar X_2\},$$
where
$X_1$ and $\bar X_1$ be copies of $X_1$ and $X_2$ and $\bar X_2$ be copies of $X_2$ such that $X_1$,
$\bar X_1$, $X_2$, and $\bar X_2$ are all independent,
we have
$$I_2 \le 2\eta k + {2\eta(2\eta k- I_1)\over 1-\eta}.$$

\proof
Applying Lemma~{\lemfivetwo} four times gives us the inequalities
\edef\eqitwofourineq{\the\eqcount}
$$\eqalign{
\dd\{X_1^*; X_1 + \bar X_1\}-\dd\{X_1^*, X_1\}&\le{1\over 2}\dd\{X_1, X_1\},\cr
\dd\{X_2^*; X_2 + \bar X_2\}-\dd\{X_2^*, X_2\}&\le{1\over 2}\dd\{X_2, X_2\},\cr
\dd\{X_1^*; X_1\given X_1-\bar X_1\}-\dd\{X_1^*,X_1\} &\le {1\over 2}\dd\{X_1, X_1\},\cr
}\adveq$$
and
$$\dd\{X_2^*; X_2\given X_2-\bar X_2\}-\dd\{X_2^*,X_2\} \le{1\over 2}\dd\{X_2,X_2\}.\adveq$$
These inequalities and Lemma~{\lemionehelper} together give
\edef\eqitwounconditional{\the\eqcount}
$$\eqalign{
\dd\{X_1 + \bar X_1; X_2 + \bar X_2\} &\ge k-\eta\bigl(\dd\{X_1^*, X_1 + \bar X_1\}-\dd\{X_1^*, X_1\}\cr
&\qquad + \dd\{X_2^*, X_2 + \bar X_2\}- \dd\{X_2^*, X_2\}\bigr) \cr
&\ge k-{\eta\over 2}\dd\{X_1, X_1\} -{\eta\over 2} \dd\{X_2, X_2\} \cr
}\adveq$$
and
\edef\eqitwoconditional{\the\eqcount}
$$\eqalign{
\dd\{X_1\given X_1 + \bar X_1;X_2\given X_2 + \bar X_2\}
&\ge k-\eta\bigl(\dd\{X_1^*; X_1\given  X_1 + \bar X_1\} - \dd\{X_1^*, X_1\}\cr
&\qquad+ \dd\{X_2^*, X_2\given X_2 + \bar X_2\}- \dd\{X_2^*, X_2\} \bigr)\cr
&\ge k-{\eta\over 2}\dd\{X_1, X_1\} -{\eta\over 2} \dd\{X_2, X_2\}. \cr
}\adveq$$
Corollary~{\corfourtwo}, with $(Y_1, Y_2, Y_3, Y_4)$ set to $(X_2, X_1, \bar X_2, \bar X_1)$ this time,
yields
\edef\eqitwoidentity{\the\eqcount}
$$\eqalign{
2k &=
\dd\{X_2 + \bar X_2;\ X_1 + \bar X_1\} +\dd\{ X_2\given X_2 + \bar X_2;\ X_1\given X_1+\bar X_1\}\cr
&\qquad\qquad+ \II\{X_1 + X_2 : X_1+\bar X_1 \given X_1 + X_2 + \bar X_1 + \bar X_2\}\cr
&=  \dd\{X_1 + \bar X_1;\ X_2 + \bar X_2\} + \dd\{ X_1\given X_1 + \bar X_1;\ X_2\given X_2+\bar X_2\}+I_2,\cr
}\adveq$$
into which we substitute~\refeq{\eqitwounconditional} and~\refeq{\eqitwoconditional} to obtain
\edef\eqitwoalmostdone{\the\eqcount}
$$ I_2 \le \eta\bigl( \dd\{X_1, X_1\} + \dd\{X_2,X_2\}\bigr).$$
It remains to bound $\dd\{X_1, X_1\} + \dd\{X_2,X_2\}$.

Since $\Eta\{X_1 + \bar X_1\}= \Eta\{X_1\} + \dd\{X_1, X_1\}$ and similarly for $X_2$, we may expand
$$\eqalign{
\dd\{X_1 + \bar X_1; X_2 + \bar X_2\} &= \Eta\{X_1 + \bar X_1 + X_2 + \bar X_2\} \cr
&\qquad\qquad-{1\over 2} \Eta\{X_1+\bar X_1\} - {1\over 2} \Eta\{X_2+\bar X_2\} \cr
&= \Eta\{X_1 + \bar X_1 + X_2 + \bar X_2\} - {1\over 2}\Eta\{X_1\}
-{1\over 2} \Eta\{X_2\} \cr
&\qquad\qquad-{1\over 2} \dd\{X_1,X_1\} - {1\over 2} \Eta\{X_2,X_2\}.\cr
}$$
But recall that the second conclusion of Lemma~{\lemionebound} was
$$\Eta\{X_1 + X_2 + \bar X_1 + \bar X_2\} \le {1\over 2} \Eta\{X_1\} + {1\over 2} \Eta\{X_2\}+(2+\eta)k - I_1,$$
meaning that
$$\dd\{X_1+\bar X_1; X_2+\bar X_2\}\le (2+\eta)k - {1\over 2}\dd\{X_1, X_1\} -{1\over 2}\dd\{X_2,X_2\}-I_1.$$
Chaining this with~\refeq{\eqitwounconditional} yields
$$ k-{\eta\over 2}\dd\{X_1,X_1\} - {\eta\over 2}\dd\{X_2,X_2\} \le
(2+\eta)k-{1\over 2}\dd\{X_1, X_1\} -{1\over 2}\dd\{X_2,X_2\}-I_1,$$
which simplifies to
$$\dd\{X_1,X_1\} + \dd\{X_2,X_2\} \le {2k +2\eta k - 2I_1\over 1-\eta} = 2k + {2(2\eta k -I_1)\over 1-\eta} $$
and hence
$$I_2 \le 2\eta k + {2\eta(2\eta k -I_1)\over 1-\eta}.\noskipslug$$

\advsect Endgame

We now approach the phase of the proof which the authors of~\ref{pfr2023} theatrically call the `endgame.'
There is just one more lemma we need before we can give a full proof of Proposition~{\propmaintau}.

\edef\lemseventwo{\the\sectcount.\the\thmcount}
\proclaim Lemma~\advthm. Let $X_1$ and $Y_1$ be any $\FF_2^n$-valued random variables,
and let $T_1$, $T_2$, and $T_3$ be $\FF_2^n$-valued random variables
such that $T_1 + T_2 + T_3 = 0$ holds identically. Putting
$$\delta = \II\{T_1 : T_2\} + \II\{T_1:T_3\} + \II\{T_2 : T_3\}$$
and letting $\psi$ be the functional given by
$$\psi\{T_1', T_2'\}
= \dd\{T_1', T_2'\} + \eta\bigl( \dd\{X_1^*,T_1'\}-\dd\{X_1^*,X_1\}+\dd\{X_2^*,T_2'\}-\dd\{X_2^*, X_2\}\bigr),$$
there exist random variables $T_1'$ and $T_2'$ such that
$$\psi\{T_1', T_2'\} \le \biggl(1+{\eta\over 3}\biggr)\delta
+ {\eta\over 3}\sum_{i=1}^2\sum_{j=1}^3 \bigl(\dd\{X_i^*, T_j\}-\dd\{X_i^*, X_i\}\bigr)$$

\proof From the fact that any two of the $T_j$ determine the full triple $(T_1, T_2, T_3)$,
we have
$$\Eta\{T_1, T_2\} = \Eta\{T_1, T_3\} = \Eta\{T_2,T_3\} = \Eta\{T_1, T_2, T_3\}.$$
Then since $T_1 + T_2 = T_3$ we may apply Lemma~{\lematwopfr} with
$(X,Y,Z) = (T_1, T_2, T_3)$ to bound
$$\eqalign{
\sum_{t_3\in \FF_2^n} \pr\{T_3 &= t_3\} \dd\bigl\{ (T_1\given T_3 = t_3);
(T_2\given T_3 = t_3)\bigr\} \cr
&\le 2\II\{T_1:T_2\} + 2\Eta\{T_3\} - \Eta\{T_1,T_2\} \cr
&= 2\Eta\{T_1\} + 2\Eta\{T_2\} + 2\Eta\{T_3\} - 3\Eta\{T_1,T_2\} \cr
&= \II\{T_1:T_2\} + \II\{T_1:T_3\} + \II\{T_2:T_3\} \cr
&= \delta.
}$$
Letting $Z$ be a random variable independent of both $X_1^*$ and $X_2^*$, we apply Lemma~{\lemfiveone}
with $(X,Z,Y,W) = (X_1^*, Z, T_1, T_3)$ to obtain
$$\eqalign{
\sum_{t_3\in\FF_2^n}\pr\{T_3 = t_3\}\bigl(\dd\bigl\{X_1^*;(T_1\given T_3&=t_3)\bigr\}-\dd\{X_1^*, X_1\}\bigr) \cr
&= \dd\{ X_1^*, T_1\given T_3\} - \dd\{X_1^*, X_1\} \cr
&\le \dd\{X_1^*, T_1\} + {1\over 2}\II\{T_1:T_3\} - \dd\{X_1^*, X_1\}, \cr
}$$
and applying the same lemma with $(X,Z,Y,W) = (X_2^*, Z, T_2, T_3)$ yields
$$\eqalign{
\sum_{t_3\in\FF_2^n}\pr\{T_3 = t_3\}\bigl(\dd\bigl\{X_2^*;(T_2\given T_3&=t_3)\bigr\}-\dd\{X_2^*, X_2\}\bigr) \cr
&\le \dd\{X_2^*, T_2\} + {1\over 2}\II\{T_2:T_3\} - \dd\{X_2^*, X_2\}. \cr
}$$
Putting the three observations together, we have
$$\eqalign{
&\sum_{t_3\in\FF_2^n}\pr\{T_3 = t_3\}\psi\bigl\{(T_1\given T_3=t_3);(T_2\given T_3=t_3)\bigr\} \cr
&\hskip10pt = \sum_{t_3\in\FF_2^n}\pr\{T_3 = t_3\}\Bigl(\dd\bigl\{(T_1\given T_3=t_3);(T_2\given T_3=t_3)\bigr\} \cr
&\hskip103pt + \eta\dd\bigl\{X_1^*;(T_1\given T_3=t_3)\bigr\}+ \eta\dd\bigl\{X_2^*;(T_2\given T_3=t_3)\bigr\} \cr
&\hskip192pt - \eta\dd\{X_1^*, X_1\} - \eta\dd\{X_2^*, X_2\} \Bigr) \cr
&\hskip10pt \le \delta + \eta\bigl( \dd\{X_1^*, T_1\} - \dd\{X_1^*, X_1\}+\dd\{X_2^*, T_2\}-\dd\{X_2^*, X_2\}\cr
&\hskip180pt 
+ {1\over 2} \II\{T_1 : T_3\} + {1\over 2}\II\{T_2:T_3\} \bigr), \cr
}$$
Choosing some $t_3$ in the support of $T_3$ such that the value of $\psi$ inside
the sum is minimal and then setting $T_{1,3}' = (T_1\given T_3 = t_3)$ and
$T_{2,3}' = (T_2\given T_3 = t_3)$, we have
$$\eqalign{
\psi\bigl\{T_{1,3}', T_{2,3}'\bigr\} \le \delta + \eta\bigl( \dd\{X_1^*, T_1\} - \dd\{X_1^*, X_1\}
&+ \dd\{X_2^*, T_2\} - \dd\{X_2^*, X_2\} \cr
& + {1\over 2} \II\{T_1 : T_3\} + {1\over 2}\II\{T_2:T_3\} \bigr)\cr
}$$
We can now repeat this for all six permutations $(\alpha, \beta, \gamma)$ of $(1,2,3)$ and average the
corresponding bounds for $(T_\alpha, T_\beta, T_\gamma)$ to obtain
$$\eqalign{
{1\over 6} \sum_{(\alpha,\beta,\gamma)\in {\frak S}_3}
\psi\bigl\{T_{\alpha,\gamma}', T_{\beta,\gamma}'\bigr\}
\le \delta &- \eta\dd\{X_1^*, X_1\} - \eta\dd\{X_2^*, X_2\} \cr
&+ {\eta\over 6}\sum_{(\alpha,\beta,\gamma)\in {\frak S}_3}
\bigl( \dd\{X_1^*, T_\alpha\} + \dd\{X_2^*, T_\beta\} \cr
&\hskip68pt+ {1\over 2} \II\{T_\alpha : T_\gamma\} + {1\over 2}\II\{T_\beta:T_\gamma\} \bigr).\cr
}$$
Since each of $\{1,2,3\}$ appears twice as $\alpha$ and twice as $\beta$, we have
$$\sum_{(\alpha,\beta,\gamma)\in {\frak S}_3} \bigl(\dd\{X_1^*, T_\alpha\} + \dd\{X_2^*, T_\beta\}\bigr)
= 2\sum_{i=1}^2\sum_{j=1}^3 \dd\{X_i^*, T_j\}$$
and
$$\sum_{(\alpha,\beta,\gamma)\in {\frak S}_3}
\biggl({1\over 2} \II\{T_\alpha : T_\gamma\} + {1\over 2}\II\{T_\beta:T_\gamma\}\biggr) = 2\delta.$$
Consequently, the average of
$\psi\bigl\{T_{\alpha,\gamma}', T_{\beta,\gamma}'\bigr\}$
over all permutations $(\alpha,\beta,\gamma)$ is bounded above by
$$\biggl(1+{\eta\over 3}\biggr)\delta
- \eta\dd\{X_1^*, X_1\} -\eta\dd\{X_2^*, X_2\} + {\eta\over 3}\sum_{i=1}^2\sum_{j=1}^3 \dd\{X_i^*, T_j\},$$
so the result follows by letting $(T_1', T_2')$ equal the pair $(T_{\alpha,\gamma}', T_{\beta,\gamma}')$
for the choice of $(\alpha,\beta,\gamma)$ that minimises
$\psi\bigl\{T_{\alpha,\gamma}', T_{\beta,\gamma}'\bigr\}$.\slug

We are now able to prove Proposition~{\propmaintau}, which we shall restate in the contrapositive.
Just as in previous sections, the functional $\tau$ depends on the fixed random variables $X_1^*$ and $X_2^*$
as well as the choice of constant $\eta = 1/9$.

\proclaim Proposition~{\propmaintau}$\mathbold'$. Let $X_1$ and $X_2$ be $\FF_2^n$-valued random variables
with the property that $\tau\{X_1', X_2'\} \ge \tau\{X_1, X_2\}$ for all random variables
$X_1'$ and $X_2'$ on $\FF_2^n$. Then $\dd\{X_1, X_2\} = 0$.

\proof Let $k=\dd\{X_1, X_2\}$. Let $X_1$ and $\bar X_1$ be copies of $X_1$ and $X_2$ and $\bar X_2$ copies
of $X_2$ such that $X_1$, $\bar X_1$, $X_2$, and $\bar X_2$ are all independent. Let $I_1$ and $I_2$
be as in the previous section, and let
$$I_3 = \II\{\bar X_1 + X_2 : X_1 + \bar X_1\given X_1 + X_2 + \bar X_1 + \bar X_2\},$$
so that $I_3 = I_2$ (which follows from interchanging the r\^oles of $X_1$ and $\bar X_1$).
For brevity of notation, let
$$U = X_1+X_2,\qquad V=\bar X_1 + X_2,\qquad W = X_1 + \bar X_1,$$
and
$$S = X_1 + X_2 + \bar X_1 + \bar X_2.$$
Then
$$I_1 = \II\{ U : V\given S\},\qquad I_2 = \II\{W:U\given S\},\qquad\hbox{and}\qquad I_3 = \II\{V:W\given S\}.$$
Lemma~{\lemionebound} gave us
$$I_3 = I_2 \le 2\eta k + {2\eta(2\eta k-I_1)\over 1-\eta},$$
so setting
$$\bar\delta = \II\{U:V\given S\} + \II\{W:U\given S\} + \II\{V:W\given S\},$$
we have
\edef\eqbardeltabound{\the\eqcount}
$$\eqalign{
\bar \delta &\le I_1+4\eta k+{4\eta(2\eta k - I_1)\over 1-\eta} \cr
&= {I_1 - \eta I_n + 4\eta k - 4\eta^2 k + 8\eta^2 k - 4\eta I_1\over 1-\eta} \cr
&= 6\eta k + {(1-5\eta)I_1 + 4\eta k + 4\eta^2 k - 6\eta k + 6\eta^2 k \over 1-\eta} \cr
&= 6\eta k + {(1-5\eta)I_1 - 2\eta k + 10\eta^2 k \over 1-\eta} \cr
&= 6\eta k - {1-5\eta \over 1-\eta}(2\eta k - I_1) \cr
}\adveq$$

Now we perform invocations of Lemma~{\lemsevenone} to obtain bounds on various Ruzsa distances. Setting
$(X,Y,Z,W) = (X_1^*, X_1, X_2, \bar X_1+\bar X_2)$, gives
$$\eqalign{
\dd\{X_1^*; U\given S\} - \dd\{X_1^*, X_1\}
&\le {1\over 2}\bigl( \Eta\{S\} + \Eta\{U\} - \Eta\{X_1\} - \Eta\{\bar X_1+\bar X_2\}\bigr)\cr
&= {1\over 2} \bigl( \Eta\{S\} - \Eta\{X_1\}\bigr),
}$$
where in the second line we used the fact that
$$\Eta\{U\} = \Eta\{X_1 + X_2\} = \Eta\{\bar X_1 + \bar X_2\}.$$
Similarly, we have
$$\eqalign{
\dd\{X_2^*; U\given S\} - \dd\{X_2^*, X_2\} &\le {1\over 2} \bigl( \Eta\{S\} - \Eta\{X_2\}\bigr), \cr
\dd\{X_1^*; V\given S\} - \dd\{X_1^*, X_1\} &\le {1\over 2} \bigl( \Eta\{S\} - \Eta\{X_1\}\bigr), \cr
\dd\{X_2^*; V\given S\} - \dd\{X_2^*, X_2\} &\le {1\over 2} \bigl( \Eta\{S\} - \Eta\{X_2\}\bigr), \cr
}$$
and
$$\dd\{X_1^*, W\given S\} - \dd\{X_1^*, X_1\}
\le {1\over 2} \bigl( \Eta\{S\} + \Eta\{W\} - \Eta\{X_1\} - \Eta\{W'\}\bigr),$$
where $W' = X_2 + \bar X_2$. To address the asymmetry in the last bound, we note that for any fixed value
$s$ taken by $S$, we have $W' = W + s$, so $\dd\{X_2^*; W\given S\} = \dd\{X_2^*; W'\given S\}$. Now
applying Lemma~{\lemsevenone} to $W'$ yields
$$\dd\{X_2^*; W\given S\} - \dd\{X_2^*, X_2\}
\le {1\over 2} \bigl( \Eta\{S\} + \Eta\{W\} - \Eta\{X_2\} - \Eta\{W'\}\bigr).$$
The sum of these six inequalities is
\edef\eqsixsum{\the\eqcount}
$$\sum_{i=1}^2 \sum_{A\in \{U,V,W\}} \bigl( \dd\{X_i^*; A\given S\} - \dd\{X_i^*, X_i\}\bigr)
\le 3\Eta\{S\} - {3\over 2} \Eta\{X_1\} - {3\over 2} \Eta\{X_2\}.\adveq$$
The second part of Lemma~{\lemionebound} states that
$$\Eta\{S\} \le {1\over 2} \Eta\{X_1\} + {1\over 2} \Eta\{X_2\} + (2+\eta)k - I_1,$$
which can be plugged into~\refeq{\eqsixsum} to give
\edef\eqdistancebounds{\the\eqcount}
$$\eqalign{
\sum_{i=1}^2 \sum_{A\in \{U,V,W\}} \bigl( \dd\{X_i^*; A\given S\} - \dd\{X_i^*, X_i\}\bigr)
&\le 3(2+\eta)k - 3I_1 \cr
&\le 6k -3\eta k + 6\eta k - 3I_1 \cr
&= (6-3\eta)k + 3(2\eta k - I_1).\cr
}\adveq$$

Let $\psi$ be defined as in Lemma~{\lemseventwo}. By Lemma~{\lemionehelper}, $k\le \psi\{T_1', T_2'\}$
for all random variables $T_1'$ and $T_2'$, so
for any random variables $(T_1, T_2, T_3)$, the pair $(T_1',T_2')$ furnished by Lemma~{\lemseventwo}
satisfies
$$k\le \psi\{T_1', T_2'\} \le \biggl(1+{\eta\over 3}\biggr)\delta
+ {\eta\over 3}\sum_{i=1}^2\sum_{j=1}^3 \bigl(\dd\{X_i^*, T_j\}-\dd\{X_i^*, X_i\}\bigr).$$
In particular, since $(U+V+W\given S=s)$ is identically zero for all possible values $s$ of $S$,
we can set
$$T_1 = (U\given S=s),\qquad T_2 = (V\given S=s), \qquad\hbox{and}\qquad T_3 = (W\given S=s)$$
and then average the above inequality over all $s\in \FF_2^n$, weighted by $\pr\{S=s\}$ to obtain
$$k\le \biggl(1+{\eta\over 3}\biggr)\bar\delta
+ {\eta\over 3}\sum_{i=1}^2\sum_{A\in\{U,V,W\}} \bigl(\dd\{X_i^*, A\given S\}-\dd\{X_i^*, X_i\}\bigr).$$
We now substitute the upper bounds~\refeq{\eqbardeltabound} and~\refeq{\eqdistancebounds} to get
$$\eqalign{
k &\le \biggl(1+{\eta\over 3}\biggr)\biggl(6\eta k - {1-5\eta \over 1-\eta}(2\eta k - I_1)\biggr)
+ {\eta\over 3}\biggl( (6-3\eta)k + 3(2\eta k - I_1)\biggr) \cr
&= (8\eta - \eta^2)k + \biggl(\eta-\Bigl(1+{\eta\over 3}\Bigr){1-5\eta\over 1-\eta} \biggr)
(2\eta k - I_1).\cr
}$$
With the choice $\eta = 1/9$,
$$\eta-\Bigl(1+{\eta\over 3}\Bigr){1-5\eta\over 1-\eta} = -{11\over 27} \le 0$$
and $2\eta k - I_1\ge 0$ by Lemma~{\lemionebound}, hence one concludes that
$$ k \le (8\eta + \eta^2)k = {73\over 81}k,$$
which is nonsense unless $k=0$.\slug

\section References

\bye