\documentclass[12pt]{article}
\usepackage{amsmath,amssymb,amsthm,amscd,amsfonts}
\usepackage{tikz}
\usepackage{url}
\usepackage{fullpage}
\usepackage{mathtools}
\usepackage{float}
%\usepackage{rotating}
\usepackage{pdflscape}
\usepackage[colorlinks=true,
linkcolor=webgreen,
filecolor=webbrown,
citecolor=webgreen]{hyperref}
\definecolor{webgreen}{rgb}{0,.5,0}
\definecolor{webbrown}{rgb}{.6,0,0}
\usepackage{color}
\usepackage{microtype}
\DeclarePairedDelimiter\abs{\lvert}{\rvert}
\DeclarePairedDelimiter\paren{\lparen}{\rparen}
\def\subw{\mathrel{\triangleleft}}
\DeclareMathOperator\supe{sup}
\DeclareMathOperator\subb{sub}
\theoremstyle{plain}
\newtheorem{theorem}{Theorem}
\newtheorem{corollary}[theorem]{Corollary}
\newtheorem{lemma}[theorem]{Lemma}
\newtheorem{proposition}[theorem]{Proposition}
%\newtheorem{test}{Test}
\theoremstyle{definition}
\newtheorem{definition}[theorem]{Definition}
\newtheorem{example}[theorem]{Example}
\newtheorem{conjecture}[theorem]{Conjecture}
%\theoremstyle{remark}
%\newtheorem{remark}[theorem]{Remark}
\newtheorem{problem}[theorem]{Problem}
\newcommand{\0}{\mathtt{0}}
\newcommand{\1}{\mathtt{1}}
\newcommand{\2}{\mathtt{2}}
\newcommand{\3}{\mathtt{3}}
\newcommand{\4}{\mathtt{4}}
\newcommand{\5}{\mathtt{5}}
\newcommand{\6}{\mathtt{6}}
\newcommand{\7}{\mathtt{7}}
\newcommand{\8}{\mathtt{8}}
\newcommand{\9}{\mathtt{9}}
\newcommand{\A}{\mathtt{A}}
\newcommand{\B}{\mathtt{B}}
\newcommand{\union}{\cup}
\newcommand{\set}[2]{\{\,#1{}:{}#2\,\}}
\renewcommand{\a}{@}
% Obfuscate @ sign to help avoid spam
\usepackage{auto-pst-pdf,pst-text}
\renewcommand{\a}{\begin{pspicture}\pscharpath[fillstyle=solid,fillcolor=black,linestyle=none]{@}\end{pspicture}}
\newcommand{\seqnum}[1]{\href{http://oeis.org/#1}{\underline{#1}}}
\newcommand{\updated}[1]{{\color{red}#1}}
\renewcommand{\updated}[1]{#1}
\title{Minimal Elements for the Prime Numbers}
\author{Curtis Bright\\
School of Computer Science\\
University of Waterloo\\
Waterloo, ON N2L 3G1\\
Canada\\
{\tt cbright\a uwaterloo.ca} \\
\ \\
Raymond Devillers\\
D\'epartement d'Informatique, CP 212\\
Universit\'e Libre de Bruxelles \\
B-1050 Bruxelles\\
Belgium\\
{\tt rdevil\a ulb.ac.be} \\
\ \\
Jeffrey Shallit \\
School of Computer Science\\
University of Waterloo\\
Waterloo, ON N2L 3G1\\
Canada \\
{\tt shallit\a cs.uwaterloo.ca}}
\begin{document}
\maketitle
\begin{abstract}
We say a string of symbols $s$ is \textit{minimal} for a language $L$
if $s$ is a member of $L$, and it is not possible to obtain another
member of $L$ by striking out one or more symbols from $s$. Although
the set $M(L)$ of minimal strings is necessarily finite, determining
it explicitly for a given $L$ can be a difficult computational problem.
We use some number-theoretic heuristics to compute $M(L)$, where $L$
is the language of base-$b$ representations of the prime numbers,
for $2 \leq b \leq 30$.
\end{abstract}
\section{Introduction}
Problems about the digits of prime numbers have a long history, and many
of them are still unsolved. For example, are there infinitely many
primes, all of whose base-$10$ digits are $\1$? Currently,
there are only five such ``repunits''
known \cite{WD},
corresponding to $(10^p-1)/9$ for $p \in \{ 2, 19, 23, 317, 1031 \}$.
It seems likely that four more are given by
$p \in \{ 49081, 86453, 109297, 270343\}$, but this has not yet been
rigorously proven.
Another problem on the digits of primes was introduced by the
third author \cite{Sh00}. To describe it, we need some definitions.
We say that a string $x$ is a \textit{subword} of a string $y$, and
we write $x \subw y$, if
one can strike out zero or more symbols of $y$ to get $x$.
For example, {\tt string} is a subword of
{\tt Meistersinger}.
(In the literature, this concept is sometimes called a ``scattered
subword'' or ``substring'' or ``subsequence''.)
A \textit{language} is a set of strings. A string $s$ is \textit{minimal}
for $L$ if (a) $s \in L$ and (b) if $x \in L$ and $x \subw s$, then
$x = s$. The set of all minimal strings of $L$ is denoted $M(L)$.
In this paper, we describe a heuristic
technique for determining $M(L_b)$ in the
case where $L_b$ consists of the representations, in base $b$, of the
the prime numbers $\lbrace 2, 3, 5, \dotsc \rbrace$. We obtain
a complete characterization of $M(L_b)$ for bases
$2 \leq b \leq 16$ and \updated{$b = 18$, $20$, $22$, $23$, $24$, and $30$. For the remaining
bases $b = 17$, $19$, $21$, and $25\leq b\leq 29$,}
we obtain results that allow us to ``almost'' completely characterize this set.
The same technique can also be applied to find minimal sets for subsets of prime numbers.
For example, we were able to determine the minimal set for primes of the form $4n+1$
represented in base 10 (and similarly for those of the form $4n+3$).
This successfully completes the sequences
\seqnum{A111055} and \seqnum{A111056} in the
\textit{Encyclopedia of Integer Sequences}~\cite{oeis},
which had been incomplete
since their introduction to the Encyclopedia in 2005.
Finally, we also show that determining the minimal set for
the composite numbers represented in base $b$ is a significantly easier problem,
and explicitly compute the minimal set for all $2\leq b\leq 30$.
\subsection{Notation}
In what follows, if $x$ is a string of symbols over the alphabet
$\Sigma_b \coloneqq \lbrace \0, \1, \dotsc, b-1 \rbrace$ we let
$[x]_b$ denote the evaluation of $x$ in base $b$ (starting with the
most significant digit)\updated{, and $[\epsilon]_b\coloneqq0$ where $\epsilon$
is the empty string}. This is extended to languages as follows:
$[L]_b \coloneqq \set{ [x]_b }{ x \in L }$.
We use the convention that $\A \coloneqq 10$, $\B \coloneqq 11$, and so forth,
to conveniently represent strings of symbols in base $b > 10$.
We let $(x)_b$ be the canonical representation
of $x$ in base-$b$, that is, the representation without leading zeroes.
%The set of all canonical representations in base $b$ is denoted
%$C_b$.
Finally, as usual, for a language $L$ we let
$L^n \coloneqq \overbrace{LL\dotsm L}^n$ and $L^* \coloneqq \bigcup_{i \geq 0} L^i$.
\section{Why minimal sets are interesting}
One reason why
the minimal set $M(L)$ of a language $L$ is interesting is because it
allows us to compute two natural and related languages,
defined as follows:
\begin{align*}
\subb(L) &\coloneqq \set{x \in \Sigma^*}{\text{there exists $y \in L$ such that $x \subw y$}} ; \\
\supe(L) &\coloneqq \set{x \in \Sigma^*}{\text{there exists $y \in L$ such that $y \subw x$}} .
\end{align*}
An amazing fact is that $\subb(L)$ and $\supe(L)$ are always regular.
This follows from
the following classical theorem due to Higman \cite{Hi52} and
Haines \cite{Ha69}.
\begin{theorem}
For every language\/ $L$, there are only finitely many minimal strings.
\end{theorem}
Indeed, we have $\supe(L) = \supe(M(L))$
and $\Sigma^* - \subb(L) = \supe(M(\Sigma^* - \subb(L)))$, and
the superword language of a finite language is regular, since
\[ \supe\paren[\big]{\{w_1,\dotsc,w_n\}} = \bigcup_{i=1}^n \Sigma^* w_{i,1} \Sigma^* \dotsm \Sigma^* w_{i,\abs{w_i}} \Sigma^* \]
where $w_i=w_{i,1}\dotsm w_{i,\abs{w_i}}$ with $w_{i,j}\in\Sigma$.
\section{Why the problem is hard}
Determining $M(L)$ for arbitrary $L$
is in general unsolvable, and can be difficult even when $L$
is relatively simple~\cite{GHK07,GHK09}.
The following is a ``semi-algorithm'' that
is guaranteed to produce $M(L)$, but it is not so easy to implement:
\begin{figure}[H]
(1) $ M \coloneqq \emptyset$ \\
(2) while ($L \not= \emptyset$) do \\
\hphantom{} \qquad (3) choose $x$, a shortest string in $L$ \\
\hphantom{} \qquad (4) $ M \coloneqq M \union \lbrace x \rbrace$ \\
\hphantom{} \qquad (5) $ L \coloneqq L - \supe(\lbrace x \rbrace) $
\end{figure}
In practice, for arbitrary $L$,
we cannot feasibly carry out step (5). Instead, we work
with $L'$, some regular overapproximation to $L$, until we can show $L' =
\emptyset$ (which implies $L = \emptyset$). In practice,
$L'$ is usually chosen to be a finite union of sets of the form
$L_1 L_2^* L_3$, where each of $L_1$, $L_2$, $L_3$ is finite.
In the case we consider in this paper,
we then have to determine whether such a language contains
a prime or not.
However, it is not even known if the following simpler decision problem is
recursively solvable:
\begin{problem}
Given strings $x$, $y$, $z$, and a base $b$, does there exist a prime
number whose base-$b$ expansion is of the form $x \overbrace{yy\cdots y}^n z$
for some $n \geq 0$?
\end{problem}
An algorithm to solve this problem, for example, would allow us to decide
if there are any additional Fermat primes (of the form $2^{2^n}+1$)
other than the known ones (corresponding to $n = 0$, $1$, $2$, $3$, $4$).
To see this, take $b\coloneqq2$, $x\coloneqq\1$, $y\coloneqq\0$, and $z\coloneqq\0^{16}\1$.
Since if $2^n+1$ is prime then $n$ must be a power of two, a prime of the
form $[xy^*z]_b$ must be a new Fermat prime.
Therefore, in practice, we are forced to try to rule out prime
representations based on heuristics such as
modular techniques and factorizations.
This is discussed in the next section.
\section{Some useful lemmas}\label{seclemmas}
It will be necessary for our algorithm to determine if families of the
form $[xL^*z]_b$ contain a prime or not. We use two different
heuristic strategies
to show that such families contain no primes.
In the first strategy, we mimic the well-known technique of ``covering
congruences'' \cite{Choi},
by finding some finite set $S$ of integers $N > 1$ such that
every number in a given family is divisible by some element of $S$.
In the second strategy, we attempt to find a
difference-of-squares or difference-of-cubes factorization.
\subsection{The first strategy}\label{subsecfirststrat}
We start with the simplest version of the idea: to find
an $N > 1$ that divides each element of the family $[xL^*z]_b$.
At first glance, this would require
checking that $N$ divides $xL^nz$ for $n=0$, $1$, $2$, $\dotsc$.
However, the following lemma shows that it is only necessary to check
the two cases $n=0$ and $1$. Although divisibility based on digital
considerations has a long history (e.g., \cite[Chap.~XII]{Dick}),
we could not find these kinds of results in the literature.
\begin{lemma}\label{lemone}
Let\/ $x$, $z\in \Sigma^*_b$, and let\/ $L\subseteq\Sigma^*_b$.
Then\/ $N$ divides all numbers of the form\/ $[xL^*z]_b$
if and only if\/ $N$ divides\/ $[xz]_b$ and all numbers of the form\/ $[xLz]_b$.
\end{lemma}
\begin{proof}%($\Rightarrow$)
Let $y=y_1\dotsm y_n\in L^*$, where $y_1$, $\dotsc$, $y_n\in L$.
By telescoping we have
\[ [xyz]_b - [xz]_b = \sum_{i=1}^{n}([xy_{i}y_{i+1}\dotsm y_n z]_b-[xy_{i+1}\dotsm y_n z]_b) . \]
Cancelling the final $\abs{y_{i+1}\dotsm y_nz}$ base-$b$ digits in the summand difference ---
which are identical --- this becomes
\[ [xyz]_b = [xz]_b + \sum_{i=1}^{n}b^{\lvert{y_{i+1}\dotsm y_n z}\rvert}([xy_i]_b-[x]_b) . \]
But $b^{\lvert z\rvert}([xy_i]_b-[x]_b)=[xy_iz]_b-[xz]_b$ by
adding and subtracting $[z]_b$, so we have
\[ [xyz]_b = [xz]_b + \sum_{i=1}^{n}b^{\vert{y_{i+1}\dotsm y_n}\rvert}([xy_iz]_b-[xz]_b) . \]
Since $N\mid[xz]_b$ and $N\mid[xy_iz]_b$ for each $1\leq i\leq n$,
it follows that $N\mid[xyz]_b$.
The other direction is clear, since $[xz]_b$ and numbers of the
form $[xLz]_b$ are both of the form $[xL^*z]_b$.
\end{proof}
In practice, our algorithm employs this lemma with
$L\coloneqq\{y_1,\dotsc,y_n\}\subseteq\Sigma_b$, and all numbers of the form
$[xL^*z]_b$ are shown to be composite with the following corollary.
\begin{corollary}\label{corone}
If\/ $1<\gcd([xz]_b,[xy_1z]_b,\dotsc,[xy_nz]_b)<[xz]_b$
then all numbers of the form\/ $[x\{y_1,\dotsc,y_n\}^*z]_b$ are composite.
\end{corollary}
\begin{proof}
By Lemma~\ref{lemone}, we know
that $N\coloneqq\gcd([xz]_b,[xy_1z]_b,\dotsc,[xy_nz]_b)>1$
divides all numbers of the form $[x\{y_1,\dotsc,y_n\}^*z]_b$.
By the size condition $N$ is strictly less than each such number, and
so is a nontrivial divisor.
\end{proof}
\begin{example}
Since $\gcd(49, 469)=7$, every number with base-$10$ representation
of the form $\4\6^*\9$ is
divisible by $7$.
Since $1 < 7 < 49$, each such number is composite.
\end{example}
We also generalize this to the following corollary in the case where
a single divisor does not divide each number in the family.
\begin{corollary}\label{cortwo}
Let\/ $L \coloneqq \lbrace y_1, y_2, \ldots, y_n \rbrace$.
If \[N_0\coloneqq\gcd \paren[\big]{ \{ [xz]_b \} \union [xL^2 z]_b} \]
and
\[ N_1\coloneqq \gcd \paren[\big]{[xLz]_b \union [xL^3z]_b} \]
lie strictly between\/ $1$ and\/ $[xz]_b$, then all numbers of the form\/
$[xL^*z]_b$ are composite.
\end{corollary}
\begin{proof}
By Lemma~\ref{lemone} applied
to $[x(L^2)^*z]_b$, we know that $N_0$ divides all numbers of the
form $[x L^* z]_b$ in which an even number of $y_i$ appear.
By Lemma~\ref{lemone} on $[xy_i(L^2)^*z]_b$
for each $1\leq i\leq n$, we know that $N_1$
divides all numbers of the form $[x L^* z]_b$ for which an
odd number of $y_i$ appear.
By the size conditions, $N_0$ and $N_1$ are nontrivial divisors.
\end{proof}
\begin{example}
Since $\gcd([\6]_9,[\6\1\1]_9)=2$, every number with
base-$9$ representation of the form $\6\1^*$ of
odd length is divisible by $2$.
Since $\gcd([\6\1]_9,[\6\1\1\1]_9)=5$,
every number with base-$9$ representation of the form $\6\1^*$
of even length is divisible by $5$. Since these numbers lie
strictly between $1$ and $6$, every number with base-$9$
representation of the form $\6\1^*$ is
composite.
\end{example}
We also note that it is simple to generalize Corollary~\ref{cortwo}
to apply to
check if there are divisors $N_0$, $N_1$, $\dotsc$, $N_{k-1}$ such that
$N_i$ divides all numbers of the form $[x\{y_1,\dotsc,y_n\}^*z]_b$ in which
the number of $y_i$ appearing is congruent to $i\bmod k$.
\begin{example}
Let $b \coloneqq 16$.
Then
$7$ divides $[\8\A\0\1]_b$ and $[\8\A\0\A\A\A\1]_b$.
Furthermore, $13$ divides
$[\8\A\0\A\1]_b$ and $[\8\A\0\A\A\A\A\1]_b$,
and $3$ divides $[\8\A\0\A\A\1]_b$ and
$[\8\A\0\A\A\A\A\A\1]_b$. Thus all numbers with base-$16$
representation of the form $\8\A\0\A^*\1$ are
divisible by either $7$, $13$, or $3$, depending on their
length mod $3$.
\end{example}
A version of Lemma~\ref{lemone} which applies to the most general kind
of family we need to consider ($x_1L_1^*\dotsm x_mL_m^*$, where we allow the case $L_m^*=\emptyset$)
is formulated in Lemma~\ref{lemtwo}.
\begin{lemma}\label{lemtwo}
Let\/ $x_1$, $\dotsc$, $x_m\in \Sigma^*_b$, and\/ $L_1$, $\dotsc$,
$L_m\subseteq\Sigma^*_b$. Then\/
$N$ divides all numbers of the form\/ $[x_1L_1^*x_2L_2^*\dotsm x_mL_m^*]_b$
if and only if\/
$N$ divides\/ $[x_1\dotsm x_m]_b$ and all numbers of the form\/
$[x_1L_1x_2x_3\dotsm x_m]_b$, $\dotsc$, $[x_1\dotsm x_{m-1}x_mL_m]_b$.
\end{lemma}
\begin{proof}
Say $w\in x_1L_1^*x_2L_2^*\dotsm x_mL_m^*$; then there exist
$y_{i,1}$, $\dotsc$, $y_{i,n_i}\in L_i$ such that
\[ w = x_1y_{1,1}\dotsm y_{1,n_1}x_2y_{2,1}\dotsm y_{2,n_2}\dotsm x_m y_{m,1}\dotsm y_{m,n_m} \]
for $1\leq i\leq m$.
As in the proof of Lemma~\ref{lemone}, we have that
\[ [w]_b = [x_1\dotsm x_m]_b + \sum_{i=1}^m\sum_{j=1}^{n_i} b^{\abs{y_{i,j+1}\dotsm y_{m,n_m}}}([x_1\dotsm x_i y_{i,j} x_{i+1}\dotsm x_m]_b-[x_1\dotsm x_m]_b) \]
from which the claim follows.
\end{proof}
As in Lemma~\ref{lemone}, we typically apply this lemma in the case
where each $L_i\subseteq\Sigma_b$ and show that all numbers of the
form $[x_1L_1^*x_2L_2^*\dotsm x_mL_m^*]_b$ have a divisor.
\begin{example}
Take $(L_1, L_2, L_3) \coloneqq (\{ \0 \},\{\0\},\emptyset)$ and $(x_1,x_2, x_3) \coloneqq (\9, \8, \1)$.
Since $9$ divides $981$, $9081$, and $9801$, it follows that $9$
divides every number with base-$10$ representation
of the form $\9\0^*\8\0^*\1$.
\end{example}
More generally, if a single divisor doesn't work for every number,
Lemma~\ref{lemtwo} can also be applied in the case where all numbers
of the form $[x_1L_1^*\dotsm x_i(L_i^2)^*\dotsm x_mL_m^*]_b$ have one
divisor, and all numbers of the form
$[x_1L_1^*\dotsm x_iL_i(L_i^2)^*\dotsm x_mL_m^*]_b$ have another divisor.
\begin{example}
Let $b \coloneqq 11$.
Since $3$ divides each of $[\4\4\A\1]_b$,
$[\4\4\A\1\1\1]_b$,
$[\4\4\0\A\1]_b$ ,
it follows that
every number of the form $[\4\4\0^*(\1\1)^*\1]_b$ is composite.
Since $2$ divides each of $[\4\4\A\1\1]_b$,
$[\4\4\A\1\1\1\1]_b$, $[\4\4\0\A\1\1]_b$, we know
every number of the form $[\4\4\0^*(\1\1)^*\1\1]_b$ is composite.
It follows that all numbers of the form $[\4\4\0^*\A\1^*\1]_b$ are composite.
\end{example}
Lemma~\ref{lemtwo} can also be applied to the case when all even-length
strings under consideration have one divisor, and all the odd-length
strings have another divisor. One such case is, for example,
if numbers of the form
$[x_1(L_1^2)^*x_2(L_2^2)^*x_3]_b$ and $[x_1 L_1(L_1^2)^*x_2L_2(L_2^2)^*x_3]_b$
have one divisor, and numbers of the form $[x_1L_1(L_1^2)^*x_2(L_2^2)^*x_3]_b$
and $[x_1(L_1^2)^*x_2L_2(L_2^2)^*x_3]_b$ have another divisor.
\begin{example}
Let $b \coloneqq 9$.
Since $2$ divides each of $[\6]_b$,
$[\1\1\6]_b$,
$[\6\1\1]_b$,
$[\1\6\1]_b$,
$[\1\1\1\6\1]_b$,
$[\1\6\1\1\1]_b$, every odd-length string of the form $\1^*\6\1^*$ is
composite.
Since $5$ divides each of $[\1\6]_b$,
$[\1\1\1\6]_b$,
$[\1\6\1\1]_b$,
$[\6\1]_b$,
$[\1\1\6\1]_b$,
$[\6\1\1\1]_b$,
every even-length string of the form $\1^*\6\1^*$ is composite.
\end{example}
\subsection{The second strategy}
A second way of proving that families of the form $xL^*z$ do not contain a
prime is via algebraic factorizations, such as a difference-of-squares
factorization.
\begin{lemma}\label{lemsquares}
Let\/ $x$, $z\in\Sigma^*_b$, \updated{$y\in\Sigma_b$}, and let\/ $g\coloneqq\gcd([y]_b,b-1)$,
$X\coloneqq([y]_b+(b-1)[x]_b)/g$, and\/ $Y\coloneqq(b^{\lvert{z}\rvert}[y]_b-(b-1)[z]_b)/g$.
If\/ $b$, $X$, and\/ $Y$ are all squares and\/
$\sqrt{b^{\lvert z\rvert}X}-\sqrt{Y}>(b-1)/g$, then all numbers of the form\/
$[xy^*z]_b$ are composite.
\end{lemma}
\begin{proof}
Evaluating the base-$b$ expansion of $xy^nz$, we get
\begin{align*}
[xy^nz]_b &= b^{\lvert z\rvert+n}[x]_b + b^{\lvert z\rvert}\frac{b^n-1}{b-1}[y]_b + [z]_b \\
&= \frac{b^{\lvert z\rvert+n}X-Y}{(b-1)/g} .
%= \frac{(\sqrt{Xb^{\lvert z\rvert+n}}+\sqrt{Y})(\sqrt{Xb^{\lvert z\rvert+n}}-\sqrt{Y})}{(b-1)/g}
\end{align*}
Since $b$, $X$, and $Y$ are all squares the numerator factors as a
difference of squares. By the size condition both factors are
strictly larger than the
denominator, and so the factorization is nontrivial.
\end{proof}
\begin{example}
Let $b\coloneqq16$, $x\coloneqq\4$, $y\coloneqq\4$, and $z\coloneqq\1$. Then $g=1$, $X=8^2$, $Y=7^2$, and
\[ [\4\4^n\1]_b = \frac{(4^{n+1}\cdot8+7)(4^{n+1}\cdot8-7)}{15} . \]
Since $4\cdot8-7>15$, this factorization is nontrivial and no number of
the form $[\4\4^*\1]_b$ is prime.
\end{example}
It is also possible to combine Lemma~\ref{lemtwo} with Corollary~\ref{cortwo}
to construct a test which also applies to bases which are not squares.
\begin{corollary}
Using the same setup as in Lemma~\ref{lemtwo}, if\/ $b^{\abs{z}}X$ and\/ $Y$
are squares, $\sqrt{b^{\lvert z\rvert}X}-\sqrt{Y}>(b-1)/g$, and\/
$1<\gcd([xyz]_b,[xy^3z]_b)<[xz]_b$, then all numbers of the form\/ $[xy^*z]_b$
are composite.
\end{corollary}
\begin{proof}
Say $n=2m$ is even. Then from the factorization in Lemma~\ref{lemtwo},
\[ [xy^nz]_b = \frac{(b^m\sqrt{b^{\abs{z}}X}+\sqrt{Y})(b^m\sqrt{b^{\abs{z}}X}-\sqrt{Y})}{(b-1)/g} \]
which is nontrivial by the size condition.
Alternatively, if $n$ is odd then as in Corollary~\ref{cortwo} we have that
$\gcd([xyz]_b,[xy^3z]_b)$ divides $[xy^nz]_b$, and by the size condition this
divisor is nontrivial.
\end{proof}
\begin{example}
Let $b\coloneqq17$, $x\coloneqq\1\9$, $y\coloneqq\9$, and $z\coloneqq\9$. Then $g=1$,
$b^{\abs{z}}X=85^2$, $Y=3^2$, and
\[ [xy^{2n}z]_b = \frac{(17^n\cdot85+3)(17^n\cdot85-3)}{16} . \]
Since $85-3>16$ this factorization is nontrivial. Furthermore, all numbers
of the form $[xy^{2n+1}z]_b$ are even, so all numbers of the form
$[\1\9\9^*\9]_b$ are composite.
\end{example}
Finally, we present a variant of Lemma~\ref{lemsquares} which applies to a
difference-of-cubes factorization.
\begin{lemma}\label{lemcubes}
Let\/ $x$, $z\in\Sigma^*_b$, \updated{$y\in\Sigma_b$}, and let\/ $g\coloneqq\gcd([y]_b,b-1)$,
$X\coloneqq([y]_b+(b-1)[x]_b)/g$, and\/ $Y\coloneqq(b^{\lvert{z}\rvert}[y]_b-(b-1)[z]_b)/g$.
If\/ $b$, $X$, and\/ $Y$ are all cubes and\/
$\sqrt[3]{b^{\lvert z\rvert}X}-\sqrt[3]{Y}>(b-1)/g$, then all numbers of
the form\/
$[xy^*z]_b$ are composite.
\end{lemma}
\begin{proof}
As in Lemma~\ref{lemsquares}, we have
\[ [xy^nz]_b = \frac{\bigl((b^{\abs{z}+n}X)^{1/3}-Y^{1/3}\bigr)\bigl((b^{\abs{z}+n}X)^{2/3}+(b^{\abs{z}+n}XY)^{1/3}+Y^{2/3}\bigr)}{(b-1)/g} . \]
The second factor is at least as large as the first (except in the
single case $b^{\abs{z}+n}X=1$ and $Y=-1$, which is not possible by
construction of $X$ and~$Y$),
so by the size condition both factors are strictly larger than the
denominator, and the factorization is nontrivial.
\end{proof}
\begin{example}
Let $b\coloneqq8$, $x\coloneqq\1$, $y\coloneqq\0$, and $z\coloneqq\1$. Then $g=7$, $X=1$, $Y=-1$, and
\[ [\1\0^n\1]_b = (2^{n+1}+1)(4^{n+1}-2^{n+1}+1) . \]
Since $2-(-1)>1$, this factorization is nontrivial and no number of the
form $[\1\0^*\1]_b$ is prime.
\end{example}
\section{Our heuristic algorithm}\label{secalg}
As previously mentioned, in practice to compute $M(L_b)$ one works with an underapproximation
$M$ of $M(L_b)$ and an overapproximation $L$ of $L_b-\sup(M)$.
One then refines such approximations until $L=\emptyset$ from which it follows that $M=M(L_b)$.
For the initial approximation, note that every minimal prime in base $b$ with at least 4
digits is of the form $x Y^* z$, where $x\in\Sigma_b-\{\0\}$, $z\in\Sigma_b$, and
\[ Y \coloneqq \Sigma_b - \set{y}{\text{$(p)_b\subw xyz$ for some prime $p$}} . \]
Making use of this, our algorithm sets $M$ to be the set of base-$b$ representations
of the minimal primes with at most $3$ digits (which can be found simply by brute force)
and $L$ to be $\bigcup_{x,z}xY^*z$, as described above.
All remaining minimal primes are members of $L$, so to find them we explore the families in~$L$.
During this process, each family will be decomposed into possibly multiple other families.
For example, a simple way of exploring the family $xY^*z$ where $Y\coloneqq\{y_1,\dotsc,y_n\}$ is to decompose it
into the families $xY^*y_1z$, $\dotsc$, $xY^*y_nz$. If the smallest member (say $xy_iz$)
of any such family happens to be prime, it can be added to $M$ and the family
$xY^*y_iz$ removed from consideration. Furthermore, once $M$ has been updated it may be possible
to simplify some families in $L$. In this case, $xY^*y_jz$ (for $j\neq i$) can be simplified to
$x(Y-\{y_i\})^*y_jz$ since no minimal prime contains $xy_iz$ as a proper subword.
Another way of decomposing %families
the family $xY^*z$ is possible if one knows that a digit of $Y$ can only occur a certain number
of times. For example, if $xy_iy_iz$ has a proper prime subword then the digit $y_i$
can occur at most once in any minimal prime of the form $xY^*z$, and we can split $xY^*z$ into
the two families
\[ x(Y-\{y_i\})^*z \qquad\text{and}\qquad x(Y-\{y_i\})^*y_i(Y-\{y_i\})^*z .\]
Lastly, the family $xY^*z$ can be decomposed by considering digits of $Y$ which are mutually incompatible,
i.e., they cannot occur simultaneously in a minimal prime. For example, if $xy_iy_jz$ and $xy_jy_iz$ ($i\neq j$)
both have proper prime subwords then the digits $y_i$ and $y_j$ cannot occur simultaneously in any minimal
prime of the form $xY^*z$, and we can split $xY^*z$ into the two families
\[ x(Y-\{y_i\})^*z \qquad\text{and}\qquad x(Y-\{y_j\})^*z . \]
Sometimes it is not possible to show two digits are mutually incompatible, but it is possible to know that
one digit must appear before the other. For example, if $xy_iy_jz$ has a proper prime subword then the digit
$y_j$ must appear before $y_i$ in any minimal prime of the form $xY^*z$, and we can replace $xY^*z$
with the family
\[ x(Y-\{y_i\})^*(Y-\{y_j\})^*z . \]
Similarly, if $xy_jyy_iz$ has a proper prime subword then we can split $xY^*yY^*z$ into the two families
\[ x(Y-\{y_i\})^*yY^*z \qquad\text{and}\qquad xY^*y(Y-\{y_j\})^*z . \]
We now formulate these arguments for the most general kind of family we need to consider, namely
$x_1L_1^*\dotsm x_mL_m^*$. For simplicity, we only specify the decompositions as applying to
$L_1\coloneqq\{y_1,\dotsc,y_n\}$, but it is straightforward to generalize these decompositions
to also apply to $L_i$ for any $1\leq i\leq m$.
\begin{lemma}\label{lemexplore}
Every minimal prime of the form\/ $x_1L_1^*\dotsm x_mL_m^*$ must also be of the form\/
$x_1x_2L_2^*\dotsm x_mL_m^*$ or\/ $x_1y_iL_1^*x_2L_2^*\dotsm x_mL_m^*$ for some\/ $1\leq i\leq n$.
\end{lemma}
\begin{proof}
Follows from the fact that $L_1^*=\{\epsilon\}\cup\bigcup_{i=1}^n y_iL_1^*$.
\end{proof}
Similarly, one can generalize Lemma~\ref{lemexplore} to apply to adding characters to the right of $L_1$ rather than
to the left.
\begin{example}
The family $\1\0\{\0,\1\}^*\6\1^*\1$ splits into the three families $\1\0\6\1^*\1$, $\1\0\{\0,\1\}^*\0\6\1^*\1$, and $\1\0\{\0,\1\}^*\1\6\1^*\1$
by exploring $\{\0,\1\}^*$ on the right.
\end{example}
\begin{lemma}\label{lemsplit1}
If\/ $x_1y_i^kx_2\dotsm x_m$ contains a prime proper subword (for some\/ $k\geq1$) then every minimal prime of the form\/
$x_1L_1^*\dotsm x_mL_m^*$ is of the form \[x_1(L_1-\{y_i\})^*(y_i(L_1-\{y_i\})^*)^jx_2L_2^*\dotsm x_mL_m^*\]
for some\/ $0\leq j1$ we only used it to sieve the sequence $ab^n+c$ for primes $p\nmid d$, and initialized the list of candidates to not include $n$ for which there is some prime $p\mid d$ for which $p\mid(ab^n+c)/d$.
The program had to be modified slightly to remove a check which would prevent it from running in the case when $a$, $b$, and $c$ were all odd (since then $2\mid ab^n+c$\updated{, but $2$ may not divide $(ab^n+c)/d$}).
Once the numbers with small divisors had been removed, it remained to test the remaining numbers using a probable primality test.
For this we used the software {\tt LLR} by Jean Penn\'e~\cite{llr}.
Although undocumented, it is possible to run this program on numbers of the form $(ab^n+c)/d$ when $d\neq1$, so this program required no modifications.
A script was also written which allowed one to run {\tt srsieve} while
{\tt LLR} was testing the remaining candidates, so that when a divisor
was found by {\tt srsieve} on a number which had not yet been tested by
{\tt LLR} it would be removed from the list of candidates.
In the cases where the elements of $M(L_b)$ could be proven prime rigorously,
we employed {\tt PRIMO}
by Marcel Martin~\cite{primo},
an elliptic curve primality proving implementation.
\begin{landscape}\begin{figure}\begin{tiny}\[ \begin{tikzpicture}
\node{$\1\{\0,\1,\6\}^*\1$}[sibling distance=8cm]
child{node{$\1\0\{\0,\1,\6\}^*\1$}[sibling distance=3.5cm]
child{node{$\1\0\{\0,\1\}^*\1$}[sibling distance=1.75cm]
child{node{$\1\0\{\0,\1\}^*\0\1$}
child{node[label=below:{divisible by 2}]{$\1\0\0^*\0\1$}}
child{node[label=below:{subword $\1\0\1\1$}]{$\1\0\0^*1\0^*\0\1$}}}
child{node[label=below:{prime $\1\0\1\1$}]{$\1\0\{\0,\1\}^*\1\1$}}}
child{node{$\1\0\{\0,\1\}^*\6\{\0,\1\}^*\1$}[sibling distance=1.75cm]
child{node{$\1\0\{\0,\1\}^*\6\1^*\1$}
child{node{$\1\0\{\0,\1\}^*\0\6\1^*\1$}
child{node[label=below:{divisible by 8}]{$\1\0\0^*\0\6\1$}}}
child{node[label=below:{subword $\1\0\1\1$}]{$\1\0\{\0,\1\}^*\1\6\1^*\1$}}
child{node{$\1\0\6\1^*\1$}
child{node[label=below:{not prime}]{$\1\0\6\1$}}}}}}
child{node{$\1\1\{\0,\1,\6\}^*\1$}[sibling distance=4.5cm]
child{node{$\1\1\{\0,\1,\6\}^*\1$}[sibling distance=1.75cm]
child{node{$\1\1\{\0,\1\}^*\1$}
child{node[label=below:{prime $\1\1\0\1$}]{$\1\1\{\0,\1\}^*\0\1$}}
child{node{$\1\1\{\0,\1\}^*\1\1$}
child{node[label=below:{difference of squares}]{$\1\1\1^*\1\1$}}}}}
child{node{$\1\1\{\0,\1\}^*\6\{\0,\1\}^*\1$}[sibling distance=2.25cm]
child{node[label=below:{subword $\1\1\0\1$}]{$\1\1\{\0,\1\}^*\0\6\{\0,\1\}^*\1$}}
child{node{$\1\1\{\0,\1\}^*\1\6\{\0,\1\}^*\1$}
child{node[label=below:{divisible by 2 or 5}]{$\1\1\1^*\1\6\1^*\1$}}}
child{node{$\1\1\6\{\0,\1\}^*\1$}
child{node[label=below:{divisible by 2 or 5}]{$\1\1\6\1^*\1$}}}}}
child{node{$\1\6\{\0,\1,\6\}^*\1$}[sibling distance=5cm]
child{node[label=below:{divisible by 2 or 5}]{$\1\6\1^*\1$}[sibling distance=3cm]}};
\end{tikzpicture} \]\end{tiny}\caption{Tree of decompositions for $\1\{\0,\1,\6\}^*\1$ in base $9$.}\label{decomptree}\end{figure}\end{landscape}
\section{Results}
A summary of the results of our algorithm is presented in
Figure~\ref{resultsfig}; it was able to completely solve all bases up to 30 \updated{except for
17, 19, 21, and those between 25 and 29}.
%bases 26, 28, and the odd bases larger than 16.
The results in base~29 required some additional strategies, as described in Section~\ref{addstrat}.
%For the bases up to~30
\begin{figure}\[\begin{array}{c@{\qquad}c@{\qquad}c@{\qquad}c@{\qquad}c}
b & \lvert M(L_b)\rvert & \max\limits_{x\in M(L_b)}\lvert x\rvert & \parbox{5em}{\centering\# unsolved\\families} & \parbox{5em}{\centering searched height} \\ \hline
2 & 2 & 2 & 0 & - \\
3 & 3 & 3 & 0 & - \\
4 & 3 & 2 & 0 & - \\
5 & 8 & 5 & 0 & - \\
6 & 7 & 5 & 0 & - \\
7 & 9 & 5 & 0 & - \\
8 & 15 & 9 & 0 & - \\
9 & 12 & 4 & 0 & - \\
10 & 26 & 8 & 0 & - \\
11 & 152 & 45 & 0 & - \\
12 & 17 & 8 & 0 & - \\
13\mathrlap{^*} & 228 & 32021 & 0 & - \\
14 & 240 & 86 & 0 & - \\
15 & 100 & 107 & 0 & - \\
16 & 483 & 3545 & 0 & - \\
17\mathrlap{^*} & \geq1279 & \geq111334 & 1 & \updated{967000} \\
18 & 50 & 33 & 0 & - \\
19\mathrlap{^*} & \geq3462 & \geq110986 & 1 & \updated{686000} \\
20 & 651 & 449 & 0 & - \\
21\mathrlap{^*} & \geq2600 & \geq479150 & 1 & \updated{475000} \\
22 & 1242 & 764 & 0 & - \\
23\mathrlap{^*} & \updated{6021} & \updated{800874} & \updated{0} & \updated{-} \\
24 & 306 & 100 & 0 & - \\
25\mathrlap{^*} & \geq17597 & \geq136967 & 12 & \updated{292000} \\
26 & \geq5662 & \geq8773 & 2 & \updated{461000} \\
27\mathrlap{^*} & \geq17210 & \geq109006 & 5 & \updated{355000} \\
28\mathrlap{^*} & \geq5783 & \geq94538 & 1 & \updated{524000} \\
29\mathrlap{^*} & \updated{\geq57283} & \updated{\geq174240} & \updated{14} & \updated{233000} \\
30 & 220 & 1024 & 0 & -
\end{array}\]
\begin{center}$^*$Data based on results of probable primality tests.\end{center}
\caption{Summary of results for the prime numbers for each base $b$.}
\label{resultsfig}
\end{figure}
For every solved base, we give the size $|M(L_b)|$ and the ``width'' $\max_{x \in M(L_b)} |x|$ of
the corresponding family.
For the unsolved bases, we give a lower bound on the size and width of $M(L_b)$,
along with the number of families of the form
$xy^*z$ for which no prime member could be found, nor could the family be
ruled out as only containing composites. Since such simple families can contain at
most one minimal prime, an upper bound on $\abs{M(L_b)}$ is given by the
sum of the lower bound for $\abs{M(L_b)}$ and the number of unsolved families.
Additionally, we give the height to which the simple families were searched
for primes; if there are any more primes in $M(L_b)$ they must have at least
this many digits in base $b$.
The family $\8\0^*\1$ in base~23, corresponding to the
generalized Proth numbers $8\cdot23^n+1$, was already known to be prime
for minimal $n=119215$ in the process
of solving the generalized Sierpi\'nski conjecture in base 23~\cite{crus}.
\updated{The largest probable prime we found was the number $\mathtt{9E}^{800873}$
(expressed as a base~23 string)
or $(106\cdot23^{800873}-7)/11$ in standard notation.
It contains 1090573 decimal digits and at the time of discovery was the tenth
largest known probable prime according to Henri and Renaud Lifchitz's ranking~\cite{lifc}.}
\subsection{Unsolved families}
\updated{There were 37 families for which we were unable to determine if they contain a prime or not.
They are explicitly described in Figure~\ref{unsolved} in both their ``base $b$'' string representation
$xy^*z$ and ``algebraic'' form $(ab^n+c)/d=[xy^nz]_b$. The numbers $a$ and $c$ are given in their factorized form
so as to help spot algebraic factorizations.}
\begin{figure}%[H]
\updated{\begin{center}\begin{tabular}{ccc}
Base & Family & Algebraic form \\ \hline
17 & $\mathtt{F19^*}$ & $(5\cdot821\cdot17^n-3^2)/16$ \\
19 & $\mathtt{EE16^*}$ & $(2^2\cdot13\cdot307\cdot19^n-1)/3$ \\
%21 & $\mathtt{CF^*0K}$ & $(3\cdot17\cdot21^{n+2}-11\cdot113)/4$ \\
21 & $\mathtt{G0^*FK}$ & $2^4\cdot21^{n+2}+5\cdot67$ \\
%23 & $\mathtt{9E^*}$ & $(2\cdot53\cdot23^n-7)/11$ \\
25 & $\mathtt{6MF^*9}$ & $(1381\cdot25^{n+1}-53)/8$ \\
& $\mathtt{CM1^*}$ & $(59\cdot131\cdot25^n-1)/24$ \\
& $\mathtt{EE1^*}$ & $(8737\cdot25^n-1)/24$ \\
& $\mathtt{E1^*E}$ & $(337\cdot25^{n+1}+311)/24$ \\
& $\mathtt{EFO^*}$ & $2\cdot3\cdot61\cdot25^n-1$ \\
& $\mathtt{F1^*F1}$ & $(19^2\cdot25^{n+2}+37\cdot227)/24$ \\
& $\mathtt{F0^*KO}$ & $3\cdot5\cdot25^{n+2}+2^2\cdot131$ \\
& $\mathtt{F0K^*O}$ & $(5\cdot11\cdot41\cdot25^{n+1}+19)/6$ \\
& $\mathtt{LOL^*8}$ & $(53\cdot83\cdot25^{n+1}-3\cdot37)/8$ \\
& $\mathtt{M1^*F1}$ & $(23^2\cdot25^{n+2}+37\cdot227)/24$ \\
& $\mathtt{M10^*8}$ & $19\cdot29\cdot25^{n+1}+2^3$ \\
& $\mathtt{OL^*8}$ & $(199\cdot25^{n+1}-3\cdot37)/8$ \\
26 & $\mathtt{A^*6F}$ & $(2\cdot26^{n+2}-7\cdot71)/5$ \\
& $\mathtt{I^*GL}$ & $(2\cdot3^2\cdot26^{n+2}-11\cdot113)/25$ \\
27 & $\mathtt{80^*9A}$ & $2^3\cdot27^{n+2}+11\cdot23$ \\
& $\mathtt{999G^*}$ & $(101\cdot877\cdot27^n-2^3)/13$ \\
& $\mathtt{CL^*E}$ & $(3^2\cdot37\cdot27^{n+1}-7\cdot29)/26$ \\
& $\mathtt{EI^*F8}$ & $(191\cdot27^{n+2}-2^3\cdot149)/13$ \\
& $\mathtt{F^*9FM}$ & $(3\cdot5\cdot27^{n+3}-113557)/26$ \\
28 & $\mathtt{OA^*F}$ & $(2\cdot7\cdot47\cdot28^{n+1}+5^3)/27$ \\
29 & $\mathtt{1A^*}$ & $(19\cdot29^n-5)/14$ \\
& $\mathtt{68L0^*6}$ & $7\cdot757\cdot29^{n+1}+2\cdot3$ \\
& $\mathtt{AMP^*}$ & $(8761\cdot29^n-5^2)/28$ \\
& $\mathtt{C^*FK}$ & $(3\cdot29^{n+2}+2\cdot331)/7$ \\
& $\mathtt{F^*OPF}$ & $(3\cdot5\cdot29^{n+3}+139\cdot1583)/28$ \\
& $\mathtt{FKI^*}$ & $(6379\cdot29^n-3^2)/14$ \\
& $\mathtt{F^*OP}$ & $(3\cdot5\cdot29^{n+2}+7573)/28$ \\
& $\mathtt{LP09^*}$ & $(31\cdot16607\cdot29^n-3^2)/28$ \\
& $\mathtt{OOPS^*A}$ & $2\cdot10453\cdot29^{n+1}-19$ \\
%& $\mathtt{O0^*FPL}$ & $2^3\cdot3\cdot29^{n+3}+31\cdot431$ \\
& $\mathtt{PC^*}$ & $(2\cdot89\cdot29^n-3)/7$ \\
& $\mathtt{PPPL^*O}$ & $(87103\cdot29^{n+1}+3^2)/4$ \\
& $\mathtt{Q^*GL}$ & $(13\cdot29^{n+2}-3\cdot1381)/14$ \\
& $\mathtt{Q^*LO}$ & $(13\cdot29^{n+2}-19\cdot109)/14$ \\
& $\mathtt{RM^*G}$ & $(389\cdot29^{n+1}-5\cdot19)/14$
\end{tabular}\end{center}
\caption{The unsolved families listed in their ``base $b$'' and ``algebraic'' representations.}}
\label{unsolved}
\end{figure}
\subsection{\boldmath Primes of the form $4n+1$ and $4n+3$}
The heuristic algorithm described in Section~\ref{secalg} may also be easily modified to apply to recursive subsets of prime numbers,
e.g., those congruent to $1\bmod4$ (alternatively, $3\bmod4$). The modifications necessary are to the initialization
of $M$ in line~1, and to update the check ``$w$ represents a minimal prime'' to also check that $w$ is in the subset
under consideration. \updated{When searching for minimal primes of the form $4n+1$, it was necessary to remove families only containing
numbers of the form $4n+3$, and vice versa. A modified Lemma~\ref{lemone} can be used to check this, e.g.,
$[x(L^0\cup L)z]_b$ are all congruent to $c\bmod d$ implies that $[xL^*z]_b$ are all congruent to $c\bmod d$.}
The lemmas of Section~\ref{seclemmas} can be used without modification, since a family which contains only composites
of course does not contain any primes of a special form, either. For simplicity the lemmas of Section~\ref{secalg} are stated
to apply when the minimal set consists of primes, but actually apply to general minimal sets and also need no modification.
When run on the primes of the form $4n+1$ represented in base 10, our implementation successfully computes the minimal set
consisting of 146 elements, the largest of which contains 79 digits. The
\textit{Encyclopedia of Integer Sequences}~\cite{oeis}
contains the elements for this minimal set in entry \seqnum{A111055}. Prior to our work, the longest 11 elements were missing
from this listing.
When run on the primes of the form $4n+3$ represented in base 10, our implementation successfully computes the minimal set
consisting of 113 elements, the largest of which contains 19153 digits. \updated{Because of its size
this number is not easily proven prime, but Fran\c{c}ois Morain successfully produced a primality certificate for it~\cite{mora}.}
The second-largest number has 50 digits, so the remaining elements
are easily proven prime. The \textit{Encyclopedia of Integer Sequences}~\cite{oeis}
contains the elements for this minimal set in entry \seqnum{A111056}. Prior to our work, the longest 10 elements were missing
from this listing.
\section{Some additional strategies}\label{addstrat}
The strategies discussed so far suffice to restrict the possible forms of minimal primes to a finite number of simple families in all bases $2\leq b\leq 28$.
However, as $b$ increased, in addition to the calculations becoming more costly,
it was found to be necessary to use increasingly complicated strategies.
We now describe some additional strategies which we found sufficient to solve all non-simple families in base~29.
%We now describe some additional strategies which we found sufficient to solve the problem in base~29 (up to a finite number of simple families).
%our implementation finds $55{,}817$ minimal primes and $2{,}565$ unsolved families).
%We now describe some additional strategies which we used in base~29 to find $57{,}252$ minimal primes
%and 45 unsolved families (with no primes with less than $10{,}000$ digits).
\begin{lemma}
If every number of the form\/ $x_1(L_1-\{y_i\})^*x_2L_2^*\dotsm x_mL_m^*$ is composite, then
every minimal prime of the form\/ $x_1L_1^*\dotsm x_mL_m^*$ must also be of the form\/
$x_1L_1^*y_iL_1^*x_2L_2^*\dotsm x_mL_m^*$.
\end{lemma}
\begin{proof}
If $w\in x_1L_1^*\dotsm x_mL_m^*$ then $w\in x_1yx_2L_2^*\dotsm x_mL_m^*$ for some $y\in L_1^*$. If $y$ does not contain a $y_i$
then $w$ is composite by assumption. Therefore if $w$ is a prime then $y$ contains a $y_i$, i.e., $y\in L_1^*y_iL_1^*$, from which the
result follows.
(Note that one could improve this result via $y\in(L_1-\{y_i\})^*y_iL_1^*\cup L_1^*y_i(L_1-\{y_i\})^*$, but this was found to be unnecessary for our purposes.)
\end{proof}
\begin{example}
The numbers represented by the family $\mathtt{F}\{\0,\9,\mathtt{F}\}^*\mathtt{F}$ in base~29 are divisible by $3$,
so every minimal prime of the form $\mathtt{F}\{\0,\9,\mathtt{F},\mathtt{P}\}^*\mathtt{F}$ must also be of the form
$\mathtt{F}\{\0,\9,\mathtt{F},\mathtt{P}\}^*\mathtt{P}\{\0,\9,\mathtt{F},\mathtt{P}\}^*\mathtt{F}$.
\end{example}
\begin{lemma}
If\/ $x_1y_iy_jy_ix_2\dotsm x_m$ contains a prime proper subword (where\/ $i\neq j$) then every minimal prime of the form\/
$x_1L_1^*\dotsm x_mL_m^*$ is of the form \[x_1(L_1-\{y_i\})^*(L_1-\{y_j\})^*(L_1-\{y_i\})^*x_2L_2^*\dotsm x_mL_m^* . \]
\end{lemma}
\begin{proof}
If $w\in x_1L_1^*\dotsm x_mL_m^*$ then $w\in x_1yx_2L_2^*\dotsm x_mL_m^*$ for some $y\in L_1^*$.
If $y$ contains $y_iy_jy_i$ then by assumption it follows that $w$ contains a proper prime subword,
and therefore is not a minimal prime. So if $w$ is a minimal prime then either $y$ does not contain
a $y_j$, or $y$ contains a $y_j$ and all $y_i$s in $y$ either come before or after the $y_j$.
In each case, $y$ is of the form $(L-\{y_i\})^*(L-\{y_j\})^*(L-\{y_i\})^*$, from which the claim follows.
\end{proof}
\begin{example}
The string $\mathtt{QLQ}$ represents a prime in base~29, and is a proper subword of $\mathtt{LQLQL}$.
It follows that the family $\mathtt{L}\{\mathtt{L},\mathtt{Q}\}^*\mathtt{L}$ splits into the family $\mathtt{L}\mathtt{L}^*\mathtt{Q}^*\mathtt{L}^*\mathtt{L}$ in base~29.
\end{example}
This rule may also be generalized to apply to the case when $x_1y_iy_jy_iy_jx_2\dotsm x_m$ contains a prime proper subword (where $i\neq j$).
\begin{example}
The string $\mathtt{LL9L9LQL}$ represents a prime in base~29. It follows that the family $\mathtt{LL\{\mathtt{9},\mathtt{L}\}^*\mathtt{Q}^*\mathtt{QL}}$ splits into the family $\mathtt{LL\mathtt{L}^*\mathtt{9}^*\mathtt{L}^*\mathtt{9}^*\mathtt{Q}^*\mathtt{QL}}$ in base~29.
\end{example}
In Section~\ref{subsecfirststrat} we described a number of strategies for determining if every member of a family has a divisor,
but for some families divisors exist which will not be found using those tests. The following lemma can help one discover when
this is the case; in particular when every member of a family is divisible by some small prime. For a language $L$ we use the
notation $[L]_b\bmod N$ to denote the finite set of residues $\set{[x]_b\bmod N}{x\in L}$.
\begin{lemma}\label{lemextdiv}
If\/ $1<\gcd(k,N)$ for every\/ $k\in[L]_b\bmod N$ and\/ $p\notin[L]_b$ for every prime\/ $p$ which divides\/ $N$, then all numbers of the form\/ $[L]_b$ are composite.
\end{lemma}
\begin{proof}
Let $x$ be an arbitrary member of $L$. Then $\gcd([x]_b,N)=\gcd([x]_b\bmod N,N)>1$ by assumption, so $[x]_b$ cannot be prime unless $[x]_b=\gcd([x]_b,N)$. But in that case $[x]_b$ would be a prime which divides $N$, and so $[x]_b\notin[L]_b$ by the second assumption, in contradiction to $x\in L$.
\end{proof}
To make use of this lemma, we need to be able to compute $[x_1L_1^*\dotsm x_mL_m^*]_b\bmod N$. To do this, we can make use of the following relations:
\begin{align*}
[Lx]_b \bmod N &= (b\cdot([L]_b\bmod N)+[x]_b)\bmod N \\
[L\{y_1,\dotsc,y_n\}]_b \bmod N &= \bigcup_{i=1}^n[Ly_i]_b\bmod N \\
[L\{y_1,\dotsc,y_n\}^*]_b \bmod N &= \bigcup_{i=0}^\infty[L\{y_1,\dotsc,y_n\}^i]_b\bmod N
\end{align*}
In the final case, the union may be taken to be finite over $i1$. Since $2$, $3$, $5\notin[\mathtt{L}\1^*\6\1^*\mathtt{LK}^*\mathtt{K}]_b$, by Lemma~\ref{lemextdiv} all numbers of the form $\mathtt{L}\1^*\6\1^*\mathtt{LK}^*\mathtt{K}$ are composite.
\end{example}
%The next result will allow to progress in the exploration of the repetitions of a language, %RD
%since it will decompose a repetition in a part that is not interesting to be explored further, until no other rule may be used, and a part which may lead to reductions of the families to be exploited.
%In the following, we shall denote by $strip(L)$ the string obtain from $L$ by deleting all the repetitions in it.
%\begin{lemma}
%Let $L=L1\{w_1,w_2,\ldots,w_n\}^*L2$ be some language of the kind we are interested in.
%Let us decompose $\{w_1,w_2,\ldots,w_n\}$ in two disjoint sets:
%$N$ (for no present interest to develop) and
%$F$ (for to be further explored) - this is like a partition but $N$ or $F$ may be empty.
%$F$ is the subset of $\{w_1,w_2,\ldots,w_n\}$ such that, for any $w_i\in F$,
%for some $m$ in $M$, the longest prefix of m which is a subword of $strip(L_1)$ is smaller than
%the longest prefix of $m$ which is a subword of $strip(L_1) w_i$; $N$ is the complement of $F$.
%If $N\neq \emptyset$, one may explore the languages $L_1 N^* L_2$ and
%$L_1 N^*w_i\{w_1,w_2,\ldots,w_n\}^* L_2$ for each $w_i\in F$.
%\end{lemma}
%This property is in some sense trivial; its true interest is that it is not useful to further explore $N^*$ until $M$ is enriched, or no other rule allows to simplify the handling.
%The rule here presented allows to progress from left to right in the handling of repetitions,
% but it is also possible to derive a similar decomposition for progressing from right to left.
\section{Composite numbers}
One can also consider the companion problem of determining the
minimal elements for the composite numbers
$\lbrace 4,6,8,9,10, 12, \ldots \rbrace $.
Here, in contrast with the primes, we have
\begin{theorem}
The following decision problem is recursively solvable: given
a base\/ $b$ and a DFA\/ $M$ accepting a language\/ $L$ of
strings in a base-$b$ canonical representation,
does\/ $M$ accept the base-$b$ representation of a composite
number?
\end{theorem}
This follows immediately from
\begin{theorem}
Suppose\/ $L$ is a regular language, accepted by a
deterministic finite automaton\/ $M$ of\/ $n$ states. Then
if\/ $M$ accepts a composite number expressed in base\/ $b$, it must
accept one whose base-$b$ representation has at most\/
$n(b^{2n} + 1)$ digits.
\end{theorem}
\begin{proof}
If $L$ is finite, then the longest string accepted by $M$ has
at most $n-1$ digits, and $n-1 < n(b^{2n} + 1)$.
Otherwise $L$ is infinite. Then $M$ accepts a string of length
$\ell$ with $n < \ell \leq 2n$ that can be pumped (as in the
pumping lemma). That is, there exists $x \in L$ such that
$x = uvw$ with $|uv| \leq n$. Then a classical proof of the
non-regularity of the prime numbers
(e.g., \cite[Example 3.2, p.\ 57]{HU79})
shows that either $x$ is the representation of a composite
number (in which case $|x| \leq 2n \leq n(b^{2n} + 1)$)
or $u v^p w$ is composite, where $p = [x]_b$.
Our bound now follows.
\end{proof}
Write $S_b \coloneqq \set{(n)_b}{\text{$n \geq 4$ is composite}}$.
For our purposes, the following theorem gives a better bound.
\begin{theorem}
Every minimal element of\/ $S_b$ is of length at most\/ $b+2$.
\label{devi}
\end{theorem}
\begin{proof}
Consider any word $w$ of $S_b$ of length $\geq b+3$. Since there are only
$b$ distinct digits, some digit $d$ is repeated at least twice,
so that $dd \subw w$. If $d > \1$, the number $[dd]_b$ is composite,
as it is divisible by $[\1\1]_b$ but not equal to it. If $d = \0$,
then some nonzero digit $c$ precedes it in $w$, so $c\0\0 \subw w$
and $[c\0\0]_b$ is divisible by $b^2$, which is composite.
Finally, if no digit other than $\1$ is repeated, then
$\1\1\1\1 \subw w$, and $[\1\1\1\1]_b = [\1\1]_b \cdot [\1\0\1]_b$, and hence
is composite.
\end{proof}
Theorem~\ref{devi} turns the computation of the minimal elements for
$S_b$ into a finite search. Our results are given in Figure~\ref{ttwo}.
\begin{figure}\[\begin{array}{c@{\qquad}c@{\qquad}c}
b & \lvert M(S_b)\rvert & \max\limits_{x\in M(S_b)}\lvert x\rvert \\ \hline
2 & 3 & 4 \\
3 & 4 & 3 \\
4 & 9 & 3 \\
5 & 10 & 3 \\
6 & 19 & 4 \\
7 & 18 & 3\\
8 & 26 & 3 \\
9 & 28 & 2 \\
10 & 32 & 3 \\
11 & 32 & 3\\
12 & 46 & 4 \\
13 & 43 & 3 \\
14 & 52 & 3 \\
15 & 54 & 2 \\
16 & 60 & 3 \\
17 & 60 & 3 \\
18 & 95 & 4 \\
19 & 77 & 3 \\
20 & 87 & 3 \\
21 & 90 & 2 \\
22 & 94 & 3 \\
23 & 97 & 3 \\
24 & 137 & 4 \\
25 & 117 & 2 \\
26 & 111 & 3 \\
27 & 115 & 2 \\
28 & 131 & 3 \\
29 & 123 & 3 \\
30 & 207 & 4
\end{array}\]
\caption{Summary of results for the composite numbers for each base $b$.}
\label{ttwo}
\end{figure}
\section{Conclusion and perspectives}
We have (sometimes partially) solved the problem of finding the minimal elements for the prime numbers
represented with the first bases. However, the problem rapidly becomes very complex: the number of members
increases rapidly, some bases need sophisticated strategies,
and some members are very long.
Hence, to complete the cases where some (simple) families resisted our analysis, and to go further in the bases,
we need better exploration strategies, and better primality tests for numbers expressed by a simple family in some base.
Another problem left open is to delineate the asymptotic behaviour of the size and width of those sets $M(L_b)$ when $b$ increases.
A rough estimation is that the size increases exponentially, but this increase is lower when the base has many different factors.
To illustrate once more the difficulty of computing $M(L)$, we recall
an open problem from \cite{Sh00}:
\begin{problem}
Let $L \coloneqq \lbrace {\tt 1, 2, 4, 8, 16, 32, 64, } \ldots \rbrace$, the
base-$10$ representation of the powers of $2$.
Is it the case that
$$ M(L) = \lbrace \1, \2, \4, \8, \6\5\5\3\6 \rbrace ? $$
\end{problem}
This would follow, for example, if we could prove that every power
of $16$ greater than $65536$ contained at least one of the digits
$\lbrace \1, \2, \4, \8 \rbrace$. But this seems beyond current capabilities.
\section*{Acknowledgments}
We are very grateful to
Fran\c{c}ois Morain for proving the primality of
$2 \cdot (10^{19153} - 1)/9 + 77$, which enabled us to complete the
classification of the minimal elements of primes of the form $4n+3$
in base 10.
\bibliographystyle{plain}
\begin{thebibliography}{10}
\raggedright
\bibitem{crus} G. Barnes.
\newblock Sierpinski conjectures and proofs, \newline
\url{www.noprimeleftbehind.net/crus/Sierp-conjectures.htm}.
\bibitem{code} C. Bright.
\newblock MEPN implementation,
\url{github.com/curtisbright/mepn}.
\bibitem{Choi} S. L. G. Choi.
\newblock Covering the set of integers by congruence
classes of distinct moduli.
\newblock \textit{Math. Comp.} {\bf 25} (1971), 885--895.
\updated{\bibitem{devillers} R. Devillers.
\newblock Results in searching for minimal primes wrt subword order,
\url{github.com/RaymondDevillers/primes}.}
\bibitem{Dick} L. E. Dickson.
\newblock \textit{History of the Theory of Numbers}.
\newblock Volume I: Divisibility and Primality.
\newblock Chelsea Publishing Company, New York, 1952.
\bibitem{gmp} T. Granlund et al.
\newblock The GNU multiple precision arithmetic library,
\url{gmplib.org}.
\bibitem{GHK07} H. Gruber, M. Holzer, and M. Kutrib.
\newblock The size of Higman-Haines sets.
\newblock \textit{Theoret. Comput. Sci.} {\bf 387} (2007), 167--176.
\bibitem{GHK09} H. Gruber, M. Holzer, and M. Kutrib.
\newblock More on the size of Higman-Haines sets: effective constructions.
\newblock \textit{Fundam. Inform.} {\bf 91} (2009), 105--121.
\bibitem{Ha69} L. H. Haines.
\newblock On free monoids partially ordered by embedding.
\newblock \textit{J. Combinatorial Theory} {\bf 6} (1969), 94--98.
\bibitem{Hi52} G. Higman.
\newblock Ordering by divisibility in abstract algebras.
\newblock \textit{Proc. London Math. Soc.} (3) {\bf 2} (1952), 326--336.
\bibitem{HU79} J. E. Hopcroft and J. D. Ullman.
\newblock \textit{Introduction to Automata Theory, Languages, and Computation},
Addison-Wesley, 1979.
\updated{\bibitem{lifc} H. Lifchitz and R. Lifchitz.
\newblock PRP records,
\url{www.primenumbers.net/prptop/prptop.php}}
\bibitem{primo} M. Martin.
\newblock Primo for Linux,
\url{www.ellipsa.eu/public/primo/primo.html}.
\updated{\bibitem{mora} F. Morain.
\newblock Some primes proven by my programs, \newline
\url{www.lix.polytechnique.fr/Labo/Francois.Morain/Primes/myprimes.html}.}
\bibitem{oeis} OEIS Foundation Inc.
\newblock The On-Line Encyclopedia of Integer Sequences,
\url{oeis.org}.
\bibitem{llr} J. Penn\'e.
\newblock LLR version 3.8.15,
\url{jpenne.free.fr/index2.html}.
\bibitem{srsieve} G. Reynolds.
\newblock Sierpinski/Riesel sieve version 0.6.17, \newline
\url{sites.google.com/site/geoffreywalterreynolds/programs/srsieve}.
\updated{\bibitem{miracl} M. Scott.
\newblock Multiprecision integer and rational arithmetic
cryptographic library,
\url{www.certivox.com/miracl}.}
\bibitem{Sh00} J. Shallit.
\newblock Minimal primes.
\newblock \textit{J. Recreational Math.} {\bf 30} (2) (2001), 113--117.
\bibitem{WD} H. C. Williams and H. Dubner.
\newblock The primality of $R_{1031}$.
\newblock \textit{Math. Comput.} {\bf 47} (1986), 703--711.
\end{thebibliography}
\end{document}