# Downloading the Septuagint

This is a script to donwload the plain text of the Septuagint from
[Sacred Texts](http://sacred-texts.com/bib/sep/index.htm)

In [1]:
import os,sys,re,collections
from lxml import html
import requests

In [2]:
base_url = 'http://sacred-texts.com/bib/sep'
top_url = '{}/index.htm'.format(base_url)

# Read the book index page

In [3]:
page = requests.get(top_url)
tree = html.fromstring(page.content)

# Compile the list of books

In [4]:
books = collections.OrderedDict()
start = False
for x in tree.iter('a'):
 link_text = ''.join(y.text if y.text != None else '' for y in x.iter())
 if not start and link_text == 'Genesis': start = True
 elif not start: continue
 link = x.get('href')
 books[link_text] = '{}/{}'.format(base_url, link)
print(', '.join(books))

Genesis, Exodus, Leviticus, Numbers, Deuteronomy, Joshua B, Joshua A, Judges B, Judges A, Ruth, 1 Samuel, 2 Samuel, 1 Kings, 2 Kings, 1 Chronicles, 2 Chronicles, 1 Esdras, 2 Esdras, Esther, Judith, Tobit BA, Tobit S, 1 Macabees, 2 Macabees, 3 Macabees, 4 Macabees, Psalms, Odes, Proverbs, Ecclesiastes, Song of Solomon, Job, Wisdom, Sirach, Psalms of Solomon, Hosea, Micah, Amos, Joel, Jonah, Obadiah, Nahum, Habakkuk, Zephaniah, Haggai, Zechariah, Malachi, Isaiah, Jeremiah, Baruch, Epistle of Jeremiah, Lamentations, Ezekiel, Bel and the Dragon, Bel and the Dragon Th, Daniel, Daniel Th, Susanna, Susanna Th


# Get the chapters

In [5]:
chapters = collections.defaultdict(dict)

def getchapters(book):
 book_url = books[book]
 page = requests.get(book_url)
 tree = html.fromstring(page.content)
 chfilter = re.compile(book+' Chapter ([0-9]+)')
 for p in tree.iter('p'):
 for x in p.iter('a'):
 link_text = ''.join(y.text if y.text != None else '' for y in x.iter())
 match = chfilter.match(link_text)
 if match:
 chnum = int(match.group(1))
 link = x.get('href')
 chapters[book][chnum] = '{}/{}'.format(base_url, link)
 print('{}: {} chapters'.format(book, max(x for x in chapters[book])))

for book in books: getchapters(book)

Genesis: 50 chapters
Exodus: 40 chapters
Leviticus: 27 chapters
Numbers: 36 chapters
Deuteronomy: 34 chapters
Joshua B: 24 chapters
Joshua A: 19 chapters
Judges B: 21 chapters
Judges A: 21 chapters
Ruth: 4 chapters
1 Samuel: 31 chapters
2 Samuel: 24 chapters
1 Kings: 22 chapters
2 Kings: 25 chapters
1 Chronicles: 29 chapters
2 Chronicles: 36 chapters
1 Esdras: 9 chapters
2 Esdras: 23 chapters
Esther: 10 chapters
Judith: 16 chapters
Tobit BA: 14 chapters
Tobit S: 14 chapters
1 Macabees: 16 chapters
2 Macabees: 15 chapters
3 Macabees: 7 chapters
4 Macabees: 18 chapters
Psalms: 151 chapters
Odes: 14 chapters
Proverbs: 36 chapters
Ecclesiastes: 12 chapters
Song of Solomon: 8 chapters
Job: 42 chapters
Wisdom: 19 chapters
Sirach: 51 chapters
Psalms of Solomon: 18 chapters
Hosea: 14 chapters
Micah: 7 chapters
Amos: 9 chapters
Joel: 4 chapters
Jonah: 4 chapters
Obadiah: 1 chapters
Nahum: 3 chapters
Habakkuk: 3 chapters
Zephaniah: 3 chapters
Haggai: 2 chapters
Zechariah: 14 chapters
Malachi: 3 

# Get the texts

In [7]:
def getchapter(book, chapter):
 url = chapters[book][chapter]
 page = requests.get(url)
 page.encoding = 'utf-8'
 tree = html.fromstring(page.content)
 chtext = ['\n{} {}\n'.format(book, chapter)]
 for x in tree.iter('p'):
 chtext.append(x.text_content())
 return chtext

sf = open('septuagint.txt', 'w')
for book in books:
 sys.stdout.write('writing {} '.format(book))
 sys.stdout.flush()
 for chapter in chapters[book]:
 sys.stdout.write('.')
 sys.stdout.flush()
 sf.write('\n'.join(getchapter(book, chapter)))
 sys.stdout.write('\n')
 sys.stdout.flush()
sf.close()

writing Genesis ..................................................
writing Exodus ........................................
writing Leviticus ...........................
writing Numbers ....................................
writing Deuteronomy ..................................
writing Joshua B ........................
writing Joshua A ...
writing Judges B .....................
writing Judges A .....................
writing Ruth ....
writing 1 Samuel ...............................
writing 2 Samuel ........................
writing 1 Kings ......................
writing 2 Kings .........................
writing 1 Chronicles .............................
writing 2 Chronicles ....................................
writing 1 Esdras .........
writing 2 Esdras .......................
writing Esther ..........
writing Judith ................
writing Tobit BA ..............
writing Tobit S ..............
writing 1 Macabees ................
writing 2 Macabees ...............
writing 3 Macabees ....