# Lexical Dispersion Plot Jupyter Notebook
## Dependency Installation and Import Required Modules

In [None]:
%pip install matplotlib nltk
import glob
import matplotlib.pyplot as plt
from nltk.tokenize import TreebankWordTokenizer
from string import punctuation

### Modify the list of words to be searched for

In [None]:
words = ['war', 'love', 'death', 'life', 'marry', 'fight', 'king', 'queen']

## Function Definitions

In [None]:
def lexical_dispersion_plot_directory(directory):
 for file_path in glob.glob(directory, recursive=True):
 with open(file_path, "r") as file:
 text = file.read()

 # Custom Tokenizer
 # Start of Go-To Tokenizer for English - Modified TreebankWordTokenize
 tokens = TreebankWordTokenizer().tokenize(text)
 for word in tokens:
 word = word.lower().strip(punctuation)
 tokens = list(filter(None, tokens)) 
 ## EOF Function

 points = [(x, y) for x in range(len(tokens))
 for y in range(len(words)) if tokens[x] == words[y]]

 if points:
 x, y = zip(*points)
 else:
 x = y = ()
 
 print(f"Lexical Dispersion Plot for {file_path}")

 plt.figure(figsize=(8, 6))
 plt.plot(x, y, "rx")
 plt.yticks(range(len(words)), words)
 plt.ylim(-1, len(words))
 plt.title(f"Lexical Dispersion Plot for {file_path}")
 plt.xlabel("Word Offset")
 plt.show()

def lexical_dispersion_plot_file(file_path):
 with open(file_path, "r") as file:
 text = file.read()

 # Custom Tokenizer
 # Start of Go-To Tokenizer for English - Modified TreebankWordTokenize
 tokens = TreebankWordTokenizer().tokenize(text)
 for word in tokens:
 word = word.lower().strip(punctuation)
 tokens = list(filter(None, tokens)) 
 ## EOF Function

 points = [(x, y) for x in range(len(tokens))
 for y in range(len(words)) if tokens[x] == words[y]]

 if points:
 x, y = zip(*points)
 else:
 x = y = ()
 
 print(f"Lexical Dispersion Plot for {file_path}")

 plt.figure(figsize=(8, 6))
 plt.plot(x, y, "rx")
 plt.yticks(range(len(words)), words)
 plt.ylim(-1, len(words))
 plt.title(f"Lexical Dispersion Plot for {file_path}")
 plt.xlabel("Word Offset")
 plt.show()

## Generating Lexical Dispersion Plot for Shakespeare & Marlowe Corpora
### One file example

In [None]:
lexical_dispersion_plot_file("books/Shakespeare-corpus/Ado Much Ado About Nothing.txt")

### The whole corpus
I commented out the lines in order to keep the page more tidy as the corpora contain a lot of text files

In [None]:
# lexical_dispersion_plot_directory("books/Shakespeare-corpus/*.txt")
# lexical_dispersion_plot_directory("books/Marlowe-corpus/*.txt")

## Custom Corpus Analysis
### Lexical Dispersion Plot for a single file

In [None]:
lexical_dispersion_plot_file("books/custom-corpus/1HVI-MIT (CL).txt")

### Alternatively you may analyze a whole directory

In [None]:
lexical_dispersion_plot_directory("books/custom-corpus/*.txt")