{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# A Medical History of British India\n", "\n", "This example is based on the dataset ['A Medical History of British India'](https://data.nls.uk/data/digitised-collections/a-medical-history-of-british-india/) provided by the [Data Foundry](https://data.nls.uk). It uses the trial data version of the dataset (15.5 MB compressed). This dataset forms the first half of the Medical History of British India collection, which itself is part of the broader India Papers collection held by the Library." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import nltk\n", "nltk.download('punkt')\n", "nltk.download('stopwords')\n", "from nltk.tokenize import word_tokenize" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from nltk.tokenize import word_tokenize\n", "f = open(\"nls-text-indiaPapers/74457530.txt\", \"r\")\n", "text = f.read()\n", "#print (text)\n", "\n", "text_tokens = word_tokenize(text)\n", "\n", "from nltk.corpus import stopwords\n", "filtered_words = [word for word in text_tokens if word not in stopwords.words('english')]\n", "print(filtered_words)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from nltk.probability import FreqDist\n", "fdist = FreqDist(filtered_words)\n", "print(fdist)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "fdist.most_common(2)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Frequency Distribution Plot\n", "import matplotlib.pyplot as plt\n", "fdist.plot(30)\n", "plt.show()" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.9" } }, "nbformat": 4, "nbformat_minor": 4 }