{ "cells": [ { "cell_type": "markdown", "id": "fluid-administration", "metadata": { "papermill": { "duration": 0.029757, "end_time": "2021-03-27T13:54:27.009269", "exception": false, "start_time": "2021-03-27T13:54:26.979512", "status": "completed" }, "tags": [] }, "source": [ "# Telegram Mining (Notebook Identifizierung von Autoren)\n", "\n", "**Master-Thesis: Social Media & Text Mining am Beispiel von Telegram**\n", "\n", "Maximilian Bundscherer\n", "\n", "Informatik Master\n", "\n", "**Hinweis**: Die Abschnitte ``Arbeitungsumgebung initialisieren`` und ``Chats laden und aufbereiten`` werden im Notebook ``Telegram.iypnb`` bereits ausführlich beschrieben und werden daher hier übersprungen. " ] }, { "cell_type": "markdown", "id": "statutory-kernel", "metadata": { "papermill": { "duration": 0.029034, "end_time": "2021-03-27T13:54:27.067097", "exception": false, "start_time": "2021-03-27T13:54:27.038063", "status": "completed" }, "tags": [] }, "source": [ "## Arbeitsumgebung initialisieren\n", "\n", "Siehe Beschreibung im Notebook ``Telegram.ipynb``" ] }, { "cell_type": "code", "execution_count": 1, "id": "brave-fusion", "metadata": { "execution": { "iopub.execute_input": "2021-03-27T13:54:27.131379Z", "iopub.status.busy": "2021-03-27T13:54:27.130784Z", "iopub.status.idle": "2021-03-27T13:54:29.526676Z", "shell.execute_reply": "2021-03-27T13:54:29.526077Z" }, "papermill": { "duration": 2.43178, "end_time": "2021-03-27T13:54:29.526840", "exception": false, "start_time": "2021-03-27T13:54:27.095060", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "# Import default libs\n", "import pandas as pd\n", "import numpy as np\n", "import seaborn as sns\n", "import matplotlib.pyplot as plt\n", "import time\n", "import re\n", "import os\n", "import sys\n", "import demjson\n", "import requests\n", "import networkx as nx\n", "import warnings\n", "from pprint import pprint\n", "from urllib.parse import urlparse\n", "from collections import Counter\n", "from pathlib import Path\n", "from lxml.html import fromstring" ] }, { "cell_type": "code", "execution_count": 2, "id": "stunning-washer", "metadata": { "execution": { "iopub.execute_input": "2021-03-27T13:54:29.590348Z", "iopub.status.busy": "2021-03-27T13:54:29.589757Z", "iopub.status.idle": "2021-03-27T13:54:29.591751Z", "shell.execute_reply": "2021-03-27T13:54:29.592227Z" }, "papermill": { "duration": 0.035123, "end_time": "2021-03-27T13:54:29.592370", "exception": false, "start_time": "2021-03-27T13:54:29.557247", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "# Hide DeprecationWarning\n", "warnings.filterwarnings(\"ignore\", category=DeprecationWarning)" ] }, { "cell_type": "code", "execution_count": 3, "id": "respected-trail", "metadata": { "execution": { "iopub.execute_input": "2021-03-27T13:54:29.655017Z", "iopub.status.busy": "2021-03-27T13:54:29.654434Z", "iopub.status.idle": "2021-03-27T13:54:32.440981Z", "shell.execute_reply": "2021-03-27T13:54:32.440486Z" }, "papermill": { "duration": 2.819082, "end_time": "2021-03-27T13:54:32.441124", "exception": false, "start_time": "2021-03-27T13:54:29.622042", "status": "completed" }, "tags": [] }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Requirement already satisfied: demoji in /opt/conda/lib/python3.8/site-packages (0.4.0)\n", "Requirement already satisfied: colorama in /opt/conda/lib/python3.8/site-packages (from demoji) (0.4.4)\n", "Requirement already satisfied: requests<3.0.0 in /opt/conda/lib/python3.8/site-packages (from demoji) (2.25.1)\n", "Requirement already satisfied: chardet<5,>=3.0.2 in /opt/conda/lib/python3.8/site-packages (from requests<3.0.0->demoji) (4.0.0)\n", "Requirement already satisfied: idna<3,>=2.5 in /opt/conda/lib/python3.8/site-packages (from requests<3.0.0->demoji) (2.10)\n", "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /opt/conda/lib/python3.8/site-packages (from requests<3.0.0->demoji) (1.26.4)\n", "Requirement already satisfied: certifi>=2017.4.17 in /opt/conda/lib/python3.8/site-packages (from requests<3.0.0->demoji) (2020.12.5)\n" ] } ], "source": [ "!{sys.executable} -m pip install demoji" ] }, { "cell_type": "code", "execution_count": 4, "id": "governing-great", "metadata": { "execution": { "iopub.execute_input": "2021-03-27T13:54:32.504889Z", "iopub.status.busy": "2021-03-27T13:54:32.504295Z", "iopub.status.idle": "2021-03-27T13:54:33.424056Z", "shell.execute_reply": "2021-03-27T13:54:33.423491Z" }, "papermill": { "duration": 0.953384, "end_time": "2021-03-27T13:54:33.424200", "exception": false, "start_time": "2021-03-27T13:54:32.470816", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "import nltk\n", "import demoji\n", "\n", "# Sklearn\n", "from sklearn.model_selection import train_test_split\n", "from sklearn.feature_extraction.text import CountVectorizer\n", "from sklearn.feature_extraction.text import TfidfTransformer\n", "\n", "from sklearn.naive_bayes import MultinomialNB\n", "from sklearn.linear_model import LogisticRegression\n", "from sklearn.ensemble import RandomForestClassifier\n", "from sklearn.svm import LinearSVC\n", "from sklearn.tree import DecisionTreeClassifier\n", "from sklearn.neural_network import MLPClassifier\n", "from sklearn.dummy import DummyClassifier\n", "\n", "from sklearn.metrics import accuracy_score\n", "from sklearn.metrics import confusion_matrix" ] }, { "cell_type": "code", "execution_count": 5, "id": "coral-jungle", "metadata": { "execution": { "iopub.execute_input": "2021-03-27T13:54:33.489176Z", "iopub.status.busy": "2021-03-27T13:54:33.488597Z", "iopub.status.idle": "2021-03-27T13:54:33.491123Z", "shell.execute_reply": "2021-03-27T13:54:33.490657Z" }, "papermill": { "duration": 0.036719, "end_time": "2021-03-27T13:54:33.491252", "exception": false, "start_time": "2021-03-27T13:54:33.454533", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "dictGloStopwatches = dict()\n", "\n", "# Start timer (for reporting)\n", "def gloStartStopwatch(key):\n", " print(\"[Stopwatch started >>\" + str(key) + \"<<]\")\n", " dictGloStopwatches[key] = time.time()\n", "\n", "# Stop timer (for reporting)\n", "def gloStopStopwatch(key):\n", " endTime = time.time()\n", " startTime = dictGloStopwatches[key]\n", " print(\"[Stopwatch stopped >>\" + str(key) + \"<< (\" + '{:5.3f}s'.format(endTime-startTime) + \")]\")" ] }, { "cell_type": "code", "execution_count": 6, "id": "studied-huntington", "metadata": { "execution": { "iopub.execute_input": "2021-03-27T13:54:33.559226Z", "iopub.status.busy": "2021-03-27T13:54:33.558648Z", "iopub.status.idle": "2021-03-27T13:54:33.596784Z", "shell.execute_reply": "2021-03-27T13:54:33.597250Z" }, "papermill": { "duration": 0.075189, "end_time": "2021-03-27T13:54:33.597441", "exception": false, "start_time": "2021-03-27T13:54:33.522252", "status": "completed" }, "tags": [] }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "[nltk_data] Downloading package stopwords to /home/jovyan/nltk_data...\n", "[nltk_data] Package stopwords is already up-to-date!\n" ] }, { "data": { "text/plain": [ "True" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "nltk.download(\"stopwords\")" ] }, { "cell_type": "code", "execution_count": 7, "id": "fixed-repeat", "metadata": { "execution": { "iopub.execute_input": "2021-03-27T13:54:33.661427Z", "iopub.status.busy": "2021-03-27T13:54:33.660832Z", "iopub.status.idle": "2021-03-27T13:54:34.373409Z", "shell.execute_reply": "2021-03-27T13:54:34.372734Z" }, "papermill": { "duration": 0.745828, "end_time": "2021-03-27T13:54:34.373557", "exception": false, "start_time": "2021-03-27T13:54:33.627729", "status": "completed" }, "tags": [] }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Downloading emoji data ...\n", "... OK (Got response in 0.40 seconds)\n", "Writing emoji data to /home/jovyan/.demoji/codes.json ...\n", "... OK\n" ] } ], "source": [ "demoji.download_codes()" ] }, { "cell_type": "code", "execution_count": 8, "id": "north-transfer", "metadata": { "execution": { "iopub.execute_input": "2021-03-27T13:54:34.445541Z", "iopub.status.busy": "2021-03-27T13:54:34.444967Z", "iopub.status.idle": "2021-03-27T13:54:34.447030Z", "shell.execute_reply": "2021-03-27T13:54:34.447444Z" }, "papermill": { "duration": 0.038755, "end_time": "2021-03-27T13:54:34.447592", "exception": false, "start_time": "2021-03-27T13:54:34.408837", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "# Show all columns (pandas hides columns by default)\n", "pd.set_option('display.max_columns', None)\n", "\n", "# Set plot style\n", "plt.style.use('ggplot')\n", "\n", "font = {'size' : 13}\n", "\n", "plt.rc('font', **font)" ] }, { "cell_type": "code", "execution_count": 9, "id": "supported-drove", "metadata": { "execution": { "iopub.execute_input": "2021-03-27T13:54:34.514493Z", "iopub.status.busy": "2021-03-27T13:54:34.513943Z", "iopub.status.idle": "2021-03-27T13:54:34.516387Z", "shell.execute_reply": "2021-03-27T13:54:34.515907Z" }, "papermill": { "duration": 0.037112, "end_time": "2021-03-27T13:54:34.516510", "exception": false, "start_time": "2021-03-27T13:54:34.479398", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "dir_var = \"./work/notebooks/\"\n", "dir_var_output = dir_var + \"output/\"\n", "dir_var_pandas_cache = dir_var + \"cache/pandas/\"" ] }, { "cell_type": "code", "execution_count": 10, "id": "continued-pierce", "metadata": { "execution": { "iopub.execute_input": "2021-03-27T13:54:34.585068Z", "iopub.status.busy": "2021-03-27T13:54:34.584499Z", "iopub.status.idle": "2021-03-27T13:54:34.586503Z", "shell.execute_reply": "2021-03-27T13:54:34.586919Z" }, "papermill": { "duration": 0.038848, "end_time": "2021-03-27T13:54:34.587070", "exception": false, "start_time": "2021-03-27T13:54:34.548222", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "def gloReplaceGermanChars(inputText):\n", "\n", " inputText = inputText.replace(\"ö\", \"oe\")\n", " inputText = inputText.replace(\"ü\", \"ue\")\n", " inputText = inputText.replace(\"ä\", \"ae\")\n", "\n", " inputText = inputText.replace(\"Ö\", \"Oe\")\n", " inputText = inputText.replace(\"Ü\", \"Ue\")\n", " inputText = inputText.replace(\"Ä\", \"Ae\")\n", "\n", " inputText = inputText.replace(\"ß\", \"ss\")\n", " \n", " return inputText" ] }, { "cell_type": "code", "execution_count": 11, "id": "korean-import", "metadata": { "execution": { "iopub.execute_input": "2021-03-27T13:54:34.655056Z", "iopub.status.busy": "2021-03-27T13:54:34.654496Z", "iopub.status.idle": "2021-03-27T13:54:34.656960Z", "shell.execute_reply": "2021-03-27T13:54:34.656512Z" }, "papermill": { "duration": 0.038417, "end_time": "2021-03-27T13:54:34.657087", "exception": false, "start_time": "2021-03-27T13:54:34.618670", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "# Rm unsafe chars\n", "def gloConvertToSafeString(text):\n", " text = demoji.replace(text, \"\")\n", " text = gloReplaceGermanChars(text)\n", " text = re.sub(r'[^a-zA-Z0-9\\s]', '', text)\n", " return text\n", "\n", "# Generate unique chat name\n", "def gloConvertToSafeChatName(chatName):\n", " chatName = gloConvertToSafeString(chatName)\n", " return chatName[:30]" ] }, { "cell_type": "code", "execution_count": 12, "id": "rapid-recipient", "metadata": { "execution": { "iopub.execute_input": "2021-03-27T13:54:34.725770Z", "iopub.status.busy": "2021-03-27T13:54:34.725180Z", "iopub.status.idle": "2021-03-27T13:54:34.727237Z", "shell.execute_reply": "2021-03-27T13:54:34.727651Z" }, "papermill": { "duration": 0.03908, "end_time": "2021-03-27T13:54:34.727793", "exception": false, "start_time": "2021-03-27T13:54:34.688713", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "def gloGetStopWordsList(filterList):\n", "\n", " stopwWorldsList = []\n", "\n", " deWordsList = nltk.corpus.stopwords.words('german')\n", "\n", " enWordsList = nltk.corpus.stopwords.words('english')\n", "\n", " aStopwords = []\n", " with open(dir_var + \"additionalStopwords.txt\") as file:\n", " for line in file: \n", " line = line.strip()\n", " if(line != \"\"):\n", " aStopwords.append(line)\n", "\n", " for s in filterList:\n", " s = gloReplaceGermanChars(s)\n", " stopwWorldsList.append(s)\n", "\n", " for s in deWordsList:\n", " s = gloReplaceGermanChars(s)\n", " stopwWorldsList.append(s)\n", "\n", " for s in enWordsList:\n", " stopwWorldsList.append(s)\n", "\n", " for s in aStopwords:\n", " s = gloReplaceGermanChars(s)\n", " stopwWorldsList.append(s)\n", "\n", " return stopwWorldsList" ] }, { "cell_type": "markdown", "id": "forty-minutes", "metadata": { "papermill": { "duration": 0.031439, "end_time": "2021-03-27T13:54:34.790729", "exception": false, "start_time": "2021-03-27T13:54:34.759290", "status": "completed" }, "tags": [] }, "source": [ "## Chats laden und aufbereiten\n", "\n", "Siehe Beschreibung im Notebook ``Telegram.ipynb``" ] }, { "cell_type": "markdown", "id": "metric-moisture", "metadata": { "papermill": { "duration": 0.031562, "end_time": "2021-03-27T13:54:34.854006", "exception": false, "start_time": "2021-03-27T13:54:34.822444", "status": "completed" }, "tags": [] }, "source": [ "## Identifizierung von Autoren\n", "\n", "Siehe Beschreibung in ``Thesis.pdf``" ] }, { "cell_type": "markdown", "id": "convenient-ownership", "metadata": { "papermill": { "duration": 0.03143, "end_time": "2021-03-27T13:54:34.917493", "exception": false, "start_time": "2021-03-27T13:54:34.886063", "status": "completed" }, "tags": [] }, "source": [ "### Chats laden" ] }, { "cell_type": "code", "execution_count": 13, "id": "adapted-intervention", "metadata": { "execution": { "iopub.execute_input": "2021-03-27T13:54:34.984612Z", "iopub.status.busy": "2021-03-27T13:54:34.984039Z", "iopub.status.idle": "2021-03-27T13:54:34.986028Z", "shell.execute_reply": "2021-03-27T13:54:34.986458Z" }, "papermill": { "duration": 0.03746, "end_time": "2021-03-27T13:54:34.986613", "exception": false, "start_time": "2021-03-27T13:54:34.949153", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "C_USE_CACHE_FILE = \"final-run-24-03.pkl\"" ] }, { "cell_type": "markdown", "id": "computational-porcelain", "metadata": { "papermill": { "duration": 0.031923, "end_time": "2021-03-27T13:54:35.050472", "exception": false, "start_time": "2021-03-27T13:54:35.018549", "status": "completed" }, "tags": [] }, "source": [ "#### Von Cache Laden" ] }, { "cell_type": "code", "execution_count": 14, "id": "small-eugene", "metadata": { "execution": { "iopub.execute_input": "2021-03-27T13:54:35.119008Z", "iopub.status.busy": "2021-03-27T13:54:35.118442Z", "iopub.status.idle": "2021-03-27T13:55:03.717028Z", "shell.execute_reply": "2021-03-27T13:55:03.716445Z" }, "papermill": { "duration": 28.634295, "end_time": "2021-03-27T13:55:03.717171", "exception": false, "start_time": "2021-03-27T13:54:35.082876", "status": "completed" }, "tags": [] }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[Stopwatch started >>Cache einlesen<<]\n", "[Stopwatch stopped >>Cache einlesen<< (28.593s)]\n" ] } ], "source": [ "gloStartStopwatch(\"Cache einlesen\")\n", "dfAllDataMessages = pd.read_pickle(dir_var_pandas_cache + C_USE_CACHE_FILE)\n", "gloStopStopwatch(\"Cache einlesen\")" ] }, { "cell_type": "markdown", "id": "exact-convergence", "metadata": { "papermill": { "duration": 0.033022, "end_time": "2021-03-27T13:55:03.783897", "exception": false, "start_time": "2021-03-27T13:55:03.750875", "status": "completed" }, "tags": [] }, "source": [ "#### Filtern und anzeigen" ] }, { "cell_type": "code", "execution_count": 15, "id": "offshore-binary", "metadata": { "execution": { "iopub.execute_input": "2021-03-27T13:55:03.875946Z", "iopub.status.busy": "2021-03-27T13:55:03.875368Z", "iopub.status.idle": "2021-03-27T13:55:18.787163Z", "shell.execute_reply": "2021-03-27T13:55:18.786673Z" }, "papermill": { "duration": 14.970188, "end_time": "2021-03-27T13:55:18.787306", "exception": false, "start_time": "2021-03-27T13:55:03.817118", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "dfAllDataMessages= dfAllDataMessages[dfAllDataMessages['ftFilePath'].isin(\n", " [\n", " \"DS-05-01-2021/ChatExport_2021-01-05-hildmann\",\n", " \"DS-05-01-2021/ChatExport_2021-01-05-janich\",\n", " \"DS-05-01-2021/ChatExport_2021-01-05-xavier\",\n", " \"DS-05-01-2021/ChatExport_2021-01-05-evaherman\"\n", " ]\n", ")]\n", "\n", "dfAllDataMessages = dfAllDataMessages[dfAllDataMessages.ftQrIsValidText == True]\n", "dfAllDataMessages = dfAllDataMessages[dfAllDataMessages.ftTdCleanText != \"\"]\n", "dfAllDataMessages = dfAllDataMessages[dfAllDataMessages.ftTdTextLength > 5]\n", "\n", "dfAllDataMessages[\"from\"] = dfAllDataMessages[\"from\"].apply(gloConvertToSafeChatName)" ] }, { "cell_type": "code", "execution_count": 16, "id": "unauthorized-mongolia", "metadata": { "execution": { "iopub.execute_input": "2021-03-27T13:55:18.859756Z", "iopub.status.busy": "2021-03-27T13:55:18.859194Z", "iopub.status.idle": "2021-03-27T13:55:18.934349Z", "shell.execute_reply": "2021-03-27T13:55:18.933762Z" }, "papermill": { "duration": 0.11306, "end_time": "2021-03-27T13:55:18.934484", "exception": false, "start_time": "2021-03-27T13:55:18.821424", "status": "completed" }, "tags": [] }, "outputs": [ { "data": { "text/html": [ "
\n", " | id | \n", "type | \n", "date | \n", "actor | \n", "actor_id | \n", "action | \n", "title | \n", "text | \n", "from | \n", "from_id | \n", "file | \n", "media_type | \n", "mime_type | \n", "duration_seconds | \n", "edited | \n", "thumbnail | \n", "width | \n", "height | \n", "photo | \n", "forwarded_from | \n", "poll.question | \n", "poll.closed | \n", "poll.total_voters | \n", "poll.answers | \n", "sticker_emoji | \n", "message_id | \n", "reply_to_message_id | \n", "location_information.latitude | \n", "location_information.longitude | \n", "live_location_period_seconds | \n", "via_bot | \n", "performer | \n", "ftFilePath | \n", "ftChatType | \n", "ftIsJsonFormatted | \n", "tmpExtractedTD | \n", "ftTdText | \n", "ftTdUrls | \n", "ftTdHashtags | \n", "ftTdBolds | \n", "ftTdItalics | \n", "ftTdUnderlines | \n", "ftTdEmails | \n", "ftTdCleanText | \n", "ftTdEmojis | \n", "ftTdEmojisDesc | \n", "ftTdSafeText | \n", "ftTdSafeLowerText | \n", "ftTdTextLength | \n", "ftQrIsValidText | \n", "ftQrIsEdited | \n", "ftQrIsForwarded | \n", "ftQrCoPhotos | \n", "ftQrCoFiles | \n", "ftQrCoUrls | \n", "ftQrCoHashtags | \n", "ftQrCoBolds | \n", "ftQrCoItalics | \n", "ftQrCoUnderlines | \n", "ftQrCoEmails | \n", "ftQrCoEmojis | \n", "ftTrNerRoberta | \n", "ftTrNerBert | \n", "ftTrSenBert | \n", "ftSenTb | \n", "author | \n", "saved_from | \n", "members | \n", "game_title | \n", "game_description | \n", "game_link | \n", "game_message_id | \n", "score | \n", "inviter | \n", "contact_vcard | \n", "contact_information.first_name | \n", "contact_information.last_name | \n", "contact_information.phone_number | \n", "place_name | \n", "address | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
1 | \n", "2 | \n", "message | \n", "2020-04-28T08:22:29 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "Dann schreiben wir mal Geschichte! | \n", "ATTILA HILDMANN | \n", "1.003416e+10 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "DS-05-01-2021/ChatExport_2021-01-05-hildmann | \n", "public_channel | \n", "False | \n", "(Dann schreiben wir mal Geschichte!, [], [], [... | \n", "Dann schreiben wir mal Geschichte! | \n", "[] | \n", "[] | \n", "[] | \n", "[] | \n", "[] | \n", "[] | \n", "Dann schreiben wir mal Geschichte! | \n", "[] | \n", "[] | \n", "Dann schreiben wir mal Geschichte | \n", "dann schreiben wir mal geschichte | \n", "34 | \n", "True | \n", "False | \n", "False | \n", "False | \n", "False | \n", "False | \n", "False | \n", "False | \n", "False | \n", "False | \n", "False | \n", "False | \n", "{'per': [], 'misc': [], 'org': [], 'loc': []} | \n", "{'per': [], 'misc': [], 'org': [], 'loc': []} | \n", "5.0 | \n", "{'polarity': 0.0, 'subjectivity': 0.0} | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
4 | \n", "5 | \n", "message | \n", "2020-04-28T08:43:27 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "Das Video sollte jeder von euch schauen und ve... | \n", "ATTILA HILDMANN | \n", "1.003416e+10 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "DS-05-01-2021/ChatExport_2021-01-05-hildmann | \n", "public_channel | \n", "False | \n", "(Das Video sollte jeder von euch schauen und v... | \n", "Das Video sollte jeder von euch schauen und ve... | \n", "[] | \n", "[] | \n", "[] | \n", "[] | \n", "[] | \n", "[] | \n", "Das Video sollte jeder von euch schauen und ve... | \n", "[] | \n", "[] | \n", "Das Video sollte jeder von euch schauen und ve... | \n", "das video sollte jeder von euch schauen und ve... | \n", "100 | \n", "True | \n", "False | \n", "False | \n", "False | \n", "False | \n", "False | \n", "False | \n", "False | \n", "False | \n", "False | \n", "False | \n", "False | \n", "{'per': [], 'misc': [], 'org': [], 'loc': ['▁C... | \n", "{'per': [], 'misc': [], 'org': [], 'loc': []} | \n", "5.0 | \n", "{'polarity': 0.0, 'subjectivity': 0.0} | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
5 | \n", "6 | \n", "message | \n", "2020-04-28T08:43:31 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "[SCHAUT ES EUCH AN! 🆘 , {'type': 'link', 'text... | \n", "ATTILA HILDMANN | \n", "1.003416e+10 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "2020-04-28T08:52:04 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "DS-05-01-2021/ChatExport_2021-01-05-hildmann | \n", "public_channel | \n", "True | \n", "(SCHAUT ES EUCH AN! 🆘 , [https://www.instagram... | \n", "SCHAUT ES EUCH AN! 🆘 | \n", "[https://www.instagram.com/tv/B_f_sYFqfvw/?igs... | \n", "[] | \n", "[] | \n", "[] | \n", "[] | \n", "[] | \n", "SCHAUT ES EUCH AN! | \n", "[🆘] | \n", "[SOS button] | \n", "SCHAUT ES EUCH AN | \n", "schaut es euch an | \n", "21 | \n", "True | \n", "True | \n", "False | \n", "False | \n", "False | \n", "True | \n", "False | \n", "False | \n", "False | \n", "False | \n", "False | \n", "True | \n", "{'per': [], 'misc': [], 'org': [], 'loc': []} | \n", "{'per': [], 'misc': [], 'org': [], 'loc': []} | \n", "5.0 | \n", "{'polarity': 0.0, 'subjectivity': 0.0} | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "