{ "cells": [ { "cell_type": "code", "execution_count": 2, "metadata": { "slideshow": { "slide_type": "skip" } }, "outputs": [], "source": [ "import os\n", "import pandas as pd\n", "import sqlite3" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "slideshow": { "slide_type": "slide" } }, "outputs": [ { "data": { "text/plain": [ "['content-prefs.sqlite',\n", " 'places.sqlite',\n", " 'kinto.sqlite',\n", " 'permissions.sqlite',\n", " 'formhistory.sqlite',\n", " 'storage-sync.sqlite',\n", " 'favicons.sqlite',\n", " 'cookies.sqlite',\n", " 'storage.sqlite',\n", " 'webappsstore.sqlite']" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "firefox_profile_dir = '/home/bird/.mozilla/firefox/old_profiles/iadzfbcv.default/'\n", "[x for x in os.listdir(firefox_profile_dir) if x.endswith('sqlite')]" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "slideshow": { "slide_type": "skip" } }, "outputs": [], "source": [ "storage_file = '{}/webappsstore.sqlite'.format(firefox_profile_dir)\n", "storage_db = sqlite3.connect(storage_file)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "slideshow": { "slide_type": "skip" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[('webappsstore2',)]\n" ] } ], "source": [ "# %load '/home/bird/Documents/tracking technologies/notebooks/get_sqlite_tables.py'\n", "def list_tables_in_db(db):\n", " print(db.cursor().execute(\"SELECT name FROM sqlite_master WHERE type='table';\").fetchall())\n", "\n", "list_tables_in_db(storage_db)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "slideshow": { "slide_type": "slide" } }, "outputs": [], "source": [ "storage_df = pd.read_sql('SELECT * FROM webappsstore2', storage_db)\n", "#storage_df.head()" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "slideshow": { "slide_type": "skip" } }, "outputs": [], "source": [ "storage_df['origin'] = storage_df.originKey.apply(lambda x: x[::-1].split(':.')[1])\n", "#storage_df.head()" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "slideshow": { "slide_type": "skip" } }, "outputs": [], "source": [ "cookies_file = '{}/cookies.sqlite'.format(firefox_profile_dir)\n", "cookies_db = sqlite3.connect(cookies_file)\n", "cookied_df = pd.read_sql('SELECT * FROM moz_cookies', cookies_db)\n", "shared_values = []\n", "for v in cookied_df.value.unique():\n", " matches = cookied_df[cookied_df.value.str.contains(v, regex=False)]\n", " if len(matches.baseDomain.unique()) > 5:\n", " shared_values.append(v)\n", "potential_ids = [x for x in shared_values if(len(x) > 10) & ('com' not in x)] " ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "slideshow": { "slide_type": "slide" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "We have 25 potential ids. Things like:\n" ] } ], "source": [ "# From cookie table\n", "print('We have', len(potential_ids), 'potential ids. Things like:')\n", "# potential_ids[0:5]" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "slideshow": { "slide_type": "slide" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "67936421072632709762729202117726060613 found in local storage\n", "7620423c-7103-4edc-9aee-099c75141b87-tuct18c03dc found in local storage\n" ] } ], "source": [ "for potential_id in potential_ids:\n", " if len(storage_df[storage_df.value.str.contains(potential_id)]) > 0:\n", " print(potential_id, 'found in local storage')" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "slideshow": { "slide_type": "slide" } }, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>value</th>\n", " <th>origin</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>291</th>\n", " <td>{\"email\":null,\"timeIncId\":\"53fdd635-5007-4f53-...</td>\n", " <td>people.com</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " value origin\n", "291 {\"email\":null,\"timeIncId\":\"53fdd635-5007-4f53-... people.com" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "repeated_id = '67936421072632709762729202117726060613'\n", "storage_df[storage_df.value.str.contains(repeated_id)][['value','origin']]" ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "slideshow": { "slide_type": "slide" } }, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>value</th>\n", " <th>origin</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>27</th>\n", " <td>7620423c-7103-4edc-9aee-099c75141b87-tuct18c03dc</td>\n", " <td>www.latimes.com</td>\n", " </tr>\n", " <tr>\n", " <th>227</th>\n", " <td>7620423c-7103-4edc-9aee-099c75141b87-tuct18c03dc</td>\n", " <td>www.merriam-webster.com</td>\n", " </tr>\n", " <tr>\n", " <th>229</th>\n", " <td>v2_bf1ca6db22b70291779ce41eb2e5aee5_7620423c-7...</td>\n", " <td>www.merriam-webster.com</td>\n", " </tr>\n", " <tr>\n", " <th>232</th>\n", " <td>v2_bf1ca6db22b70291779ce41eb2e5aee5_7620423c-7...</td>\n", " <td>tpc.googlesyndication.com</td>\n", " </tr>\n", " <tr>\n", " <th>236</th>\n", " <td>7620423c-7103-4edc-9aee-099c75141b87-tuct18c03dc</td>\n", " <td>tpc.googlesyndication.com</td>\n", " </tr>\n", " <tr>\n", " <th>357</th>\n", " <td>v2_9a6c86f52b3f9239067936a17472df9f_7620423c-7...</td>\n", " <td>www.huffingtonpost.com</td>\n", " </tr>\n", " <tr>\n", " <th>359</th>\n", " <td>7620423c-7103-4edc-9aee-099c75141b87-tuct18c03dc</td>\n", " <td>www.huffingtonpost.com</td>\n", " </tr>\n", " <tr>\n", " <th>407</th>\n", " <td>v2_66138b9f6bd0ec8912d19cb714efd912_7620423c-7...</td>\n", " <td>www.latimes.com</td>\n", " </tr>\n", " <tr>\n", " <th>984</th>\n", " <td>7620423c-7103-4edc-9aee-099c75141b87-tuct18c03dc</td>\n", " <td>www.bloomberg.com</td>\n", " </tr>\n", " <tr>\n", " <th>989</th>\n", " <td>v2_96ea2f0533c59e2312c1f1112ced8f46_7620423c-7...</td>\n", " <td>www.bloomberg.com</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " value \\\n", "27 7620423c-7103-4edc-9aee-099c75141b87-tuct18c03dc \n", "227 7620423c-7103-4edc-9aee-099c75141b87-tuct18c03dc \n", "229 v2_bf1ca6db22b70291779ce41eb2e5aee5_7620423c-7... \n", "232 v2_bf1ca6db22b70291779ce41eb2e5aee5_7620423c-7... \n", "236 7620423c-7103-4edc-9aee-099c75141b87-tuct18c03dc \n", "357 v2_9a6c86f52b3f9239067936a17472df9f_7620423c-7... \n", "359 7620423c-7103-4edc-9aee-099c75141b87-tuct18c03dc \n", "407 v2_66138b9f6bd0ec8912d19cb714efd912_7620423c-7... \n", "984 7620423c-7103-4edc-9aee-099c75141b87-tuct18c03dc \n", "989 v2_96ea2f0533c59e2312c1f1112ced8f46_7620423c-7... \n", "\n", " origin \n", "27 www.latimes.com \n", "227 www.merriam-webster.com \n", "229 www.merriam-webster.com \n", "232 tpc.googlesyndication.com \n", "236 tpc.googlesyndication.com \n", "357 www.huffingtonpost.com \n", "359 www.huffingtonpost.com \n", "407 www.latimes.com \n", "984 www.bloomberg.com \n", "989 www.bloomberg.com " ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "repeated_id = '7620423c-7103-4edc-9aee-099c75141b87-tuct18c03dc'\n", "storage_df[storage_df.value.str.contains(repeated_id)][['value', 'origin']]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "We can see this id being shared across multiple origins in local storage as well." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.6" } }, "nbformat": 4, "nbformat_minor": 4 }