{ "cells": [ { "cell_type": "code", "execution_count": 2, "metadata": { "slideshow": { "slide_type": "skip" } }, "outputs": [], "source": [ "import os\n", "import pandas as pd\n", "import sqlite3" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "slideshow": { "slide_type": "slide" } }, "outputs": [ { "data": { "text/plain": [ "['content-prefs.sqlite',\n", " 'places.sqlite',\n", " 'kinto.sqlite',\n", " 'permissions.sqlite',\n", " 'formhistory.sqlite',\n", " 'storage-sync.sqlite',\n", " 'favicons.sqlite',\n", " 'cookies.sqlite',\n", " 'storage.sqlite',\n", " 'webappsstore.sqlite']" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "firefox_profile_dir = '/home/bird/.mozilla/firefox/old_profiles/iadzfbcv.default/'\n", "[x for x in os.listdir(firefox_profile_dir) if x.endswith('sqlite')]" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "slideshow": { "slide_type": "skip" } }, "outputs": [], "source": [ "storage_file = '{}/webappsstore.sqlite'.format(firefox_profile_dir)\n", "storage_db = sqlite3.connect(storage_file)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "slideshow": { "slide_type": "skip" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[('webappsstore2',)]\n" ] } ], "source": [ "# %load '/home/bird/Documents/tracking technologies/notebooks/get_sqlite_tables.py'\n", "def list_tables_in_db(db):\n", " print(db.cursor().execute(\"SELECT name FROM sqlite_master WHERE type='table';\").fetchall())\n", "\n", "list_tables_in_db(storage_db)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "slideshow": { "slide_type": "slide" } }, "outputs": [], "source": [ "storage_df = pd.read_sql('SELECT * FROM webappsstore2', storage_db)\n", "#storage_df.head()" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "slideshow": { "slide_type": "skip" } }, "outputs": [], "source": [ "storage_df['origin'] = storage_df.originKey.apply(lambda x: x[::-1].split(':.')[1])\n", "#storage_df.head()" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "slideshow": { "slide_type": "skip" } }, "outputs": [], "source": [ "cookies_file = '{}/cookies.sqlite'.format(firefox_profile_dir)\n", "cookies_db = sqlite3.connect(cookies_file)\n", "cookied_df = pd.read_sql('SELECT * FROM moz_cookies', cookies_db)\n", "shared_values = []\n", "for v in cookied_df.value.unique():\n", " matches = cookied_df[cookied_df.value.str.contains(v, regex=False)]\n", " if len(matches.baseDomain.unique()) > 5:\n", " shared_values.append(v)\n", "potential_ids = [x for x in shared_values if(len(x) > 10) & ('com' not in x)] " ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "slideshow": { "slide_type": "slide" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "We have 25 potential ids. Things like:\n" ] } ], "source": [ "# From cookie table\n", "print('We have', len(potential_ids), 'potential ids. Things like:')\n", "# potential_ids[0:5]" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "slideshow": { "slide_type": "slide" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "67936421072632709762729202117726060613 found in local storage\n", "7620423c-7103-4edc-9aee-099c75141b87-tuct18c03dc found in local storage\n" ] } ], "source": [ "for potential_id in potential_ids:\n", " if len(storage_df[storage_df.value.str.contains(potential_id)]) > 0:\n", " print(potential_id, 'found in local storage')" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "slideshow": { "slide_type": "slide" } }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
valueorigin
291{\"email\":null,\"timeIncId\":\"53fdd635-5007-4f53-...people.com
\n", "
" ], "text/plain": [ " value origin\n", "291 {\"email\":null,\"timeIncId\":\"53fdd635-5007-4f53-... people.com" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "repeated_id = '67936421072632709762729202117726060613'\n", "storage_df[storage_df.value.str.contains(repeated_id)][['value','origin']]" ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "slideshow": { "slide_type": "slide" } }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
valueorigin
277620423c-7103-4edc-9aee-099c75141b87-tuct18c03dcwww.latimes.com
2277620423c-7103-4edc-9aee-099c75141b87-tuct18c03dcwww.merriam-webster.com
229v2_bf1ca6db22b70291779ce41eb2e5aee5_7620423c-7...www.merriam-webster.com
232v2_bf1ca6db22b70291779ce41eb2e5aee5_7620423c-7...tpc.googlesyndication.com
2367620423c-7103-4edc-9aee-099c75141b87-tuct18c03dctpc.googlesyndication.com
357v2_9a6c86f52b3f9239067936a17472df9f_7620423c-7...www.huffingtonpost.com
3597620423c-7103-4edc-9aee-099c75141b87-tuct18c03dcwww.huffingtonpost.com
407v2_66138b9f6bd0ec8912d19cb714efd912_7620423c-7...www.latimes.com
9847620423c-7103-4edc-9aee-099c75141b87-tuct18c03dcwww.bloomberg.com
989v2_96ea2f0533c59e2312c1f1112ced8f46_7620423c-7...www.bloomberg.com
\n", "
" ], "text/plain": [ " value \\\n", "27 7620423c-7103-4edc-9aee-099c75141b87-tuct18c03dc \n", "227 7620423c-7103-4edc-9aee-099c75141b87-tuct18c03dc \n", "229 v2_bf1ca6db22b70291779ce41eb2e5aee5_7620423c-7... \n", "232 v2_bf1ca6db22b70291779ce41eb2e5aee5_7620423c-7... \n", "236 7620423c-7103-4edc-9aee-099c75141b87-tuct18c03dc \n", "357 v2_9a6c86f52b3f9239067936a17472df9f_7620423c-7... \n", "359 7620423c-7103-4edc-9aee-099c75141b87-tuct18c03dc \n", "407 v2_66138b9f6bd0ec8912d19cb714efd912_7620423c-7... \n", "984 7620423c-7103-4edc-9aee-099c75141b87-tuct18c03dc \n", "989 v2_96ea2f0533c59e2312c1f1112ced8f46_7620423c-7... \n", "\n", " origin \n", "27 www.latimes.com \n", "227 www.merriam-webster.com \n", "229 www.merriam-webster.com \n", "232 tpc.googlesyndication.com \n", "236 tpc.googlesyndication.com \n", "357 www.huffingtonpost.com \n", "359 www.huffingtonpost.com \n", "407 www.latimes.com \n", "984 www.bloomberg.com \n", "989 www.bloomberg.com " ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "repeated_id = '7620423c-7103-4edc-9aee-099c75141b87-tuct18c03dc'\n", "storage_df[storage_df.value.str.contains(repeated_id)][['value', 'origin']]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "We can see this id being shared across multiple origins in local storage as well." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.6" } }, "nbformat": 4, "nbformat_minor": 4 }