{
"cells": [
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"slideshow": {
"slide_type": "skip"
}
},
"outputs": [],
"source": [
"import os\n",
"import pandas as pd\n",
"import sqlite3"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"slideshow": {
"slide_type": "slide"
}
},
"outputs": [
{
"data": {
"text/plain": [
"['content-prefs.sqlite',\n",
" 'places.sqlite',\n",
" 'kinto.sqlite',\n",
" 'permissions.sqlite',\n",
" 'formhistory.sqlite',\n",
" 'storage-sync.sqlite',\n",
" 'favicons.sqlite',\n",
" 'cookies.sqlite',\n",
" 'storage.sqlite',\n",
" 'webappsstore.sqlite']"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"firefox_profile_dir = '/home/bird/.mozilla/firefox/old_profiles/iadzfbcv.default/'\n",
"[x for x in os.listdir(firefox_profile_dir) if x.endswith('sqlite')]"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"slideshow": {
"slide_type": "skip"
}
},
"outputs": [],
"source": [
"storage_file = '{}/webappsstore.sqlite'.format(firefox_profile_dir)\n",
"storage_db = sqlite3.connect(storage_file)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"slideshow": {
"slide_type": "skip"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[('webappsstore2',)]\n"
]
}
],
"source": [
"# %load '/home/bird/Documents/tracking technologies/notebooks/get_sqlite_tables.py'\n",
"def list_tables_in_db(db):\n",
" print(db.cursor().execute(\"SELECT name FROM sqlite_master WHERE type='table';\").fetchall())\n",
"\n",
"list_tables_in_db(storage_db)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"slideshow": {
"slide_type": "slide"
}
},
"outputs": [],
"source": [
"storage_df = pd.read_sql('SELECT * FROM webappsstore2', storage_db)\n",
"#storage_df.head()"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"slideshow": {
"slide_type": "skip"
}
},
"outputs": [],
"source": [
"storage_df['origin'] = storage_df.originKey.apply(lambda x: x[::-1].split(':.')[1])\n",
"#storage_df.head()"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"slideshow": {
"slide_type": "skip"
}
},
"outputs": [],
"source": [
"cookies_file = '{}/cookies.sqlite'.format(firefox_profile_dir)\n",
"cookies_db = sqlite3.connect(cookies_file)\n",
"cookied_df = pd.read_sql('SELECT * FROM moz_cookies', cookies_db)\n",
"shared_values = []\n",
"for v in cookied_df.value.unique():\n",
" matches = cookied_df[cookied_df.value.str.contains(v, regex=False)]\n",
" if len(matches.baseDomain.unique()) > 5:\n",
" shared_values.append(v)\n",
"potential_ids = [x for x in shared_values if(len(x) > 10) & ('com' not in x)] "
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"slideshow": {
"slide_type": "slide"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"We have 25 potential ids. Things like:\n"
]
}
],
"source": [
"# From cookie table\n",
"print('We have', len(potential_ids), 'potential ids. Things like:')\n",
"# potential_ids[0:5]"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"slideshow": {
"slide_type": "slide"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"67936421072632709762729202117726060613 found in local storage\n",
"7620423c-7103-4edc-9aee-099c75141b87-tuct18c03dc found in local storage\n"
]
}
],
"source": [
"for potential_id in potential_ids:\n",
" if len(storage_df[storage_df.value.str.contains(potential_id)]) > 0:\n",
" print(potential_id, 'found in local storage')"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {
"slideshow": {
"slide_type": "slide"
}
},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" value | \n",
" origin | \n",
"
\n",
" \n",
" \n",
" \n",
" | 291 | \n",
" {\"email\":null,\"timeIncId\":\"53fdd635-5007-4f53-... | \n",
" people.com | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" value origin\n",
"291 {\"email\":null,\"timeIncId\":\"53fdd635-5007-4f53-... people.com"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"repeated_id = '67936421072632709762729202117726060613'\n",
"storage_df[storage_df.value.str.contains(repeated_id)][['value','origin']]"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {
"slideshow": {
"slide_type": "slide"
}
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" value | \n",
" origin | \n",
"
\n",
" \n",
" \n",
" \n",
" | 27 | \n",
" 7620423c-7103-4edc-9aee-099c75141b87-tuct18c03dc | \n",
" www.latimes.com | \n",
"
\n",
" \n",
" | 227 | \n",
" 7620423c-7103-4edc-9aee-099c75141b87-tuct18c03dc | \n",
" www.merriam-webster.com | \n",
"
\n",
" \n",
" | 229 | \n",
" v2_bf1ca6db22b70291779ce41eb2e5aee5_7620423c-7... | \n",
" www.merriam-webster.com | \n",
"
\n",
" \n",
" | 232 | \n",
" v2_bf1ca6db22b70291779ce41eb2e5aee5_7620423c-7... | \n",
" tpc.googlesyndication.com | \n",
"
\n",
" \n",
" | 236 | \n",
" 7620423c-7103-4edc-9aee-099c75141b87-tuct18c03dc | \n",
" tpc.googlesyndication.com | \n",
"
\n",
" \n",
" | 357 | \n",
" v2_9a6c86f52b3f9239067936a17472df9f_7620423c-7... | \n",
" www.huffingtonpost.com | \n",
"
\n",
" \n",
" | 359 | \n",
" 7620423c-7103-4edc-9aee-099c75141b87-tuct18c03dc | \n",
" www.huffingtonpost.com | \n",
"
\n",
" \n",
" | 407 | \n",
" v2_66138b9f6bd0ec8912d19cb714efd912_7620423c-7... | \n",
" www.latimes.com | \n",
"
\n",
" \n",
" | 984 | \n",
" 7620423c-7103-4edc-9aee-099c75141b87-tuct18c03dc | \n",
" www.bloomberg.com | \n",
"
\n",
" \n",
" | 989 | \n",
" v2_96ea2f0533c59e2312c1f1112ced8f46_7620423c-7... | \n",
" www.bloomberg.com | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" value \\\n",
"27 7620423c-7103-4edc-9aee-099c75141b87-tuct18c03dc \n",
"227 7620423c-7103-4edc-9aee-099c75141b87-tuct18c03dc \n",
"229 v2_bf1ca6db22b70291779ce41eb2e5aee5_7620423c-7... \n",
"232 v2_bf1ca6db22b70291779ce41eb2e5aee5_7620423c-7... \n",
"236 7620423c-7103-4edc-9aee-099c75141b87-tuct18c03dc \n",
"357 v2_9a6c86f52b3f9239067936a17472df9f_7620423c-7... \n",
"359 7620423c-7103-4edc-9aee-099c75141b87-tuct18c03dc \n",
"407 v2_66138b9f6bd0ec8912d19cb714efd912_7620423c-7... \n",
"984 7620423c-7103-4edc-9aee-099c75141b87-tuct18c03dc \n",
"989 v2_96ea2f0533c59e2312c1f1112ced8f46_7620423c-7... \n",
"\n",
" origin \n",
"27 www.latimes.com \n",
"227 www.merriam-webster.com \n",
"229 www.merriam-webster.com \n",
"232 tpc.googlesyndication.com \n",
"236 tpc.googlesyndication.com \n",
"357 www.huffingtonpost.com \n",
"359 www.huffingtonpost.com \n",
"407 www.latimes.com \n",
"984 www.bloomberg.com \n",
"989 www.bloomberg.com "
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"repeated_id = '7620423c-7103-4edc-9aee-099c75141b87-tuct18c03dc'\n",
"storage_df[storage_df.value.str.contains(repeated_id)][['value', 'origin']]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"We can see this id being shared across multiple origins in local storage as well."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.6"
}
},
"nbformat": 4,
"nbformat_minor": 4
}