{ "cells": [ { "cell_type": "markdown", "id": "039928a3", "metadata": {}, "source": [ "# Scop3P\n", "\n", "A comprehensive database of human phosphosites within their full context. Scop3P integrates sequences (UniProtKB/Swiss-Prot), structures (PDB), and uniformly reprocessed phosphoproteomics data (PRIDE) to annotate all known human phosphosites. \n", "\n", "Scop3P, available at https://iomics.ugent.be/scop3p, presents a unique resource for visualization and analysis of phosphosites and for understanding of phosphosite structure–function relationships.\n", "\n", "Please cite: https://doi.org/10.1021/acs.jproteome.0c00306\n" ] }, { "cell_type": "markdown", "id": "0d2dbe26", "metadata": {}, "source": [ "### Install Dependencies" ] }, { "cell_type": "code", "execution_count": 2, "id": "6fd309ae", "metadata": {}, "outputs": [], "source": [ "%%capture\n", "!jupyter labextension install jupyterlab_3dmol\n", "!jupyter labextension install @jupyter-widgets/jupyterlab-manager\n", "!pip install pandas matplotlib py3Dmol nglview" ] }, { "cell_type": "markdown", "id": "c5aadc48", "metadata": {}, "source": [ "### Import required packages" ] }, { "cell_type": "code", "execution_count": 3, "id": "f63a76a8", "metadata": {}, "outputs": [], "source": [ "%%capture\n", "import requests, tempfile,json,sys\n", "import pandas as pd \n", "from b2bTools import SingleSeq, constants\n", "import py3Dmol\n", "import ipywidgets as widgets" ] }, { "cell_type": "markdown", "id": "ffd05325", "metadata": {}, "source": [ "### Fetch phospho peptides from Scop3P and map onto protein structures\n", "> 1. Enter the protein ID (ex: P07949) and click 'Load'\n", "> 2. The app will let you choose between all peptides ('All rows') and unique spans (the peptide ranges are merged)\n", "> 3. Map all petides on the AF structure using 'Map all' (shows the mass spec coverage of your protein)\n", "> 4. Alternatively click one or multiple peptides on peptide panel to see their structural mapping\n", "> 5. Hint:\n", "> > Explore what the search funtion does!" ] }, { "cell_type": "code", "execution_count": 4, "id": "441b530a", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "d7d8a4d8a4dd447792f089b8da131d47", "version_major": 2, "version_minor": 0 }, "text/plain": [] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "7e80691c8a4143ec846eaf404204898c", "version_major": 2, "version_minor": 0 }, "text/plain": [ "HTML(value='Scop3P → AlphaFold → NGLView peptide mapper
Enter accession → Load → (optional) Search →…" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "1ef83ddc8e6e425fa92f7bbb947c4275", "version_major": 2, "version_minor": 0 }, "text/plain": [ "HBox(children=(Text(value='', description='ACC_ID:', layout=Layout(width='260px')), Button(button_style='prima…" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "b98678d730f44e0e97920dc311a99035", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Text(value='', description='Search:', layout=Layout(width='750px'), placeholder='Filter: substring (SSFG), ran…" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "852c323cd3ac43e3b79382458267624c", "version_major": 2, "version_minor": 0 }, "text/plain": [ "SelectMultiple(description='Peptides:', layout=Layout(height='240px', width='980px'), options=(), value=())" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "6120007ad19b49998b8b0a04b1910ccc", "version_major": 2, "version_minor": 0 }, "text/plain": [ "HBox(children=(Button(button_style='warning', description='Map all (filtered)', style=ButtonStyle()), Checkbox…" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "a00f5213c5ff4ac0850dae7dadd2e643", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Output()" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "import pandas as pd\n", "import requests\n", "import urllib.request\n", "from urllib.error import HTTPError, URLError\n", "import re\n", "\n", "import ipywidgets as widgets\n", "from IPython.display import display, clear_output\n", "import nglview as nv\n", "\n", "# --- added (export only) ---\n", "from pathlib import Path\n", "import json\n", "\n", "\n", "def scop3p_ngl_mapper_app(default_accession=\"\"):\n", " # -------------------------\n", " # 1) Scop3P API fetch\n", " # -------------------------\n", " def fetch_scop3p_peptides(accession: str) -> pd.DataFrame:\n", " url = f\"https://iomics.ugent.be/scop3p/api/get-peptides-modifications?accession={accession}\"\n", " r = requests.get(url, timeout=30)\n", " r.raise_for_status()\n", " data = r.json()\n", "\n", " df = pd.DataFrame(data.get(\"peptides\", []))\n", " if df.empty:\n", " return df\n", "\n", " for c in [\"peptideStart\", \"peptideEnd\", \"peptideModificationPosition\", \"uniprotPosition\"]:\n", " if c in df.columns:\n", " df[c] = pd.to_numeric(df[c], errors=\"coerce\").astype(\"Int64\")\n", "\n", " df[\"label\"] = df.apply(\n", " lambda x: (\n", " f'{x[\"peptideSequence\"]} ({int(x[\"peptideStart\"])}-{int(x[\"peptideEnd\"])}) '\n", " f'@{x.get(\"modifiedResidue\",\"\")}{int(x[\"uniprotPosition\"])} score={x.get(\"score\",\"\")}'\n", " ),\n", " axis=1\n", " )\n", " return df\n", "\n", " # -------------------------\n", " # 2) AlphaFold download (fallback v6 -> v4)\n", " # -------------------------\n", " def download_alphafold_pdb(accession: str, versions=(\"v6\", \"v4\")) -> str:\n", " base = \"https://alphafold.ebi.ac.uk/files\"\n", " last_err = None\n", "\n", " for v in versions:\n", " pdb_name = f\"AF-{accession}-F1-model_{v}.pdb\"\n", " url = f\"{base}/{pdb_name}\"\n", " out = f\"{accession}.pdb\"\n", " try:\n", " urllib.request.urlretrieve(url, out)\n", "\n", " import os\n", " if os.path.getsize(out) < 1000:\n", " raise RuntimeError(f\"Downloaded file too small from {url}\")\n", "\n", " return out\n", " except (HTTPError, URLError, RuntimeError) as e:\n", " last_err = e\n", "\n", " raise RuntimeError(f\"Could not download AlphaFold PDB for {accession}. Last error: {last_err}\")\n", "\n", " # -------------------------\n", " # 3) NGL helpers\n", " # -------------------------\n", " def positions_to_ranges(pos_list):\n", " if not pos_list:\n", " return []\n", " pos_list = sorted(set(int(p) for p in pos_list))\n", " ranges = []\n", " s = pos_list[0]\n", " prev = pos_list[0]\n", " for x in pos_list[1:]:\n", " if x == prev + 1:\n", " prev = x\n", " else:\n", " ranges.append((s, prev))\n", " s = x\n", " prev = x\n", " ranges.append((s, prev))\n", " return ranges\n", "\n", " def add_cartoon_selection(view, ranges, color, name):\n", " if not ranges:\n", " return\n", " selection = \" or \".join([f\"resi {a}-{b}\" for a, b in ranges])\n", " view.add_representation(\"cartoon\", selection=selection, color=color, name=name)\n", "\n", " def add_positions(view, positions, color, name, repr_type=\"ball+stick\"):\n", " if not positions:\n", " return\n", " selection = \" or \".join([f\"resi {int(p)}\" for p in sorted(set(int(p) for p in positions))])\n", " view.add_representation(repr_type, selection=selection, color=color, name=name)\n", "\n", " # -------------------------\n", " # 4) Filter logic\n", " # -------------------------\n", " def filter_peptides(df: pd.DataFrame, query: str) -> pd.DataFrame:\n", " if df is None or df.empty:\n", " return df\n", " if not query:\n", " return df\n", "\n", " q = query.strip()\n", "\n", " m = re.match(r\"^(\\d+)\\s*-\\s*(\\d+)$\", q)\n", " if m:\n", " a, b = int(m.group(1)), int(m.group(2))\n", " return df[(df[\"peptideStart\"] <= b) & (df[\"peptideEnd\"] >= a)]\n", "\n", " m = re.match(r\"^>=\\s*(\\d+)$\", q)\n", " if m:\n", " p = int(m.group(1))\n", " return df[df[\"peptideEnd\"] >= p]\n", "\n", " m = re.match(r\"^<=\\s*(\\d+)$\", q)\n", " if m:\n", " p = int(m.group(1))\n", " return df[df[\"peptideStart\"] <= p]\n", "\n", " if q.isdigit():\n", " p = int(q)\n", " return df[(df[\"peptideStart\"] <= p) & (df[\"peptideEnd\"] >= p)]\n", "\n", " return df[df[\"peptideSequence\"].astype(str).str.contains(q, case=False, na=False)]\n", "\n", " # -------------------------\n", " # 5) UI\n", " # -------------------------\n", " acc_input = widgets.Text(value=default_accession, description=\"ACC_ID:\", layout=widgets.Layout(width=\"260px\"))\n", " load_btn = widgets.Button(description=\"Load\", button_style=\"primary\")\n", "\n", " mode = widgets.ToggleButtons(\n", " options=[\"Unique peptide spans\", \"All rows\"],\n", " value=\"Unique peptide spans\",\n", " description=\"List:\"\n", " )\n", "\n", " search_box = widgets.Text(\n", " value=\"\",\n", " placeholder=\"Filter: substring (SSFG), range (70-90), >=150, <=300, or single pos (154)\",\n", " description=\"Search:\",\n", " layout=widgets.Layout(width=\"750px\")\n", " )\n", "\n", " peptide_multi = widgets.SelectMultiple(\n", " description=\"Peptides:\",\n", " options=[],\n", " layout=widgets.Layout(width=\"980px\", height=\"240px\")\n", " )\n", "\n", " show_mods_chk = widgets.Checkbox(value=True, description=\"Show modified sites (magenta)\")\n", " show_mods_mode = widgets.ToggleButtons(\n", " options=[\"Selected peptides only\", \"All protein mods\"],\n", " value=\"Selected peptides only\",\n", " description=\"Mods:\"\n", " )\n", "\n", " map_all_btn = widgets.Button(description=\"Map all (filtered)\", button_style=\"warning\")\n", "\n", " # --- added (export only) ---\n", " export_html_btn = widgets.Button(description=\"Export styled HTML\", button_style=\"info\")\n", " # export_png_btn = widgets.Button(description=\"Export PNG (via HTML)\", button_style=\"info\")\n", "\n", " out = widgets.Output()\n", "\n", " display(widgets.HTML(\n", " \"Scop3P → AlphaFold → NGLView peptide mapper
\"\n", " \"Enter accession → Load → (optional) Search → select peptides (auto-renders).\"\n", " ))\n", " display(widgets.HBox([acc_input, load_btn, mode]))\n", " display(search_box)\n", " display(peptide_multi)\n", " display(widgets.HBox([map_all_btn, show_mods_chk, show_mods_mode, export_html_btn])) #export_png_btn\n", " display(out)\n", "\n", " # -------------------------\n", " # 6) State\n", " # -------------------------\n", " STATE = {\n", " \"df\": pd.DataFrame(),\n", " \"df_filtered\": pd.DataFrame(),\n", " \"pdb_path\": None,\n", " \"acc_loaded\": None,\n", " \"suspend_autorender\": False,\n", " \"last_action\": None, # \"map_all\" or \"select\"\n", "\n", " # --- added (export only) ---\n", " \"last_union_ranges\": [],\n", " \"last_inter_pos\": [],\n", " \"last_mod_pos\": [],\n", " \"last_pdb_path\": None,\n", " }\n", "\n", " def build_peptide_options(df: pd.DataFrame, mode_value: str):\n", " if df is None or df.empty:\n", " return []\n", "\n", " if mode_value == \"Unique peptide spans\":\n", " g = (\n", " df.groupby([\"peptideSequence\", \"peptideStart\", \"peptideEnd\"], as_index=False)\n", " .agg(n_mod_sites=(\"uniprotPosition\", \"nunique\"),\n", " max_score=(\"score\", \"max\"))\n", " )\n", " opts = []\n", " for _, row in g.iterrows():\n", " key = (row[\"peptideSequence\"], int(row[\"peptideStart\"]), int(row[\"peptideEnd\"]))\n", " label = f'{key[0]} ({key[1]}-{key[2]}) | modSites={int(row[\"n_mod_sites\"])} maxScore={row[\"max_score\"]}'\n", " opts.append((label, key))\n", " return opts\n", "\n", " return [(r[\"label\"], int(idx)) for idx, r in df.iterrows()]\n", "\n", " def ensure_loaded_assets(acc: str):\n", " if STATE[\"acc_loaded\"] != acc:\n", " STATE[\"pdb_path\"] = None\n", " STATE[\"acc_loaded\"] = acc\n", "\n", " if STATE[\"pdb_path\"] is None:\n", " STATE[\"pdb_path\"] = download_alphafold_pdb(acc)\n", "\n", " return STATE[\"pdb_path\"]\n", "\n", " def refresh_filtered_and_options(keep_selection=True):\n", " df = STATE[\"df\"]\n", " df_filt = filter_peptides(df, search_box.value)\n", " STATE[\"df_filtered\"] = df_filt\n", "\n", " old_sel = set(peptide_multi.value) if keep_selection else set()\n", " peptide_multi.options = build_peptide_options(df_filt, mode.value)\n", "\n", " if keep_selection and old_sel:\n", " valid_vals = [v for _, v in peptide_multi.options]\n", " restored = [v for v in valid_vals if v in old_sel]\n", " STATE[\"suspend_autorender\"] = True\n", " try:\n", " peptide_multi.value = tuple(restored)\n", " finally:\n", " STATE[\"suspend_autorender\"] = False\n", "\n", " def render_current_selection():\n", " with out:\n", " clear_output()\n", "\n", " acc = acc_input.value.strip()\n", " df_all = STATE[\"df\"]\n", " df_filt = STATE[\"df_filtered\"]\n", "\n", " if df_all is None or df_all.empty:\n", " print(\"No data loaded. Click Load.\")\n", " return\n", "\n", " selected = list(peptide_multi.value)\n", " if not selected:\n", " print(\"Select at least one peptide (or click 'Map all (filtered)').\")\n", " return\n", "\n", " try:\n", " pdb_path = ensure_loaded_assets(acc)\n", " except Exception as e:\n", " print(\"AlphaFold download error:\", e)\n", " return\n", "\n", " spans = []\n", " mod_positions = []\n", "\n", " if mode.value == \"Unique peptide spans\":\n", " spans = [(int(s), int(e)) for _, s, e in selected]\n", "\n", " if show_mods_mode.value == \"Selected peptides only\":\n", " for pepSeq, s, e in selected:\n", " sub = df_all[\n", " (df_all[\"peptideSequence\"] == pepSeq)\n", " & (df_all[\"peptideStart\"] == int(s))\n", " & (df_all[\"peptideEnd\"] == int(e))\n", " ]\n", " mod_positions.extend(sub[\"uniprotPosition\"].dropna().astype(int).tolist())\n", " else:\n", " mod_positions = df_all[\"uniprotPosition\"].dropna().astype(int).tolist()\n", "\n", " else:\n", " sub = df_filt.loc[selected].copy()\n", " spans = [(int(r[\"peptideStart\"]), int(r[\"peptideEnd\"])) for _, r in sub.iterrows()]\n", "\n", " if show_mods_mode.value == \"Selected peptides only\":\n", " mod_positions = sub[\"uniprotPosition\"].dropna().astype(int).tolist()\n", " else:\n", " mod_positions = df_all[\"uniprotPosition\"].dropna().astype(int).tolist()\n", "\n", " # union/intersection\n", " pos_lists = [list(range(a, b + 1)) for a, b in spans]\n", " union_pos = sorted(set(p for L in pos_lists for p in L))\n", " inter_pos = sorted(set(pos_lists[0]).intersection(*map(set, pos_lists[1:]))) if len(pos_lists) > 1 else []\n", " union_ranges = positions_to_ranges(union_pos)\n", "\n", " # --- Big NGL panel ---\n", " view = nv.NGLWidget()\n", " view.add_component(pdb_path)\n", " view.clear_representations()\n", " view.add_cartoon(color=\"silver\")\n", "\n", " add_cartoon_selection(view, union_ranges, color=\"blue\", name=\"peptide_union\")\n", "\n", " if inter_pos:\n", " add_positions(view, inter_pos, color=\"red\", name=\"peptide_intersection\", repr_type=\"ball+stick\")\n", "\n", " if show_mods_chk.value and mod_positions:\n", " add_positions(view, mod_positions, color=\"magenta\", name=\"mods\", repr_type=\"ball+stick\")\n", "\n", " view.center()\n", "\n", " # Make structure panel bigger (tweak as you like)\n", " view.layout = widgets.Layout(width=\"1100px\", height=\"700px\")\n", "\n", " display(view)\n", "\n", " # --- added (export only): store last render state ---\n", " STATE[\"last_union_ranges\"] = union_ranges\n", " STATE[\"last_inter_pos\"] = sorted(set(int(x) for x in inter_pos))\n", " STATE[\"last_mod_pos\"] = sorted(set(int(x) for x in mod_positions))\n", " STATE[\"last_pdb_path\"] = pdb_path\n", "\n", " # --- Summary printing tweaks ---\n", " spans_sorted = sorted(spans, key=lambda x: (x[0], x[1]))\n", " first_start = spans_sorted[0][0]\n", " last_end = spans_sorted[-1][1]\n", "\n", " print(f\"\\nACC_ID: {acc}\")\n", " print(f\"AlphaFold model: {pdb_path}\")\n", " print(f\"Selected peptide spans: {len(spans_sorted)}\")\n", "\n", " # When map-all clicked, show compact coverage (also useful generally)\n", " if STATE[\"last_action\"] == \"map_all\":\n", " print(f\"Coverage (first peptide start → last peptide end): {first_start} → {last_end}\")\n", " else:\n", " # For manual selection, still show compact coverage (less spammy)\n", " print(f\"Coverage: {first_start} → {last_end}\")\n", "\n", " if inter_pos:\n", " print(f\"Intersection (red): {len(inter_pos)} residues\")\n", " else:\n", " print(\"Intersection: none (only one peptide)\")\n", "\n", " if show_mods_chk.value:\n", " print(f\"Modified sites (magenta): {len(set(mod_positions))} unique positions\")\n", "\n", " # -------------------------\n", " # --- added (export only): standalone styled HTML writer ---\n", " # -------------------------\n", " def _write_styled_ngl_html(acc, pdb_path, union_ranges, inter_pos, mod_pos, out_html_path, auto_download_png=False):\n", " pdb_text = Path(pdb_path).read_text(errors=\"ignore\")\n", "\n", " payload = {\n", " \"acc\": acc,\n", " \"union_ranges\": union_ranges,\n", " \"intersection\": inter_pos,\n", " \"mods\": mod_pos\n", " }\n", "\n", " # If auto_download_png=True, the HTML will immediately trigger a PNG download via stage.makeImage()\n", " auto_png_js = \"\"\"\n", " // Auto-download PNG snapshot\n", " stage.makeImage({ factor: 2, antialias: true, trim: false }).then(function (blob) {\n", " var a = document.createElement(\"a\");\n", " a.href = URL.createObjectURL(blob);\n", " a.download = payload.acc + \"_snapshot.png\";\n", " document.body.appendChild(a);\n", " a.click();\n", " a.remove();\n", " });\n", " \"\"\" if auto_download_png else \"\"\n", "\n", " html = f\"\"\"\n", "\n", "\n", " \n", " {acc} styled NGL session\n", " \n", " \n", "\n", "\n", "
\n", " {acc}
\n", "
Grey: protein | Blue: peptides | Red: intersection | Magenta: mods
\n", "
union ranges: {len(union_ranges)} | mods: {len(mod_pos)} | intersection: {len(inter_pos)}
\n", "
\n", "
\n", "\n", " \n", "\n", "\n", "\"\"\"\n", " Path(out_html_path).write_text(html, encoding=\"utf-8\")\n", "\n", " # -------------------------\n", " # --- added (export only): export button actions ---\n", " # -------------------------\n", " def on_export_html(_):\n", " with out:\n", " acc = acc_input.value.strip()\n", " if not acc:\n", " print(\"Enter an accession first.\")\n", " return\n", " if not STATE.get(\"last_pdb_path\"):\n", " print(\"Render a selection first (so peptides/mods exist) before exporting.\")\n", " return\n", "\n", " export_dir = Path(\"exports\")\n", " export_dir.mkdir(exist_ok=True)\n", " html_path = export_dir / f\"{acc}_styled_session.html\"\n", "\n", " _write_styled_ngl_html(\n", " acc=acc,\n", " pdb_path=STATE[\"last_pdb_path\"],\n", " union_ranges=STATE[\"last_union_ranges\"],\n", " inter_pos=STATE[\"last_inter_pos\"],\n", " mod_pos=STATE[\"last_mod_pos\"],\n", " out_html_path=html_path,\n", " auto_download_png=False\n", " )\n", "\n", " print(f\"Exported styled HTML to: {html_path.resolve()}\")\n", " print(\"Open it in a browser — peptide/mod colors are preserved.\")\n", "\n", " def on_export_png(_):\n", " with out:\n", " acc = acc_input.value.strip()\n", " if not acc:\n", " print(\"Enter an accession first.\")\n", " return\n", " if not STATE.get(\"last_pdb_path\"):\n", " print(\"Render a selection first (so peptides/mods exist) before exporting.\")\n", " return\n", "\n", " export_dir = Path(\"exports\")\n", " export_dir.mkdir(exist_ok=True)\n", " html_path = export_dir / f\"{acc}_snapshot_downloader.html\"\n", "\n", " _write_styled_ngl_html(\n", " acc=acc,\n", " pdb_path=STATE[\"last_pdb_path\"],\n", " union_ranges=STATE[\"last_union_ranges\"],\n", " inter_pos=STATE[\"last_inter_pos\"],\n", " mod_pos=STATE[\"last_mod_pos\"],\n", " out_html_path=html_path,\n", " auto_download_png=True\n", " )\n", "\n", " print(f\"Created PNG downloader HTML: {html_path.resolve()}\")\n", " print(\"Open this HTML in a browser — it will auto-download the PNG snapshot with styling.\")\n", "\n", " # -------------------------\n", " # 7) Events\n", " # -------------------------\n", " def on_load(_):\n", " with out:\n", " clear_output()\n", " acc = acc_input.value.strip()\n", " if not acc:\n", " print(\"Enter an accession (e.g., O00571) then click Load.\")\n", " return\n", "\n", " try:\n", " df = fetch_scop3p_peptides(acc)\n", " except Exception as e:\n", " print(\"API error:\", e)\n", " STATE[\"df\"] = pd.DataFrame()\n", " STATE[\"df_filtered\"] = pd.DataFrame()\n", " peptide_multi.options = []\n", " return\n", "\n", " if df.empty:\n", " print(f\"No peptides returned for {acc}.\")\n", " STATE[\"df\"] = df\n", " STATE[\"df_filtered\"] = df\n", " peptide_multi.options = []\n", " return\n", "\n", " STATE[\"df\"] = df\n", " STATE[\"acc_loaded\"] = acc\n", " STATE[\"pdb_path\"] = None\n", " STATE[\"last_action\"] = None\n", "\n", " refresh_filtered_and_options(keep_selection=False)\n", "\n", " print(f\"Loaded {len(df)} Scop3P peptide-mod rows for {acc}.\")\n", " print(\"Use Search to filter; select peptides to map (auto-renders).\")\n", "\n", " def on_mode_change(_):\n", " refresh_filtered_and_options(keep_selection=True)\n", " if peptide_multi.value:\n", " STATE[\"last_action\"] = \"select\"\n", " render_current_selection()\n", "\n", " def on_search_change(_):\n", " refresh_filtered_and_options(keep_selection=True)\n", " if peptide_multi.value:\n", " STATE[\"last_action\"] = \"select\"\n", " render_current_selection()\n", "\n", " def on_selection_change(_):\n", " if STATE[\"suspend_autorender\"]:\n", " return\n", " if peptide_multi.value:\n", " STATE[\"last_action\"] = \"select\"\n", " render_current_selection()\n", "\n", " def on_mods_toggle(_):\n", " if peptide_multi.value:\n", " STATE[\"last_action\"] = \"select\"\n", " render_current_selection()\n", "\n", " def on_map_all(_):\n", " STATE[\"last_action\"] = \"map_all\"\n", " STATE[\"suspend_autorender\"] = True\n", " try:\n", " all_vals = [v for (_, v) in peptide_multi.options]\n", " peptide_multi.value = tuple(all_vals)\n", " finally:\n", " STATE[\"suspend_autorender\"] = False\n", " render_current_selection()\n", "\n", " load_btn.on_click(on_load)\n", " mode.observe(on_mode_change, names=\"value\")\n", " search_box.observe(on_search_change, names=\"value\")\n", " peptide_multi.observe(on_selection_change, names=\"value\")\n", " show_mods_chk.observe(on_mods_toggle, names=\"value\")\n", " show_mods_mode.observe(on_mods_toggle, names=\"value\")\n", " map_all_btn.on_click(on_map_all)\n", "\n", " # --- added (export only) ---\n", " export_html_btn.on_click(on_export_html)\n", " # export_png_btn.on_click(on_export_png)\n", "\n", " # No return → avoids printing the dict at cell end\n", " return None\n", "\n", "\n", "# Run the app (empty default forces user to type accession)\n", "scop3p_ngl_mapper_app(\"\")\n" ] }, { "cell_type": "code", "execution_count": null, "id": "034a30ec-6235-4c86-80cf-168a1e65fd97", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "markdown", "id": "17942105-b541-4513-91e1-d375c8cb4c5c", "metadata": {}, "source": [ "### Upload your own data (Peptide table)\n", "#### Gets a peptide file and visualize the peptides and modifications on protein structures\n", "> 1. Choose peptide file (tsv/tab delimkited text file) and click 'Load file'\n", "> 2. The app will autofill the columns using keywords or enter the column headers\n", ">> (protein ID column name [ACC_ID], peptide seq column name [pep_seq] and the modified position column name [UP_POS]\n", "> 3. Click 'Build mapping'\n", "> 4. Select the protein from list for which you want to map the peptides (note: these proteins are identified in your sample)\n", "> 5. Map all petides on the AF structure using 'Map all' (shows the mass spec coverage of your protein)\n", "> 6. Alternatively click one or multiple peptides on peptide panel to see their structural mapping\n", "> 7. Hint:\n", ">> Explore what the search funtion does!\n" ] }, { "cell_type": "code", "execution_count": 6, "id": "3f8d3ec5-370f-4808-8a15-708dcfee2137", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "ef1eedf39b84475e941f305bf1a5d07b", "version_major": 2, "version_minor": 0 }, "text/plain": [ "HTML(value='Upload peptide table (protein-mapped pep_start/pep_end) → map to AlphaFold with NGLView')" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "61f43bed1afc4450923efc0436ca0e40", "version_major": 2, "version_minor": 0 }, "text/plain": [ "HBox(children=(FileUpload(value=(), accept='.tsv,.txt,.csv', description='Choose file'), Button(button_style='…" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "822850f87f1e4256ad9acabfee41081d", "version_major": 2, "version_minor": 0 }, "text/plain": [ "HBox(children=(Dropdown(description='Protein ID:', options=(), value=None), Dropdown(description='Pep seq:', o…" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "89eb6384c8a2431e98b69fac024f78cc", "version_major": 2, "version_minor": 0 }, "text/plain": [ "HBox(children=(Dropdown(description='Protein:', options=(), value=None), ToggleButtons(description='List:', op…" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "25c8eff152444591a759516b1ef53c3d", "version_major": 2, "version_minor": 0 }, "text/plain": [ "SelectMultiple(description='Peptides:', layout=Layout(height='240px', width='980px'), options=(), value=())" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "3c9db26465424b87bc76cf88821e10d4", "version_major": 2, "version_minor": 0 }, "text/plain": [ "HBox(children=(Button(button_style='warning', description='Map all peptides', style=ButtonStyle()), Button(but…" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "36509065e036424fa1504a25e0c3091e", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Output()" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "import pandas as pd\n", "import urllib.request\n", "from urllib.error import HTTPError, URLError\n", "from pathlib import Path\n", "import shutil\n", "import json\n", "\n", "import ipywidgets as widgets\n", "from IPython.display import display, clear_output\n", "import nglview as nv\n", "\n", "\n", "def peptide_upload_ngl_mapper_app():\n", " # -------------------------\n", " # AlphaFold download\n", " # -------------------------\n", " def download_alphafold_pdb(accession: str, versions=(\"v6\", \"v4\")) -> str:\n", " base = \"https://alphafold.ebi.ac.uk/files\"\n", " last_err = None\n", " for v in versions:\n", " pdb_name = f\"AF-{accession}-F1-model_{v}.pdb\"\n", " url = f\"{base}/{pdb_name}\"\n", " out = f\"{accession}.pdb\"\n", " try:\n", " urllib.request.urlretrieve(url, out)\n", " import os\n", " if os.path.getsize(out) < 1000:\n", " raise RuntimeError(\"Downloaded file too small\")\n", " return out\n", " except (HTTPError, URLError, RuntimeError) as e:\n", " last_err = e\n", " raise RuntimeError(f\"No AlphaFold structure for {accession}: {last_err}\")\n", "\n", " # -------------------------\n", " # NGL helpers\n", " # -------------------------\n", " def positions_to_ranges(pos_list):\n", " if not pos_list:\n", " return []\n", " pos_list = sorted(set(int(p) for p in pos_list))\n", " ranges = []\n", " s = pos_list[0]\n", " prev = pos_list[0]\n", " for x in pos_list[1:]:\n", " if x == prev + 1:\n", " prev = x\n", " else:\n", " ranges.append((s, prev))\n", " s = x\n", " prev = x\n", " ranges.append((s, prev))\n", " return ranges\n", "\n", " def add_cartoon_selection(view, ranges, color=\"blue\", name=\"peptides\"):\n", " if not ranges:\n", " return\n", " sel = \" or \".join([f\"resi {a}-{b}\" for a, b in ranges])\n", " view.add_representation(\"cartoon\", selection=sel, color=color, name=name)\n", "\n", " def add_positions(view, positions, color=\"magenta\", name=\"sites\"):\n", " if not positions:\n", " return\n", " sel = \" or \".join([f\"resi {int(p)}\" for p in sorted(set(int(p) for p in positions))])\n", " view.add_representation(\"ball+stick\", selection=sel, color=color, name=name)\n", "\n", " # -------------------------\n", " # Robust upload reader\n", " # -------------------------\n", " def read_uploaded_table(upload_widget):\n", " if not upload_widget.value:\n", " return None, \"No file uploaded.\"\n", "\n", " v = upload_widget.value\n", " if isinstance(v, dict):\n", " item = next(iter(v.values()))\n", " filename = item.get(\"metadata\", {}).get(\"name\", \"uploaded\")\n", " content = item.get(\"content\", None)\n", " elif isinstance(v, (list, tuple)) and len(v) > 0:\n", " item = v[0]\n", " filename = item.get(\"name\", \"uploaded\")\n", " content = item.get(\"content\", None) or item.get(\"data\", None)\n", " else:\n", " return None, \"Unrecognized upload payload.\"\n", "\n", " if content is None:\n", " return None, \"Uploaded file content not found.\"\n", "\n", " bio = pd.io.common.BytesIO(content)\n", " try:\n", " df = pd.read_csv(bio, sep=\"\\t\")\n", " except Exception:\n", " bio = pd.io.common.BytesIO(content)\n", " df = pd.read_csv(bio, sep=\",\")\n", "\n", " return df, filename\n", "\n", " # -------------------------\n", " # UI\n", " # -------------------------\n", " upload = widgets.FileUpload(accept=\".tsv,.txt,.csv\", multiple=False, description=\"Choose file\")\n", " load_file_btn = widgets.Button(description=\"Load file\", button_style=\"primary\")\n", "\n", " protein_col = widgets.Dropdown(description=\"Protein ID:\", options=[])\n", " pepseq_col = widgets.Dropdown(description=\"Pep seq:\", options=[])\n", " pepstart_col = widgets.Dropdown(description=\"pep_start:\", options=[])\n", " pepend_col = widgets.Dropdown(description=\"pep_end:\", options=[])\n", " uppos_col = widgets.Dropdown(description=\"UP_POS:\", options=[])\n", "\n", " build_btn = widgets.Button(description=\"Build mapping\", button_style=\"success\")\n", "\n", " protein_dd = widgets.Dropdown(description=\"Protein:\", options=[])\n", " list_mode = widgets.ToggleButtons(options=[\"Unique peptide spans\", \"All rows\"], value=\"Unique peptide spans\", description=\"List:\")\n", "\n", " peptide_multi = widgets.SelectMultiple(\n", " description=\"Peptides:\",\n", " options=[],\n", " layout=widgets.Layout(width=\"980px\", height=\"240px\")\n", " )\n", "\n", " map_all_btn = widgets.Button(description=\"Map all peptides\", button_style=\"warning\")\n", "\n", " export_pdb_btn = widgets.Button(description=\"Export PDB\", button_style=\"info\")\n", " export_html_btn = widgets.Button(description=\"Export styled HTML session\", button_style=\"info\")\n", " export_sel_btn = widgets.Button(description=\"Export selections (TSV)\", button_style=\"info\")\n", "\n", " show_mods_chk = widgets.Checkbox(value=True, description=\"Show mods (magenta)\")\n", " show_mods_mode = widgets.ToggleButtons(\n", " options=[\"Selected peptides only\", \"All protein mods\"],\n", " value=\"Selected peptides only\",\n", " description=\"Mods:\"\n", " )\n", "\n", " viewer_size = widgets.Dropdown(\n", " options=[(\"Large\", (\"1100px\",\"700px\")), (\"Medium\", (\"900px\",\"550px\")), (\"Huge\", (\"1300px\",\"850px\"))],\n", " value=(\"1100px\",\"700px\"),\n", " description=\"Viewer:\"\n", " )\n", "\n", " out = widgets.Output()\n", "\n", " display(widgets.HTML(\"Upload peptide table (protein-mapped pep_start/pep_end) → map to AlphaFold with NGLView\"))\n", " display(widgets.HBox([upload, load_file_btn]))\n", " display(widgets.HBox([protein_col, pepseq_col, pepstart_col, pepend_col, uppos_col, build_btn]))\n", " display(widgets.HBox([protein_dd, list_mode, viewer_size]))\n", " display(peptide_multi)\n", " display(widgets.HBox([\n", " map_all_btn,\n", " export_pdb_btn,\n", " export_html_btn,\n", " export_sel_btn,\n", " show_mods_chk,\n", " show_mods_mode\n", " ]))\n", " display(out)\n", "\n", " # -------------------------\n", " # State\n", " # -------------------------\n", " STATE = {\n", " \"raw_df\": pd.DataFrame(),\n", " \"map_df\": pd.DataFrame(),\n", " \"suspend\": False,\n", " \"last_action\": None,\n", "\n", " # last render info for exports\n", " \"last_acc\": None,\n", " \"last_pdb_path\": None,\n", " \"last_union_ranges\": [],\n", " \"last_inter_positions\": [],\n", " \"last_mod_positions\": [],\n", " }\n", "\n", " # -------------------------\n", " # Utilities\n", " # -------------------------\n", " def guess_column(cols, candidates):\n", " cols_lower = {c.lower(): c for c in cols}\n", " for cand in candidates:\n", " if cand.lower() in cols_lower:\n", " return cols_lower[cand.lower()]\n", " return None\n", "\n", " def build_mapping_df():\n", " df = STATE[\"raw_df\"]\n", " if df is None or df.empty:\n", " return pd.DataFrame()\n", "\n", " chosen = {\n", " \"ACC_ID\": protein_col.value,\n", " \"Pep_seq\": pepseq_col.value,\n", " \"pep_start\": pepstart_col.value,\n", " \"pep_end\": pepend_col.value,\n", " \"UP_POS\": uppos_col.value,\n", " }\n", " if not all(chosen.values()):\n", " return pd.DataFrame()\n", "\n", " m = df[list(chosen.values())].copy()\n", " m.columns = list(chosen.keys())\n", "\n", " m[\"ACC_ID\"] = m[\"ACC_ID\"].astype(str).str.strip()\n", " m[\"Pep_seq\"] = m[\"Pep_seq\"].astype(str).str.strip()\n", "\n", " for c in [\"pep_start\", \"pep_end\", \"UP_POS\"]:\n", " m[c] = pd.to_numeric(m[c], errors=\"coerce\")\n", "\n", " m = m.dropna(subset=[\"ACC_ID\", \"Pep_seq\", \"pep_start\", \"pep_end\", \"UP_POS\"]).copy()\n", " m[\"pep_start\"] = m[\"pep_start\"].astype(int)\n", " m[\"pep_end\"] = m[\"pep_end\"].astype(int)\n", " m[\"UP_POS\"] = m[\"UP_POS\"].astype(int)\n", "\n", " m = m[(m[\"pep_start\"] >= 1) & (m[\"pep_end\"] >= m[\"pep_start\"])].copy()\n", " return m\n", "\n", " def peptide_options_for_acc(acc):\n", " df = STATE[\"map_df\"]\n", " df_sub = df[df[\"ACC_ID\"] == acc].copy()\n", " if df_sub.empty:\n", " return []\n", "\n", " if list_mode.value == \"Unique peptide spans\":\n", " g = (\n", " df_sub.groupby([\"Pep_seq\", \"pep_start\", \"pep_end\"], as_index=False)\n", " .agg(n_mod_sites=(\"UP_POS\", \"nunique\"), n_rows=(\"UP_POS\", \"size\"))\n", " )\n", " opts = []\n", " for _, r in g.iterrows():\n", " key = (r[\"Pep_seq\"], int(r[\"pep_start\"]), int(r[\"pep_end\"]))\n", " label = f'{key[0]} ({key[1]}-{key[2]}) | modSites={int(r[\"n_mod_sites\"])} rows={int(r[\"n_rows\"])}'\n", " opts.append((label, key))\n", " return opts\n", "\n", " opts = []\n", " for idx, r in df_sub.iterrows():\n", " label = f'{r[\"Pep_seq\"]} ({int(r[\"pep_start\"])}-{int(r[\"pep_end\"])}) @UP_POS={int(r[\"UP_POS\"])}'\n", " opts.append((label, idx))\n", " return opts\n", "\n", " def refresh_peptides(acc, keep_selection=False):\n", " old = set(peptide_multi.value) if keep_selection else set()\n", " opts = peptide_options_for_acc(acc)\n", " peptide_multi.options = opts\n", "\n", " if keep_selection and old:\n", " valid_vals = [v for _, v in opts]\n", " restored = [v for v in valid_vals if v in old]\n", " STATE[\"suspend\"] = True\n", " try:\n", " peptide_multi.value = tuple(restored)\n", " finally:\n", " STATE[\"suspend\"] = False\n", "\n", " # -------------------------\n", " # Renderer\n", " # -------------------------\n", " def render_current():\n", " with out:\n", " clear_output()\n", "\n", " if STATE[\"map_df\"] is None or STATE[\"map_df\"].empty:\n", " print(\"Build the mapping table first.\")\n", " return\n", "\n", " acc = protein_dd.value\n", " if not acc:\n", " print(\"Select a protein.\")\n", " return\n", "\n", " selected = list(peptide_multi.value)\n", " if not selected:\n", " print(\"Select one or more peptides, or click 'Map all peptides'.\")\n", " return\n", "\n", " try:\n", " pdb_path = download_alphafold_pdb(acc)\n", " STATE[\"last_pdb_path\"] = pdb_path\n", " except Exception as e:\n", " print(f\"[AlphaFold not available for {acc}]\")\n", " print(e)\n", " return\n", "\n", " df = STATE[\"map_df\"]\n", " df_sub = df[df[\"ACC_ID\"] == acc].copy()\n", "\n", " spans = []\n", " mod_positions = []\n", "\n", " if list_mode.value == \"Unique peptide spans\":\n", " spans = [(int(s), int(e)) for _, s, e in selected]\n", "\n", " if show_mods_mode.value == \"Selected peptides only\":\n", " for pepSeq, s, e in selected:\n", " sub = df_sub[\n", " (df_sub[\"Pep_seq\"] == pepSeq) &\n", " (df_sub[\"pep_start\"] == int(s)) &\n", " (df_sub[\"pep_end\"] == int(e))\n", " ]\n", " mod_positions.extend(sub[\"UP_POS\"].tolist())\n", " else:\n", " mod_positions = df_sub[\"UP_POS\"].tolist()\n", "\n", " else:\n", " sub = df_sub.loc[selected].copy()\n", " spans = list(zip(sub[\"pep_start\"].astype(int).tolist(), sub[\"pep_end\"].astype(int).tolist()))\n", " if show_mods_mode.value == \"Selected peptides only\":\n", " mod_positions = sub[\"UP_POS\"].astype(int).tolist()\n", " else:\n", " mod_positions = df_sub[\"UP_POS\"].astype(int).tolist()\n", "\n", " pos_lists = [list(range(a, b + 1)) for a, b in spans]\n", " union_pos = sorted(set(p for L in pos_lists for p in L))\n", " inter_pos = sorted(set(pos_lists[0]).intersection(*map(set, pos_lists[1:]))) if len(pos_lists) > 1 else []\n", " union_ranges = positions_to_ranges(union_pos)\n", "\n", " view = nv.NGLWidget()\n", " view.add_component(pdb_path)\n", " view.clear_representations()\n", " view.add_cartoon(color=\"silver\")\n", "\n", " add_cartoon_selection(view, union_ranges, color=\"blue\", name=\"peptide_union\")\n", " if inter_pos:\n", " add_positions(view, inter_pos, color=\"red\", name=\"intersection\")\n", " if show_mods_chk.value and mod_positions:\n", " add_positions(view, mod_positions, color=\"magenta\", name=\"mods\")\n", "\n", " w, h = viewer_size.value\n", " view.layout = widgets.Layout(width=w, height=h)\n", " view.center()\n", " display(view)\n", "\n", " # Save for exports\n", " STATE[\"last_acc\"] = acc\n", " STATE[\"last_union_ranges\"] = union_ranges\n", " STATE[\"last_inter_positions\"] = sorted(set(int(x) for x in inter_pos))\n", " STATE[\"last_mod_positions\"] = sorted(set(int(x) for x in mod_positions))\n", "\n", " print(f\"ACC_ID: {acc}\")\n", " print(f\"AlphaFold: {pdb_path}\")\n", " print(f\"Union ranges: {len(union_ranges)}\")\n", " if show_mods_chk.value:\n", " print(f\"Unique mod sites: {len(set(mod_positions))}\")\n", "\n", " # -------------------------\n", " # Export: raw PDB copy\n", " # -------------------------\n", " def on_export_pdb(_):\n", " with out:\n", " acc = protein_dd.value\n", " if not acc:\n", " print(\"No protein selected to export.\")\n", " return\n", " try:\n", " pdb_path = download_alphafold_pdb(acc)\n", " except Exception as e:\n", " print(f\"Cannot export: AlphaFold not available for {acc}\")\n", " print(e)\n", " return\n", "\n", " export_dir = Path(\"exports\")\n", " export_dir.mkdir(exist_ok=True)\n", " export_path = export_dir / f\"{acc}_alphafold.pdb\"\n", " shutil.copyfile(pdb_path, export_path)\n", " print(f\"Exported raw PDB to: {export_path.resolve()}\")\n", "\n", " # -------------------------\n", " # Export: selections TSV\n", " # -------------------------\n", " def on_export_selections(_):\n", " with out:\n", " acc = STATE.get(\"last_acc\")\n", " if not acc:\n", " print(\"Nothing to export yet — render a protein first.\")\n", " return\n", "\n", " export_dir = Path(\"exports\")\n", " export_dir.mkdir(exist_ok=True)\n", " tsv_path = export_dir / f\"{acc}_mapped_residues.tsv\"\n", "\n", " union_ranges = STATE.get(\"last_union_ranges\", [])\n", " mods = STATE.get(\"last_mod_positions\", [])\n", " inter = STATE.get(\"last_inter_positions\", [])\n", "\n", " rows = []\n", " for a, b in union_ranges:\n", " rows.append({\"type\": \"union_range\", \"start\": a, \"end\": b, \"pos\": \"\"})\n", " for p in mods:\n", " rows.append({\"type\": \"mod_site\", \"start\": \"\", \"end\": \"\", \"pos\": p})\n", " for p in inter:\n", " rows.append({\"type\": \"intersection_site\", \"start\": \"\", \"end\": \"\", \"pos\": p})\n", "\n", " pd.DataFrame(rows).to_csv(tsv_path, sep=\"\\t\", index=False)\n", " print(f\"Exported selections TSV to: {tsv_path.resolve()}\")\n", "\n", " # -------------------------\n", " # Export: TRUE standalone styled NGL HTML\n", " # -------------------------\n", " def on_export_styled_html(_):\n", " with out:\n", " acc = STATE.get(\"last_acc\")\n", " pdb_path = STATE.get(\"last_pdb_path\")\n", " if not acc or not pdb_path:\n", " print(\"Nothing to export yet — render a protein first.\")\n", " return\n", "\n", " union_ranges = STATE.get(\"last_union_ranges\", [])\n", " inter = STATE.get(\"last_inter_positions\", [])\n", " mods = STATE.get(\"last_mod_positions\", [])\n", "\n", " export_dir = Path(\"exports\")\n", " export_dir.mkdir(exist_ok=True)\n", " html_path = export_dir / f\"{acc}_styled_session.html\"\n", "\n", " # Embed PDB text directly into HTML\n", " pdb_text = Path(pdb_path).read_text(errors=\"ignore\")\n", "\n", " payload = {\n", " \"acc\": acc,\n", " \"union_ranges\": union_ranges, # list of [start,end]\n", " \"intersection\": inter, # list of positions\n", " \"mods\": mods # list of positions\n", " }\n", "\n", " # Standalone HTML uses NGL from CDN and applies your same styling\n", " html = f\"\"\"\n", "\n", "\n", " \n", " {acc} styled NGL session\n", " \n", " \n", "\n", "\n", "
\n", " {acc}
\n", "
Grey: protein | Blue: peptides | Red: intersection | Magenta: mods
\n", "
union ranges: {len(union_ranges)} | mods: {len(mods)} | intersection: {len(inter)}
\n", "
\n", "
\n", "\n", " \n", "\n", "\n", "\"\"\"\n", " html_path.write_text(html, encoding=\"utf-8\")\n", " print(f\"Exported TRUE styled session HTML to: {html_path.resolve()}\")\n", " print(\"Open it in a browser — colors & selections are preserved (no Jupyter dependencies).\")\n", "\n", " # -------------------------\n", " # Callbacks\n", " # -------------------------\n", " def on_load_file(_):\n", " with out:\n", " clear_output()\n", "\n", " df, filename = read_uploaded_table(upload)\n", " if df is None:\n", " print(filename)\n", " return\n", "\n", " STATE[\"raw_df\"] = df\n", " cols = list(df.columns)\n", "\n", " for dd in [protein_col, pepseq_col, pepstart_col, pepend_col, uppos_col]:\n", " dd.options = cols\n", "\n", " protein_col.value = guess_column(cols, [\"ACC_ID\", \"Protein\", \"protein\", \"Uniprot\", \"UniProt\", \"accession\"]) or cols[0]\n", " pepseq_col.value = guess_column(cols, [\"Pep_seq\", \"peptideSequence\", \"peptide\", \"Peptide\"]) or cols[0]\n", " pepstart_col.value = guess_column(cols, [\"pep_start\", \"PeptideStart\", \"peptideStart\", \"start\"]) or cols[0]\n", " pepend_col.value = guess_column(cols, [\"pep_end\", \"PeptideEnd\", \"peptideEnd\", \"end\"]) or cols[0]\n", " uppos_col.value = guess_column(cols, [\"UP_POS\", \"uniprotPosition\", \"UniprotPosition\", \"modpos_prot\"]) or cols[0]\n", "\n", " print(f\"Loaded file: {filename}\")\n", " print(f\"Shape: {df.shape[0]} rows × {df.shape[1]} cols\")\n", " print(\"Now choose the 5 columns and click 'Build mapping'.\\n\")\n", " display(df.head(5))\n", "\n", " def on_build_mapping(_):\n", " with out:\n", " clear_output()\n", "\n", " mdf = build_mapping_df()\n", " if mdf.empty:\n", " print(\"Failed to build mapping table. Check that pep_start/pep_end/UP_POS are numeric and columns are correct.\")\n", " return\n", "\n", " STATE[\"map_df\"] = mdf\n", " prots = sorted(mdf[\"ACC_ID\"].unique().tolist())\n", " protein_dd.options = prots\n", " protein_dd.value = prots[0] if prots else None\n", "\n", " refresh_peptides(protein_dd.value, keep_selection=False)\n", " print(f\"Mapping built: {len(mdf)} rows across {len(prots)} proteins.\")\n", " print(\"Select peptides to render (auto-refresh).\")\n", "\n", " def on_protein_change(_):\n", " if STATE[\"map_df\"] is None or STATE[\"map_df\"].empty:\n", " return\n", " acc = protein_dd.value\n", " if not acc:\n", " return\n", " STATE[\"last_action\"] = None\n", " refresh_peptides(acc, keep_selection=False)\n", " with out:\n", " clear_output()\n", " print(f\"Protein switched to {acc}. Peptides refreshed.\")\n", " print(\"Select peptides or click 'Map all peptides'.\")\n", "\n", " def on_list_mode_change(_):\n", " acc = protein_dd.value\n", " if not acc:\n", " return\n", " refresh_peptides(acc, keep_selection=True)\n", " if peptide_multi.value:\n", " STATE[\"last_action\"] = \"select\"\n", " render_current()\n", "\n", " def on_peptide_select(_):\n", " if STATE[\"suspend\"]:\n", " return\n", " if peptide_multi.value:\n", " STATE[\"last_action\"] = \"select\"\n", " render_current()\n", "\n", " def on_map_all(_):\n", " if not peptide_multi.options:\n", " return\n", " STATE[\"last_action\"] = \"map_all\"\n", " STATE[\"suspend\"] = True\n", " try:\n", " all_vals = [v for _, v in peptide_multi.options]\n", " peptide_multi.value = tuple(all_vals)\n", " finally:\n", " STATE[\"suspend\"] = False\n", " render_current()\n", "\n", " def on_any_toggle(_):\n", " if peptide_multi.value:\n", " STATE[\"last_action\"] = \"select\"\n", " render_current()\n", "\n", " # wire up\n", " load_file_btn.on_click(on_load_file)\n", " build_btn.on_click(on_build_mapping)\n", "\n", " protein_dd.observe(on_protein_change, names=\"value\")\n", " list_mode.observe(on_list_mode_change, names=\"value\")\n", " peptide_multi.observe(on_peptide_select, names=\"value\")\n", " map_all_btn.on_click(on_map_all)\n", "\n", " export_pdb_btn.on_click(on_export_pdb)\n", " export_html_btn.on_click(on_export_styled_html)\n", " export_sel_btn.on_click(on_export_selections)\n", "\n", " show_mods_chk.observe(on_any_toggle, names=\"value\")\n", " show_mods_mode.observe(on_any_toggle, names=\"value\")\n", " viewer_size.observe(on_any_toggle, names=\"value\")\n", "\n", " return None\n", "\n", "\n", "# Run it\n", "peptide_upload_ngl_mapper_app()\n" ] }, { "cell_type": "code", "execution_count": null, "id": "44c877e7-8448-44bb-b741-fbb507381bb2", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.12" } }, "nbformat": 4, "nbformat_minor": 5 }