{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# AbNumber Usage Examples" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "from abnumber import Chain\n", "import pandas as pd\n", "\n", "pd.options.display.max_columns = 200" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Creating a chain" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "EVQLQQSGAELARPGASVKMSCKASGYTFTRYTMHWVKQRPGQGLEWIGYINPSRGYTNYNQKFKDKATLTTDKSSSTAYMQLSSLTSEDSAVYYCARYYSEDDERGHYCLDYWGQGTTLTVSS\n", " ^^^^^^^^ ^^^^^^^^ ^^^^^^^^^^^^^^^^^ " ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "seq = 'EVQLQQSGAELARPGASVKMSCKASGYTFTRYTMHWVKQRPGQGLEWIGYINPSRGYTNYNQKFKDKATLTTDKSSSTAYMQLSSLTSEDSAVYYCARYYSEDDERGHYCLDYWGQGTTLTVSSAKTTAPSVYPLA'\n", "chain = Chain(seq, scheme='imgt')\n", "chain" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "0 1 2 3 4 5 6 7 8 9 10 11 12 \n", "1234567891234567890123456789056789012345678901234567892345678901245678901234567890123456789012345678901112223456789012345678\n", " ABBA \n", "EVQLQQSGAELARPGASVKMSCKASGYTFTRYTMHWVKQRPGQGLEWIGYINPSRGYTNYNQKFKDKATLTTDKSSSTAYMQLSSLTSEDSAVYYCARYYSEDDERGHYCLDYWGQGTTLTVSS\n", " ^^^^^^^^ ^^^^^^^^ ^^^^^^^^^^^^^^^^^ \n" ] } ], "source": [ "chain.print(numbering=True)" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'EVQLQQSGAELARPGASVKMSCKASGYTFTRYTMHWVKQRPGQGLEWIGYINPSRGYTNYNQKFKDKATLTTDKSSSTAYMQLSSLTSEDSAVYYCARYYSEDDERGHYCLDYWGQGTTLTVSS'" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "chain.seq" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'AKTTAPSVYPLA'" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "chain.tail" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'ARYYSEDDERGHYCLDY'" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "chain.cdr3_seq" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " H1 E CDR1 H27 G CDR2 H56 I H84 S CDR3 H109 S\n", " H2 V CDR1 H28 Y CDR2 H57 N H85 S CDR3 H110 E\n", " H3 Q CDR1 H29 T CDR2 H58 P H86 T CDR3 H111 D\n", " H4 L CDR1 H30 F CDR2 H59 S H87 A CDR3 H111A D\n", " H5 Q CDR1 H35 T CDR2 H62 R H88 Y CDR3 H111B E\n", " H6 Q CDR1 H36 R CDR2 H63 G H89 M CDR3 H112B R\n", " H7 S CDR1 H37 Y CDR2 H64 Y H90 Q CDR3 H112A G\n", " H8 G CDR1 H38 T CDR2 H65 T H91 L CDR3 H112 H\n", " H9 A H39 M H66 N H92 S CDR3 H113 Y\n", " H11 E H40 H H67 Y H93 S CDR3 H114 C\n", " H12 L H41 W H68 N H94 L CDR3 H115 L\n", " H13 A H42 V H69 Q H95 T CDR3 H116 D\n", " H14 R H43 K H70 K H96 S CDR3 H117 Y\n", " H15 P H44 Q H71 F H97 E H118 W\n", " H16 G H45 R H72 K H98 D H119 G\n", " H17 A H46 P H74 D H99 S H120 Q\n", " H18 S H47 G H75 K H100 A H121 G\n", " H19 V H48 Q H76 A H101 V H122 T\n", " H20 K H49 G H77 T H102 Y H123 T\n", " H21 M H50 L H78 L H103 Y H124 L\n", " H22 S H51 E H79 T H104 C H125 T\n", " H23 C H52 W H80 T CDR3 H105 A H126 V\n", " H24 K H53 I H81 D CDR3 H106 R H127 S\n", " H25 A H54 G H82 K CDR3 H107 Y H128 S\n", " H26 S H55 Y H83 S CDR3 H108 Y\n" ] } ], "source": [ "chain.print_tall()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Separate CDR definitions" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "EVQLQQSGAELARPGASVKMSCKASGYTFTRYTMHWVKQRPGQGLEWIGYINPSRGYTNYNQKFKDKATLTTDKSSSTAYMQLSSLTSEDSAVYYCARYYSEDDERGHYCLDYWGQGTTLTVSS\n", " ^^^^^^^ ^^^^^^ ^^^^^^^^^^^^^^^ " ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "Chain(seq, scheme='imgt', cdr_definition='chothia')" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "EVQLQQSGAELARPGASVKMSCKASGYTFTRYTMHWVKQRPGQGLEWIGYINPSRGYTNYNQKFKDKATLTTDKSSSTAYMQLSSLTSEDSAVYYCARYYSEDDERGHYCLDYWGQGTTLTVSS\n", " ^^^^^^^^^^^^^ ^^^^^^^^^^ ^^^^^^^^^^^^^^^^^ " ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "Chain(seq, scheme='imgt', cdr_definition='north')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Renumbering" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " H1 E H22 C H43 Q H63 F H82B S CDR3 H100B R\n", " H2 V H23 K H44 G H64 K H82C L CDR3 H100C G\n", " H3 Q H24 A H45 L H65 D H83 T CDR3 H100D H\n", " H4 L H25 S H46 E H66 K H84 S CDR3 H100E Y\n", " H5 Q CDR1 H26 G H47 W H67 A H85 E CDR3 H100F C\n", " H6 Q CDR1 H27 Y H48 I H68 T H86 D CDR3 H100G L\n", " H7 S CDR1 H28 T H49 G H69 L H87 S CDR3 H101 D\n", " H8 G CDR1 H29 F H50 Y H70 T H88 A CDR3 H102 Y\n", " H9 A CDR1 H30 T CDR2 H51 I H71 T H89 V H103 W\n", " H10 E CDR1 H31 R CDR2 H52 N H72 D H90 Y H104 G\n", " H11 L CDR1 H32 Y CDR2 H52A P H73 K H91 Y H105 Q\n", " H12 A CDR1 H33 T CDR2 H53 S H74 S H92 C H106 G\n", " H13 R H34 M CDR2 H54 R H75 S CDR3 H93 A H107 T\n", " H14 P H35 H CDR2 H55 G H76 S CDR3 H94 R H108 T\n", " H15 G H36 W CDR2 H56 Y H77 T CDR3 H95 Y H109 L\n", " H16 A H37 V CDR2 H57 T H78 A CDR3 H96 Y H110 T\n", " H17 S H38 K H58 N H79 Y CDR3 H97 S H111 V\n", " H18 V H39 Q H59 Y H80 M CDR3 H98 E H112 S\n", " H19 K H40 R H60 N H81 Q CDR3 H99 D H113 S\n", " H20 M H41 P H61 Q H82 L CDR3 H100 D\n", " H21 S H42 G H62 K H82A S CDR3 H100A E\n" ] } ], "source": [ "# Use Chothia numbering and CDR definitions\n", "chain.renumber('chothia').print_tall(6)" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " H1 E H23 C H48 Q H71 F H93 S CDR3 H112B R\n", " H2 V H24 K H49 G H72 K H94 L CDR3 H112A G\n", " H3 Q H25 A H50 L H74 D H95 T CDR3 H112 H\n", " H4 L H26 S H51 E H75 K H96 S CDR3 H113 Y\n", " H5 Q CDR1 H27 G H52 W H76 A H97 E CDR3 H114 C\n", " H6 Q CDR1 H28 Y H53 I H77 T H98 D CDR3 H115 L\n", " H7 S CDR1 H29 T H54 G H78 L H99 S CDR3 H116 D\n", " H8 G CDR1 H30 F H55 Y H79 T H100 A CDR3 H117 Y\n", " H9 A CDR1 H35 T H56 I H80 T H101 V H118 W\n", " H11 E CDR1 H36 R CDR2 H57 N H81 D H102 Y H119 G\n", " H12 L CDR1 H37 Y CDR2 H58 P H82 K H103 Y H120 Q\n", " H13 A H38 T CDR2 H59 S H83 S H104 C H121 G\n", " H14 R H39 M CDR2 H62 R H84 S H105 A H122 T\n", " H15 P H40 H CDR2 H63 G H85 S H106 R H123 T\n", " H16 G H41 W CDR2 H64 Y H86 T CDR3 H107 Y H124 L\n", " H17 A H42 V H65 T H87 A CDR3 H108 Y H125 T\n", " H18 S H43 K H66 N H88 Y CDR3 H109 S H126 V\n", " H19 V H44 Q H67 Y H89 M CDR3 H110 E H127 S\n", " H20 K H45 R H68 N H90 Q CDR3 H111 D H128 S\n", " H21 M H46 P H69 Q H91 L CDR3 H111A D\n", " H22 S H47 G H70 K H92 S CDR3 H111B E\n" ] } ], "source": [ "# Keep IMGT numbering, use Chothia CDR definitions\n", "chain.renumber(cdr_definition='chothia').print_tall(6)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Chain properties" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'imgt'" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "chain.scheme" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'H'" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "chain.chain_type" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "True" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "chain.is_heavy_chain()" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "124" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(chain)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Chain indexing and slicing" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'H'" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "chain['112']" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "'Y'" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "chain.raw[112]" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "EVQLQQSGAELARPGASVKMSCKASGYTF\n", " ^^^^" ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "chain[:'30']" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "QGTTLTVSS\n", " " ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "chain['120':]" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "GQGTTLTVSS\n", " " ] }, "execution_count": 20, "metadata": {}, "output_type": "execute_result" } ], "source": [ "chain.raw[114:]" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "GQGTTLTVSS\n", " " ] }, "execution_count": 21, "metadata": {}, "output_type": "execute_result" } ], "source": [ "chain.raw[-10:]" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "GQGTTLTVSS\n", " " ] }, "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": [ "chain.raw[-10:]" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "CDR3 H111A D CDR3 H111B E CDR3 H112B R CDR3 H112A G\n" ] } ], "source": [ "chain['111A':'112A'].print_tall()" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "H111A D\n", "H111B E\n", "H112B R\n", "H112A G\n" ] } ], "source": [ "for pos, aa in chain['111A':'112A']:\n", " print(pos, aa)" ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "H1 E\n", "H2 V\n", "H3 Q\n", "H4 L\n", "H5 Q\n", "H6 Q\n", "H7 S\n", "H8 G\n", "H9 A\n", "H11 E\n" ] } ], "source": [ "for pos, aa in chain.raw[:10]:\n", " print(pos, aa)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Alignment" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "QVQLVQSGAELDRPGATVKMSCKASGYTTTRYTMHWVKQRPGQGLDWIGYINPSDRSYTNYNQKFKDKATLTTDKSSSTAYMQKTSLTSEDSAVYYCARYYDDERYDYLDRWGQGTTLTVSS\n", " ^^^^^^^^ ^^^^^^^^^ ^^^^^^^^^^^^^^ " ] }, "execution_count": 26, "metadata": {}, "output_type": "execute_result" } ], "source": [ "seq2 = 'QVQLVQSGAELDRPGATVKMSCKASGYTTTRYTMHWVKQRPGQGLDWIGYINPSDRSYTNYNQKFKDKATLTTDKSSSTAYMQKTSLTSEDSAVYYCARYYDDERYDYLDRWGQGTTLTVSSAKTTAP'\n", "chain2 = Chain(seq2, scheme='imgt')\n", "chain2" ] }, { "cell_type": "code", "execution_count": 27, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "EVQLQQSGAELARPGASVKMSCKASGYTFTRYTMHWVKQRPGQGLEWIGYINPS-RGYTNYNQKFKDKATLTTDKSSSTAYMQLSSLTSEDSAVYYCARYYSEDDERGHYCLDYWGQGTTLTVSS\n", "+|||.||||||.||||+|||||||||||.||||||||||||||||+||||||||.|.||||||||||||||||||||||||||.+||||||||||||||||.++....+..||.|||||||||||\n", "QVQLVQSGAELDRPGATVKMSCKASGYTTTRYTMHWVKQRPGQGLDWIGYINPSDRSYTNYNQKFKDKATLTTDKSSSTAYMQKTSLTSEDSAVYYCARYYDDE---RYDYLDRWGQGTTLTVSS\n", " ^^^^^^^^ ^^^^^^^^^ ^^^^^^^^^^^^^^^^^ " ] }, "execution_count": 27, "metadata": {}, "output_type": "execute_result" } ], "source": [ "alignment = chain.align(chain2)\n", "alignment" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Alignment indexing and slicing" ] }, { "cell_type": "code", "execution_count": 28, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "G R\n" ] } ], "source": [ "aa, bb = alignment['112A']\n", "print(aa, bb)" ] }, { "cell_type": "code", "execution_count": 29, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "('L', 'L')" ] }, "execution_count": 29, "metadata": {}, "output_type": "execute_result" } ], "source": [ "aa, bb = alignment.raw[10]\n", "aa, bb" ] }, { "cell_type": "code", "execution_count": 30, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "H108 Y Y\n", "H109 S D\n", "H110 E D\n", "H111 D E\n", "H111A D -\n", "H111B E -\n", "H112B R -\n", "H112A G R\n" ] } ], "source": [ "for pos, (aa, bb) in alignment['108':'112A']:\n", " print(pos, aa, bb)" ] }, { "cell_type": "code", "execution_count": 31, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "H1 E Q\n", "H2 V V\n", "H3 Q Q\n", "H4 L L\n", "H5 Q V\n", "H6 Q Q\n", "H7 S S\n", "H8 G G\n", "H9 A A\n", "H11 E E\n" ] } ], "source": [ "for pos, (aa, bb) in alignment.raw[:10]:\n", " print(pos, aa, bb)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## CDR grafting" ] }, { "cell_type": "code", "execution_count": 32, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "EVQLQQSGAELARPGASVKMSCKASGYTFTRYTMHWVKQRPGQGLEWIGYINPS-RGYTNYNQKFKDKATLTTDKSSSTAYMQLSSLTSEDSAVYYCARYYSEDDERGHYCLDYWGQGTTLTVSS\n", "+|||.||||||.||||+|||||||||||.||||||||||||||||+||||||||.|.||||||||||||||||||||||||||.+||||||||||||||||.++....+..||.|||||||||||\n", "QVQLVQSGAELDRPGATVKMSCKASGYTTTRYTMHWVKQRPGQGLDWIGYINPSDRSYTNYNQKFKDKATLTTDKSSSTAYMQKTSLTSEDSAVYYCARYYDDE---RYDYLDRWGQGTTLTVSS\n", " ^^^^^^^^ ^^^^^^^^^ ^^^^^^^^^^^^^^^^^ " ] }, "execution_count": 32, "metadata": {}, "output_type": "execute_result" } ], "source": [ "alignment" ] }, { "cell_type": "code", "execution_count": 33, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "QVQLVQSGAELDRPGATVKMSCKASGYTFTRYTMHWVKQRPGQGLDWIGYINPSRGYTNYNQKFKDKATLTTDKSSSTAYMQKTSLTSEDSAVYYCARYYSEDDERGHYCLDYWGQGTTLTVSS\n", " ^^^^^^^^ ^^^^^^^^ ^^^^^^^^^^^^^^^^^ \n" ] } ], "source": [ "chain.graft_cdrs_onto(chain2).print()" ] }, { "cell_type": "code", "execution_count": 34, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "EVQLQQSGAELARPGASVKMSCKASGYTTTRYTMHWVKQRPGQGLEWIGYINPSDRSYTNYNQKFKDKATLTTDKSSSTAYMQLSSLTSEDSAVYYCARYYDDERYDYLDRWGQGTTLTVSS\n", " ^^^^^^^^ ^^^^^^^^^ ^^^^^^^^^^^^^^ \n" ] } ], "source": [ "chain2.graft_cdrs_onto(chain).print()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Different numbering schemes" ] }, { "cell_type": "code", "execution_count": 35, "metadata": {}, "outputs": [], "source": [ "from IPython.display import display, HTML" ] }, { "cell_type": "code", "execution_count": 36, "metadata": {}, "outputs": [], "source": [ "EXAMPLES = [\n", " 'EVQLQQSGAELARPGASVKMSCKASGYTFTSRRYTMHWVKQRPGQGLEWIGYINPSDERGYTNYNQKFKDKATLTTDKSSSTAYMQLSSLTSEDSAVYYCARYYSEDDERGHYCLDYWGQGTTLTVSSAKTTAPSVYPLA',\n", " 'EIVLTQSPATLSLSPGERATLSCRASKGVSTDLSGYSYLHWYQQKPGQAPRLLIYLGYASYLESGVPARFSGSGSGTDFTLTISSLEPEDFAVYYCQHSRDGTLPLTFGGGTKVEIK',\n", " 'QSVLTQPPSASGTPGQRVTISCSGSSSNIGSNTVHWYQQLPGTAPKLLIYSDNQRPSGVPDRFSGSKSGTSASLAISGLQSEDEADYYCAAWDDSLNGVFGGGTKLTVL'\n", "]" ] }, { "cell_type": "code", "execution_count": 37, "metadata": {}, "outputs": [ { "data": { "text/html": [ "