{ "cells": [ { "cell_type": "code", "execution_count": 35, "metadata": { "collapsed": false }, "outputs": [], "source": [ "# reads a string and returns a masked version of the string,\n", "# where sequences of `length` (default=9) digits are replaced \n", "# with the replacement character (default='X')\n", "#\n", "# if the string does not fit in memory, we'll need a stream-based approach instead\n", "def mask_digit_sequences(input_str, length=None, replacement=None):\n", " # this is the recommended way to use default arguments in \n", " # python functions\n", " if length is None:\n", " length = 9\n", " if replacement is None:\n", " replacement = \"X\"\n", " \n", " # i'll use a state-machine approach that counts the number of\n", " # digits seen in the current sentence\n", " \n", " current_sequence = \"\"\n", " \n", " for (i,char) in enumerate(input_str):\n", " if char.isdigit():\n", " current_sequence += char\n", " else: # not a digit\n", " # broken sequence\n", " if len(current_sequence) != 0:\n", " print(current_sequence, end='')\n", " current_sequence = \"\"\n", " else:\n", " print(char, end='')\n", " \n", " if len(current_sequence) == length:\n", " for i in range(length):\n", " print(replacement, end='')\n", " current_sequence = \"\"\n", " \n", " # we've reached the end of the string and the current sequence is not empty\n", " if i == len(input_str) -1 and len(current_sequence) != 0:\n", " print(current_sequence,end='')\n" ] }, { "cell_type": "code", "execution_count": 36, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "12" ] } ], "source": [ "mask_digit_sequences(\"12\")" ] }, { "cell_type": "code", "execution_count": 37, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "aaaa12345678bbbbXXXXXXXXX0123" ] } ], "source": [ "mask_digit_sequences(\"aaaa12345678bbbbb1234567890123\")" ] }, { "cell_type": "code", "execution_count": 39, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "aaaa12345678bbbb---------0123" ] } ], "source": [ "mask_digit_sequences(\"aaaa12345678bbbbb1234567890123\",replacement='-')" ] }, { "cell_type": "code", "execution_count": 40, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Claudio’s number is XXXXXXXXX and his friend’s number is XXXXXXXXXXXXXXXXXX55" ] } ], "source": [ "mask_digit_sequences(\"Claudio’s number is 123456789 and his friend’s number is 12345678900033322255\")" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.5.2" } }, "nbformat": 4, "nbformat_minor": 2 }