In [1]:
# Copyright 2014 Brett Slatkin, Pearson Education Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Preamble to mimick book environment
import logging
from pprint import pprint
from sys import stdout as STDOUT

### Simple but a bit dense and memoty inefficient!

In [2]:
# Example 1
def index_words(text):
 result = []
 if text:
 result.append(0)
 for index, letter in enumerate(text):
 if letter == ' ':
 result.append(index + 1)
 return result

In [3]:
# Example 2
address = 'Four score and seven years ago...'
address = 'Four score and seven years ago our fathers brought forth on this continent a new nation, conceived in liberty, and dedicated to the proposition that all men are created equal.'
result = index_words(address)
print(result[:3])

[0, 5, 11]


### Use Generators, functions that use yield expressions

In [4]:
# Example 3
def index_words_iter(text):
 if text:
 yield 0
 for index, letter in enumerate(text):
 if letter == ' ':
 yield index + 1

In [5]:
### list? Think of memory!!

In [6]:
# Example 4
result = list(index_words_iter(address))
print(result[:3])

[0, 5, 11]


### yields outputs	one	word	at	a	time. Memory efficient. BEST!

In [7]:
# Example 5
def index_file(handle):
 offset = 0
 for line in handle:
 if line:
 yield offset
 for letter in line:
 offset += 1
 if letter == ' ':
 yield offset

In [8]:
# Example 6
address_lines = """Four score and seven years
ago our fathers brought forth on this
continent a new nation, conceived in liberty,
and dedicated to the proposition that all men
are created equal."""

with open('address.txt', 'w') as f:
 f.write(address_lines)

from itertools import islice
with open('address.txt', 'r') as f:
 it = index_file(f)
 results = islice(it, 0, 3)
 print(list(results))

[0, 5, 11]
