In [73]:
import matplotlib.dates as mdates
import matplotlib.pyplot as plt
import numpy as np
from datetime import date
from dateutil.parser import parse
history = """December, 1989:Implementation started
1990:Internal releases at CWI
February 20, 1991:0.9.0 (released to alt.sources)
February, 1991:0.9.1
September, 1991:0.9.2
December 24, 1991:0.9.4
January 2, 1992:0.9.5 (Macintosh only)
April 6, 1992:0.9.6
January 9, 1993:0.9.8
July 29, 1993:0.9.9
January 26, 1994:1.0.0
February 15, 1994:1.0.2
May 4, 1994:1.0.3
July 14, 1994:1.0.4
October 11, 1994:1.1
November 10, 1994:1.1.1
April 13, 1995:1.2
October 13, 1995:1.3
October 25, 1996:1.4
January 3, 1998:1.5
October 31, 1998:1.5.1
April 13, 1999:1.5.2
September 5, 2000:1.6
October 16, 2000:2.0
February 25, 2001:1.6.1
April 17, 2001:2.1
December 21, 2001:2.2
July 29, 2003:2.3
November 30, 2004:2.4
September 16, 2006:2.5
October 1, 2008:2.6
December 3, 2008:3.0
June 27, 2009: 3.1
July 3, 2010: 2.7
February 20, 2011: 3.2
September 29, 2012: 3.3
March 16, 2014: 3.4
September 13, 2015: 3.5
December 23, 2016: 3.6
June 27, 2018:3.7
January 1, 2020: 2.7 EOL"""

dates = []
names = []
for entry in history.split('\n'):
 datestr, version = entry.split(':')
 dates.append(parse(datestr))
 names.append(version)


def plot_timeline(dates, names, title, spans=[]):
 levels = np.array([-5, 5, -4, 4, -3, 3, -2, 2])
 fig, ax = plt.subplots(figsize=(12, 5))

 # Create the base line
 start = min(dates)
 stop = max(dates)
 ax.plot((start, stop), (0, 0), 'k', alpha=.5)

 # Iterate through releases annotating each one
 for ii, (iname, idate) in enumerate(zip(names, dates)):
 level = levels[ii % len(levels)]
 vert = 'top' if level < 0 else 'bottom'

 ax.scatter(idate, 0, s=100, facecolor='w', edgecolor='k', zorder=9999)
 # Plot a line up to the text
 ax.plot((idate, idate), (0, level), c='r', alpha=.7)
 # Give the text a faint background and align it properly
 ax.text(idate, level, iname,
 horizontalalignment='right', verticalalignment=vert, fontsize=14,
 backgroundcolor=(1., 1., 1., .3))
 for args in spans:
 ax.axvspan(*args, alpha=0.2)
 ax.set(title=title)
 # Set the xticks formatting
 # format xaxis with 3 month intervals
 ax.get_xaxis().set_major_locator(mdates.YearLocator())
 ax.get_xaxis().set_major_formatter(mdates.DateFormatter("%Y"))
 fig.autofmt_xdate()

 # Remove components for a cleaner look
 plt.setp((ax.get_yticklabels() + ax.get_yticklines() +
 list(ax.spines.values())), visible=False)
 plt.tight_layout()
 plt.savefig(f"{title.lower().replace(' ','_')}.png")


plot_timeline(dates, names, "Python Release Dates",
 [(parse("july 2, 2010"), parse("january 1,2020")),
 (parse("December 3, 2008"), date.today(), 0.2, 0.8)
 ])

# Python 3: More than just `print()`

* Andrew Bolster
* Threat Intelligence Data Scientist (Alert Logic)
* Founding Director (Farset Labs)
* Pythionista for ~10 years

# What We'll Cover

* History of Python
* Significant Features


## But TL;DR?

* As of Python 3.7; in all but one test type (_why are you doing crypto in python?_), [*3 is 20% faster than 2*](https://hackernoon.com/which-is-the-fastest-version-of-python-2ae7c61a6b2b)
* The language features developed make Python both performent and stable
* The breaking changes between 2 and 3 were due to poor historical architectural decisions; there are no plans for breaking changes going forward
* [2.7 EOL is in less than a year](https://pythonclock.org/): Most major packages have already dropped (non security) support for it, including:
 * Numpy
 * Pandas
 * matplotlib
 * dask
 * sympy
* Of the [Top 360 most popular Python modules](http://py3readiness.org/) only one hasn't migrated to Python 3: [`apache-beam`](https://beam.apache.org/get-started/beam-overview/) (Which is a Java-first SDK anyway so stuff 'em)

# Timeline



# Significant Features/Changes
* `print` _let's just get that out of the way, shall we?_
* Integer Division
* f-strings
* υηι¢σ∂є
* Iterable Unpacking
* Iterators, Generators, `next`s, oh my!
* changes to `dict` behaviour
* dataclasses

## `print()`

Probably the most obvious, contentious, but also meaningless change in py3


In [82]:
print("Hello World!")

Hello World!


But it's more than just brackets;

In [83]:
print("Hello", "World", "!") # Native Tuples

Hello World !


In [84]:
print("Hello", "World", "!", sep='\t') # Custom Separators

Hello	World	!


In [85]:
print("Hello", end=' ') # Tail override
print("World!")

Hello World!


In [87]:
import sys
print("fatal error", file=sys.stderr) # Can still do piping to file handlers

fatal error


## Integer Division (/ vs //)

`print` as a keyword vs `print()` as a function is just a bit of syntactic sugar to simplify the cPython API, however... some changes are more subtle and _more likely to cause non-trivial bugs when porting 'stable' code_

This is one of them...

In [66]:
print('4 / 2 =', 4 / 2)
print('3 / 2 =', 3 / 2)
print('4 // 2 =', 4 // 2)
print('3 // 2 =', 3 // 2)
print('4 / 2.0 =', 4 / 2.0)
print('3 / 2.0 =', 3 / 2.0)
print('3 // 2.0 =', 3 // 2.0)

4 / 2 = 2.0
3 / 2 = 1.5
4 // 2 = 2
3 // 2 = 1
4 / 2.0 = 2.0
3 / 2.0 = 1.5
3 // 2.0 = 1.0


Division *now* works in a way that you'd expect a duck-type language to; i.e.

* `/` _always_ returns a `float` even when it's not numerically necessary
* `//` returns as the type of the denominator, but always with an integer value ($\in \mathbb{Z}$)


In [67]:
print('4 / 2 =', type(4 / 2))
print('3 / 2 =', type(3 / 2))
print('4 // 2 =', type(4 // 2))
print('3 // 2 =', type(3 // 2))
print('4 / 2.0 =', type(4 / 2.0))
print('3 / 2.0 =', type(3 / 2.0))
print('3 // 2.0 =', type(3 // 2.0))

4 / 2 = 
3 / 2 = 
4 // 2 = 
3 // 2 = 
4 / 2.0 = 
3 / 2.0 = 
3 // 2.0 = 


Also note that this is not `round`; this is `floor` division

In [71]:
print("5 / 6 = ", 5/6)
print("5 // 6 = ", 5//6)
print("‖5/6‖ = ", round(5/6))

5 / 6 = 0.8333333333333334
5 // 6 = 0
‖5/6‖ = 1


And if anyone is wondering about performance...


In [75]:
% % timeit

x = 7//2

14 ns ± 1.15 ns per loop (mean ± std. dev. of 7 runs, 100000000 loops each)


In [78]:
from math import floor

In [79]:
% % timeit

x = floor(7/2)

84.7 ns ± 3.32 ns per loop (mean ± std. dev. of 7 runs, 10000000 loops each)


## υηι¢σ∂є

From the original 3.0 release notes:
> Everything you thought you knew about binary data and Unicode has changed.

* All strings are Unicode, but unicode encoded strings are stored as binary
* No more `u""` junk
* Incorrectly encoded `open`'s will fail **loudly**

The transition is largely painless unless you're doing something really 'clever' to get around python2's utter failings in interacting with Unicode in sensible ways. 

Best of all... this isn't just a string processing change; this is fundamental to the interpreter... so...

In [96]:
from numpy import array, cos, sin


def rotate(vector, angle):
 θ = angle
 mat = [[cos(θ), -sin(θ)],
 [sin(θ), cos(θ)]]
 mat = array(mat)
 return mat @ vector # << Sneaky mat_mul operator for free too


rotate([1, 0], 90)

array([-0.44807362, 0.89399666])

Unfortunately you can only use printable characters as variable identifiers, so no emojis, but you can go mad elsewhere:

In [107]:
import emoji # pip install emoji
this_is_a_regular_string = emoji.emojize(
 "Python 2 is :poop:", use_aliases=True)
print(this_is_a_regular_string)
print(this_is_a_regular_string.replace('is', 'was'))
print(''.join(reversed(this_is_a_regular_string))) # << Spoilers Ahead

Python 2 is 💩
Python 2 was 💩
💩 si 2 nohtyP


## f-strings (py3.6)

* Replacement for `%` and `str.format()` methods
* Jinja-like templating of local scope variables
 * (Basically like having an interpreter inside a string)

In [28]:
thing = 'thing'
print(f"This is a {thing}")

This is a thing


In [30]:
print(f"This is a loud {thing.upper()}")

This is a loud THING


In [44]:
import datetime
from datetime import date


class Person:
 def __init__(self, first_name: str, last_name: str, birthday: date, gender: str):
 self.first_name = first_name
 self.last_name = last_name
 self.birthday = birthday
 self.gender = gender

 @property
 def age(self):
 today = date.today()
 return today.year - self.birthday.year - ((today.month, today.day) < (self.birthday.month, self.birthday.day))

 def __str__(self):
 return f"{self.first_name} {self.last_name}"

 def __repr__(self):
 return f"{self.first_name} {self.last_name} ({self.age})"


p = Person("Andrew", "Bolster", date(1988, 5, 17), 'Male')
p

Andrew Bolster (30)

In [45]:
f"{p}" # Defaults to __str__

'Andrew Bolster'

In [46]:
f"{p!r}" # Can be poked to use __repr__

'Andrew Bolster (30)'

In [48]:
f"{p}"\
 f" is {p.age}"\
 f" and this is a multiline {'f-string'}"

'Andrew Bolster is 30 and this is a multiline f-string'

Performance wise; f-strings are _fast_
About 30% faster than `%`
50% faster than `.format()`

Also support standard formatting syntax

In [49]:
from math import pi
pi

3.141592653589793

In [65]:
f"{pi:07.4}" # {value:width.precision}

'003.142'

## Iterators and stuff

### `range` behaves like `xrange` used to

`xrange` is dead, long live `range`


In [113]:
span = range(10000)
len(span), sum(span), max(span), min(span)

(10000, 49995000, 9999, 0)

In [110]:
4 in span

True

So what?

`range` now returns an 'iterator'; elements are not populated until used

In [117]:
stupid_span = range(int(10e16))
len(stupid_span)

100000000000000000

In [118]:
for i in stupid_span: # Doesn't blow up memory
 if i ** 2 > 10000:
 break
print(i)

101


`dict`, `zip`, `reversed` and a load of other functions now return iterators

In [123]:
zip(range(5), range(5, 0, -1))



In [126]:
from string import ascii_letters
dict(zip(ascii_letters, range(5)))

{'a': 0, 'b': 1, 'c': 2, 'd': 3, 'e': 4}

In [127]:
d = dict(zip(ascii_letters, range(5)))
d.keys()

dict_keys(['a', 'b', 'c', 'd', 'e'])

Note; this is a 'view', not an actual list, even though it looks like it.
The motivation for this is that `dict.items()` etc. in py2 produced realised-views of the values as a fully populated list. This was expensive.

## Oh, BTW, `dicts` are now sorted!
Previously insertion-sorting was not guaranteed; `dicts` will *always* be returned in the same order as they were inserted

In [130]:
del d['a']
d['a'] = -1
d

{'b': 1, 'c': 2, 'd': 3, 'e': 4, 'a': -1}

However, since this is a view; things don't always work how you'd imagine

In [132]:
list(reversed(range(5)))

[4, 3, 2, 1, 0]

In [133]:
reversed(d.keys())

TypeError: 'dict_keys' object is not reversible

In [136]:
list(reversed(list(d.keys()))) # need to instantiate the view

['a', 'e', 'd', 'c', 'b']

The downside to all of this is that sometimes your code will be peppered with `list`s...

## Advanced Unpacking


In [143]:
# First with sensible lists:
values = [0, 1, 2, 3, 4, 5, 6, 7]
while values:
 first, *values = values
 print(first, values)

0 [1, 2, 3, 4, 5, 6, 7]
1 [2, 3, 4, 5, 6, 7]
2 [3, 4, 5, 6, 7]
3 [4, 5, 6, 7]
4 [5, 6, 7]
5 [6, 7]
6 [7]
7 []


In [142]:
# First with sensible lists:
values = [0, 1, 2, 3, 4, 5, 6, 7]
while values:
 head, *values, tail = values
 print(head, tail, values)

0 7 [1, 2, 3, 4, 5, 6]
1 6 [2, 3, 4, 5]
2 5 [3, 4]
3 4 []


## Type annotations

* Lazy type hinting; 
* Great for documenting what you expect and IDE-assist; doesn't do type validation
* Extensions available for auto generation of sphinx-docs based on hints
* Optional type checking via [mypy](http://mypy-lang.org/)
* Not used for any runtime performance optimisation or anything
* _Kinda_ used in `dataclasses`

In [25]:
def add(a: int, b: int)->int:
 return a+b

add(5, 5)

10

In [26]:
add('this', 'that') # badness

'thisthat'

In [None]:
from typing import Iterator

def fib(n: int) -> Iterator[int]:
 a, b = 0, 1
 while a < n:
 yield a
 a, b = b, a+b

In [74]:
from typing import *
from operator import itemgetter

def listtacular(listicle: Dict[AnyStr,int])->List[AnyStr]:
 listable = []
 for k, v in sorted(listicle.items(), key=itemgetter(1)):
 listable.append(k)
 return listable

listtacular({'first':1, 'fifth':5, 'second':2, 'forth':4, 'third':3})

['first', 'second', 'third', 'forth', 'fifth']

## `dataclass` (py3.7)

Basically, a massive shortcut for building object classes

Highly recommend watching Raymond Hettinger's PyCon 2018 talk https://www.youtube.com/watch?v=T-TwcmT6Rcw 

TLDR:
1. It makes a mutable data holder, in the spirit of `collections.namedtuple`
2. It writes boiler-plate code for you, simplifying the process of writing the `class`.

In [1]:
# Code You write
from dataclasses import dataclass


@dataclass
class Color:
 hue: int
 saturation: float
 lightness: float = 0.5

In [None]:
from dataclasses import Field, _MISSING_TYPE, _DataclassParams

class Color:
 'Color(hue: int, saturation: float, lightness: float = 0.5)'

 def __init__(self, hue: int, saturation: float, lightness: float = 0.5) -> None:
 self.hue = hue
 self.saturation = saturation
 self.lightness = lightness

 def __repr__(self):
 return (self.__class__.__qualname__ +
 f"(hue={self.hue!r}, saturation={self.saturation!r}, "
 f"lightness={self.lightness!r})")

 def __eq__(self, other):
 if other.__class__ is self.__class__:
 return (self.hue, self.saturation, self.lightness) == (other.hue, other.saturation, other.lightness)
 return NotImplemented

 __hash__ = None

 hue: int
 saturation: float
 lightness: float = 0.5

In [2]:
 __dataclass_params__ = _DataclassParams(
 init=True,
 repr=True,
 eq=True,
 order=False,
 unsafe_hash=False,
 frozen=False)

 __dataclass_fields__ = {
 'hue': Field(default=_MISSING_TYPE,
 default_factory=_MISSING_TYPE,
 init=True,
 repr=True,
 hash=None,
 compare=True,
 metadata={}),
 'saturation': Field(default=_MISSING_TYPE,
 default_factory=_MISSING_TYPE,
 init=True,
 repr=True,
 hash=None,
 compare=True,
 metadata={}),
 'lightness': Field(default=0.5,
 default_factory=_MISSING_TYPE,
 init=True,
 repr=True,
 hash=None,
 compare=True,
 metadata={})
 }
 __dataclass_fields__['hue'].name = 'hue'
 __dataclass_fields__['hue'].type = int
 __dataclass_fields__['saturation'].name = 'saturation'
 __dataclass_fields__['saturation'].type = float
 __dataclass_fields__['lightness'].name = 'lightness'
 __dataclass_fields__['lightness'].type = float

In [67]:
from dataclasses import dataclass
from datetime import date


@dataclass
class Person: # Basically, gets rid of boring boilerplate
 first_name: str
 last_name: str
 birthday: date
 gender: str


p = Person("Andrew", "Bolster", date(1988, 5, 17), 'Male')
p

Person(first_name='Andrew', last_name='Bolster', birthday=datetime.date(1988, 5, 17), gender='Male')

In [69]:
@dataclass
class Person: # Basically, gets rid of boring boilerplate
 first_name: str
 last_name: str
 birthday: date = field(repr=False)
 gender: str = field(repr=False)


p = Person("Andrew", "Bolster", date(1988, 5, 17), 'Male')
p

Person(first_name='Andrew', last_name='Bolster')

In [71]:
from dataclasses import dataclass, field
from datetime import date


@dataclass
class Person: # Basically, gets rid of boring boilerplate
 first_name: str
 last_name: str
 birthday: date = field(repr=False)
 gender: str = field(repr=False)

 @property
 def age(self):
 today = date.today()
 return today.year - self.birthday.year \
 - ((today.month, today.day) < (self.birthday.month, self.birthday.day))

 def __str__(self):
 return f"{self.first_name} {self.last_name} ({self.age})"


p = Person("Andrew", "Bolster", date(1988, 5, 17), 'Male')
p

Person(first_name='Andrew', last_name='Bolster')

In [72]:
print(p)

Andrew Bolster (30)


### But wait, there's more!

Passing class decorator arguments to augment output objects; e.g.

* 'order': adds `__lt__`/`__gt__` etc methods based on tuple-ordering of attributes
* 'frozen': adds `__hash__` method to add immutability / hashability


Also `field` declarations to provide per-attribute control over these things

In [83]:
from dataclasses import dataclass, field
from datetime import datetime
import uuid

@dataclass(order=True, frozen=True)
class MP:
 name: str
 gender: str = field(repr=False)
 salary: int = field(hash=False, repr=False, metadata={'units': 'GBP'})
 age: int = field(hash=False, repr=False)
 party: str = field(hash=True, repr=True, default='Independent')
 ate: list = field(default_factory=list, compare=False, repr=False)
 emp_id: uuid.UUID = field(
 default_factory=uuid.uuid4, compare=True, repr=False
 )

 def eats(self, thing):
 self.ate.append((thing, datetime.now()))

In [96]:
e1 = MP(name='Sammy Wilson',
 gender='male', party='DUP',
 salary=77_379, # Another cool py3 feature ;)
 age=65,

 )
e2 = MP(name='Caroline Lucas',
 gender='female', party='Greens',
 salary=77_379, # Another cool py3 feature ;)
 age=56,
 )
e1 # Non-repr fields not displayed

MP(name='Sammy Wilson', party='DUP')

In [97]:
[e1, e2]

[MP(name='Sammy Wilson', party='DUP'),
 MP(name='Caroline Lucas', party='Greens')]

In [98]:
sorted([e1, e2]) # thanks to 'order'

[MP(name='Caroline Lucas', party='Greens'),
 MP(name='Sammy Wilson', party='DUP')]

In [99]:
affiliations = {
 e1: 'Brexiteers',
 e2: 'Sane'
}
affiliations # thanks to 'frozen'

{MP(name='Sammy Wilson', party='DUP'): 'Brexiteers',
 MP(name='Caroline Lucas', party='Greens'): 'Sane'}

In [100]:
e1.eats('fish')
e1.eats('chips')
for e, camp in affiliations.items():
 msg = f"{e.name}, from the {camp} camp, "\
 f"ate {' and '.join([m[0] for m in e.ate]) if e.ate else 'Nothing'}"
 print(msg)

Sammy Wilson, from the Brexiteers camp, ate fish and chips
Caroline Lucas, from the Sane camp, ate Nothing


# What we done covered

* `print` 
* / vs // 
* **unicode**
* Catching Constructions (i.e. first,*rest = iterable)
* changes to dict (i.e. views)
* f-strings (including performance)
* `typing` / type hinting
* `dataclasses`


**Anything I've missed / Undersold?**

# Conclusion

* If you're not using at least Python 3.5, you're missing out

* If you're still stuck 2.7, you're going to be left behind

* If you're still _starting new projects_ in 2.7, you deserve all the pain that's coming your way