# Numpy Basics

## ndarray
Arrays are important because they enable you to express batch operations on data without writing any for loops. This is usually called **vectorization**.

In [11]:
import pandas as pd
import numpy as np

In [24]:
# creating an array
data1 = [2, 3, 4, 5, 6, 7]
np.array(data1)

array([2, 3, 4, 5, 6, 7])

In [26]:
data2 = ([1, 2, 3, 4], [5, 6, 7, 8])
arr2 = np.array(data2)
arr2

array([[1, 2, 3, 4],
       [5, 6, 7, 8]])

In [30]:
arr2.shape

(2, 4)

In [31]:
np.zeros(10)

array([ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.])

In [32]:
np.ones((2, 3))

array([[ 1.,  1.,  1.],
       [ 1.,  1.,  1.]])

In [33]:
np.arange(15)

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14])

In [14]:
data = np.array([[2, 4, 6], 
       [3, 5, 9]])
data

array([[2, 4, 6],
       [3, 5, 9]])

In [17]:
data + data + data

array([[ 6, 12, 18],
       [ 9, 15, 27]])

In [16]:
data * 2

array([[ 4,  8, 12],
       [ 6, 10, 18]])

In [18]:
data.shape

(2, 3)

In [19]:
data.dtype

dtype('int64')

In [23]:
df = pd.DataFrame(data)
df

Unnamed: 0,0,1,2
0,2,4,6
1,3,5,9


### Data types for ndarrays

In [36]:
data1 = ([1, 2, 4.5], [1.1, 3.4, 3.9], [0, 0.88, 0.45])
arr1 = np.array(data1)
arr1

array([[ 1.  ,  2.  ,  4.5 ],
       [ 1.1 ,  3.4 ,  3.9 ],
       [ 0.  ,  0.88,  0.45]])

In [39]:
arr1.dtype

dtype('float64')

In [41]:
arr1.astype(np.int32)

array([[1, 2, 4],
       [1, 3, 3],
       [0, 0, 0]], dtype=int32)

### Subsetting/ Slicing

In [42]:
arr1[0]

array([ 1. ,  2. ,  4.5])

In [43]:
arr1[0][1]

2.0

In [45]:
arr1[1]

array([ 1.1,  3.4,  3.9])

### Mathematical and Statistical Methods

In [68]:
arr2 = np.random.randn(2, 3).astype(np.float64)
arr2

array([[ 1.18302338, -1.14660054, -0.76273185],
       [ 1.16602911,  0.08769594,  1.23586991]])

In [69]:
arr3 = np.dot(arr2.T, arr2)
arr3

array([[ 2.7591682 , -1.25419923,  0.53873069],
       [-1.25419923,  1.32238338,  0.98292953],
       [ 0.53873069,  0.98292953,  2.10913431]])

In [70]:
np.sqrt(arr3)

  """Entry point for launching an IPython kernel.


array([[ 1.66107441,         nan,  0.73398276],
       [        nan,  1.1499493 ,  0.99142802],
       [ 0.73398276,  0.99142802,  1.45228589]])

In [71]:
arr2.mean()

0.29388099170824028

In [73]:
arr2.std()

0.97232450582277508

In [74]:
arr2.sum()

1.7632859502494416

In [78]:
arr2.sum(0)

array([ 2.34905249, -1.0589046 ,  0.47313806])