# Handling text with Python

## Get started

In [None]:
text1 = 'In this world nothing can be said to be certain except death and taxes'

In [None]:
len(text1)

In [None]:
text2 = text1.split(' ')

In [None]:
len(text2)

In [None]:
text2

## 1. Finding Specific Words

### Words that are more than 5 letters long

In [None]:
[w for w in text2 if len(w) > 5]

### Words that end with  'd'

In [None]:
[w for w in text2 if w.endswith('d')]

### Words that start with  'd'

In [None]:
[w for w in text2 if w.startswith('d')]

In [None]:
[w for w in text2 if w.startswith(('d','n'))]

## 2. Using set()

### Finding unique words from a list

In [None]:
text3 = 'to be or not to be'
text4 = text3.split(' ')

In [None]:
text4

In [None]:
text5 = set(text4)

In [None]:
len(text5)

In [None]:
text5

### set vs list performance

In [None]:
import time
import random

NUMBER_OF_ELEMENTS = 10000

# Create a list
lst = list(range(NUMBER_OF_ELEMENTS))
random.shuffle(lst)

# Crt=eate a set from the list
s = set(lst)

# Test if an element is in the set
start = time.time()
for i in range(NUMBER_OF_ELEMENTS):
    i in s
end = time.time()
print(f'To test if {NUMBER_OF_ELEMENTS} elements are in the set, runtime is {end-start} seconds')

# Test if an element is in the list
start = time.time()
for i in range(NUMBER_OF_ELEMENTS):
    i in lst
end = time.time()
print(f'To test if {NUMBER_OF_ELEMENTS} elements are in the list, runtime is {end-start} seconds')

## 3. More String Operations

### t in s - To check whether a substring exists in a given string

In [None]:
'Python' in 'Python is good'

### string.isupper() - To check whether all the characters of the string are uppercase

In [None]:
'PYTHON IS GOOD'.isupper()

In [None]:
'Python is good'.isupper()

### string.islower() - To check whether all the characters of the string are lowercase

In [None]:
'python is good'.islower()

In [None]:
'Python is good'.islower()

### string.istitle() - To check whether each word in the string starts with an uppercase letter

In [None]:
'Python Is Good'.istitle()

In [None]:
'Python is good'.istitle()

### string.isdigit() - To check whether the string contains digits only

In [None]:
'000002'.isdigit()

In [None]:
'SZ000002'.isdigit()

### string.isalpha() - To check whether the string contains alphabetic characters only

In [None]:
'Textmining'.isalpha()

In [None]:
'Text mining1'.isalpha()

### string.isalnum() - To check whether the string contains alphanumeric characters only

In [None]:
'SZ000002'.isalnum()

In [None]:
'SZ_000002#'.isalnum()

### Conversion between uppercase and lowercase

In [None]:
s1 = 'python is good'

### string.upper() - Returns a string in which all characters are uppercased

In [None]:
s2 = s1.upper()

In [None]:
s2

### string.lower() - Returns a string in which all characters are lowercased

In [None]:
s2.lower()

### string.capitalize() - Returns a string with only its first character capitalized

In [None]:
s1.capitalize()

### string.title() - Returns a string in which first characters of all the words are capitalized

In [None]:
s1.title()

### string.split()

In [None]:
s3 = 'cattcatt'
s4 = s3.split('a')

In [None]:
s4

### join()

In [None]:
'a'.join(s4)

### Get all the characters of s3 

In [None]:
list(s3)

In [None]:
[c for c in s3]

### String formatting

In [None]:
'{} {}'.format('hello', 'world')

In [None]:
'{} {}'.format(24, 'seconds')

In [None]:
24 + 'seconds' 

### f-string 

In [None]:
name = 'Eric'
age = 74

In [None]:
f'Hello, {name}. You are {age}.'

### string.strip() – Removes whitespaces at the beginning and at the end of the string

In [None]:
s5 = '  a quick brown fox jumped over the lazy dog  '
s6 = s5.strip()

In [None]:
s6

### sring.replace(old, new) - Returns a string where all occurrences of the old substring are replaced with the new substring

In [None]:
s6.replace('o', 'O')

In [None]:
s6.replace('o', 'O', 2)

### string.translate(table) - The translate() method returns a string where each character is mapped to its corresponding character in the translation table. The translation table is created by the static method maketrans().

In [None]:
intab = 'aeiou'
outtab = '12345'
table = str.maketrans(intab, outtab)
s7 = 'aeiou-xmppp'

In [None]:
s7.translate(table)

In [None]:
table_1 = str.maketrans(intab, outtab, 'xm')

In [None]:
s7.translate(table_1)

## 4. Index and Slice String

### Accessing characters by positive index number

In [None]:
s = 'Hello World!'

In [None]:
s[4]

### Accessing characters by negative index number

In [None]:
s[-3]

### Slicing strings

In [None]:
s[1:5]

In [None]:
s[:5]

In [None]:
s[-4:-1]

In [None]:
s[-2:]

### Specifying stride while slicing strings

In [None]:
s[:5]

In [None]:
s[:5:1]

In [None]:
s[:5:2]

In [None]:
s[::-1]

In [None]:
s[-1:-7:-2]

### String.find() - Return the index of the first occurrence of the substring

In [None]:
s.find('o')

In [None]:
s.find('or')

## 5. Writing to and Reading from CSV File

In [None]:
import csv

### Writing to a csv file

In [None]:
with open('test.csv', 'w', encoding='utf8', newline='') as wf:
    writer = csv.writer(wf)
    writer.writerow(('张三','北京'))

### Reading from a csv file

In [None]:
with open('test.csv', 'r', encoding='utf8') as rf:
    r = csv.reader(rf)
    for row in r:
        print(f'姓名:{row[0]}, 住址:{row[1]}')

# Exercise1
## Write a Python program to get a string from a given string where all occurrences of its first char have been changed to '@' except the first char itself.
### Sample String : 'restart'
### Expected Result : 'resta@t'

In [None]:
def change_char(str1):
    ### START CODE HERE ###

    ### END CODE HERE ###
    return str1

In [None]:
# Check your function
print(change_char('restart'))
print(change_char('text'))

#### Expected output
```
resta@t
tex@
```

# Exercise2
## Given an input string with the combination of the lower and upper case, arrange characters in such a way that all lowercase letters should come first.
### Sample String : 'PyNaTive'
### Expected Result : 'yaivePNT'

In [None]:
def arrange_chars(str1):
   
  ### START CODE HERE ###

  ### END CODE HERE ###
  return str1

In [None]:
# Check your function
print(arrange_chars('PyNaTive'))
print(arrange_chars('OpTYabi'))

#### Expected output
```
yaivePNT
pabiOTY
```

# Exercise3
## Write a Python function that takes a list of words and returns the word with the largest length.
### Sample List : ["Python", "Text", "Analysis"]
### Expected Result : 'Analysis'

In [None]:
def find_longest_word(words_list):
    longest_word = ''
    max_len = 0
    ### START CODE HERE ###

    ### END CODE HERE ###
    return longest_word

In [None]:
# Check your function
print(find_longest_word(["Python", "Text", "Analysis"]))

#### Expected output
```
Analysis
```