import re full_line = "" with open('COVID19_US.html', 'r') as f: lines = f.readlines() for line in lines: full_line += line.strip("\n").replace("\t", "") res = re.findall( r'