{}%".format(ind, LENGTH, get_perc(ind))) current_count, current_verb = parse_line(line) current_verb = unicodedata.normalize("NFC", current_verb) current_url = "{}{}".format(TRANSLATE_STUB, current_verb) print("Current verb: {}".format(current_verb)) print("Current verb data: {}".format(current_verb.encode("utf-8"))) print("Current count: {}".format(current_count)) print("Current url: ->{}<-".format(current_url)) try: r = requests.get(current_url) tree = lxml.html.fromstring(r.text) results = MISMATCH_CSS(tree) if results: print("# of matches: {}".format(len(results))) else: print("results is none") match = results[0] THE_TEXT = html2text.html2text(etree.tostring(match).decode("utf-8")).rstrip() the_infinitive = "" the_person = "" the_number = "" the_tense = "" AMB = False if TEST1.search(THE_TEXT): the_infinitive = GET_INF1.search(THE_TEXT).group(1) AMB = True elif TEST2.search(THE_TEXT): matches = GET_INF2.search(THE_TEXT) the_tense = matches.group(1) the_infinitive = matches.group(2) the_person = matches.group(3) the_number = matches.group(4) AMB = False print("The infinitive : >{}<".format(the_infinitive)) if AMB: BIG_DICT[current_verb] = {"count": current_count, "spanish_dict_success": True, "infinitive": the_infinitive, "line": ind+1} else: BIG_DICT[current_verb] = {"count": current_count, "spanish_dict_success": True, "infinitive": the_infinitive, "line": ind+1} except: print("UNSPECIFIED FAILURE!") BIG_DICT[current_verb] = {"count": current_count, "spanish_dict_success": False, "infinitive": None, "line": ind+1} # with open("./mistakes/{}.html".format(current_verb), "w") as fh: # fh.write(r.text) # every twenty if (ind % 20) == 0: out_str = json.dumps(BIG_DICT, sort_keys=True, indent=2, separators=(',', ': ')) with open("./running.json", "w") as fh: fh.write(out_str) print() time.sleep(random.uniform(0.5, 2)) out_str = json.dumps(BIG_DICT, sort_keys=True, indent=2, separators=(',', ': '), ensure_ascii=False) with open("./finished.json", "w") as fh: fh.write(out_str) if __name__ == '__main__': STATUS = main() sys.exit(STATUS)