import csv , pprint from collections import defaultdict pp = pprint.PrettyPrinter(indent=2) # create a dictionary where the key is the DDC identifier and the value is a list of tuples def doMystuff(textName): """ a function that returns nice lists""" dictDewey = defaultdict(set) with open(textName, 'r', errors='ignore') as csvfile: reader = csv.DictReader(csvfile) for row in reader: dictDewey[row['Dewey classification']].add((row['Type of topic'],row['Topic'])) pp.pprint(dictDewey) # create a clean dictionary cleanDict = defaultdict(dict) for ddc, stuff in dictDewey.items(): for typeTopic, topic in stuff: if typeTopic not in cleanDict[ddc] and topic not in cleanDict[ddc]: cleanDict[ddc][typeTopic] = [topic] if topic not in cleanDict[ddc][typeTopic]: cleanDict[ddc][typeTopic].append(topic) pp.pprint(cleanDict) # print the bullet list for ddc, types in cleanDict.items(): print('-',ddc+':') for typeTopic, listTopics in types.items(): print(' -',typeTopic+':') for topic in listTopics: print(' -',topic) # reproduce the physical organization at the British Library dictCollocation = defaultdict(set) with open(textName, 'r', errors='ignore') as csvfile: reader = csv.DictReader(csvfile) for row in reader: dictCollocation[row['Content type']].add(row['BL record ID']) pp.pprint(dictCollocation) # print the bullet list for key,values in dictCollocation.items(): print('- '+key+':') for value in values: print(' - '+value)