{ "metadata": { "signature": "sha256:d463d49a32f1860a82eabf42f9564ad318e0ed8f853a5c2ca27f6c30d927e686" }, "nbformat": 3, "nbformat_minor": 0, "worksheets": [ { "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Topic Modeling on Turkish News\n", "by [Talha Oz](http://www.mli.gmu.edu/toz/wordpress/)\n", "\n", "Details on the project is available on [GitHub](https://github.com/oztalha/News-Commentary-Tweets-of-Elites)" ] }, { "cell_type": "code", "collapsed": false, "input": [ "import pandas as pd\n", "from gensim import corpora, models\n", "from nltk.corpus import stopwords\n", "import nltk\n", "import snowballstemmer\n", "import re\n", "from operator import itemgetter" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 36 }, { "cell_type": "code", "collapsed": false, "input": [ "df = pd.read_csv('../data/TR-news.csv',usecols=['dt','title','newstxt'],parse_dates=[0])\n", "documents = df.newstxt.tolist()" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 2 }, { "cell_type": "code", "collapsed": false, "input": [ "stemmer = snowballstemmer.TurkishStemmer()\n", "stoplist = stopwords.words('turkish')\n", "stoplist.extend(\"bir active number yes titlesection bootstrap tabtitle tboot tab tabcontent contentsection nin n\u0131n\".split())\n", "stoplist = stemmer.stemWords(stoplist)\n", "stoplist = set(stoplist)" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 37 }, { "cell_type": "code", "collapsed": false, "input": [ "texts=[]\n", "for doc in documents:\n", " doc = re.sub('[!\"#%\\'()*+,-./:;<=>?@\\[\\]^_`{|}~1234567890\u2019\u201d\u201c\u2032\u2018\\\\\\]',' ', doc).split()\n", " doc = stemmer.stemWords(doc)\n", " text= [w.strip() for w in doc if len(w.strip())>=3 and w.strip() not in stoplist]\n", " texts.append(text)" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 38 }, { "cell_type": "code", "collapsed": false, "input": [ "dictionary = corpora.Dictionary(texts)\n", "corpus = [dictionary.doc2bow(text) for text in texts]\n", "tfidf = models.TfidfModel(corpus)\n", "corpus_tfidf = tfidf[corpus]" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 40 }, { "cell_type": "code", "collapsed": false, "input": [ "n_topics = 100\n", "lda = models.LdaModel(corpus_tfidf, id2word=dictionary, num_topics=n_topics, passes=3, alpha='auto')\n", "for i in range(0, n_topics):\n", " terms = [term[1] for term in lda.show_topic(i, 10)]\n", " print (\"#\" + str(i) + \": \"+ \", \".join(terms))" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "#0: ka\u00e7ak\u00e7\u0131l\u0131k, alet, sald\u0131rd\u0131k, \u00f6nleyi\u00e7, T\u00fcpra\u015f, Aygaz, Bar\u0131\u015f\u00e7\u0131l, girilme, Talimha, alabor\n", "#1: K\u0131rba\u00e7, okul, \u00f6\u011frenci, n\u00fcfus, E\u011fit, Milli, yasakla, kay\u0131t, rastlana, e\u011fit\n", "#2: Bayraktar, edilmemi\u015f, fezleke, bozulmad\u0131, verilebilmes, bulundura, \u015eehircilik, Atilla, \u00c7evre, Sancak\n", "#3: \u00c7anakkale, gelebilecek, ya\u011f\u0131\u015f, olumsuzluk, Marmar, silme, k\u0131y\u0131, f\u0131rt, \u015fiddetli, Loza" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", "#4: \u015eansal, Barbaros, moda\u00e7, Kapla, kanats\u0131z, erozyo, cezaland\u0131rma, H\u00fck\u00fcm, s\u00fcr\u00fclemez, K\u00fcrdista\n", "#5: Berk, Elvan, Mutlu, aile, al\u0131nacak, ba\u015flad\u0131k, TRT, yasa, kadar, gerekli\n", "#6: \u00e7a\u011f\u0131ra, rut, M\u00fcdahal, s\u0131karak, ekmek, su\u00e7land\u0131k, yakla\u015f\u0131m, d\u00fc\u015f\u00fcnen, ramaza, oru\u00e7" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", "#7: kaybede, \u0130srail, kaset, spek\u00fclasyon, bildirilmi\u015f, havuz, d\u00fc\u015f\u00fcn\u00fcr, Gazze, cep, b\u00f6cek\n", "#8: Ba\u011f\u0131\u015f, Ba\u015fm\u00fczakere\u00e7, emanet, Egeme, Kuz, merhap, Kaymakaml\u0131k, Baransu, d\u00fc\u015f\u00fcn\u00fcle, Yalov\n", "#9: tekne, merdiven, girilebilecek, diyebilir, uyruklu, baraj, sahas, kanaati, doluluk, Demokratikle\u015fme" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", "#10: elema, bot, icra, Tamame, y\u0131ld\u0131r, tir, haberdar, \u015feffaf, h\u00e2k, iftar\n", "#11: de\u011fi\u015ftirilme, ikaz, bakmad\u0131k, sanat, maddelik, e\u011fitimci, Altayl\u0131, \u015fof\u00f6r, Tasar\u0131, al\u0131m\n", "#12: TOBB, YA\u015e, Bulut, Yi\u011fit, a\u011f\u0131rlama, Rifat, Hisarc\u0131kl\u0131o\u011flu, tahkikat, kurgu, planlad\u0131k" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", "#13: Akdo\u011fa, Riz, Atalay, \u0130lan, \u00e7eker, \u00e7aba, karde\u015f, kapana, bil, kayd\u0131\n", "#14: I\u015f\u0131l, \u0130la, vali, Musul, siville\u015ftirildik, EMASYA, I\u015e\u0130D, reh, \u00c7evikca, a\u00e7\u0131kland\u0131k\n", "#15: D\u00fcndar, Sazak, Milliyet, Can, Demir\u00f6re, Cemal, Tamer, Candundaradasi, candundaradasi, dertle\u015fiyor" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", "#16: Aylin, Kan, dayanak, baraj, patro, kald\u0131rma, kullan\u0131lmak, d\u00fc\u015f\u00fcr\u00fclmes, master, Pazartes\n", "#17: kaps\u00fcl, gaz, sehve, Biber, y\u00fcr\u00fcmek, derecelik, att\u0131k, ba\u015fvurma, ka\u00e7, biber\n", "#18: Sara\u00e7, s\u00f6ylenen, kar\u015f\u0131lanacak, spek\u00fclasyon, devral, muhtelif, Yama, Sadullah, engelli, o\u011fl" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", "#19: De\u011fi\u015fik, g\u00f6r\u00fc\u015fm\u00fcyor, siyasi, Lale, ortakla\u015fma, Alemdaro\u011flu, nitelendir, Teoma, Yayma, Akta\u015f\n", "#20: Derya, izlemi\u015f, Al\u00e7\u0131, sa\u011f, s\u00fcrd\u00fcrmes, \u00f6\u011frenilmi\u015f, Yetkili, mart, sani, vadel\n", "#21: Marmaray, Korgeneral, Yenikap, Projes, arkeolokik, b\u00fct\u00fcnle\u015fecek, yolcu, hatt, manevra, metro" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", "#22: kedi, s\u00f6ylemedi, \u00c7ED, Tayla, seks, Osmani, Kalyo, havalima, Tanay, Etki\n", "#23: SE\u00c7S\u0130S, ma\u00e7, Se\u00e7me, K\u00fct\u00fck, Fenerbah\u00e7e, dakika, Galatasaray, final, kabullenmek, i\u015flenmek\n", "#24: y\u00fcz, Ba\u015f\u00e7\u0131, Bankas, enflasyo, faiz, hareketlenme, Erdem, DTK, fiyat, Merkez" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", "#25: Trabzo, \u0130nal, k\u0131l\u0131\u00e7, Erol, Zek, \u00c7olak, kat\u0131l\u0131yor, Tu\u011fgeneral, Nezih, heykel\n", "#26: burs, suatkili\u00e7, res, Bak\u00fc, Mansur, Evli, deli\u00e7, y\u00fcksek\u00f6\u011fre, \u00f6nlisans, cefa\n", "#27: kodu, soy, Dergis, askerlik, \u015f\u00fcpheli, b\u0131\u00e7ak, k\u0131r\u0131k, say\u0131lacak, almad\u0131k, maddi" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", "#28: Silivri, tedbir, \u00f6\u011frenmek, de\u011ferlendirdik, yarg\u0131lan\u0131yor, Kurum, Yerle\u015fkes, art\u0131r\u0131l, s\u0131nav, eyl\u00fcl\n", "#29: hatt\u0131, yang\u0131, kilometrelik, ula\u015f\u0131la, d\u0131\u015fsal, Farkl\u0131, ger\u00e7ekle\u015fme, K\u0131l\u0131n\u00e7, yukar, i\u015f\u00e7i\n", "#30: Urfa, ama\u00e7lad\u0131k, \u015feref, de\u011fi\u015ftirilebilecek, sonu\u00e7lanmas, korkak, darbecilik, \u00e7evrilme, etmeme, kar\u015f\u0131lad\u0131k" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", "#31: Arslan, r\u00fc\u015fvet, S\u00fcleyma, Zarrap, yolsuzluk, \u00c7a\u011flaya, Bar\u0131\u015f, Reza, Zafer, uzakla\u015ft\u0131r\u0131la\n", "#32: yat\u0131yor, \u015fiir, Hikmet, Evran, Naz, Kar\u015f\u0131, \u015fair, ayet, Emir, ele\u015ftirir\n", "#33: dan\u0131\u015fmanl\u0131k, kan\u0131t, alm\u0131yor, \u015fahs\u0131, Ege, kurtar\u0131l, K\u00f6\u015fk, cari, Teredd\u00fct, Rus" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", "#34: i\u015flenmes, sahte, pansiyo, yaratm\u0131\u015f, atanmas, yetenek, amir, \u0130ktisat, Efe, \u015eefik\n", "#35: \u0130nan\u00e7er, \u00f6\u011fre, hamile, edebilir, TRT, kanatl\u0131s, realizm, menk\u0131be, Tu\u011frul, hamilelik\n", "#36: birle\u015ferek, fert, Ay\u015fenur, gem, g\u00fcncel, ideal, Ailes, sorgulama, takvim, Zeyrek" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", "#37: vazifes, de\u011ferlendirmes, yap\u0131labilir, bildirile, galeris, foto, g\u00f6rmemes, kredi, Keke\u00e7, y\u00fcr\u00fcrl\u00fck\n", "#38: koymak, H\u00fck\u00fcme, AYM, Tarafs\u0131z, ak\u0131ls\u0131z, h\u00fck\u00fcm, tutmaz, Progra, hamle, Demokratik\n", "#39: tarla, \u00fcretme, genelkurmay, y\u00f6netmek, Zekeriya, Nihal, Bengisu, h\u00fckmettik, olu\u015fturulma, K\u00f6rfez" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", "#40: Komuta, Orgeneral, Kuvvet, emekli, Kar, Ordu, Silahl\u0131, Damc\u0131, Genelkurmay, m\u00fckellefiyet\n", "#41: yaratmas, Bilindik, ulusalc\u0131l\u0131k, g\u00fcvenli, olu\u015fturulmas, M\u00fcezzino\u011flu, \u00fclkes, Y\u00fczba\u015f, pilot, Kas\u0131mpa\u015f\n", "#42: Alp, D\u00f6nmez, kazan\u00e7, san\u0131kl\u0131, uyarla, Serdar, yarat\u0131\u00e7, Batar, vitrin, selfie" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", "#43: Karaka\u015f, unut, rastlamad\u0131k, Erten, Baykal, dinlemek, m\u00fczaker, Meral, Erg\u00fcn, Fikri\n", "#44: Erdo\u011fa, Gez, Ba\u015fbaka, T\u00fcrki, Park\u0131, dedi, \u00d6cala, T\u00fcrk, a\u00e7\u0131klama, CHP" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", "#45: aday, se\u00e7, Se\u00e7, \u0130hsano\u011flu, se\u00e7im, CHP, kat\u0131lmak, aral\u0131k, iftar, ada\n", "#46: partil, korunma, \u00f6nlenmes, \u00fcstlenmi\u015f, \u0130n\u00f6, duyurus, akraba, Burak, Se\u00e7im, Karamano\u011flu\n", "#47: S\u00fcng\u00fc, Bay\u0131k, Necip, Atlansoy, G\u00fcray, Kabul, d\u00f6nd\u00fck, \u00dczer, takv, takd\u0131" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", "#48: Binal, da\u011f\u0131t\u0131la, devirmek, helal, dinlemi\u015f, b\u00fcy\u00fck\u015fehir, A\u011fr\u0131, dinlendik, ye\u011fe, al\u0131\u015fkanl\u0131k\n", "#49: Demirta\u015f, Bila, Times, Fikret, The, d\u00fc\u015f\u00fcnce, bahane, g\u00f6steris, Ay\u015fe, kapat\u0131lma\n", "#50: Batu, terbiyesizlik, Pel, Kent, temkinli, selam, artt\u0131r\u0131lma, PAB, Tutuklu, m\u00fcdahel" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", "#51: depre, Depre, hissedil, Y\u00fczeysel, \u00fcss\u00fc, \u00d6zlem, meyda, anla\u015f\u0131lmak, b\u00fcy\u00fckl\u00fck, As\u0131l\n", "#52: Akar, KILI\u00c7, m\u00fcd\u00fcrl\u00fck, Alparsla, R\u00fc\u015f, \u00c7apk, arama, Te\u011fme, Davas, Japonya\n", "#53: bildiri, July, bo\u015falt\u0131l\u0131r, oturduk, g\u00f6r\u00fc\u015fm\u00fc\u015f, muazza, de\u011findik, m\u00fcsvedi, boyutlu, Tepki" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", "#54: kafata, ya\u015fart\u0131\u00e7, yarat\u0131la, Arda, ma\u011fduriyet, gayrime\u015fru, \u00e7alarak, ba\u015f\u00f6rt\u00fc, etmey, Irmak\n", "#55: mesaf, korur, beklent, g\u00fcnah, san\u0131r, t\u0131bbi, i\u00e7ki, ata, i\u015fletmecis, Gereke\n", "#56: demokratikle\u015fme, spiker, kal\u0131n, m\u00fc\u015f, maliye, Servet, desteklemes, Devr, nakil, Balc\u0131" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", "#57: Kotil, hapis, Kurul, Mehmet, Haber, \u00f6\u011fret, s\u00f6z, g\u00f6r, taraf, g\u00fcn\n", "#58: L\u00fctf\u00fc, stand, T\u00fcrkka, sertlik, Suavi, yatak, Ziraat, t\u00fcnel, s\u00f6ylemek, yava\u015f\n", "#59: kanunsuz, Ya\u011f\u0131\u015f, yo\u011funluk, \u00e7at, sloga, olanak, s\u0131k\u0131l, boykot, etkiledi, dev" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", "#60: Ceylanp\u0131nar, konma, Arap\u00e7a, Bilim, Hudut, s\u00f6yleyebilecek, g\u00f6stermek, ba\u015flang\u0131\u00e7, Karakol, MEB\n", "#61: g\u00f6nderildik, do\u00e7ent, \u0130lahiyat, de\u011ferlendiril, ta\u015f\u0131nma, geciktik, safha, merdive, malze, Ger\u00e7ek\n", "#62: Arif, Perin\u00e7ek, Kandilli, de\u011fi\u015ftiril, Rasathanes, Gar, Bo\u011fazi\u00e7, geze, devri, evlendire" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", "#63: Yahudi, Demirci, g\u00f6sterges, Denizli, y\u00fckl\u00fc, g\u00fcnd\u00fcz, Malazgirt, tencer, yakla\u015fa, K\u0131r\u0131kkale\n", "#64: karar, duru\u015fma, Ergeneko, dava, su\u00e7, ceza, san\u0131k, m\u00fcebbet, TGB, y\u0131l\n", "#65: Karargah, plastik, mermi, S\u00f6nmez, \u00e7\u0131kmaz, Avc\u0131, k\u0131r, Haz, Hanef, Ni\u015fanya" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", "#66: Denet, Vergi, \u0130ran, Mali, davul, ba\u015far, a\u015fa\u011f\u0131la, Bek, Barlas, Tuncer\n", "#67: Sa\u011fl\u0131kl\u0131, halihaz\u0131r, Mezarl\u0131k, defnedil, Nevra, cenazes, Zincirliku, Camii, K\u00f6s, Pavey\n", "#68: parlak, fiil, DER, Destekle, Kredi, TMSF, su\u00e7lam\u0131\u015f, ay\u0131rmak, Ferit, verebilmek" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", "#69: yurtd\u0131\u015f, terfi, kollamak, Co\u015f, g\u00fc\u00e7lendirilme, Antikapitalist, sa\u011flamak, a\u00e7m\u0131\u015f, Kulis, endi\u015fes\n", "#70: durduramaz, s\u00fcrd\u00fcr\u00fclme, ba\u015fdan\u0131\u015fma, kalkmas, Naci, dave, Il\u0131cak, bulunmama, Nazl\u0131, Amasya\n", "#71: \u00c7elep, Sabri, davas, ettir, Temmuz, Fas, alg\u0131s, do\u011fumlu, Sar\u0131yer, Asli" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", "#72: day\u0131s, Bot, da\u011f\u0131, Ya\u015f, bahseder, Put, Youtube, Rusya, tak\u0131m, lacivertli\n", "#73: Ba\u015fbuk, \u0130lker, alay, T\u00dcS\u0130AD, kovulduk, kapat\u0131ld\u0131k, lokomotif, Hulusi, skandal, Silivri\n", "#74: Tombul, Alphan, Kurba, Yarg\u0131la, k\u0131ymet, FED, Melis, kule, Cenevre, y\u00fckseklik" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", "#75: Arenemega, T\u00fcrk\u00e7e, aktif, sade\u00e7, toplum, devirir, nisa, darbe, Te\u015fkilat, Yok\n", "#76: cinsel, taciz, \u015eur, tutuklana, \u00e7\u0131kart\u0131l\u0131yor, Dumanl\u0131, \u015e\u00fcphel, Ya\u015far, Cinsel, Gaziosmanpa\u015f\n", "#77: h\u00fck\u00fbmet, \u00f6ze, pala, Hilmio\u011flu, gecekondu, Acar, i\u015fsizlik, korkutmak, davranmak, mutsuz" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", "#78: fak\u00fclte, kayganla\u015fa, \u00e7izer, Y\u00d6K, seyreder, de\u011fi\u015ftirdik, derslik, Hafta, doktora, Do\u00e7entlik\n", "#79: s\u00fcr\u00fc\u015f, takip\u00e7i, ka\u00e7ak, yap\u0131l, Twitter, hizmet, beledi, hesap, att\u0131k, Ara\n", "#80: TCK, \u00c7izgi, metrek\u00fcp, Duygu, G\u00f6ktep, Erta\u015f, k\u0131yas\u0131, G\u00f6n\u00fcll\u00fc, Fad\u0131, bilecek" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", "#81: G\u00f6k\u00e7ea, ka\u00e7ak\u00e7\u0131, bulunul, Ayd\u0131nl\u0131k, kontroller, olumlu, Haydarpa\u015f, \u0130ktisad\u0131, potansiyel, t\u0131p\n", "#82: deneyimli, u\u011fratacak, Nok, getirilmi\u015f, anar\u015fi, Ay\u0131p, Varl\u0131k, U\u00e7kan, Sar\u0131kam\u0131\u015f, icraat\n", "#83: verebilir, uya, skandal, \u00e7ekilir, a\u00e7mak, grev, kald\u0131rmak, Takv, Sinemas, Komu" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", "#84: tahmin, s\u00f6ylenti, Enflasyo, d\u00f6v\u00fcle, ta\u015f\u0131, belirlendik, dershane, Rapor, Yery\u00fcz, koruduk\n", "#85: Emekli, Koma, rota, \u00c7et, motor, Filis, mahall, K\u0131vr\u0131ko\u011flu, Kaan, \u0130slamabad\u0131\n", "#86: ilgili, Aslan, par\u00e7alay\u0131\u00e7, durulmas, oturum, Zeyidi, k\u0131na, yukar\u0131, Karak\u00f6y, \u015ei\u015fha" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", "#87: Kalyoncu, Reyhanl\u0131, ODT\u00dc, Uluder, Ba\u015fbakan, g\u00f6t\u00fcrebilir, \u015e\u0131rnak, D\u0130HA, ele\u015ftirilmi\u015f, Roboski\n", "#88: yurdu, saatir, \u00d6zdemir, Nitelikli, yarat\u0131lmas, Metiner, Aysever, Enver, C\u00fcneyt, Ad\u0131yama\n", "#89: G\u00f6k\u00e7ek, elektronik, Uzla\u015fma, yard\u0131mc\u0131, sorgulatma, b\u00fcy\u00fckel\u00e7i, Melih, Muk, refah, yard\u0131mc\u0131l\u0131k" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", "#90: al\u0131nmayacak, hassasiye, sonu\u00e7land\u0131r\u0131lmas, Siyase, AGOS, dilek, i\u00e7mek, resepsiyo, d\u00f6vme, \u00e7ay\n", "#91: polis, g\u00f6zalt, grup, m\u00fcdahal, g\u00fcvenlik, Rojav, Olay, ki\u015f, \u00f6nlem, il\u00e7e\n", "#92: El\u00e7i, \u015eerafet, g\u00fcne\u015f, ele\u015ftirildik, kurtul, Lig, S\u00fcper, de\u011ferlendirilmek, g\u00fcnd\u00fcz, Be\u015fikta\u015f" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", "#93: belde, Y\u0131ld\u0131z, yay\u0131mlad\u0131k, yapt\u0131r\u0131yor, s\u0131ralama, d\u00fc\u015fmanl\u0131k, Sevgi, u\u00e7uyor, s\u00f6yle\u015f, de\u011ferlendirilmel\n", "#94: izleyi\u00e7, delege, tahamm\u00fcls\u00fczl\u00fck, art\u0131r\u0131lacak, Feyzio\u011flu, baro, nakli, \u0131rk, Anla\u015fmas, TBB\n", "#95: ney, Ye\u015fil, g\u00f6ze, Tahincio\u011flu, fa\u015fizme, ayr\u0131lacak, hil, erecek, H\u00fcmeyra, Remzi" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", "#96: atl\u0131, ba\u011flayacak, boyn, kabuk, Osmanl\u0131\u00e7, \u015eimdik, Tar, engellenmes, olimpiyat, Dak\n", "#97: tem, \u0130zleyi\u00e7, B\u00fcnyam, Y\u0131lmazer, \u00f6d\u00fcl, G\u00f6lges, Pamuk, Furka, imzac\u0131, \u00d6d\u00fcl\n", "#98: Elia\u00e7\u0131k, \u0130hsan, itibars\u0131z, mukte, ara\u015ft\u0131r\u0131la, \u0130n\u015fa, \u00d6zk\u00f6k, davas, Pakis, Sak\u0131k" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", "#99: suni, \u0130lkokul, Ayta\u00e7, TMK, zulm, Tablo, Geni\u015fletil, \u015fak, Y\u0131ll\u0131k, veto\n" ] } ], "prompt_number": 41 }, { "cell_type": "code", "collapsed": false, "input": [ "lsi = models.LsiModel(corpus_tfidf, id2word=dictionary, num_topics=n_topics, onepass=False, power_iters=3)\n", "for i in range(0, n_topics):\n", " terms = [term[1] for term in lsi.show_topic(i, 10)]\n", " print (\"#\" + str(i) + \": \"+ \", \".join(terms))" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "#0: Erdo\u011fa, Ba\u015fbaka, CHP, karar, T\u00fcrki, polis, iddia, Parti, Gez, \u0130stanbul\n", "#1: polis, Taks, Gez, Park\u0131, gaz, Erdo\u011fa, CHP, K\u0131l\u0131\u00e7daro\u011flu, m\u00fcdahal, grup\n", "#2: karar, Mahkemes, san\u0131k, Ceza, soru\u015fturma, dava, Taks, CHP, HSYK, K\u0131l\u0131\u00e7daro\u011flu\n", "#3: CHP, y\u00fcz, I\u015e\u0130D, se\u00e7, \u0130hsano\u011flu, K\u0131l\u0131\u00e7daro\u011flu, aday, Davuto\u011flu, se\u00e7im, Suri\n", "#4: y\u00fcz, K\u0131l\u0131\u00e7daro\u011flu, faiz, puan, CHP, milyo, bin, Gez, Bankas, ma\u00e7\n", "#5: \u00d6cala, PKK, I\u015e\u0130D, san\u0131k, Erdo\u011fa, HDP, Aral\u0131k, Mahkemes, BDP, yolsuzluk\n", "#6: ma\u00e7, dakika, Fenerbah\u00e7e, Gez, Galatasaray, i\u015f\u00e7i, CHP, Park\u0131, Erdo\u011fa, puan\n", "#7: ma\u00e7, y\u00fcz, dakika, Fenerbah\u00e7e, K\u0131l\u0131\u00e7daro\u011flu, Galatasaray, CHP, Gez, faiz, Park\u0131\n", "#8: i\u015f\u00e7i, made, y\u00fcz, facia, ocak, Y\u0131ld\u0131z, G\u00fcl, HSYK, Demirta\u015f, PKK\n", "#9: HSYK, san\u0131k, K\u0131l\u0131\u00e7daro\u011flu, Adalet, i\u015f\u00e7i, Kurul, Baka, de\u011fi\u015fiklik, Bozdak, dava" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", "#10: \u00d6cala, I\u015e\u0130D, Davuto\u011flu, K\u0131l\u0131\u00e7daro\u011flu, Demirta\u015f, BDP, HDP, \u0130mral, i\u015f\u00e7i, Irak\n", "#11: ya\u011f\u0131\u015f, K\u0131l\u0131\u00e7daro\u011flu, y\u00fcz, \u0130hsano\u011flu, faiz, Twitter, i\u015f\u00e7i, eri\u015f, se\u00e7, made\n", "#12: \u0130hsano\u011flu, Erdo\u011fa, ya\u011f\u0131\u015f, G\u00fcl, CHP, K\u0131l\u0131\u00e7daro\u011flu, Sar\u0131g\u00fcl, Cumhurba\u015fka, Ekmeledi, se\u00e7\n", "#13: ya\u011f\u0131\u015f, Twitter, eri\u015f, Erdo\u011fa, I\u015e\u0130D, sa\u011fanak, T\u0130B, kar, G\u00f6k\u00e7ek, Ba\u015fbaka\n", "#14: Taks, Gez, Park\u0131, Berk, Elvan, I\u015e\u0130D, \u00f6\u011frenci, \u0130hsano\u011flu, HSYK, polis\n", "#15: eri\u015f, Elvan, Twitter, Erdo\u011fa, T\u0130B, Berk, G\u00fcle, HSYK, Gez, AYM\n", "#16: Elvan, Berk, Davuto\u011flu, Ar\u0131n\u00e7, G\u00fcl, Ahmet, Baka, G\u00fcle, ya\u011f\u0131\u015f, Erdo\u011fa\n", "#17: \u00f6\u011frenci, okul, G\u00fcl, E\u011fit, y\u00fcz, e\u011fit, HSYK, faiz, G\u00fcle, bin\n", "#18: Ar\u0131n\u00e7, Davuto\u011flu, B\u00fclent, HSYK, ses, kay\u0131t, Yard\u0131mc\u0131s, Erdo\u011fa, K\u0131l\u0131\u00e7daro\u011flu, G\u00f6k\u00e7ek" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", "#19: G\u00fcl, \u0130hsano\u011flu, Cumhurba\u015fka, Sar\u0131g\u00fcl, Davuto\u011flu, G\u00f6k\u00e7ek, Ar\u0131n\u00e7, Abdullah, HSYK, Elvan\n", "#20: K\u0131l\u0131\u00e7daro\u011flu, G\u00fcl, \u0130hsano\u011flu, Erdo\u011fa, Berk, Elvan, G\u00f6k\u00e7ek, Davuto\u011flu, se\u00e7, Parti\n", "#21: Sar\u0131g\u00fcl, Elvan, grup, Berk, Gez, G\u00fcle, m\u00fcdahal, HSYK, polis, Davuto\u011flu\n", "#22: Davuto\u011flu, Ar\u0131n\u00e7, G\u00fcl, I\u015e\u0130D, B\u00fclent, eri\u015f, Ahmet, Emniyet, g\u00f6zalt, G\u00fcle\n", "#23: Davuto\u011flu, Ar\u0131n\u00e7, M\u0130T, kay\u0131t, ses, G\u00fcl, K\u0131l\u0131\u00e7daro\u011flu, \u0130hsano\u011flu, G\u00f6k\u00e7ek, \u00c7a\u011flaya\n", "#24: G\u00fcle, Elvan, Berk, Sar\u0131g\u00fcl, G\u00f6k\u00e7ek, I\u015e\u0130D, Fethullah, \u0130hsano\u011flu, okul, G\u00fcl\n", "#25: HSYK, Bah\u00e7el, \u0130hsano\u011flu, Ar\u0131n\u00e7, MHP, Demirta\u015f, Sar\u0131g\u00fcl, Parti, ODT\u00dc, Gez\n", "#26: faiz, \u00f6\u011frenci, G\u00f6k\u00e7ek, Davuto\u011flu, milyo, okul, I\u015e\u0130D, Sar\u0131g\u00fcl, Ankar, bin\n", "#27: Sar\u0131g\u00fcl, G\u00f6k\u00e7ek, faiz, K\u0131l\u0131\u00e7daro\u011flu, G\u00fcl, Bankas, Merkez, \u0130hsano\u011flu, Mustaf, Ankar" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", "#28: \u0130hsano\u011flu, M\u0130T, G\u00fcle, Sar\u0131g\u00fcl, TIR, G\u00f6k\u00e7ek, Baka, \u0130srail, Erdo\u011fa, I\u015e\u0130D\n", "#29: y\u00fcz, faiz, Bah\u00e7el, Bankas, Sar\u0131g\u00fcl, \u0130srail, G\u00fcle, Merkez, Al\u00e7\u0131, G\u00f6k\u00e7ek\n", "#30: faiz, I\u015e\u0130D, Bah\u00e7el, asker, Al\u00e7\u0131, Diyarbak\u0131r, milyo, PKK, Musul, Il\u0131cak\n", "#31: Sar\u0131g\u00fcl, Elvan, Berk, M\u0130T, Korkmaz, y\u00fcz, TIR, Bah\u00e7el, G\u00fcl, G\u00f6k\u00e7ek\n", "#32: Bah\u00e7el, MHP, Davuto\u011flu, Erdo\u011fa, Sar\u0131g\u00fcl, TIR, Zarrap, M\u0130T, Baka, Devlet\n", "#33: PKK, Al\u00e7\u0131, Il\u0131cak, K\u0131l\u0131\u00e7daro\u011flu, M\u0130T, Sar\u0131g\u00fcl, Baka, Demirta\u015f, Ar\u0131n\u00e7, Taks\n", "#34: G\u00fcle, depre, G\u00f6k\u00e7ek, \u00c7elik, Baka, \u0130hsano\u011flu, y\u00fcz, Komisyo, Fethullah, Davuto\u011flu\n", "#35: depre, G\u00fcle, Bah\u00e7el, \u0130srail, Sar\u0131g\u00fcl, Twitter, Y\u0131ld\u0131r, bin, karar, gazete\n", "#36: \u0130srail, depre, Al\u00e7\u0131, Il\u0131cak, Depre, Gazze, ODT\u00dc, b\u00fcy\u00fckl\u00fck, ABD, gazete" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", "#37: depre, M\u0130T, \u0130srail, TIR, Demirta\u015f, Depre, Emniyet, b\u00fcy\u00fckl\u00fck, \u00d6cala, kay\u0131t\n", "#38: depre, M\u0130T, \u00d6cala, \u0130srail, Demirta\u015f, Sar\u0131g\u00fcl, Y\u0131ld\u0131r, Zarrap, Depre, ODT\u00dc\n", "#39: Korkmaz, \u0130srail, Demirta\u015f, \u0130smail, \u00d6cala, Ali, HDP, G\u00fcle, Eski\u015fehir, Gazze\n", "#40: AYM, K\u0131l\u0131\u00e7, Korkmaz, Zarrap, Anayas, Erdo\u011fa, Al\u00e7\u0131, Il\u0131cak, depre, Sar\u0131g\u00fcl\n", "#41: Y\u0131ld\u0131r, Demirta\u015f, K\u0131l\u0131\u00e7, AYM, HDP, Bah\u00e7el, \u0130zmir, G\u00f6k\u00e7ek, Binal, Korkmaz\n", "#42: Bah\u00e7el, \u0130srail, PKK, HDP, \u0130hsano\u011flu, Emniyet, g\u00f6rev, i\u015f\u00e7i, Feyzio\u011flu, MHP\n", "#43: \u0130srail, Y\u0131ld\u0131r, M\u0130T, TIR, Baka, Haka, Gazze, kad\u0131, Emniyet, G\u00f6k\u00e7ek\n", "#44: Zarrap, eri\u015f, Mutlu, T\u0130B, Komisyo, Kuz, Diyarbak\u0131r, K\u0131l\u0131\u00e7, bin, Reza\n", "#45: Korkmaz, Y\u0131ld\u0131r, Mutlu, \u00e7ocuk, Diyarbak\u0131r, K\u0131l\u0131\u00e7, Feyzio\u011flu, AYM, \u0130smail, g\u00f6zalt" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", "#46: \u00e7ocuk, \u00c7elik, Y\u0131ld\u0131r, depre, PKK, Y\u0131ld\u0131z, Diyarbak\u0131r, i\u015f\u00e7i, milyar, Ba\u015fbuk\n", "#47: Y\u0131ld\u0131r, \u0130srail, Haka, Al\u00e7\u0131, Il\u0131cak, Demirta\u015f, Emniyet, Korkmaz, Ahmet, K\u0131l\u0131\u00e7daro\u011flu\n", "#48: Y\u0131ld\u0131r, Fenerbah\u00e7e, kad\u0131, G\u00f6k\u00e7ek, a\u011fa\u00e7, Korkmaz, Binal, Emniyet, Feyzio\u011flu, \u0130zmir\n", "#49: u\u00e7ak, Ba\u015fbuk, trafik, Al\u00e7\u0131, y\u0131l, K\u0131l\u0131\u00e7, Il\u0131cak, Dumanl\u0131, bin, Genelkurmay\n", "#50: Feyzio\u011flu, Suri, ODT\u00dc, Zarrap, \u00c7elik, Al\u00e7\u0131, Korkmaz, Emniyet, Il\u0131cak, g\u00f6zalt\n", "#51: Kobani, Demirta\u015f, Suri, ODT\u00dc, Erdo\u011fa, Ba\u015fbuk, \u0130hsano\u011flu, T\u00fcrk, Genelkurmay, Y\u0131ld\u0131r\n", "#52: Feyzio\u011flu, Dan\u0131\u015ftay, ODT\u00dc, Suri, milyar, Zarrap, TIR, Yarg\u0131tay, Baro, Akdo\u011fa\n", "#53: milyar, Demirta\u015f, ODT\u00dc, Suri, HDP, Haka, Emniyet, bin, Diyarbak\u0131r, K\u0131l\u0131\u00e7\n", "#54: Haka, trafik, Baka, \u00e7ocuk, Suri, ODT\u00dc, M\u0130T, Ala, BDP, gaz" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", "#55: Feyzio\u011flu, ses, istifa, Diyarbak\u0131r, Ba\u011f\u0131\u015f, Dumanl\u0131, su\u00e7, BDP, K\u0131l\u0131\u00e7, Erdo\u011fa\n", "#56: \u00e7ocuk, \u00c7elik, Ba\u015fbuk, Mutlu, yay, Aslan, TRT, Genelkurmay, TIR, i\u015f\u00e7i\n", "#57: Aslan, ODT\u00dc, Ala, Ba\u015fbuk, bin, milyar, aday, ki\u015f, Feyzio\u011flu, Zarrap\n", "#58: Y\u0131ld\u0131z, i\u015f\u00e7i, milyar, Atat\u00fcrk, bin, Mutlu, aday, \u00c7elik, Bayraktar, An\u0131tkabir\n", "#59: trafik, \u00c7elik, lira, \u0130srail, Feyzio\u011flu, istifa, ara\u00e7, TIR, milyar, puan\n", "#60: istifa, ODT\u00dc, Demirta\u015f, \u00f6\u011frenci, Ba\u015fbuk, Bayraktar, g\u00f6zalt, Mutlu, soru\u015fturma, kad\u0131\n", "#61: Zarrap, Aslan, TIR, Ba\u015fbuk, \u00e7ocuk, \u00c7elik, Akdo\u011fa, puan, Diyarbak\u0131r, ODT\u00dc\n", "#62: fiyat, \u00c7elik, lira, Demirta\u015f, HDP, Y\u0131ld\u0131z, zam, Kobani, milyo, faiz\n", "#63: \u00c7elik, milyar, M\u0131s\u0131r, Ala, Feyzio\u011flu, u\u00e7ak, ki\u015f, TRT, \u0130srail, TIR" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", "#64: Aslan, milyar, fiyat, kad\u0131, Zarrap, S\u00fcleyma, ABD, Halkbank, puan, Diyarbak\u0131r\n", "#65: kad\u0131, Akdo\u011fa, yay, TRT, \u00e7ocuk, puan, ceza, su\u00e7, Y\u0131ld\u0131z, fiyat\n", "#66: \u00c7elik, TIR, Aslan, kad\u0131, Demirta\u015f, May\u0131s, M\u0130T, HDP, puan, Ba\u015fbuk\n", "#67: \u00e7ocuk, bin, TIR, TRT, aday, Y\u0131ld\u0131z, Kuz, Korkmaz, Ba\u011f\u0131\u015f, fiyat\n", "#68: \u00e7ocuk, aday, M\u0131s\u0131r, ABD, Ala, Feyzio\u011flu, milyar, Atat\u00fcrk, trafik, Y\u0131ld\u0131z\n", "#69: Akdo\u011fa, Ba\u015fbuk, u\u00e7ak, CHP, ODT\u00dc, Y\u0131ld\u0131z, istifa, K\u0131l\u0131\u00e7daro\u011flu, Yal\u00e7, M\u0131s\u0131r\n", "#70: Ala, Akdo\u011fa, Bayraktar, M\u0130T, istifa, fiyat, Efkan, \u00c7elik, Mutlu, i\u015f\u00e7i\n", "#71: \u00c7elik, M\u0131s\u0131r, Y\u0131ld\u0131z, Demirta\u015f, Dumanl\u0131, Zama, i\u015f\u00e7i, Kobani, HDP, May\u0131s\n", "#72: M\u0131s\u0131r, kad\u0131, Dumanl\u0131, \u00c7elik, aday, asker, Kuz, darbe, polis, milyar" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", "#73: Ba\u015fbuk, san\u0131k, kad\u0131, Dumanl\u0131, fiyat, \u00e7ocuk, milyar, Zama, bin, Aslan\n", "#74: puan, istifa, aday, Akdo\u011fa, final, Dumanl\u0131, Aslan, Be\u015fikta\u015f, gaz, YSK\n", "#75: Mah\u00e7upya, trafik, yang\u0131, yolsuzluk, M\u0131s\u0131r, Zama, Bilal, Aral\u0131k, Diyarbak\u0131r, Ba\u011f\u0131\u015f\n", "#76: TRT, u\u00e7ak, THY, Kobani, K\u0131l\u0131\u00e7, M\u0131s\u0131r, Y\u0131ld\u0131z, Suri, san\u0131k, BDP\n", "#77: Akdo\u011fa, M\u0131s\u0131r, TRT, Kuz, Ala, ABD, Ba\u011f\u0131\u015f, \u00c7elik, polis, Dumanl\u0131\n", "#78: Akdo\u011fa, CHP, grup, Dink, G\u00fcl, Atat\u00fcrk, polis, Bozdak, Demirta\u015f, \u00c7i\u00e7ek\n", "#79: M\u0131s\u0131r, kad\u0131, Dumanl\u0131, PKK, \u00e7ocuk, Irak, M\u0130T, yang\u0131, Bozdak, ABD\n", "#80: Ba\u011f\u0131\u015f, \u00c7elik, u\u00e7ak, ABD, THY, trafik, \u00f6\u011frenci, Y\u0131ld\u0131z, Bayraktar, Atat\u00fcrk" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", "#81: karar, Bozdak, ODT\u00dc, Akdo\u011fa, ki\u015f, Bayraktar, TIR, gaz, u\u00e7ak, \u00f6\u011frenci\n", "#82: i\u015f\u00e7i, Ala, yang\u0131, kad\u0131, facia, M\u0131s\u0131r, made, aday, \u00d6nder, G\u00f6k\u00e7ek\n", "#83: M\u0131s\u0131r, ODT\u00dc, CHP, Y\u0131lmaz, May\u0131s, Kuz, Kobani, Bozdak, T\u00dcS\u0130AD, grup\n", "#84: Akdo\u011fa, M\u0130T, TIR, M\u0131s\u0131r, rapor, Kuz, Mutlu, yang\u0131, Taks, Dink\n", "#85: Ba\u011f\u0131\u015f, Akdo\u011fa, ODT\u00dc, \u00f6\u011frenci, seviye, yang\u0131, grup, \u00c7ar\u015f\u0131, Komisyo, \u015eah\n", "#86: Ba\u011f\u0131\u015f, \u00f6\u011frenci, M\u0131s\u0131r, TRT, Bayraktar, Bank, Egeme, yolsuzluk, a\u011fa\u00e7, Asya\n", "#87: gaz, Bayraktar, polis, biber, Y\u0131ld\u0131z, \u00f6\u011frenci, made, M\u0131s\u0131r, i\u015f\u00e7i, facia\n", "#88: askerlik, Y\u0131lmaz, Fenerbah\u00e7e, Akdo\u011fa, u\u00e7ak, Kobani, milyar, yasak, bedelli, Atat\u00fcrk" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", "#89: Asya, Bank, \u00f6\u011frenci, \u015eah, Babaca, Parti, banka, Kuz, Bulut, K\u0131l\u0131\u00e7\n", "#90: baraj, \u00c7elik, Ba\u011f\u0131\u015f, bin, Sara\u00e7, Mehmet, yang\u0131, Mah\u00e7upya, facia, gaz\n", "#91: Bozdak, K\u0131l\u0131\u00e7, CHP, M\u0131s\u0131r, Dink, Adalet, Bekir, ABD, yang\u0131, rapor\n", "#92: ABD, \u0130zmir, Dink, Kuz, \u00d6cala, polis, Mah\u00e7upya, g\u00f6rev, BDP, kad\u0131\n", "#93: Bozdak, Ba\u011f\u0131\u015f, Ala, Yerkel, HSYK, Marmaray, Dumanl\u0131, Avc\u0131, rapor, ODT\u00dc\n", "#94: Marmaray, askerlik, yang\u0131, baraj, Cumhurba\u015fkanl\u0131k, \u0130stanbul, foto\u011fraf, aday, trafik, bedelli\n", "#95: Kuz, \u00d6nder, g\u00f6rev, Fenerbah\u00e7e, A\u011fustos, \u015eah, Ba\u015fbuk, Ba\u011f\u0131\u015f, Sar\u0131s\u00fcl\u00fck, asker\n", "#96: Dink, \u0130nce, cami, \u00d6cala, bayrak, Mah\u00e7upya, Marmaray, Sara\u00e7, yang\u0131, Akdo\u011fa\n", "#97: ceza, rapor, \u015eah, \u00d6cala, TRT, Y\u0131lmaz, Bozdak, T\u00dcS\u0130AD, Ba\u011f\u0131\u015f, Cemal" ] }, { "output_type": "stream", "stream": "stdout", "text": [ "\n", "#98: M\u0131s\u0131r, Ba\u011f\u0131\u015f, Kobani, TRT, Y\u0131ld\u0131z, Suri, Haka, Reyhanl\u0131, foto\u011fraf, polis\n", "#99: Marmaray, Akdo\u011fa, askerlik, seviye, kad\u0131, YSK, Dink, faiz, \u00d6cala, Y\u0131lmaz\n" ] } ], "prompt_number": 42 }, { "cell_type": "code", "collapsed": false, "input": [ "i = 0\n", "print ('Topic distribution on an arbitrarily selected document:\\n')\n", "print ('Original document: ' + documents[i])\n", "print ('Preprocessed document: ' + str(texts[i]))" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "Topic distribution on an arbitrarily selected document:\n", "\n", "Original document: \n", "S\u0131n\u0131r d\u0131\u015f\u0131na \u00e7ekilme karar\u0131n\u0131n ard\u0131ndan silahl\u0131 bir grup PKK\u2019l\u0131 Kuzey Irak\u2019a ula\u015ft\u0131. 15 ki\u015filik grubun Irak\u2019a giden ilk PKK\u2019l\u0131lar oldu\u011fu belirtildi.\n", "Van\u2019\u0131n k\u0131rsal kesiminden bir hafta \u00f6nce yola \u00e7\u0131kan 6\u2032s\u0131 kad\u0131n 15 PKK\u2019l\u0131, Hakkari\u2019nin \u00c7ukurca \u0130l\u00e7esi \u00fczerinden Irak\u2019\u0131n kuzeyine ge\u00e7i\u015f yapt\u0131. Sabah saat 06.30\u2032da Metina kamp\u0131na gelen silahl\u0131 grubu, burada di\u011fer PKK\u2019l\u0131lar kar\u015f\u0131lad\u0131. Gruba \u00e7ekilmesi s\u0131ras\u0131nda \u00e7ok say\u0131da bas\u0131n mensubu da e\u015flik ederken, deneyimli gazeteci Hasan Cemal de o isimlerden biriydi.\u00a0Ter\u00f6r \u00f6rg\u00fct\u00fc \u00fcyesi ikinci grubun da yolda oldu\u011fu ve 1-2 g\u00fcn i\u00e7inde Irak\u2019a varaca\u011f\u0131 belirtildi.\n", "PKK,\u00a0\u00d6calan\u2019\u0131n Nevruz\u2019da yapt\u0131\u011f\u0131 \u201cs\u0131n\u0131r d\u0131\u015f\u0131na \u00e7ekilin\u201d\u00e7a\u011fr\u0131s\u0131n\u0131n\u00a0ard\u0131ndan Murat Karay\u0131lan\u2019\u0131n a\u00e7\u0131klad\u0131\u011f\u0131 tarih olan 8 May\u0131s\u2019ta \u00e7ekilmeye ba\u015flam\u0131\u015ft\u0131.\n", "TSK d\u00fcn yapt\u0131\u011f\u0131 a\u00e7\u0131klamada, ter\u00f6ristlerin \u00e7ekilmesine ait g\u00f6r\u00fcnt\u00fc ve bilgiye sahip olmad\u0131klar\u0131n\u0131 duyurmu\u015ftu.\n", "\n", "Preprocessed document: ['S\u0131n\u0131r', 'd\u0131\u015f', '\u00e7ekilme', 'karar', 'ard\u0131', 'silahl\u0131', 'grup', 'PKK', 'Kuzey', 'Irak', 'ula\u015f', 'ki\u015filik', 'grup', 'Irak', 'ilk', 'PKK', 'olduk', 'belirtil', 'Van', 'k\u0131rsal', 'kesim', 'haf', 'yol', '\u00e7\u0131ka', 'kad\u0131', 'PKK', 'Hakkari', '\u00c7ukur', '\u0130l\u00e7es', '\u00fczer', 'Irak', 'kuzey', 'ge\u00e7i\u015f', 'yap', 'Sabah', 'saat', 'Metina', 'kamp', 'gele', 'silahl\u0131', 'grup', 'bura', 'di\u011fer', 'PKK', 'kar\u015f\u0131lad\u0131', 'Grup', '\u00e7ekilmes', 's\u0131ra', 'say\u0131', 'mensup', 'e\u015flik', 'eder', 'deneyimli', 'gazete\u00e7', 'Hasa', 'Cemal', 'isim', 'Ter\u00f6r', '\u00f6rg\u00fct', '\u00fcyes', 'ikinci', 'grup', 'yol', 'olduk', 'g\u00fcn', 'Irak', 'varacak', 'belirtil', 'PKK', '\u00d6cala', 'Nevruz', 'yapt\u0131k', 's\u0131n\u0131r', 'd\u0131\u015f', '\u00e7ekil', '\u00e7a\u011fr\u0131', 'ard\u0131', 'Murat', 'Karay\u0131la', 'a\u00e7\u0131klad\u0131k', 'tarih', 'ola', 'May\u0131s', '\u00e7ekilme', 'ba\u015flam\u0131\u015f', 'TSK', 'd\u00fcn', 'yapt\u0131k', 'a\u00e7\u0131klama', 'ter\u00f6rist', '\u00e7ekilme', 'ait', 'g\u00f6r', 'bilgi', 'sahip', 'olmad\u0131k', 'duyurmu\u015f']\n" ] } ], "prompt_number": 43 }, { "cell_type": "code", "collapsed": false, "input": [ "print ('Which LDA topic maximally describes this document?')\n", "print ('Topic probability mixture: ',end=\"\")\n", "for j,k in lda[corpus[i]]:\n", " print('(%d, %3.2f)' % (j,k), end = ' ')\n", "print ('\\nTopic with the highest probability: topic #' + str(max(lda[corpus[i]],key=itemgetter(1))[0]))" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "Which LDA topic maximally describes this document?\n", "Topic probability mixture: (5, 0.08) (14, 0.06) (44, 0.33) (54, 0.04) (57, 0.26) (79, 0.04) (82, 0.01) (91, 0.17) \n", "Topic with the highest probability: topic #44\n" ] } ], "prompt_number": 44 }, { "cell_type": "code", "collapsed": false, "input": [ "print ('Which LSI topic maximally describes a document?')\n", "print ('Topic probability mixture: ',end=\"\")\n", "for j,k in lsi[corpus[i]]:\n", " print('(%d, %3.2f)' % (j,k), end = ' ')\n", "print ('\\nTopic with the highest probability: topic #' + str(max(lsi[corpus[i]],key=itemgetter(1))[0]))" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "Which LSI topic maximally describes a document?\n", "Topic probability mixture: (0, 2.96) (1, -1.01) (2, -0.71) (3, 1.63) (4, -0.48) (5, -2.29) (6, 1.11) (7, -0.49) (8, -1.09) (9, 0.54) (10, -0.50) (11, -0.05) (12, 0.51) (13, 0.29) (14, 0.05) (15, 0.61) (16, -1.00) (17, 0.23) (18, -0.92) (19, 0.27) (20, 0.29) (21, -0.41) (22, -0.67) (23, 0.15) (24, -0.23) (25, 0.04) (26, -0.01) (27, 0.04) (28, 0.16) (29, -0.33) (30, 0.29) (31, -0.66) (32, 0.74) (33, -1.75) (34, -0.26) (35, -0.75) (36, -0.16) (37, 0.37) (38, 0.21) (39, 0.86) (40, -0.32) (41, -1.05) (42, 1.19) (43, -0.55) (44, 0.53) (45, -0.76) (46, -1.03) (47, -0.26) (48, -0.05) (49, 0.60) (50, 0.71) (51, 0.82) (52, -0.16) (53, 0.78) (54, -0.19) (55, 0.01) (56, -0.24) (57, 0.67) (58, 0.15) (59, 0.95) (60, -0.13) (61, -0.23) (62, 0.04) (63, -0.38) (64, 0.34) (65, 0.66) (66, 0.43) (67, 0.36) (68, 0.99) (69, 0.18) (70, 0.49) (71, -0.33) (72, 0.15) (73, -0.38) (74, -0.27) (75, -0.31) (76, 0.05) (77, 0.15) (78, -1.17) (79, -0.02) (80, 0.15) (81, 0.50) (82, -0.01) (83, 1.27) (84, 0.09) (85, -1.15) (86, -0.41) (87, -0.28) (88, -0.84) (89, 0.87) (90, 0.70) (91, 0.24) (92, -0.20) (93, -0.13) (94, -0.33) (95, -0.56) (96, -1.16) (97, 0.43) (98, 0.44) (99, 0.11) \n", "Topic with the highest probability: topic #0\n" ] } ], "prompt_number": 45 } ], "metadata": {} } ] }