{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "tutorials:\n", "\n", "- [Klintberg - Doc2Vec Tutorial Using Gensim](https://medium.com/@klintcho/doc2vec-tutorial-using-gensim-ab3ac03d3a1#.rccx4cq1p)\n", "\n", "- [Sentiment Analysis Using Doc2Vec](http://linanqiu.github.io/2015/10/07/word2vec-sentiment/)\n", "\n", "- [Official Gensim Doc2Vec Tutorial](https://rare-technologies.com/doc2vec-tutorial/)\n", "\n", "- [Doc2Vec IMDB notebook](https://github.com/RaRe-Technologies/gensim/blob/develop/docs/notebooks/doc2vec-IMDB.ipynb)" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": true }, "outputs": [], "source": [ "import logging\n", "\n", "from sklearn.datasets import fetch_rcv1\n", "from sklearn.multiclass import OneVsRestClassifier\n", "from sklearn.metrics import f1_score, precision_score, recall_score\n", "from sklearn.pipeline import Pipeline\n", "from sklearn import svm\n", "\n", "logging.basicConfig()\n", "rcv1 = fetch_rcv1()" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": true }, "outputs": [], "source": [ "training_samples = 23149\n", "\n", "X_train = rcv1.data[:training_samples]\n", "X_test = rcv1.data[training_samples:]\n", "\n", "y_train = rcv1.target[:training_samples]\n", "y_test = rcv1.target[training_samples:]" ] }, { "cell_type": "code", "execution_count": 15, "metadata": { "collapsed": false }, "outputs": [], "source": [ "sample_ids = rcv1.sample_id[:training_samples]" ] }, { "cell_type": "code", "execution_count": 23, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "26150" ] }, "execution_count": 23, "metadata": {}, "output_type": "execute_result" } ], "source": [ "sample_ids[-1]" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.5.2" } }, "nbformat": 4, "nbformat_minor": 1 }