{ "cells": [ { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "\"セキュリティ ソフト 種類 印象 どれ No どれ 俺 ここ 種類 ウイルス ウイルス 自分 知識 稲荷 ウイルス 逆 新種 ウイルス パターン ウイルス ウイルス 新種 ウイルス 両方 イラスト ソフト 動画 ソフト 動画 ソフト テロップ 動画 作品 彼女 ポケット スマ ホ メガネ デバイス ッーン 稲荷 俺 なん それ それ 世界 メガネ デバイス 鼻 栓 デバイス 鼻 そこ 光 AR これ 彼女 彼女 立体 俺 彼女 セキュリティ ソフト ノリ クマ キャクター それ 人気 本当 俺 これ 通り 答え 誰 考え 相手 人工 知能 相手 電気 電子 機器 ローン 最新 コンピュータ 性能 ローン 中学生 社会 国会 議事堂 俺 それ 技術 家 PC ソフトウェア これ ウイルス ウェブサイト ソース コード 世界中 俺 胸 ソース コード 俺 人 人 悪意 大学 ひよっこ ソース コード 彼女 僕 それ それ 方向 兆候 読み これ 闇 稲荷 俺 バイト そっ 稲荷 自転車 颯爽 稲荷 なに ドア 鍵 母さん 鍋 声 夕飯 物 なに アンタ 感じ お袋 身の回り 稲荷 俺 調子 鬱々 サンキュー モツ 大盛り モツ やりとり あと 眠\"" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "lines = open(\"./dataset/shosetsu1-wakati.txt\", \"r\") do fp\n", " readline(fp)\n", "end" ] }, { "cell_type": "code", "execution_count": 30, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "102-element Array{SubString{String},1}:\n", " \"ウェブサイト\"\n", " \"母さん\" \n", " \"テロップ\" \n", " \"大盛り\" \n", " \"社会\" \n", " \"イラスト\" \n", " \"中学生\" \n", " \"家\" \n", " \"読み\" \n", " \"夕飯\" \n", " \"知識\" \n", " \"闇\" \n", " \"答え\" \n", " ⋮ \n", " \"No\" \n", " \"胸\" \n", " \"方向\" \n", " \"ソフト\" \n", " \"颯爽\" \n", " \"動画\" \n", " \"ソース\" \n", " \"これ\" \n", " \"ひよっこ\" \n", " \"鍵\" \n", " \"最新\" \n", " \"機器\" " ] }, "execution_count": 30, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# 重複をなくすためSet()でくくり配列にもどす.\n", "word = [v for v in Set(split(lines, \" \"))]" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "6-element Array{SubString{String},1}:\n", " \"programmer\"\n", " \"I\" \n", " \"am\" \n", " \"a\" \n", " \"like\" \n", " \"dog\" " ] }, "execution_count": 1, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# 重複をなくすためSet()でくくり配列にもどす.\n", "sample_text = [v for v in Set(split(\"I am a programmer like dog\", \" \"))]" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "sigmoid (generic function with 1 method)" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "function sigmoid(z)\n", " return 1 / (1 + exp(-z))\n", "end" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "softmax (generic function with 1 method)" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "function softmax(vtw, vw)\n", " sum_vtw = sum(vtw)\n", " result = []\n", " for i in 1:length(vtw)\n", " push!(result, (exp(dot(vtw[i], vw[i])) / exp(dot(sum_vtw, vw[i]))))\n", " end\n", " return result\n", "end" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "3-element Array{Float64,1}:\n", " 0.2\n", " 2.2\n", " 0.4" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "vtw = [0.1,0.4,3.3]\n", "vw = [0.2,2.2,0.4]" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "3-element Array{Any,1}:\n", " 0.477114 \n", " 0.000564257\n", " 0.818731 " ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "softmax(vtw',vw)" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "one_hot_vectorization (generic function with 1 method)" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "function one_hot_vectorization(word_array)\n", " one_hot_vec = Any[]\n", " for i in 1:length(word_array)\n", " zeros_array = zeros(length(word_array))'\n", " zeros_array[i] = 1.0\n", " push!(one_hot_vec, zeros_array)\n", " end\n", " return one_hot_vec\n", "end" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "6-element Array{SubString{String},1}:\n", " \"programmer\"\n", " \"I\" \n", " \"am\" \n", " \"a\" \n", " \"like\" \n", " \"dog\" " ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "sample_text" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### ベクトルとテキストの辞書を作る" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [], "source": [ "vec_dict = Dict{String, Any}()\n", "W = one_hot_vectorization(sample_text)\n", "for i in 1:length(W)\n", " vec_dict[sample_text[i]] = W[i]\n", "end" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Base.KeyIterator for a Dict{String,Any} with 6 entries. Keys:\n", " \"programmer\"\n", " \"I\"\n", " \"am\"\n", " \"a\"\n", " \"like\"\n", " \"dog\"" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "keys(vec_dict)" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Base.ValueIterator for a Dict{String,Any} with 6 entries. Values:\n", " [1.0 0.0 … 0.0 0.0]\n", " [0.0 1.0 … 0.0 0.0]\n", " [0.0 0.0 … 0.0 0.0]\n", " [0.0 0.0 … 0.0 0.0]\n", " [0.0 0.0 … 1.0 0.0]\n", " [0.0 0.0 … 0.0 1.0]" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "values(vec_dict)" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "1×6 RowVector{Float64,Array{Float64,1}}:\n", " 1.0 0.0 0.0 0.0 0.0 0.0" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "vec_dict[\"programmer\"]" ] }, { "cell_type": "code", "execution_count": 36, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "6-element Array{Any,1}:\n", " [1.0 0.0 … 0.0 0.0]\n", " [0.0 1.0 … 0.0 0.0]\n", " [0.0 0.0 … 0.0 0.0]\n", " [0.0 0.0 … 0.0 0.0]\n", " [0.0 0.0 … 1.0 0.0]\n", " [0.0 0.0 … 0.0 1.0]" ] }, "execution_count": 36, "metadata": {}, "output_type": "execute_result" } ], "source": [ "W = one_hot_vectorization(sample_text) # これが重みWになる" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "1.0" ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "h = vec_dict[\"dog\"]*vec_dict[\"dog\"]'" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Julia 0.6.2", "language": "julia", "name": "julia-0.6" }, "language_info": { "file_extension": ".jl", "mimetype": "application/julia", "name": "julia", "version": "0.6.2" } }, "nbformat": 4, "nbformat_minor": 2 }