initial commit

2017-02-22 00:17:04 +01:00 · 2017-02-22 00:17:04 +01:00 · 632b85a6b7
commit 632b85a6b7
2 changed files with 157 additions and 0 deletions
--- a/README.md
+++ b/README.md
@ -0,0 +1,3 @@
 tensorflow==1.0.0
 tflearn==0.3
--- a/Word2Vec.ipynb
+++ b/Word2Vec.ipynb
@ -0,0 +1,154 @@
 {
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "from __future__ import division, print_function, absolute_import\n",
    "\n",
    "import tflearn\n",
    "from tflearn.data_utils import to_categorical, pad_sequences\n",
    "from tflearn.datasets import imdb\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "train, valid, test = imdb.load_data(path='imdb.pkl', n_words=10000, valid_portion=0.1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "trainX, trainY = train\n",
    "validX, validY = valid\n",
    "testX, testY = test\n",
    "\n",
    "# Test set: 25% of the full test set\n",
    "test_len = int(0.25*len(testX))\n",
    "testX = testX[:test_len]\n",
    "testY = testY[:test_len]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "#Data preprocessing\n",
    "# Sequence padding\n",
    "trainX = pad_sequences(trainX, maxlen=100, value=0.)\n",
    "validX = pad_sequences(validX, maxlen=100, value=0.)\n",
    "testX = pad_sequences(testX, maxlen=100)\n",
    "\n",
    "#Convert labels to binary vectors\n",
    "trainY = to_categorical(trainY, nb_classes=2)\n",
    "validY = to_categorical(validY, nb_classes=2)\n",
    "testY = to_categorical(testY, nb_classes=2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# Network building\n",
    "net = tflearn.input_data([None, 100])\n",
    "net = tflearn.embedding(net, input_dim=10000, output_dim=128)\n",
    "net = tflearn.lstm(net, 128, dropout=0.8)\n",
    "net = tflearn.fully_connected(net, 2, activation='softmax')\n",
    "net = tflearn.regression(net, optimizer='adam', learning_rate=0.001, loss='categorical_crossentropy')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Training Step: 7039  | total loss: \u001b[1m\u001b[32m0.03469\u001b[0m\u001b[0m | time: 47.844s\n",
      "| Adam | epoch: 010 | loss: 0.03469 - acc: 0.9900 -- iter: 22496/22500\n",
      "Training Step: 7040  | total loss: \u001b[1m\u001b[32m0.03141\u001b[0m\u001b[0m | time: 49.158s\n",
      "| Adam | epoch: 010 | loss: 0.03141 - acc: 0.9910 | val_loss: 0.98614 - val_acc: 0.7904 -- iter: 22500/22500\n",
      "--\n"
     ]
    }
   ],
   "source": [
    "# Training\n",
    "model = tflearn.DNN(net, tensorboard_verbose=0)\n",
    "model.fit(trainX, trainY, validation_set=(validX, validY), show_metric=True, batch_size=32)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[0.84079999971389774]"
      ]
     },
     "execution_count": 31,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "## Testing the model\n",
    "model.evaluate(testX[:test_len], testY[:test_len])"
   ]
  }
 ],
 "metadata": {
  "hide_input": false,
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.1"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
 }