{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": false }, "outputs": [], "source": [ "from __future__ import division, print_function, absolute_import\n", "\n", "import tflearn\n", "from tflearn.data_utils import to_categorical, pad_sequences\n", "from tflearn.datasets import imdb\n" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "collapsed": true }, "outputs": [], "source": [ "train, valid, test = imdb.load_data(path='imdb.pkl', n_words=10000, valid_portion=0.1)" ] }, { "cell_type": "code", "execution_count": 32, "metadata": { "collapsed": false }, "outputs": [], "source": [ "trainX, trainY = train\n", "validX, validY = valid\n", "testX, testY = test\n", "\n", "# Test set: 25% of the full test set\n", "test_len = int(0.25*len(testX))\n", "testX = testX[:test_len]\n", "testY = testY[:test_len]" ] }, { "cell_type": "code", "execution_count": 30, "metadata": { "collapsed": false }, "outputs": [], "source": [ "#Data preprocessing\n", "# Sequence padding\n", "trainX = pad_sequences(trainX, maxlen=100, value=0.)\n", "validX = pad_sequences(validX, maxlen=100, value=0.)\n", "testX = pad_sequences(testX, maxlen=100)\n", "\n", "#Convert labels to binary vectors\n", "trainY = to_categorical(trainY, nb_classes=2)\n", "validY = to_categorical(validY, nb_classes=2)\n", "testY = to_categorical(testY, nb_classes=2)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "collapsed": true }, "outputs": [], "source": [ "# Network building\n", "net = tflearn.input_data([None, 100])\n", "net = tflearn.embedding(net, input_dim=10000, output_dim=128)\n", "net = tflearn.lstm(net, 128, dropout=0.8)\n", "net = tflearn.fully_connected(net, 2, activation='softmax')\n", "net = tflearn.regression(net, optimizer='adam', learning_rate=0.001, loss='categorical_crossentropy')" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Training Step: 7039 | total loss: \u001b[1m\u001b[32m0.03469\u001b[0m\u001b[0m | time: 47.844s\n", "| Adam | epoch: 010 | loss: 0.03469 - acc: 0.9900 -- iter: 22496/22500\n", "Training Step: 7040 | total loss: \u001b[1m\u001b[32m0.03141\u001b[0m\u001b[0m | time: 49.158s\n", "| Adam | epoch: 010 | loss: 0.03141 - acc: 0.9910 | val_loss: 0.98614 - val_acc: 0.7904 -- iter: 22500/22500\n", "--\n" ] } ], "source": [ "# Training\n", "model = tflearn.DNN(net, tensorboard_verbose=0)\n", "model.fit(trainX, trainY, validation_set=(validX, validY), show_metric=True, batch_size=32)" ] }, { "cell_type": "code", "execution_count": 31, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "[0.84079999971389774]" ] }, "execution_count": 31, "metadata": {}, "output_type": "execute_result" } ], "source": [ "## Testing the model\n", "model.evaluate(testX[:test_len], testY[:test_len])" ] } ], "metadata": { "hide_input": false, "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.5.1" } }, "nbformat": 4, "nbformat_minor": 2 }