finished building rnn

7 years ago · d452dd8b78
parent dfa204818e
commit d452dd8b78
2 changed files with 370 additions and 51 deletions
--- a/tv-script-generation/.ipynb_checkpoints/dlnd_tv_script_generation-checkpoint.ipynb
+++ b/tv-script-generation/.ipynb_checkpoints/dlnd_tv_script_generation-checkpoint.ipynb
@ -15,7 +15,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 21,
+   "execution_count": 54,
   "metadata": {
    "collapsed": false,
    "deletable": true,
@ -126,7 +126,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 19,
+   "execution_count": 47,
   "metadata": {
    "collapsed": false,
    "deletable": true,
@ -192,7 +192,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 21,
+   "execution_count": 48,
   "metadata": {
    "collapsed": false,
    "deletable": true,
@ -301,7 +301,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 36,
+   "execution_count": 34,
   "metadata": {
    "collapsed": false
   },
@ -310,7 +310,8 @@
    "from collections import namedtuple\n",
    "\n",
    "hyper_params = (('embedding_size', 128),\n",
-    "                \n",
+    "                ('lstm_layers', 2),\n",
+    "                ('keep_prob', 0.5)\n",
    "               )\n",
    "\n",
    "\n",
@ -342,7 +343,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 3,
   "metadata": {
    "collapsed": false
   },
@ -396,7 +397,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 4,
   "metadata": {
    "collapsed": false,
    "deletable": true,
@ -453,7 +454,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 36,
   "metadata": {
    "collapsed": false,
    "deletable": true,
@ -478,7 +479,12 @@
    "    \"\"\"\n",
    "    lstm = tf.contrib.rnn.BasicLSTMCell(rnn_size)\n",
    "    \n",
-    "    cell = tf.contrib.rnn.MultiRNNCell([lstm] * 2)\n",
+    "    # add a dropout wrapper\n",
+    "    drop = tf.contrib.rnn.DropoutWrapper(lstm, output_keep_prob=HYPER.keep_prob)\n",
+    "    \n",
+    "    #cell = tf.contrib.rnn.MultiRNNCell([drop] * HYPER.lstm_layers)\n",
+    "    \n",
+    "    cell = tf.contrib.rnn.MultiRNNCell([lstm] * HYPER.lstm_layers)\n",
    "    \n",
    "    initial_state = cell.zero_state(batch_size, tf.float32)\n",
    "    initial_state = tf.identity(initial_state, name='initial_state')\n",
@ -505,7 +511,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 10,
   "metadata": {
    "collapsed": false,
    "deletable": true,
@ -561,7 +567,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 18,
+   "execution_count": 11,
   "metadata": {
    "collapsed": false,
    "deletable": true,
@ -584,6 +590,9 @@
    "    :param inputs: Input text data\n",
    "    :return: Tuple (Outputs, Final State)\n",
    "    \"\"\"\n",
+    "    ## NOTES\n",
+    "    # dynamic rnn automatically takes the seq size in dim=1 [batch_size, max_time, ...] time_major==false (default)\n",
+    "    \n",
    "    outputs, final_state = tf.nn.dynamic_rnn(cell, inputs, dtype=tf.float32)\n",
    "    final_state = tf.identity(final_state, name='final_state')\n",
    "    \n",
@ -615,7 +624,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 37,
+   "execution_count": 38,
   "metadata": {
    "collapsed": false,
    "deletable": true,
@ -623,15 +632,10 @@
   },
   "outputs": [
    {
-     "ename": "AssertionError",
-     "evalue": "Final state doesn't have the \"name\" attribute.  Are you using build_rnn?",
-     "output_type": "error",
-     "traceback": [
-      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
-      "\u001b[0;31mAssertionError\u001b[0m                            Traceback (most recent call last)",
-      "\u001b[0;32m<ipython-input-37-73c6b69893b4>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m     16\u001b[0m \u001b[0mDON\u001b[0m\u001b[0;31m'\u001b[0m\u001b[0mT\u001b[0m \u001b[0mMODIFY\u001b[0m \u001b[0mANYTHING\u001b[0m \u001b[0mIN\u001b[0m \u001b[0mTHIS\u001b[0m \u001b[0mCELL\u001b[0m \u001b[0mTHAT\u001b[0m \u001b[0mIS\u001b[0m \u001b[0mBELOW\u001b[0m \u001b[0mTHIS\u001b[0m \u001b[0mLINE\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     17\u001b[0m \"\"\"\n\u001b[0;32m---> 18\u001b[0;31m \u001b[0mtests\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtest_build_nn\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mbuild_nn\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
-      "\u001b[0;32m/home/spike/ml/udacity/nd101/deep-learning-modified/tv-script-generation/problem_unittests.py\u001b[0m in \u001b[0;36mtest_build_nn\u001b[0;34m(build_nn)\u001b[0m\n\u001b[1;32m    242\u001b[0m         \u001b[0;31m# Check name\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    243\u001b[0m         \u001b[0;32massert\u001b[0m \u001b[0mhasattr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfinal_state\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'name'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;31m \u001b[0m\u001b[0;31m\\\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 244\u001b[0;31m             \u001b[0;34m'Final state doesn\\'t have the \"name\" attribute.  Are you using build_rnn?'\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    245\u001b[0m         \u001b[0;32massert\u001b[0m \u001b[0mfinal_state\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mname\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;34m'final_state:0'\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;31m \u001b[0m\u001b[0;31m\\\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    246\u001b[0m             \u001b[0;34m'Final state doesn\\'t have the correct name. Found the name {}. Are you using build_rnn?'\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mformat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfinal_state\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-      "\u001b[0;31mAssertionError\u001b[0m: Final state doesn't have the \"name\" attribute.  Are you using build_rnn?"
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Tests Passed\n"
     ]
    }
   ],
@ -645,9 +649,39 @@
    "    :param vocab_size: Vocabulary size\n",
    "    :return: Tuple (Logits, FinalState)\n",
    "    \"\"\"\n",
+    "    \n",
+    "    num_outputs = vocab_size\n",
+    "    batch_size = input_data.get_shape().as_list()[0]\n",
+    "    \n",
    "    embed = get_embed(input_data, vocab_size, HYPER.embedding_size)\n",
    "    \n",
-    "    return None, None\n",
+    "    \n",
+    "    ## NOTES\n",
+    "    # dynamic rnn automatically takes the seq size in dim=1 [batch_size, max_time, ...] see: time_major==false (default)\n",
+    "    \n",
+    "    ## Output shape\n",
+    "    ## [batch_size, time_step, rnn_size]\n",
+    "    raw_rnn_outputs, final_state = build_rnn(cell, embed)\n",
+    "    \n",
+    "    # Put outputs in rows\n",
+    "    # make the output into [batch_size*time_step, rnn_size] for easy matmul\n",
+    "    outputs = tf.reshape(raw_rnn_outputs, [-1, rnn_size])\n",
+    "    \n",
+    "    \n",
+    "    # Question, why are we using linear activation and not softmax ?\n",
+    "    # My Guess: because seq2seq.sequence_loss has an efficient way to calculate the loss directly from logits \n",
+    "    with tf.variable_scope('linear_layer'):\n",
+    "        linear_w = tf.Variable(tf.truncated_normal((rnn_size, num_outputs), stddev=0.1), name='linear_w')\n",
+    "        linear_b = tf.Variable(tf.zeros(num_outputs), name='linear_b')\n",
+    "        \n",
+    "    logits = tf.matmul(outputs, linear_w) + linear_b\n",
+    "    \n",
+    "    # Reshape the logits back into the original input shape -> [batch_size, seq_len, num_classes]\n",
+    "    # We do this beceause the loss function seq2seq.sequence_loss takes as logits a shape of [batch_size,seq_len,num_decoded_symbols]\n",
+    "    logits = tf.reshape(logits, [batch_size, -1, num_outputs])\n",
+    "    \n",
+    "    \n",
+    "    return logits, final_state\n",
    "\n",
    "\n",
    "\"\"\"\n",
@ -694,13 +728,66 @@
  },
  {
   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 141,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Stored '_input' (ndarray)\n",
+      "Stored '_target' (ndarray)\n",
+      "Stored 'test_int_text' (list)\n"
+     ]
+    }
+   ],
+   "source": [
+    "batch_size = 128\n",
+    "seq_length = 5\n",
+    "slice_size = batch_size * seq_length\n",
+    "test_int_text = list(range(1000*seq_length))\n",
+    "n_batches = int(len(test_int_text)/slice_size)\n",
+    "\n",
+    "# input part\n",
+    "_input = np.array(int_text[:n_batches*slice_size])\n",
+    "\n",
+    "# target part\n",
+    "_target = np.array(int_text[1:n_batches*slice_size + 1])\n",
+    "\n",
+    "%store _input\n",
+    "%store _target\n",
+    "%store test_int_text\n",
+    "\n",
+    "for b in range(n_batches):\n",
+    "    print \n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 174,
   "metadata": {
    "collapsed": false,
    "deletable": true,
    "editable": true
   },
-   "outputs": [],
+   "outputs": [
+    {
+     "ename": "AttributeError",
+     "evalue": "'list' object has no attribute 'shape'",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mAttributeError\u001b[0m                            Traceback (most recent call last)",
+      "\u001b[0;32m<ipython-input-174-903ff1c73bcc>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m     49\u001b[0m \u001b[0mDON\u001b[0m\u001b[0;31m'\u001b[0m\u001b[0mT\u001b[0m \u001b[0mMODIFY\u001b[0m \u001b[0mANYTHING\u001b[0m \u001b[0mIN\u001b[0m \u001b[0mTHIS\u001b[0m \u001b[0mCELL\u001b[0m \u001b[0mTHAT\u001b[0m \u001b[0mIS\u001b[0m \u001b[0mBELOW\u001b[0m \u001b[0mTHIS\u001b[0m \u001b[0mLINE\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     50\u001b[0m \"\"\"\n\u001b[0;32m---> 51\u001b[0;31m \u001b[0mtests\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtest_get_batches\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mget_batches\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
+      "\u001b[0;32m/home/spike/ml/udacity/nd101/deep-learning-modified/tv-script-generation/problem_unittests.py\u001b[0m in \u001b[0;36mtest_get_batches\u001b[0;34m(get_batches)\u001b[0m\n\u001b[1;32m     77\u001b[0m         \u001b[0mtest_seq_length\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;36m5\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     78\u001b[0m         \u001b[0mtest_int_text\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mlist\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mrange\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m1000\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0mtest_seq_length\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 79\u001b[0;31m         \u001b[0mbatches\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mget_batches\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtest_int_text\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtest_batch_size\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtest_seq_length\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     80\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     81\u001b[0m         \u001b[0;31m# Check type\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32m<ipython-input-174-903ff1c73bcc>\u001b[0m in \u001b[0;36mget_batches\u001b[0;34m(int_text, batch_size, seq_length)\u001b[0m\n\u001b[1;32m     37\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     38\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 39\u001b[0;31m     \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mvectorize\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0m_input\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0m_target\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     40\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     41\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32m<ipython-input-174-903ff1c73bcc>\u001b[0m in \u001b[0;36mvectorize\u001b[0;34m(_inputs, _targets)\u001b[0m\n\u001b[1;32m     24\u001b[0m         \u001b[0;31m# Go through all inputs, targets and split them into batch_size*seq\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     25\u001b[0m         \u001b[0minputs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtargets\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msplit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0m_inputs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbatch_size\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0mseq_length\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msplit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0m_targets\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbatch_size\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0mseq_length\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 26\u001b[0;31m         \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0minputs\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     27\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     28\u001b[0m         \u001b[0;31m# Stack inputs and targets into batch_size * seq_length\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;31mAttributeError\u001b[0m: 'list' object has no attribute 'shape'"
+     ]
+    }
+   ],
   "source": [
    "def get_batches(int_text, batch_size, seq_length):\n",
    "    \"\"\"\n",
@ -710,7 +797,44 @@
    "    :param seq_length: The length of sequence\n",
    "    :return: Batches as a Numpy array\n",
    "    \"\"\"\n",
-    "    # TODO: Implement Function\n",
+    "    \n",
+    "    slice_size = batch_size * seq_length\n",
+    "    n_batches = int(len(int_text)/slice_size)\n",
+    "    \n",
+    "    # input part\n",
+    "    _input = np.array(int_text[:n_batches*slice_size])\n",
+    "    \n",
+    "    # target part\n",
+    "    _target = np.array(int_text[1:n_batches*slice_size + 1])\n",
+    "    \n",
+    "    \n",
+    "    def vectorize(_inputs, _targets):\n",
+    "        # Takes flattened inputs and targets\n",
+    "        # returns shape [n_batches, 2, batch_size, seq_length]\n",
+    "        \n",
+    "        # Go through all inputs, targets and split them into batch_size*seq list of items\n",
+    "        # [batch*seq, batch*seq, ...]\n",
+    "        inputs, targets = np.split(_inputs, batch_size*seq_length), np.split(_targets, batch_size*seq_length)\n",
+    "        \n",
+    "        # Reshape into [batch x seq, batch x seq, ...]\n",
+    "        \n",
+    "        # Stack inputs and targets into batch_size * seq_length \n",
+    "        # Shape should become batch_size x seq_length\n",
+    "        inputs, targets = np.stack(inputs), np.stack(targets)\n",
+    "        \n",
+    "        \n",
+    "        # Stack Inputs and Targets\n",
+    "        batches = np.concatenate((inputs, targets))\n",
+    "        \n",
+    "        return batch\n",
+    "    \n",
+    "    \n",
+    "    result = vectorize(_input, _target)\n",
+    "    \n",
+    "        \n",
+    "    # preare result as reference for target shape\n",
+    "    #result = np.empty((n_batches, 2, batch_size, seq_length), dtype=np.int32)\n",
+    "    \n",
    "    return None\n",
    "\n",
    "\n",
--- a/tv-script-generation/dlnd_tv_script_generation.ipynb
+++ b/tv-script-generation/dlnd_tv_script_generation.ipynb
@ -15,7 +15,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 21,
+   "execution_count": 54,
   "metadata": {
    "collapsed": false,
    "deletable": true,
@ -126,7 +126,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 19,
+   "execution_count": 47,
   "metadata": {
    "collapsed": false,
    "deletable": true,
@ -192,7 +192,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 21,
+   "execution_count": 48,
   "metadata": {
    "collapsed": false,
    "deletable": true,
@ -301,7 +301,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 36,
+   "execution_count": 34,
   "metadata": {
    "collapsed": false
   },
@ -310,7 +310,8 @@
    "from collections import namedtuple\n",
    "\n",
    "hyper_params = (('embedding_size', 128),\n",
-    "                \n",
+    "                ('lstm_layers', 2),\n",
+    "                ('keep_prob', 0.5)\n",
    "               )\n",
    "\n",
    "\n",
@ -342,7 +343,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 3,
   "metadata": {
    "collapsed": false
   },
@ -396,7 +397,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 4,
   "metadata": {
    "collapsed": false,
    "deletable": true,
@ -453,7 +454,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 36,
   "metadata": {
    "collapsed": false,
    "deletable": true,
@ -478,7 +479,12 @@
    "    \"\"\"\n",
    "    lstm = tf.contrib.rnn.BasicLSTMCell(rnn_size)\n",
    "    \n",
-    "    cell = tf.contrib.rnn.MultiRNNCell([lstm] * 2)\n",
+    "    # add a dropout wrapper\n",
+    "    drop = tf.contrib.rnn.DropoutWrapper(lstm, output_keep_prob=HYPER.keep_prob)\n",
+    "    \n",
+    "    #cell = tf.contrib.rnn.MultiRNNCell([drop] * HYPER.lstm_layers)\n",
+    "    \n",
+    "    cell = tf.contrib.rnn.MultiRNNCell([lstm] * HYPER.lstm_layers)\n",
    "    \n",
    "    initial_state = cell.zero_state(batch_size, tf.float32)\n",
    "    initial_state = tf.identity(initial_state, name='initial_state')\n",
@ -505,7 +511,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 10,
   "metadata": {
    "collapsed": false,
    "deletable": true,
@ -561,7 +567,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 18,
+   "execution_count": 11,
   "metadata": {
    "collapsed": false,
    "deletable": true,
@ -584,6 +590,9 @@
    "    :param inputs: Input text data\n",
    "    :return: Tuple (Outputs, Final State)\n",
    "    \"\"\"\n",
+    "    ## NOTES\n",
+    "    # dynamic rnn automatically takes the seq size in dim=1 [batch_size, max_time, ...] time_major==false (default)\n",
+    "    \n",
    "    outputs, final_state = tf.nn.dynamic_rnn(cell, inputs, dtype=tf.float32)\n",
    "    final_state = tf.identity(final_state, name='final_state')\n",
    "    \n",
@ -615,7 +624,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 37,
+   "execution_count": 38,
   "metadata": {
    "collapsed": false,
    "deletable": true,
@ -623,15 +632,10 @@
   },
   "outputs": [
    {
-     "ename": "AssertionError",
-     "evalue": "Final state doesn't have the \"name\" attribute.  Are you using build_rnn?",
-     "output_type": "error",
-     "traceback": [
-      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
-      "\u001b[0;31mAssertionError\u001b[0m                            Traceback (most recent call last)",
-      "\u001b[0;32m<ipython-input-37-73c6b69893b4>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m     16\u001b[0m \u001b[0mDON\u001b[0m\u001b[0;31m'\u001b[0m\u001b[0mT\u001b[0m \u001b[0mMODIFY\u001b[0m \u001b[0mANYTHING\u001b[0m \u001b[0mIN\u001b[0m \u001b[0mTHIS\u001b[0m \u001b[0mCELL\u001b[0m \u001b[0mTHAT\u001b[0m \u001b[0mIS\u001b[0m \u001b[0mBELOW\u001b[0m \u001b[0mTHIS\u001b[0m \u001b[0mLINE\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     17\u001b[0m \"\"\"\n\u001b[0;32m---> 18\u001b[0;31m \u001b[0mtests\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtest_build_nn\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mbuild_nn\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
-      "\u001b[0;32m/home/spike/ml/udacity/nd101/deep-learning-modified/tv-script-generation/problem_unittests.py\u001b[0m in \u001b[0;36mtest_build_nn\u001b[0;34m(build_nn)\u001b[0m\n\u001b[1;32m    242\u001b[0m         \u001b[0;31m# Check name\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    243\u001b[0m         \u001b[0;32massert\u001b[0m \u001b[0mhasattr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfinal_state\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'name'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;31m \u001b[0m\u001b[0;31m\\\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 244\u001b[0;31m             \u001b[0;34m'Final state doesn\\'t have the \"name\" attribute.  Are you using build_rnn?'\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    245\u001b[0m         \u001b[0;32massert\u001b[0m \u001b[0mfinal_state\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mname\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;34m'final_state:0'\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;31m \u001b[0m\u001b[0;31m\\\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    246\u001b[0m             \u001b[0;34m'Final state doesn\\'t have the correct name. Found the name {}. Are you using build_rnn?'\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mformat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfinal_state\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-      "\u001b[0;31mAssertionError\u001b[0m: Final state doesn't have the \"name\" attribute.  Are you using build_rnn?"
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Tests Passed\n"
     ]
    }
   ],
@ -645,9 +649,39 @@
    "    :param vocab_size: Vocabulary size\n",
    "    :return: Tuple (Logits, FinalState)\n",
    "    \"\"\"\n",
+    "    \n",
+    "    num_outputs = vocab_size\n",
+    "    batch_size = input_data.get_shape().as_list()[0]\n",
+    "    \n",
    "    embed = get_embed(input_data, vocab_size, HYPER.embedding_size)\n",
    "    \n",
-    "    return None, None\n",
+    "    \n",
+    "    ## NOTES\n",
+    "    # dynamic rnn automatically takes the seq size in dim=1 [batch_size, max_time, ...] see: time_major==false (default)\n",
+    "    \n",
+    "    ## Output shape\n",
+    "    ## [batch_size, time_step, rnn_size]\n",
+    "    raw_rnn_outputs, final_state = build_rnn(cell, embed)\n",
+    "    \n",
+    "    # Put outputs in rows\n",
+    "    # make the output into [batch_size*time_step, rnn_size] for easy matmul\n",
+    "    outputs = tf.reshape(raw_rnn_outputs, [-1, rnn_size])\n",
+    "    \n",
+    "    \n",
+    "    # Question, why are we using linear activation and not softmax ?\n",
+    "    # My Guess: because seq2seq.sequence_loss has an efficient way to calculate the loss directly from logits \n",
+    "    with tf.variable_scope('linear_layer'):\n",
+    "        linear_w = tf.Variable(tf.truncated_normal((rnn_size, num_outputs), stddev=0.1), name='linear_w')\n",
+    "        linear_b = tf.Variable(tf.zeros(num_outputs), name='linear_b')\n",
+    "        \n",
+    "    logits = tf.matmul(outputs, linear_w) + linear_b\n",
+    "    \n",
+    "    # Reshape the logits back into the original input shape -> [batch_size, seq_len, num_classes]\n",
+    "    # We do this beceause the loss function seq2seq.sequence_loss takes as logits a shape of [batch_size,seq_len,num_decoded_symbols]\n",
+    "    logits = tf.reshape(logits, [batch_size, -1, num_outputs])\n",
+    "    \n",
+    "    \n",
+    "    return logits, final_state\n",
    "\n",
    "\n",
    "\"\"\"\n",
@ -694,13 +728,150 @@
  },
  {
   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 238,
   "metadata": {
    "collapsed": false,
    "deletable": true,
    "editable": true
   },
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "(7, 1280)\n",
+      "[[  0   1   2   3   4]\n",
+      " [  5   6   7   8   9]\n",
+      " [ 10  11  12  13  14]\n",
+      " [ 15  16  17  18  19]\n",
+      " [ 20  21  22  23  24]\n",
+      " [ 25  26  27  28  29]\n",
+      " [ 30  31  32  33  34]\n",
+      " [ 35  36  37  38  39]\n",
+      " [ 40  41  42  43  44]\n",
+      " [ 45  46  47  48  49]\n",
+      " [ 50  51  52  53  54]\n",
+      " [ 55  56  57  58  59]\n",
+      " [ 60  61  62  63  64]\n",
+      " [ 65  66  67  68  69]\n",
+      " [ 70  71  72  73  74]\n",
+      " [ 75  76  77  78  79]\n",
+      " [ 80  81  82  83  84]\n",
+      " [ 85  86  87  88  89]\n",
+      " [ 90  91  92  93  94]\n",
+      " [ 95  96  97  98  99]\n",
+      " [100 101 102 103 104]\n",
+      " [105 106 107 108 109]\n",
+      " [110 111 112 113 114]\n",
+      " [115 116 117 118 119]\n",
+      " [120 121 122 123 124]\n",
+      " [125 126 127 128 129]\n",
+      " [130 131 132 133 134]\n",
+      " [135 136 137 138 139]\n",
+      " [140 141 142 143 144]\n",
+      " [145 146 147 148 149]\n",
+      " [150 151 152 153 154]\n",
+      " [155 156 157 158 159]\n",
+      " [160 161 162 163 164]\n",
+      " [165 166 167 168 169]\n",
+      " [170 171 172 173 174]\n",
+      " [175 176 177 178 179]\n",
+      " [180 181 182 183 184]\n",
+      " [185 186 187 188 189]\n",
+      " [190 191 192 193 194]\n",
+      " [195 196 197 198 199]\n",
+      " [200 201 202 203 204]\n",
+      " [205 206 207 208 209]\n",
+      " [210 211 212 213 214]\n",
+      " [215 216 217 218 219]\n",
+      " [220 221 222 223 224]\n",
+      " [225 226 227 228 229]\n",
+      " [230 231 232 233 234]\n",
+      " [235 236 237 238 239]\n",
+      " [240 241 242 243 244]\n",
+      " [245 246 247 248 249]\n",
+      " [250 251 252 253 254]\n",
+      " [255 256 257 258 259]\n",
+      " [260 261 262 263 264]\n",
+      " [265 266 267 268 269]\n",
+      " [270 271 272 273 274]\n",
+      " [275 276 277 278 279]\n",
+      " [280 281 282 283 284]\n",
+      " [285 286 287 288 289]\n",
+      " [290 291 292 293 294]\n",
+      " [295 296 297 298 299]\n",
+      " [300 301 302 303 304]\n",
+      " [305 306 307 308 309]\n",
+      " [310 311 312 313 314]\n",
+      " [315 316 317 318 319]\n",
+      " [320 321 322 323 324]\n",
+      " [325 326 327 328 329]\n",
+      " [330 331 332 333 334]\n",
+      " [335 336 337 338 339]\n",
+      " [340 341 342 343 344]\n",
+      " [345 346 347 348 349]\n",
+      " [350 351 352 353 354]\n",
+      " [355 356 357 358 359]\n",
+      " [360 361 362 363 364]\n",
+      " [365 366 367 368 369]\n",
+      " [370 371 372 373 374]\n",
+      " [375 376 377 378 379]\n",
+      " [380 381 382 383 384]\n",
+      " [385 386 387 388 389]\n",
+      " [390 391 392 393 394]\n",
+      " [395 396 397 398 399]\n",
+      " [400 401 402 403 404]\n",
+      " [405 406 407 408 409]\n",
+      " [410 411 412 413 414]\n",
+      " [415 416 417 418 419]\n",
+      " [420 421 422 423 424]\n",
+      " [425 426 427 428 429]\n",
+      " [430 431 432 433 434]\n",
+      " [435 436 437 438 439]\n",
+      " [440 441 442 443 444]\n",
+      " [445 446 447 448 449]\n",
+      " [450 451 452 453 454]\n",
+      " [455 456 457 458 459]\n",
+      " [460 461 462 463 464]\n",
+      " [465 466 467 468 469]\n",
+      " [470 471 472 473 474]\n",
+      " [475 476 477 478 479]\n",
+      " [480 481 482 483 484]\n",
+      " [485 486 487 488 489]\n",
+      " [490 491 492 493 494]\n",
+      " [495 496 497 498 499]\n",
+      " [500 501 502 503 504]\n",
+      " [505 506 507 508 509]\n",
+      " [510 511 512 513 514]\n",
+      " [515 516 517 518 519]\n",
+      " [520 521 522 523 524]\n",
+      " [525 526 527 528 529]\n",
+      " [530 531 532 533 534]\n",
+      " [535 536 537 538 539]\n",
+      " [540 541 542 543 544]\n",
+      " [545 546 547 548 549]\n",
+      " [550 551 552 553 554]\n",
+      " [555 556 557 558 559]\n",
+      " [560 561 562 563 564]\n",
+      " [565 566 567 568 569]\n",
+      " [570 571 572 573 574]\n",
+      " [575 576 577 578 579]\n",
+      " [580 581 582 583 584]\n",
+      " [585 586 587 588 589]\n",
+      " [590 591 592 593 594]\n",
+      " [595 596 597 598 599]\n",
+      " [600 601 602 603 604]\n",
+      " [605 606 607 608 609]\n",
+      " [610 611 612 613 614]\n",
+      " [615 616 617 618 619]\n",
+      " [620 621 622 623 624]\n",
+      " [625 626 627 628 629]\n",
+      " [630 631 632 633 634]\n",
+      " [635 636 637 638 639]]\n",
+      "Tests Passed\n"
+     ]
+    }
+   ],
   "source": [
    "def get_batches(int_text, batch_size, seq_length):\n",
    "    \"\"\"\n",
@ -710,8 +881,32 @@
    "    :param seq_length: The length of sequence\n",
    "    :return: Batches as a Numpy array\n",
    "    \"\"\"\n",
-    "    # TODO: Implement Function\n",
-    "    return None\n",
+    "    \n",
+    "    slice_size = batch_size * seq_length\n",
+    "    n_batches = int(len(int_text)/slice_size)\n",
+    "    \n",
+    "    # input part\n",
+    "    _inputs = np.array(int_text[:n_batches*slice_size])\n",
+    "    \n",
+    "    # target part\n",
+    "    _targets = np.array(int_text[1:n_batches*slice_size + 1])\n",
+    "    \n",
+    "\n",
+    "    # Go through all inputs, targets and split them into batch_size*seq_len list of items\n",
+    "    # [batch, batch, ...]\n",
+    "    inputs, targets = np.split(_inputs, n_batches), np.split(_targets, n_batches)\n",
+    "    \n",
+    "    # concat inputs and targets\n",
+    "    batches = np.c_[inputs, targets]\n",
+    "    print(batches.shape)\n",
+    "    \n",
+    "    # Reshape into final batches output\n",
+    "    batches = batches.reshape((-1, 2, batch_size, seq_length))\n",
+    "\n",
+    "    print(batches[0][0])\n",
+    "\n",
+    "    \n",
+    "    return batches\n",
    "\n",
    "\n",
    "\"\"\"\n",