finished building rnn

master
spike 7 years ago
parent dfa204818e
commit d452dd8b78

@ -15,7 +15,7 @@
},
{
"cell_type": "code",
"execution_count": 21,
"execution_count": 54,
"metadata": {
"collapsed": false,
"deletable": true,
@ -126,7 +126,7 @@
},
{
"cell_type": "code",
"execution_count": 19,
"execution_count": 47,
"metadata": {
"collapsed": false,
"deletable": true,
@ -192,7 +192,7 @@
},
{
"cell_type": "code",
"execution_count": 21,
"execution_count": 48,
"metadata": {
"collapsed": false,
"deletable": true,
@ -301,7 +301,7 @@
},
{
"cell_type": "code",
"execution_count": 36,
"execution_count": 34,
"metadata": {
"collapsed": false
},
@ -310,7 +310,8 @@
"from collections import namedtuple\n",
"\n",
"hyper_params = (('embedding_size', 128),\n",
" \n",
" ('lstm_layers', 2),\n",
" ('keep_prob', 0.5)\n",
" )\n",
"\n",
"\n",
@ -342,7 +343,7 @@
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": 3,
"metadata": {
"collapsed": false
},
@ -396,7 +397,7 @@
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": 4,
"metadata": {
"collapsed": false,
"deletable": true,
@ -453,7 +454,7 @@
},
{
"cell_type": "code",
"execution_count": 8,
"execution_count": 36,
"metadata": {
"collapsed": false,
"deletable": true,
@ -478,7 +479,12 @@
" \"\"\"\n",
" lstm = tf.contrib.rnn.BasicLSTMCell(rnn_size)\n",
" \n",
" cell = tf.contrib.rnn.MultiRNNCell([lstm] * 2)\n",
" # add a dropout wrapper\n",
" drop = tf.contrib.rnn.DropoutWrapper(lstm, output_keep_prob=HYPER.keep_prob)\n",
" \n",
" #cell = tf.contrib.rnn.MultiRNNCell([drop] * HYPER.lstm_layers)\n",
" \n",
" cell = tf.contrib.rnn.MultiRNNCell([lstm] * HYPER.lstm_layers)\n",
" \n",
" initial_state = cell.zero_state(batch_size, tf.float32)\n",
" initial_state = tf.identity(initial_state, name='initial_state')\n",
@ -505,7 +511,7 @@
},
{
"cell_type": "code",
"execution_count": 9,
"execution_count": 10,
"metadata": {
"collapsed": false,
"deletable": true,
@ -561,7 +567,7 @@
},
{
"cell_type": "code",
"execution_count": 18,
"execution_count": 11,
"metadata": {
"collapsed": false,
"deletable": true,
@ -584,6 +590,9 @@
" :param inputs: Input text data\n",
" :return: Tuple (Outputs, Final State)\n",
" \"\"\"\n",
" ## NOTES\n",
" # dynamic rnn automatically takes the seq size in dim=1 [batch_size, max_time, ...] time_major==false (default)\n",
" \n",
" outputs, final_state = tf.nn.dynamic_rnn(cell, inputs, dtype=tf.float32)\n",
" final_state = tf.identity(final_state, name='final_state')\n",
" \n",
@ -615,7 +624,7 @@
},
{
"cell_type": "code",
"execution_count": 37,
"execution_count": 38,
"metadata": {
"collapsed": false,
"deletable": true,
@ -623,15 +632,10 @@
},
"outputs": [
{
"ename": "AssertionError",
"evalue": "Final state doesn't have the \"name\" attribute. Are you using build_rnn?",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mAssertionError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-37-73c6b69893b4>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m 16\u001b[0m \u001b[0mDON\u001b[0m\u001b[0;31m'\u001b[0m\u001b[0mT\u001b[0m \u001b[0mMODIFY\u001b[0m \u001b[0mANYTHING\u001b[0m \u001b[0mIN\u001b[0m \u001b[0mTHIS\u001b[0m \u001b[0mCELL\u001b[0m \u001b[0mTHAT\u001b[0m \u001b[0mIS\u001b[0m \u001b[0mBELOW\u001b[0m \u001b[0mTHIS\u001b[0m \u001b[0mLINE\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 17\u001b[0m \"\"\"\n\u001b[0;32m---> 18\u001b[0;31m \u001b[0mtests\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtest_build_nn\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mbuild_nn\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[0;32m/home/spike/ml/udacity/nd101/deep-learning-modified/tv-script-generation/problem_unittests.py\u001b[0m in \u001b[0;36mtest_build_nn\u001b[0;34m(build_nn)\u001b[0m\n\u001b[1;32m 242\u001b[0m \u001b[0;31m# Check name\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 243\u001b[0m \u001b[0;32massert\u001b[0m \u001b[0mhasattr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfinal_state\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'name'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;31m \u001b[0m\u001b[0;31m\\\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 244\u001b[0;31m \u001b[0;34m'Final state doesn\\'t have the \"name\" attribute. Are you using build_rnn?'\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 245\u001b[0m \u001b[0;32massert\u001b[0m \u001b[0mfinal_state\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mname\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;34m'final_state:0'\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;31m \u001b[0m\u001b[0;31m\\\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 246\u001b[0m \u001b[0;34m'Final state doesn\\'t have the correct name. Found the name {}. Are you using build_rnn?'\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mformat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfinal_state\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mAssertionError\u001b[0m: Final state doesn't have the \"name\" attribute. Are you using build_rnn?"
"name": "stdout",
"output_type": "stream",
"text": [
"Tests Passed\n"
]
}
],
@ -645,9 +649,39 @@
" :param vocab_size: Vocabulary size\n",
" :return: Tuple (Logits, FinalState)\n",
" \"\"\"\n",
" \n",
" num_outputs = vocab_size\n",
" batch_size = input_data.get_shape().as_list()[0]\n",
" \n",
" embed = get_embed(input_data, vocab_size, HYPER.embedding_size)\n",
" \n",
" return None, None\n",
" \n",
" ## NOTES\n",
" # dynamic rnn automatically takes the seq size in dim=1 [batch_size, max_time, ...] see: time_major==false (default)\n",
" \n",
" ## Output shape\n",
" ## [batch_size, time_step, rnn_size]\n",
" raw_rnn_outputs, final_state = build_rnn(cell, embed)\n",
" \n",
" # Put outputs in rows\n",
" # make the output into [batch_size*time_step, rnn_size] for easy matmul\n",
" outputs = tf.reshape(raw_rnn_outputs, [-1, rnn_size])\n",
" \n",
" \n",
" # Question, why are we using linear activation and not softmax ?\n",
" # My Guess: because seq2seq.sequence_loss has an efficient way to calculate the loss directly from logits \n",
" with tf.variable_scope('linear_layer'):\n",
" linear_w = tf.Variable(tf.truncated_normal((rnn_size, num_outputs), stddev=0.1), name='linear_w')\n",
" linear_b = tf.Variable(tf.zeros(num_outputs), name='linear_b')\n",
" \n",
" logits = tf.matmul(outputs, linear_w) + linear_b\n",
" \n",
" # Reshape the logits back into the original input shape -> [batch_size, seq_len, num_classes]\n",
" # We do this beceause the loss function seq2seq.sequence_loss takes as logits a shape of [batch_size,seq_len,num_decoded_symbols]\n",
" logits = tf.reshape(logits, [batch_size, -1, num_outputs])\n",
" \n",
" \n",
" return logits, final_state\n",
"\n",
"\n",
"\"\"\"\n",
@ -694,13 +728,66 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 141,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Stored '_input' (ndarray)\n",
"Stored '_target' (ndarray)\n",
"Stored 'test_int_text' (list)\n"
]
}
],
"source": [
"batch_size = 128\n",
"seq_length = 5\n",
"slice_size = batch_size * seq_length\n",
"test_int_text = list(range(1000*seq_length))\n",
"n_batches = int(len(test_int_text)/slice_size)\n",
"\n",
"# input part\n",
"_input = np.array(int_text[:n_batches*slice_size])\n",
"\n",
"# target part\n",
"_target = np.array(int_text[1:n_batches*slice_size + 1])\n",
"\n",
"%store _input\n",
"%store _target\n",
"%store test_int_text\n",
"\n",
"for b in range(n_batches):\n",
" print \n"
]
},
{
"cell_type": "code",
"execution_count": 174,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [],
"outputs": [
{
"ename": "AttributeError",
"evalue": "'list' object has no attribute 'shape'",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-174-903ff1c73bcc>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m 49\u001b[0m \u001b[0mDON\u001b[0m\u001b[0;31m'\u001b[0m\u001b[0mT\u001b[0m \u001b[0mMODIFY\u001b[0m \u001b[0mANYTHING\u001b[0m \u001b[0mIN\u001b[0m \u001b[0mTHIS\u001b[0m \u001b[0mCELL\u001b[0m \u001b[0mTHAT\u001b[0m \u001b[0mIS\u001b[0m \u001b[0mBELOW\u001b[0m \u001b[0mTHIS\u001b[0m \u001b[0mLINE\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 50\u001b[0m \"\"\"\n\u001b[0;32m---> 51\u001b[0;31m \u001b[0mtests\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtest_get_batches\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mget_batches\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[0;32m/home/spike/ml/udacity/nd101/deep-learning-modified/tv-script-generation/problem_unittests.py\u001b[0m in \u001b[0;36mtest_get_batches\u001b[0;34m(get_batches)\u001b[0m\n\u001b[1;32m 77\u001b[0m \u001b[0mtest_seq_length\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;36m5\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 78\u001b[0m \u001b[0mtest_int_text\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mlist\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mrange\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m1000\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0mtest_seq_length\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 79\u001b[0;31m \u001b[0mbatches\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mget_batches\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtest_int_text\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtest_batch_size\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtest_seq_length\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 80\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 81\u001b[0m \u001b[0;31m# Check type\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m<ipython-input-174-903ff1c73bcc>\u001b[0m in \u001b[0;36mget_batches\u001b[0;34m(int_text, batch_size, seq_length)\u001b[0m\n\u001b[1;32m 37\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 38\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 39\u001b[0;31m \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mvectorize\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0m_input\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0m_target\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 40\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 41\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m<ipython-input-174-903ff1c73bcc>\u001b[0m in \u001b[0;36mvectorize\u001b[0;34m(_inputs, _targets)\u001b[0m\n\u001b[1;32m 24\u001b[0m \u001b[0;31m# Go through all inputs, targets and split them into batch_size*seq\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 25\u001b[0m \u001b[0minputs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtargets\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msplit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0m_inputs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbatch_size\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0mseq_length\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msplit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0m_targets\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbatch_size\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0mseq_length\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 26\u001b[0;31m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0minputs\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 27\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 28\u001b[0m \u001b[0;31m# Stack inputs and targets into batch_size * seq_length\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mAttributeError\u001b[0m: 'list' object has no attribute 'shape'"
]
}
],
"source": [
"def get_batches(int_text, batch_size, seq_length):\n",
" \"\"\"\n",
@ -710,7 +797,44 @@
" :param seq_length: The length of sequence\n",
" :return: Batches as a Numpy array\n",
" \"\"\"\n",
" # TODO: Implement Function\n",
" \n",
" slice_size = batch_size * seq_length\n",
" n_batches = int(len(int_text)/slice_size)\n",
" \n",
" # input part\n",
" _input = np.array(int_text[:n_batches*slice_size])\n",
" \n",
" # target part\n",
" _target = np.array(int_text[1:n_batches*slice_size + 1])\n",
" \n",
" \n",
" def vectorize(_inputs, _targets):\n",
" # Takes flattened inputs and targets\n",
" # returns shape [n_batches, 2, batch_size, seq_length]\n",
" \n",
" # Go through all inputs, targets and split them into batch_size*seq list of items\n",
" # [batch*seq, batch*seq, ...]\n",
" inputs, targets = np.split(_inputs, batch_size*seq_length), np.split(_targets, batch_size*seq_length)\n",
" \n",
" # Reshape into [batch x seq, batch x seq, ...]\n",
" \n",
" # Stack inputs and targets into batch_size * seq_length \n",
" # Shape should become batch_size x seq_length\n",
" inputs, targets = np.stack(inputs), np.stack(targets)\n",
" \n",
" \n",
" # Stack Inputs and Targets\n",
" batches = np.concatenate((inputs, targets))\n",
" \n",
" return batch\n",
" \n",
" \n",
" result = vectorize(_input, _target)\n",
" \n",
" \n",
" # preare result as reference for target shape\n",
" #result = np.empty((n_batches, 2, batch_size, seq_length), dtype=np.int32)\n",
" \n",
" return None\n",
"\n",
"\n",

@ -15,7 +15,7 @@
},
{
"cell_type": "code",
"execution_count": 21,
"execution_count": 54,
"metadata": {
"collapsed": false,
"deletable": true,
@ -126,7 +126,7 @@
},
{
"cell_type": "code",
"execution_count": 19,
"execution_count": 47,
"metadata": {
"collapsed": false,
"deletable": true,
@ -192,7 +192,7 @@
},
{
"cell_type": "code",
"execution_count": 21,
"execution_count": 48,
"metadata": {
"collapsed": false,
"deletable": true,
@ -301,7 +301,7 @@
},
{
"cell_type": "code",
"execution_count": 36,
"execution_count": 34,
"metadata": {
"collapsed": false
},
@ -310,7 +310,8 @@
"from collections import namedtuple\n",
"\n",
"hyper_params = (('embedding_size', 128),\n",
" \n",
" ('lstm_layers', 2),\n",
" ('keep_prob', 0.5)\n",
" )\n",
"\n",
"\n",
@ -342,7 +343,7 @@
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": 3,
"metadata": {
"collapsed": false
},
@ -396,7 +397,7 @@
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": 4,
"metadata": {
"collapsed": false,
"deletable": true,
@ -453,7 +454,7 @@
},
{
"cell_type": "code",
"execution_count": 8,
"execution_count": 36,
"metadata": {
"collapsed": false,
"deletable": true,
@ -478,7 +479,12 @@
" \"\"\"\n",
" lstm = tf.contrib.rnn.BasicLSTMCell(rnn_size)\n",
" \n",
" cell = tf.contrib.rnn.MultiRNNCell([lstm] * 2)\n",
" # add a dropout wrapper\n",
" drop = tf.contrib.rnn.DropoutWrapper(lstm, output_keep_prob=HYPER.keep_prob)\n",
" \n",
" #cell = tf.contrib.rnn.MultiRNNCell([drop] * HYPER.lstm_layers)\n",
" \n",
" cell = tf.contrib.rnn.MultiRNNCell([lstm] * HYPER.lstm_layers)\n",
" \n",
" initial_state = cell.zero_state(batch_size, tf.float32)\n",
" initial_state = tf.identity(initial_state, name='initial_state')\n",
@ -505,7 +511,7 @@
},
{
"cell_type": "code",
"execution_count": 9,
"execution_count": 10,
"metadata": {
"collapsed": false,
"deletable": true,
@ -561,7 +567,7 @@
},
{
"cell_type": "code",
"execution_count": 18,
"execution_count": 11,
"metadata": {
"collapsed": false,
"deletable": true,
@ -584,6 +590,9 @@
" :param inputs: Input text data\n",
" :return: Tuple (Outputs, Final State)\n",
" \"\"\"\n",
" ## NOTES\n",
" # dynamic rnn automatically takes the seq size in dim=1 [batch_size, max_time, ...] time_major==false (default)\n",
" \n",
" outputs, final_state = tf.nn.dynamic_rnn(cell, inputs, dtype=tf.float32)\n",
" final_state = tf.identity(final_state, name='final_state')\n",
" \n",
@ -615,7 +624,7 @@
},
{
"cell_type": "code",
"execution_count": 37,
"execution_count": 38,
"metadata": {
"collapsed": false,
"deletable": true,
@ -623,15 +632,10 @@
},
"outputs": [
{
"ename": "AssertionError",
"evalue": "Final state doesn't have the \"name\" attribute. Are you using build_rnn?",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mAssertionError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-37-73c6b69893b4>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m 16\u001b[0m \u001b[0mDON\u001b[0m\u001b[0;31m'\u001b[0m\u001b[0mT\u001b[0m \u001b[0mMODIFY\u001b[0m \u001b[0mANYTHING\u001b[0m \u001b[0mIN\u001b[0m \u001b[0mTHIS\u001b[0m \u001b[0mCELL\u001b[0m \u001b[0mTHAT\u001b[0m \u001b[0mIS\u001b[0m \u001b[0mBELOW\u001b[0m \u001b[0mTHIS\u001b[0m \u001b[0mLINE\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 17\u001b[0m \"\"\"\n\u001b[0;32m---> 18\u001b[0;31m \u001b[0mtests\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtest_build_nn\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mbuild_nn\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[0;32m/home/spike/ml/udacity/nd101/deep-learning-modified/tv-script-generation/problem_unittests.py\u001b[0m in \u001b[0;36mtest_build_nn\u001b[0;34m(build_nn)\u001b[0m\n\u001b[1;32m 242\u001b[0m \u001b[0;31m# Check name\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 243\u001b[0m \u001b[0;32massert\u001b[0m \u001b[0mhasattr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfinal_state\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'name'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;31m \u001b[0m\u001b[0;31m\\\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 244\u001b[0;31m \u001b[0;34m'Final state doesn\\'t have the \"name\" attribute. Are you using build_rnn?'\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 245\u001b[0m \u001b[0;32massert\u001b[0m \u001b[0mfinal_state\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mname\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;34m'final_state:0'\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;31m \u001b[0m\u001b[0;31m\\\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 246\u001b[0m \u001b[0;34m'Final state doesn\\'t have the correct name. Found the name {}. Are you using build_rnn?'\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mformat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfinal_state\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mAssertionError\u001b[0m: Final state doesn't have the \"name\" attribute. Are you using build_rnn?"
"name": "stdout",
"output_type": "stream",
"text": [
"Tests Passed\n"
]
}
],
@ -645,9 +649,39 @@
" :param vocab_size: Vocabulary size\n",
" :return: Tuple (Logits, FinalState)\n",
" \"\"\"\n",
" \n",
" num_outputs = vocab_size\n",
" batch_size = input_data.get_shape().as_list()[0]\n",
" \n",
" embed = get_embed(input_data, vocab_size, HYPER.embedding_size)\n",
" \n",
" return None, None\n",
" \n",
" ## NOTES\n",
" # dynamic rnn automatically takes the seq size in dim=1 [batch_size, max_time, ...] see: time_major==false (default)\n",
" \n",
" ## Output shape\n",
" ## [batch_size, time_step, rnn_size]\n",
" raw_rnn_outputs, final_state = build_rnn(cell, embed)\n",
" \n",
" # Put outputs in rows\n",
" # make the output into [batch_size*time_step, rnn_size] for easy matmul\n",
" outputs = tf.reshape(raw_rnn_outputs, [-1, rnn_size])\n",
" \n",
" \n",
" # Question, why are we using linear activation and not softmax ?\n",
" # My Guess: because seq2seq.sequence_loss has an efficient way to calculate the loss directly from logits \n",
" with tf.variable_scope('linear_layer'):\n",
" linear_w = tf.Variable(tf.truncated_normal((rnn_size, num_outputs), stddev=0.1), name='linear_w')\n",
" linear_b = tf.Variable(tf.zeros(num_outputs), name='linear_b')\n",
" \n",
" logits = tf.matmul(outputs, linear_w) + linear_b\n",
" \n",
" # Reshape the logits back into the original input shape -> [batch_size, seq_len, num_classes]\n",
" # We do this beceause the loss function seq2seq.sequence_loss takes as logits a shape of [batch_size,seq_len,num_decoded_symbols]\n",
" logits = tf.reshape(logits, [batch_size, -1, num_outputs])\n",
" \n",
" \n",
" return logits, final_state\n",
"\n",
"\n",
"\"\"\"\n",
@ -694,13 +728,150 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 238,
"metadata": {
"collapsed": false,
"deletable": true,
"editable": true
},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(7, 1280)\n",
"[[ 0 1 2 3 4]\n",
" [ 5 6 7 8 9]\n",
" [ 10 11 12 13 14]\n",
" [ 15 16 17 18 19]\n",
" [ 20 21 22 23 24]\n",
" [ 25 26 27 28 29]\n",
" [ 30 31 32 33 34]\n",
" [ 35 36 37 38 39]\n",
" [ 40 41 42 43 44]\n",
" [ 45 46 47 48 49]\n",
" [ 50 51 52 53 54]\n",
" [ 55 56 57 58 59]\n",
" [ 60 61 62 63 64]\n",
" [ 65 66 67 68 69]\n",
" [ 70 71 72 73 74]\n",
" [ 75 76 77 78 79]\n",
" [ 80 81 82 83 84]\n",
" [ 85 86 87 88 89]\n",
" [ 90 91 92 93 94]\n",
" [ 95 96 97 98 99]\n",
" [100 101 102 103 104]\n",
" [105 106 107 108 109]\n",
" [110 111 112 113 114]\n",
" [115 116 117 118 119]\n",
" [120 121 122 123 124]\n",
" [125 126 127 128 129]\n",
" [130 131 132 133 134]\n",
" [135 136 137 138 139]\n",
" [140 141 142 143 144]\n",
" [145 146 147 148 149]\n",
" [150 151 152 153 154]\n",
" [155 156 157 158 159]\n",
" [160 161 162 163 164]\n",
" [165 166 167 168 169]\n",
" [170 171 172 173 174]\n",
" [175 176 177 178 179]\n",
" [180 181 182 183 184]\n",
" [185 186 187 188 189]\n",
" [190 191 192 193 194]\n",
" [195 196 197 198 199]\n",
" [200 201 202 203 204]\n",
" [205 206 207 208 209]\n",
" [210 211 212 213 214]\n",
" [215 216 217 218 219]\n",
" [220 221 222 223 224]\n",
" [225 226 227 228 229]\n",
" [230 231 232 233 234]\n",
" [235 236 237 238 239]\n",
" [240 241 242 243 244]\n",
" [245 246 247 248 249]\n",
" [250 251 252 253 254]\n",
" [255 256 257 258 259]\n",
" [260 261 262 263 264]\n",
" [265 266 267 268 269]\n",
" [270 271 272 273 274]\n",
" [275 276 277 278 279]\n",
" [280 281 282 283 284]\n",
" [285 286 287 288 289]\n",
" [290 291 292 293 294]\n",
" [295 296 297 298 299]\n",
" [300 301 302 303 304]\n",
" [305 306 307 308 309]\n",
" [310 311 312 313 314]\n",
" [315 316 317 318 319]\n",
" [320 321 322 323 324]\n",
" [325 326 327 328 329]\n",
" [330 331 332 333 334]\n",
" [335 336 337 338 339]\n",
" [340 341 342 343 344]\n",
" [345 346 347 348 349]\n",
" [350 351 352 353 354]\n",
" [355 356 357 358 359]\n",
" [360 361 362 363 364]\n",
" [365 366 367 368 369]\n",
" [370 371 372 373 374]\n",
" [375 376 377 378 379]\n",
" [380 381 382 383 384]\n",
" [385 386 387 388 389]\n",
" [390 391 392 393 394]\n",
" [395 396 397 398 399]\n",
" [400 401 402 403 404]\n",
" [405 406 407 408 409]\n",
" [410 411 412 413 414]\n",
" [415 416 417 418 419]\n",
" [420 421 422 423 424]\n",
" [425 426 427 428 429]\n",
" [430 431 432 433 434]\n",
" [435 436 437 438 439]\n",
" [440 441 442 443 444]\n",
" [445 446 447 448 449]\n",
" [450 451 452 453 454]\n",
" [455 456 457 458 459]\n",
" [460 461 462 463 464]\n",
" [465 466 467 468 469]\n",
" [470 471 472 473 474]\n",
" [475 476 477 478 479]\n",
" [480 481 482 483 484]\n",
" [485 486 487 488 489]\n",
" [490 491 492 493 494]\n",
" [495 496 497 498 499]\n",
" [500 501 502 503 504]\n",
" [505 506 507 508 509]\n",
" [510 511 512 513 514]\n",
" [515 516 517 518 519]\n",
" [520 521 522 523 524]\n",
" [525 526 527 528 529]\n",
" [530 531 532 533 534]\n",
" [535 536 537 538 539]\n",
" [540 541 542 543 544]\n",
" [545 546 547 548 549]\n",
" [550 551 552 553 554]\n",
" [555 556 557 558 559]\n",
" [560 561 562 563 564]\n",
" [565 566 567 568 569]\n",
" [570 571 572 573 574]\n",
" [575 576 577 578 579]\n",
" [580 581 582 583 584]\n",
" [585 586 587 588 589]\n",
" [590 591 592 593 594]\n",
" [595 596 597 598 599]\n",
" [600 601 602 603 604]\n",
" [605 606 607 608 609]\n",
" [610 611 612 613 614]\n",
" [615 616 617 618 619]\n",
" [620 621 622 623 624]\n",
" [625 626 627 628 629]\n",
" [630 631 632 633 634]\n",
" [635 636 637 638 639]]\n",
"Tests Passed\n"
]
}
],
"source": [
"def get_batches(int_text, batch_size, seq_length):\n",
" \"\"\"\n",
@ -710,8 +881,32 @@
" :param seq_length: The length of sequence\n",
" :return: Batches as a Numpy array\n",
" \"\"\"\n",
" # TODO: Implement Function\n",
" return None\n",
" \n",
" slice_size = batch_size * seq_length\n",
" n_batches = int(len(int_text)/slice_size)\n",
" \n",
" # input part\n",
" _inputs = np.array(int_text[:n_batches*slice_size])\n",
" \n",
" # target part\n",
" _targets = np.array(int_text[1:n_batches*slice_size + 1])\n",
" \n",
"\n",
" # Go through all inputs, targets and split them into batch_size*seq_len list of items\n",
" # [batch, batch, ...]\n",
" inputs, targets = np.split(_inputs, n_batches), np.split(_targets, n_batches)\n",
" \n",
" # concat inputs and targets\n",
" batches = np.c_[inputs, targets]\n",
" print(batches.shape)\n",
" \n",
" # Reshape into final batches output\n",
" batches = batches.reshape((-1, 2, batch_size, seq_length))\n",
"\n",
" print(batches[0][0])\n",
"\n",
" \n",
" return batches\n",
"\n",
"\n",
"\"\"\"\n",

Loading…
Cancel
Save