diff --git a/TextCorrector.ipynb b/TextCorrector.ipynb index 578dd04..2c76e4c 100644 --- a/TextCorrector.ipynb +++ b/TextCorrector.ipynb @@ -1,925 +1,1170 @@ { - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "from __future__ import print_function\n", - "\n", - "import os\n", - "import time\n", - "import numpy as np\n", - "import tensorflow as tf\n", - "import pandas as pd\n", - "from collections import defaultdict\n", - "\n", - "from sklearn.metrics import roc_auc_score, accuracy_score\n", - "import nltk\n", - "\n", - "from correct_text import train, decode, decode_sentence, evaluate_accuracy, create_model,\\\n", - " get_corrective_tokens, DefaultPTBConfig, DefaultMovieDialogConfig\n", - "from text_correcter_data_readers import PTBDataReader, MovieDialogReader\n", - "\n", - "%matplotlib inline" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "root_data_path = \"/Users/atpaino/data/textcorrecter/dialog_corpus\"\n", - "train_path = os.path.join(root_data_path, \"movie_lines.txt\")\n", - "val_path = os.path.join(root_data_path, \"cleaned_dialog_val.txt\")\n", - "test_path = os.path.join(root_data_path, \"cleaned_dialog_test.txt\")\n", - "model_path = os.path.join(root_data_path, \"dialog_correcter_model_testnltk\")\n", - "config = DefaultMovieDialogConfig()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Train" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "data_reader = MovieDialogReader(config, train_path)" - ] + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "name": "TextCorrector.ipynb", + "version": "0.3.2", + "provenance": [] + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 2 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython2", + "version": "2.7.11" + }, + "kernelspec": { + "display_name": "Python 2", + "language": "python", + "name": "python2" + } }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": { - "collapsed": false - }, - "outputs": [ + "cells": [ { - "name": "stdout", - "output_type": "stream", - "text": [ - "Reading data; train = /Users/atpaino/data/textcorrecter/dialog_corpus/movie_lines.txt, test = /Users/atpaino/data/textcorrecter/dialog_corpus/cleaned_dialog_val.txt\n" - ] + "metadata": { + "id": "qbfq1QtHhV-J", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "Modified notebook for Google Colaboratory\n", + "Not yet working, but, will check in to my branch to keep the changes." + ] }, { - "ename": "KeyboardInterrupt", - "evalue": "", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", - "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mtrain\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata_reader\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtrain_path\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mval_path\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmodel_path\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", - "\u001b[0;32m/Users/atpaino/github/deep-text-correcter/correct_text.pyc\u001b[0m in \u001b[0;36mtrain\u001b[0;34m(data_reader, train_path, test_path, model_path)\u001b[0m\n\u001b[1;32m 138\u001b[0m \"Reading data; train = {}, test = {}\".format(train_path, test_path))\n\u001b[1;32m 139\u001b[0m \u001b[0mconfig\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdata_reader\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mconfig\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 140\u001b[0;31m \u001b[0mtrain_data\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdata_reader\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbuild_dataset\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtrain_path\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 141\u001b[0m \u001b[0mtest_data\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdata_reader\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbuild_dataset\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtest_path\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 142\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/Users/atpaino/github/deep-text-correcter/data_reader.pyc\u001b[0m in \u001b[0;36mbuild_dataset\u001b[0;34m(self, path)\u001b[0m\n\u001b[1;32m 125\u001b[0m \u001b[0;31m# dropouts.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 126\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0m_\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mrange\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdataset_copies\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 127\u001b[0;31m \u001b[0;32mfor\u001b[0m \u001b[0msource\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtarget\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mread_samples\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpath\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 128\u001b[0m for bucket_id, (source_size, target_size) in enumerate(\n\u001b[1;32m 129\u001b[0m self.config.buckets):\n", - "\u001b[0;32m/Users/atpaino/github/deep-text-correcter/data_reader.pyc\u001b[0m in \u001b[0;36mread_samples\u001b[0;34m(self, path)\u001b[0m\n\u001b[1;32m 113\u001b[0m \"\"\"\n\u001b[1;32m 114\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0msource_words\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtarget_words\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mread_samples_by_string\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpath\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 115\u001b[0;31m \u001b[0msource\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mconvert_token_to_id\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mword\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mword\u001b[0m \u001b[0;32min\u001b[0m \u001b[0msource_words\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 116\u001b[0m \u001b[0mtarget\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mconvert_token_to_id\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mword\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mword\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mtarget_words\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 117\u001b[0m \u001b[0mtarget\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mEOS_ID\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m/Users/atpaino/github/deep-text-correcter/data_reader.pyc\u001b[0m in \u001b[0;36mconvert_token_to_id\u001b[0;34m(self, token)\u001b[0m\n\u001b[1;32m 77\u001b[0m \u001b[0;34m:\u001b[0m\u001b[0;32mreturn\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 78\u001b[0m \"\"\"\n\u001b[0;32m---> 79\u001b[0;31m \u001b[0mtoken_with_id\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtoken\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mtoken\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtoken_to_id\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;31m \u001b[0m\u001b[0;31m\\\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 80\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0munknown_token\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 81\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtoken_to_id\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mtoken_with_id\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;31mKeyboardInterrupt\u001b[0m: " - ] - } - ], - "source": [ - "train(data_reader, train_path, val_path, model_path)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Decode sentences" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "data_reader = MovieDialogReader(config, train_path, dropout_prob=0.25, replacement_prob=0.25, dataset_copies=1)" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "corrective_tokens = get_corrective_tokens(data_reader, train_path)" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [ - "import pickle\n", - "with open(os.path.join(root_data_path, \"corrective_tokens.pickle\"), \"w\") as f:\n", - " pickle.dump(corrective_tokens, f)" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": { - "collapsed": false - }, - "outputs": [], - "source": [ - "import pickle\n", - "with open(os.path.join(root_data_path, \"token_to_id.pickle\"), \"w\") as f:\n", - " pickle.dump(data_reader.token_to_id, f)" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": { - "collapsed": false - }, - "outputs": [ + "metadata": { + "id": "vv_XoVWbhPkq", + "colab_type": "code", + "colab": {} + }, + "cell_type": "code", + "source": [ + "!cd ~/" + ], + "execution_count": 0, + "outputs": [] + }, { - "name": "stdout", - "output_type": "stream", - "text": [ - "Reading model parameters from /Users/atpaino/data/textcorrecter/dialog_corpus/dialog_correcter_model/translate.ckpt-41900\n" - ] - } - ], - "source": [ - "sess = tf.InteractiveSession()\n", - "model = create_model(sess, True, model_path, config=config)" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": { - "collapsed": false, - "scrolled": false - }, - "outputs": [ + "metadata": { + "id": "Lza4H9_dhhIM", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 139 + }, + "outputId": "17e4487b-cbb7-4217-845a-ae2e0235ae8b" + }, + "cell_type": "code", + "source": [ + "!git clone https://github.com/jgoodrich77/deep-text-corrector.git" + ], + "execution_count": 3, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Cloning into 'deep-text-corrector'...\n", + "remote: Enumerating objects: 19, done.\u001b[K\n", + "remote: Counting objects: 100% (19/19), done.\u001b[K\n", + "remote: Compressing objects: 100% (19/19), done.\u001b[K\n", + "remote: Total 141 (delta 8), reused 0 (delta 0), pack-reused 122\u001b[K\n", + "Receiving objects: 100% (141/141), 302.89 KiB | 10.44 MiB/s, done.\n", + "Resolving deltas: 100% (76/76), done.\n" + ], + "name": "stdout" + } + ] + }, { - "name": "stdout", - "output_type": "stream", - "text": [ - "Input: you must have girlfriend\n", - "Output: you must have a girlfriend\n", - "\n" - ] - } - ], - "source": [ - "# Test a sample from the test dataset.\n", - "decoded = decode_sentence(sess, model, data_reader, \"you must have girlfriend\", corrective_tokens=corrective_tokens)" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": { - "collapsed": false - }, - "outputs": [ + "metadata": { + "id": "QozrtmPQhrpg", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 34 + }, + "outputId": "b71d1543-7ee9-462c-8567-fa2606c68f3a" + }, + "cell_type": "code", + "source": [ + "cd /content/deep-text-corrector" + ], + "execution_count": 8, + "outputs": [ + { + "output_type": "stream", + "text": [ + "/content/deep-text-corrector\n" + ], + "name": "stdout" + } + ] + }, { - "ename": "NameError", - "evalue": "name 'decoded' is not defined", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", - "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mdecoded\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", - "\u001b[0;31mNameError\u001b[0m: name 'decoded' is not defined" - ] - } - ], - "source": [ - "decoded" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": { - "collapsed": false - }, - "outputs": [ + "metadata": { + "id": "oppGl6NShwLj", + "colab_type": "code", + "colab": {} + }, + "cell_type": "code", + "source": [ + "!pip3 install -r requirements.txt" + ], + "execution_count": 0, + "outputs": [] + }, { - "name": "stdout", - "output_type": "stream", - "text": [ - "Input: did n't you say that they 're going to develop this revolutionary new thing ...\n", - "Output: did n't you say that they 're going to develop this revolutionary new thing ...\n", - "\n" - ] - } - ], - "source": [ - "decoded = decode_sentence(sess, model, data_reader,\n", - " \"did n't you say that they 're going to develop this revolutionary new thing ...\",\n", - " corrective_tokens=corrective_tokens)" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": { - "collapsed": false - }, - "outputs": [ + "metadata": { + "id": "OhmkYuFCh80_", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "Google drive support, mounting drive" + ] + }, { - "data": { - "text/plain": [ - "['kvothe', 'went', 'to', 'the', 'market']" + "metadata": { + "id": "6iwii7JTh73j", + "colab_type": "code", + "colab": {} + }, + "cell_type": "code", + "source": [ + "from google.colab import drive\n", + "drive.mount('/content/drive')" + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "QvuOwICMhE3P", + "colab_type": "code", + "colab": {} + }, + "cell_type": "code", + "source": [ + "from __future__ import print_function\n", + "\n", + "import os\n", + "import time\n", + "import numpy as np\n", + "import tensorflow as tf\n", + "import pandas as pd\n", + "from collections import defaultdict\n", + "\n", + "from sklearn.metrics import roc_auc_score, accuracy_score\n", + "import nltk\n", + "\n", + "from correct_text import train, decode, decode_sentence, evaluate_accuracy, create_model,\\\n", + " get_corrective_tokens, DefaultPTBConfig, DefaultMovieDialogConfig\n", + "from text_correcter_data_readers import PTBDataReader, MovieDialogReader\n", + "\n", + "%matplotlib inline" + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "eJa3KCyOiucw", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 382 + }, + "outputId": "c1d8eabf-c2db-4c04-8870-1ef0d235f89c" + }, + "cell_type": "code", + "source": [ + "!cd .." + ], + "execution_count": 11, + "outputs": [ + { + "output_type": "stream", + "text": [ + "total 300\n", + "drwxr-xr-x 4 root root 4096 May 1 17:56 .\n", + "-rw-r--r-- 1 root root 44780 May 1 17:56 seq2seq.pyc\n", + "-rw-r--r-- 1 root root 13354 May 1 17:56 text_corrector_models.pyc\n", + "-rw-r--r-- 1 root root 3876 May 1 17:56 text_corrector_data_readers.pyc\n", + "-rw-r--r-- 1 root root 4747 May 1 17:56 data_reader.pyc\n", + "-rw-r--r-- 1 root root 11496 May 1 17:56 correct_text.pyc\n", + "drwxr-xr-x 8 root root 4096 May 1 17:54 .git\n", + "-rw-r--r-- 1 root root 58720 May 1 17:54 seq2seq.py\n", + "-rw-r--r-- 1 root root 3811 May 1 17:54 text_corrector_data_readers.py\n", + "-rw-r--r-- 1 root root 19397 May 1 17:54 text_corrector_models.py\n", + "-rw-r--r-- 1 root root 16837 May 1 17:54 correct_text.py\n", + "-rw-r--r-- 1 root root 4275 May 1 17:54 data_reader.py\n", + "-rw-r--r-- 1 root root 2523 May 1 17:54 dtc_lambda.py\n", + "drwxr-xr-x 2 root root 4096 May 1 17:54 preprocessors\n", + "-rw-r--r-- 1 root root 77 May 1 17:54 requirements.txt\n", + "-rw-r--r-- 1 root root 45284 May 1 17:54 TextCorrector.ipynb\n", + "-rw-r--r-- 1 root root 76 May 1 17:54 .gitignore\n", + "-rw-r--r-- 1 root root 11357 May 1 17:54 LICENSE\n", + "-rw-r--r-- 1 root root 12339 May 1 17:54 README.md\n", + "drwxr-xr-x 1 root root 4096 May 1 17:54 ..\n" + ], + "name": "stdout" + } ] - }, - "execution_count": 9, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "decode_sentence(sess, model, data_reader, \"kvothe went to market\", corrective_tokens=corrective_tokens, verbose=False)" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": { - "collapsed": false - }, - "outputs": [ + }, + { + "metadata": { + "id": "LryHyKEihE3f", + "colab_type": "code", + "colab": {} + }, + "cell_type": "code", + "source": [ + "root_data_path = \"/Users/atpaino/data/textcorrecter/dialog_corpus\"\n", + "train_path = os.path.join(root_data_path, \"movie_lines.txt\")\n", + "val_path = os.path.join(root_data_path, \"cleaned_dialog_val.txt\")\n", + "test_path = os.path.join(root_data_path, \"cleaned_dialog_test.txt\")\n", + "model_path = os.path.join(root_data_path, \"dialog_correcter_model_testnltk\")\n", + "config = DefaultMovieDialogConfig()" + ], + "execution_count": 0, + "outputs": [] + }, { - "data": { - "text/plain": [ - "['blablahblah', 'and', 'bladdddd', 'went', 'to', 'the', 'market']" + "metadata": { + "id": "KyCYKcZ9hE3p", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "## Train" ] - }, - "execution_count": 10, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "decode_sentence(sess, model, data_reader, \"blablahblah and bladdddd went to market\", corrective_tokens=corrective_tokens,\n", - " verbose=False)" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": { - "collapsed": false - }, - "outputs": [ + }, { - "data": { - "text/plain": [ - "['do', 'you', 'have', 'a', 'book']" + "metadata": { + "id": "nXxkxHgahE3t", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 165 + }, + "outputId": "315df6ce-03ce-4230-878d-40ae90aa59b7" + }, + "cell_type": "code", + "source": [ + "data_reader = MovieDialogReader(config, train_path)" + ], + "execution_count": 6, + "outputs": [ + { + "output_type": "error", + "ename": "NameError", + "evalue": "ignored", + "traceback": [ + "\u001b[0;31m\u001b[0m", + "\u001b[0;31mNameError\u001b[0mTraceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mdata_reader\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mMovieDialogReader\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mconfig\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtrain_path\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;31mNameError\u001b[0m: name 'MovieDialogReader' is not defined" + ] + } ] - }, - "execution_count": 11, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "decode_sentence(sess, model, data_reader, \"do you have book\", corrective_tokens=corrective_tokens, verbose=False)" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": { - "collapsed": false - }, - "outputs": [ + }, { - "data": { - "text/plain": [ - "['the', 'cardinals', 'did', 'better', 'than', 'the', 'cubs']" + "metadata": { + "id": "1S6pmwIihE32", + "colab_type": "code", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 165 + }, + "outputId": "495bb4f4-fd66-41e6-defe-56d4c06842a8" + }, + "cell_type": "code", + "source": [ + "train(data_reader, train_path, val_path, model_path)" + ], + "execution_count": 7, + "outputs": [ + { + "output_type": "error", + "ename": "NameError", + "evalue": "ignored", + "traceback": [ + "\u001b[0;31m\u001b[0m", + "\u001b[0;31mNameError\u001b[0mTraceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mtrain\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata_reader\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtrain_path\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mval_path\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmodel_path\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;31mNameError\u001b[0m: name 'train' is not defined" + ] + } ] - }, - "execution_count": 10, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "decode_sentence(sess, model, data_reader, \"the cardinals did better then the cubs\", corrective_tokens=corrective_tokens, verbose=False)" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": { - "collapsed": false - }, - "outputs": [ + }, { - "name": "stdout", - "output_type": "stream", - "text": [ - "Bucket 0: (10, 10)\n", - "\tBaseline BLEU = 0.8354\n", - "\tModel BLEU = 0.8492\n", - "\tBaseline Accuracy: 0.9090\n", - "\tModel Accuracy: 0.9354\n", - "Bucket 1: (15, 15)\n", - "\tBaseline BLEU = 0.8826\n", - "\tModel BLEU = 0.8595\n", - "\tBaseline Accuracy: 0.8055\n", - "\tModel Accuracy: 0.8149\n", - "Bucket 2: (20, 20)\n", - "\tBaseline BLEU = 0.8880\n", - "\tModel BLEU = 0.8216\n", - "\tBaseline Accuracy: 0.7301\n", - "\tModel Accuracy: 0.6689\n", - "Bucket 3: (40, 40)\n", - "\tBaseline BLEU = 0.9097\n", - "\tModel BLEU = 0.6357\n", - "\tBaseline Accuracy: 0.5981\n", - "\tModel Accuracy: 0.2283\n" - ] - } - ], - "source": [ - "# 4 layers, 40k steps\n", - "errors = evaluate_accuracy(sess, model, data_reader, corrective_tokens, test_path)#, max_samples=1000)" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": { - "collapsed": false - }, - "outputs": [ + "metadata": { + "id": "VvCc0qFYhE4A", + "colab_type": "text" + }, + "cell_type": "markdown", + "source": [ + "## Decode sentences" + ] + }, { - "name": "stdout", - "output_type": "stream", - "text": [ - "Bucket 0: (10, 10)\n", - "\tBaseline BLEU = 0.8368\n", - "\tModel BLEU = 0.8425\n", - "\tBaseline Accuracy: 0.9110\n", - "\tModel Accuracy: 0.9303\n", - "Bucket 1: (15, 15)\n", - "\tBaseline BLEU = 0.8818\n", - "\tModel BLEU = 0.8459\n", - "\tBaseline Accuracy: 0.8063\n", - "\tModel Accuracy: 0.8014\n", - "Bucket 2: (20, 20)\n", - "\tBaseline BLEU = 0.8891\n", - "\tModel BLEU = 0.7986\n", - "\tBaseline Accuracy: 0.7309\n", - "\tModel Accuracy: 0.6281\n", - "Bucket 3: (40, 40)\n", - "\tBaseline BLEU = 0.9099\n", - "\tModel BLEU = 0.5997\n", - "\tBaseline Accuracy: 0.6007\n", - "\tModel Accuracy: 0.1607\n" - ] - } - ], - "source": [ - "# 4 layers, 30k steps\n", - "errors = evaluate_accuracy(sess, model, data_reader, corrective_tokens, test_path)#, max_samples=1000)" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": { - "collapsed": false - }, - "outputs": [ + "metadata": { + "id": "5c_OQtD7hE4C", + "colab_type": "code", + "colab": {} + }, + "cell_type": "code", + "source": [ + "data_reader = MovieDialogReader(config, train_path, dropout_prob=0.25, replacement_prob=0.25, dataset_copies=1)" + ], + "execution_count": 0, + "outputs": [] + }, { - "name": "stdout", - "output_type": "stream", - "text": [ - "Bucket 0: (10, 10)\n", - "\tBaseline BLEU = 0.8330\n", - "\tModel BLEU = 0.8335\n", - "\tBaseline Accuracy: 0.9067\n", - "\tModel Accuracy: 0.9218\n", - "Bucket 1: (15, 15)\n", - "\tBaseline BLEU = 0.8772\n", - "\tModel BLEU = 0.8100\n", - "\tBaseline Accuracy: 0.7980\n", - "\tModel Accuracy: 0.7437\n", - "Bucket 2: (20, 20)\n", - "\tBaseline BLEU = 0.8898\n", - "\tModel BLEU = 0.7636\n", - "\tBaseline Accuracy: 0.7366\n", - "\tModel Accuracy: 0.5370\n", - "Bucket 3: (40, 40)\n", - "\tBaseline BLEU = 0.9098\n", - "\tModel BLEU = 0.5387\n", - "\tBaseline Accuracy: 0.6041\n", - "\tModel Accuracy: 0.1117\n" - ] - } - ], - "source": [ - "# 4 layers, 20k steps\n", - "errors = evaluate_accuracy(sess, model, data_reader, corrective_tokens, test_path)#, max_samples=1000)" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "metadata": { - "collapsed": false - }, - "outputs": [ + "metadata": { + "id": "TrIX-_e2hE4J", + "colab_type": "code", + "colab": {} + }, + "cell_type": "code", + "source": [ + "corrective_tokens = get_corrective_tokens(data_reader, train_path)" + ], + "execution_count": 0, + "outputs": [] + }, { - "name": "stdout", - "output_type": "stream", - "text": [ - "Bucket 0: (10, 10)\n", - "\tBaseline BLEU = 0.8341\n", - "\tModel BLEU = 0.8516\n", - "\tBaseline Accuracy: 0.9083\n", - "\tModel Accuracy: 0.9384\n", - "Bucket 1: (15, 15)\n", - "\tBaseline BLEU = 0.8850\n", - "\tModel BLEU = 0.8860\n", - "\tBaseline Accuracy: 0.8156\n", - "\tModel Accuracy: 0.8491\n", - "Bucket 2: (20, 20)\n", - "\tBaseline BLEU = 0.8876\n", - "\tModel BLEU = 0.8880\n", - "\tBaseline Accuracy: 0.7291\n", - "\tModel Accuracy: 0.7817\n", - "Bucket 3: (40, 40)\n", - "\tBaseline BLEU = 0.9099\n", - "\tModel BLEU = 0.9045\n", - "\tBaseline Accuracy: 0.6073\n", - "\tModel Accuracy: 0.6425\n" - ] - } - ], - "source": [ - "errors = evaluate_accuracy(sess, model, data_reader, corrective_tokens, test_path)#, max_samples=1000)" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "metadata": { - "collapsed": false, - "scrolled": false - }, - "outputs": [ + "metadata": { + "id": "bhN3wLoChE4Q", + "colab_type": "code", + "colab": {} + }, + "cell_type": "code", + "source": [ + "import pickle\n", + "with open(os.path.join(root_data_path, \"corrective_tokens.pickle\"), \"w\") as f:\n", + " pickle.dump(corrective_tokens, f)" + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "9JuwMKX7hE4Z", + "colab_type": "code", + "colab": {} + }, + "cell_type": "code", + "source": [ + "import pickle\n", + "with open(os.path.join(root_data_path, \"token_to_id.pickle\"), \"w\") as f:\n", + " pickle.dump(data_reader.token_to_id, f)" + ], + "execution_count": 0, + "outputs": [] + }, + { + "metadata": { + "id": "fnQPeYZhhE4m", + "colab_type": "code", + "colab": {}, + "outputId": "d3ff8fe4-0f1d-4d90-e9a1-7b696d0f5b68" + }, + "cell_type": "code", + "source": [ + "sess = tf.InteractiveSession()\n", + "model = create_model(sess, True, model_path, config=config)" + ], + "execution_count": 0, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Reading model parameters from /Users/atpaino/data/textcorrecter/dialog_corpus/dialog_correcter_model/translate.ckpt-41900\n" + ], + "name": "stdout" + } + ] + }, + { + "metadata": { + "scrolled": false, + "id": "2xS4TY4whE44", + "colab_type": "code", + "colab": {}, + "outputId": "6b7c67b8-2789-47de-8c94-c3170d9fc9f4" + }, + "cell_type": "code", + "source": [ + "# Test a sample from the test dataset.\n", + "decoded = decode_sentence(sess, model, data_reader, \"you must have girlfriend\", corrective_tokens=corrective_tokens)" + ], + "execution_count": 0, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Input: you must have girlfriend\n", + "Output: you must have a girlfriend\n", + "\n" + ], + "name": "stdout" + } + ] + }, + { + "metadata": { + "id": "O20Tuhz_hE5B", + "colab_type": "code", + "colab": {}, + "outputId": "7d0180e1-6e48-47b9-f7f4-211488290bd2" + }, + "cell_type": "code", + "source": [ + "decoded" + ], + "execution_count": 0, + "outputs": [ + { + "output_type": "error", + "ename": "NameError", + "evalue": "name 'decoded' is not defined", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mdecoded\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;31mNameError\u001b[0m: name 'decoded' is not defined" + ] + } + ] + }, + { + "metadata": { + "id": "JefsxNEChE5N", + "colab_type": "code", + "colab": {}, + "outputId": "59515559-db2a-4455-8398-d80e0a258b45" + }, + "cell_type": "code", + "source": [ + "decoded = decode_sentence(sess, model, data_reader,\n", + " \"did n't you say that they 're going to develop this revolutionary new thing ...\",\n", + " corrective_tokens=corrective_tokens)" + ], + "execution_count": 0, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Input: did n't you say that they 're going to develop this revolutionary new thing ...\n", + "Output: did n't you say that they 're going to develop this revolutionary new thing ...\n", + "\n" + ], + "name": "stdout" + } + ] + }, + { + "metadata": { + "id": "l4ona7f1hE5U", + "colab_type": "code", + "colab": {}, + "outputId": "eeb55d25-f2c4-42e5-d60e-d6cbc2ef5a61" + }, + "cell_type": "code", + "source": [ + "decode_sentence(sess, model, data_reader, \"kvothe went to market\", corrective_tokens=corrective_tokens, verbose=False)" + ], + "execution_count": 0, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "['kvothe', 'went', 'to', 'the', 'market']" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 9 + } + ] + }, { - "name": "stdout", - "output_type": "stream", - "text": [ - "Decoding: you beg for mercy in a second .\n", - "Target: you 'll beg for mercy in a second .\n", - "\n", - "Decoding: i 'm dying for a shower . you could use the one too . and we 'd better check that bandage .\n", - "Target: i 'm dying for a shower . you could use one too . and we 'd better check that bandage .\n", - "\n", - "Decoding: whatever ... they 've become hotshot computer guys so they get a job to build el computer grande ... skynet ... for the government . right ?\n", - "Target: whatever ... they become the hotshot computer guys so they get the job to build el computer grande ... skynet ... for the government . right ?\n", - "\n", - "Decoding: did n't you say that they 're going to develop this revolutionary a new thing ...\n", - "Target: did n't you say that they 're going to develop this revolutionary new thing ...\n", - "\n", - "Decoding: bag some z ?\n", - "Target: bag some z 's ?\n", - "\n", - "Decoding: sleep . it 'll be a light soon .\n", - "Target: sleep . it 'll be light soon .\n", - "\n", - "Decoding: well , at least i know what to name him . i do n't suppose you 'd know who father is ? so i do n't tell him to get lost when i meet him .\n", - "Target: well , at least i know what to name him . i do n't suppose you 'd know who the father is ? so i do n't tell him to get lost when i meet him .\n", - "\n", - "Decoding: we got ta get you to doctor .\n", - "Target: we got ta get you to a doctor .\n", - "\n", - "Decoding: hunter killers . patrol machines . a build in automated factories . most of us were rounded up , put in camps ... for orderly disposal .\n", - "Target: hunter killers . patrol machines . build in automated factories . most of us were rounded up , put in camps ... for orderly disposal .\n", - "\n", - "Decoding: but outside , it 's a living human tissue . flesh , skin , hair ... blood . grown for the cyborgs .\n", - "Target: but outside , it 's living human tissue . flesh , skin , hair ... blood . grown for the cyborgs .\n", - "\n", - "Decoding: you heard enough . decide . are you going to release me ?\n", - "Target: you 've heard enough . decide . are you going to release me ?\n", - "\n", - "Decoding: okay . okay . but this ... cyborg ... if it metal ...\n", - "Target: okay . okay . but this ... cyborg ... if it 's metal ...\n", - "\n", - "Decoding: you go naked . something about the field generated by living organism . nothing dead will go .\n", - "Target: you go naked . something about the field generated by a living organism . nothing dead will go .\n", - "\n", - "Decoding: ca n't . nobody goes home . nobody else comes through . it just him and me .\n", - "Target: ca n't . nobody goes home . nobody else comes through . it 's just him and me .\n", - "\n", - "Decoding: i see . and this ... computer , thinks it can win by killing the mother of its enemy , kill- ing him , in effect , before he is even conceived ? sort of retroactive abortion ?\n", - "Target: i see . and this ... computer , thinks it can win by killing the mother of its enemy , kill- ing him , in effect , before he is even conceived ? a sort of retroactive abortion ?\n", - "\n", - "Decoding: skynet . a computer defense system built for sac-norad by cyber dynamics . modified series 4800 .\n", - "Target: skynet . a computer defense system built for sac-norad by cyber dynamics . a modified series 4800 .\n", - "\n", - "Decoding: a year 2027 ?\n", - "Target: the year 2027 ?\n", - "\n", - "Decoding: with one thirty a second under perry , from '21 to '27 --\n", - "Target: with the one thirty second under perry , from '21 to '27 --\n", - "\n", - "Decoding: why do n't you just stretch out here and get some sleep . it take your mom 's a good hour to get here from redlands .\n", - "Target: why do n't you just stretch out here and get some sleep . it 'll take your mom a good hour to get here from redlands .\n", - "\n", - "Decoding: lieutenant , are you sure it them ? maybe i should see the ... bodies .\n", - "Target: lieutenant , are you sure it 's them ? maybe i should see the ... bodies .\n", - "\n", - "Decoding: i already did . no answer at the door and the apartment manager 's out . i keeping them there .\n", - "Target: i already did . no answer at the door and the apartment manager 's out . i 'm keeping them there .\n", - "\n", - "Decoding: that stuff two hours cold .\n", - "Target: that stuff 's two hours cold .\n", - "\n", - "Decoding: you got ta be kidding me . the new guys 'll be short-stroking it over this one . one-day pattern killer .\n", - "Target: you got ta be kidding me . the new guys 'll be short-stroking it over this one . a one-day pattern killer .\n", - "\n", - "Decoding: give me a short version .\n", - "Target: give me the short version .\n", - "\n", - "Decoding: because it 's fair . give me the next quarter . if you still feel this way , vote your shares ...\n", - "Target: because it 's fair . give me next quarter . if you still feel this way , vote your shares ...\n", - "\n", - "Decoding: it 's probably will . in fact , i 'd go so far as to say it 's almost certainly will , in time . why should i settle for that ?\n", - "Target: it probably will . in fact , i 'd go so far as to say it almost certainly will , in time . why should i settle for that ?\n", - "\n", - "Decoding: stock will turn .\n", - "Target: the stock will turn .\n", - "\n", - "Decoding: you want to know what it is ? what 's it all about ? john . chapter nine . verse twenty-five .\n", - "Target: you want to know what it is ? what it 's all about ? john . chapter nine . verse twenty-five .\n", - "\n", - "Decoding: i only mention it because i took a test this afternoon , down on montgomery street .\n", - "Target: i only mention it because i took the test this afternoon , down on montgomery street .\n", - "\n", - "Decoding: christine ! mister van orton is valued customer ...\n", - "Target: christine ! mister van orton is a valued customer ...\n", - "\n", - "Decoding: a single ?\n", - "Target: single ?\n", - "\n", - "Decoding: there 's another gig starting in saudi arabia . i just a walk-on this time though . bit-part .\n", - "Target: there 's another gig starting in saudi arabia . i 'm just a walk-on this time though . bit-part .\n", - "\n", - "Decoding: no ! you take another step , i shoot ! they 're trying to kill me ...\n", - "Target: no ! you take another step , i 'll shoot ! they 're trying to kill me ...\n", - "\n", - "Decoding: listen very carefully , i 'm telling the truth ... this is a game . this was all the game .\n", - "Target: listen very carefully , i 'm telling the truth ... this is the game . this was all the game .\n", - "\n", - "Decoding: that 's gun . that 's ... that 's not automatic . the guard had an automatic ...\n", - "Target: that gun . that ... that 's not automatic . the guard had an automatic ...\n", - "\n", - "Decoding: take a picture out .\n", - "Target: take the picture out .\n", - "\n", - "Decoding: yeah . first communion . are n't i little angel ?\n", - "Target: yeah . first communion . are n't i a little angel ?\n", - "\n", - "Decoding: let me go get some clothes on . we talk , okay ? be right back .\n", - "Target: let me go get some clothes on . we 'll talk , okay ? be right back .\n", - "\n", - "Decoding: i 'm tired . i 'm sorry , i should go . i 've been enough of nuisance .\n", - "Target: i 'm tired . i 'm sorry , i should go . i 've been enough of a nuisance .\n", - "\n", - "Decoding: they said five hundred . i said six . they said man in the gray flannel suit . i think i said , you mean the attractive guy in the gray flannel suit ?\n", - "Target: they said five hundred . i said six . they said the man in the gray flannel suit . i think i said , you mean the attractive guy in the gray flannel suit ?\n", - "\n", - "Decoding: i have a confession to make . someone gave me six-hundred dollars to spill a drinks on you , as a practical joke .\n", - "Target: i have a confession to make . someone gave me six-hundred dollars to spill drinks on you , as a practical joke .\n", - "\n", - "Decoding: maitre d ' called you christine .\n", - "Target: the maitre d ' called you christine .\n", - "\n", - "Decoding: i know owner of campton place . i could talk to him in the morning .\n", - "Target: i know the owner of campton place . i could talk to him in the morning .\n", - "\n", - "Decoding: fresh shirt ...\n", - "Target: a fresh shirt ...\n", - "\n", - "Decoding: investment banking . moving money from a place to place .\n", - "Target: investment banking . moving money from place to place .\n", - "\n", - "Decoding: what 's the c .r .s . ?\n", - "Target: what 's c .r .s . ?\n", - "\n", - "Decoding: this is a c .r .s .\n", - "Target: this is c .r .s .\n", - "\n", - "Decoding: their ladder here .\n", - "Target: there 's a ladder here .\n", - "\n", - "Decoding: this is n't attempt to be gallant . if i do n't lift you , how are you going to get there ?\n", - "Target: this is n't an attempt to be gallant . if i do n't lift you , how are you going to get there ?\n", - "\n", - "Decoding: are you suggesting we wait till someone 's finds us ?\n", - "Target: are you suggesting we wait till someone finds us ?\n", - "\n", - "Decoding: `` ... wait for help . '' wait for help . i 'm not opening that specifically warns me not to .\n", - "Target: `` ... wait for help . '' wait for help . i 'm not opening a door that specifically warns me not to .\n", - "\n", - "Decoding: read what it says : `` warning , do < u > not < /u > attempt to open . if elevator stops , use the emergency ... ``\n", - "Target: read what it says : `` warning , do < u > not < /u > attempt to open . if elevator stops , use emergency ... ``\n", - "\n", - "Decoding: long story . i found this key in the mouth of wooden harlequin .\n", - "Target: long story . i found this key in the mouth of a wooden harlequin .\n", - "\n", - "Decoding: how do you know that way ?\n", - "Target: how do you know that 's the way ?\n", - "\n", - "Decoding: it 's run by company ... they play elaborate pranks . things like this . i 'm really only now finding out myself .\n", - "Target: it 's run by a company ... they play elaborate pranks . things like this . i 'm really only now finding out myself .\n", - "\n", - "Decoding: you got to be kidding .\n", - "Target: you 've got to be kidding .\n", - "\n", - "Decoding: i do n't think he breathing .\n", - "Target: i do n't think he 's breathing .\n", - "\n", - "Decoding: a bad month . you did exact the same thing to me last week .\n", - "Target: a bad month . you did the exact same thing to me last week .\n", - "\n", - "Decoding: yeah , yeah . she 's called a cab . said something about catching plane .\n", - "Target: yeah , yeah . she called a cab . said something about catching a plane .\n", - "\n", - "Decoding: oh , god yes please . thanks , man . i take you up on that .\n", - "Target: oh , god yes please . thanks , man . i 'll take you up on that .\n", - "\n", - "Decoding: this ... ? oh , this is just ... this is bill .\n", - "Target: this ... ? oh , this is just ... this is the bill .\n", - "\n", - "Decoding: baby , they were all over the house with metal detectors . they switched your gun with look-alike , rigged barrel , loaded with blanks . pop-gun .\n", - "Target: baby , they were all over the house with metal detectors . they switched your gun with a look-alike , rigged barrel , loaded with blanks . pop-gun .\n", - "\n", - "Decoding: you dodged bullet .\n", - "Target: you dodged a bullet .\n", - "\n", - "Decoding: c .r .s . who do you think ? jesus h . , thank your lucky charms . to think what i 've almost got you into .\n", - "Target: c .r .s . who do you think ? jesus h . , thank your lucky charms . to think what i almost got you into .\n", - "\n", - "Decoding: it 's profound life experience .\n", - "Target: it 's a profound life experience .\n", - "\n", - "Decoding: you 've heard of it . you 've seen other people having it . they 're entertainment service , but more than that .\n", - "Target: you 've heard of it . you 've seen other people having it . they 're an entertainment service , but more than that .\n", - "\n", - "Decoding: they make your life fun . there 's only guarantee is you will not be bored .\n", - "Target: they make your life fun . their only guarantee is you will not be bored .\n", - "\n", - "Decoding: not after i done with it . actually , i 've been here . in grad-school i bought crystal-meth from the maitre d ' .\n", - "Target: not after i 'm done with it . actually , i 've been here . in grad-school i bought crystal-meth from the maitre d ' .\n", - "\n", - "Decoding: that 's why it 's a classic . come on , man ... how 'bout hug ... ?\n", - "Target: that 's why it 's a classic . come on , man ... how 'bout a hug ... ?\n", - "\n", - "Decoding: how much is it ? a few thousand , at least . a rolex like that ... lucky for you 've missed it .\n", - "Target: how much is it ? a few thousand , at least . a rolex like that ... lucky for you they missed it .\n", - "\n", - "Decoding: i told you , they hired me over the phone . i 've never met anyone .\n", - "Target: i told you , they hired me over the phone . i never met anyone .\n", - "\n", - "Decoding: i do n't want money . i 'm pulling back curtain . i 'm here to meet the wizard .\n", - "Target: i do n't want money . i 'm pulling back the curtain . i 'm here to meet the wizard .\n", - "\n", - "Decoding: tell them the cops are after you ... tell them you got to talk to someone , i 'm threatening to blow the whistle .\n", - "Target: tell them the cops are after you ... tell them you 've got to talk to someone , i 'm threatening to blow the whistle .\n", - "\n", - "Decoding: they own the whole building . they just move from the floor to floor .\n", - "Target: they own the whole building . they just move from floor to floor .\n", - "\n", - "Decoding: look , it was just a job . nothing personal , ya know ? i play my part , improvise little . that 's what i 'm good at .\n", - "Target: look , it was just a job . nothing personal , ya know ? i play my part , improvise a little . that 's what i 'm good at .\n", - "\n", - "Decoding: that 's right -- you 're left-brain the word fetishist .\n", - "Target: that 's right -- you 're a left-brain word fetishist .\n", - "\n", - "Decoding: one guarantee . payment 's entirely at your brother discretion and , as a gift , dependent on your satisfaction .\n", - "Target: one guarantee . payment 's entirely at your brother 's discretion and , as a gift , dependent on your satisfaction .\n", - "\n", - "Decoding: your brother was a client with our branch . we do a sort of informal scoring . his numbers were outstanding . sure you 're not hungry at all ... ? tung hoy , best in chinatown ...\n", - "Target: your brother was a client with our london branch . we do a sort of informal scoring . his numbers were outstanding . sure you 're not hungry at all ... ? tung hoy , best in chinatown ...\n", - "\n", - "Decoding: key ?\n", - "Target: the key ?\n", - "\n", - "Decoding: nobody 's worried about your father .\n", - "Target: nobody worried about your father .\n", - "\n", - "Decoding: there 's been a break in . lock this door and stay here . do n't move muscle .\n", - "Target: there 's been a break in . lock this door and stay here . do n't move a muscle .\n", - "\n", - "Decoding: i do n't know what you 're talking about . what happened ?\n", - "Target: i do n't know what you 're talking about . what 's happened ?\n", - "\n", - "Decoding: did alarm go off ? the house ... they ... you did n't see ... ?\n", - "Target: did the alarm go off ? the house ... they ... you did n't see ... ?\n", - "\n", - "Decoding: then then .\n", - "Target: goodnight then .\n", - "\n", - "Decoding: okay . i think he into some sort of new personal improvement cult .\n", - "Target: okay . i think he 's into some sort of new personal improvement cult .\n", - "\n", - "Decoding: dinner in the oven .\n", - "Target: dinner 's in the oven .\n", - "\n", - "Decoding: there was incident a few days ago ... a nervous breakdown , they said . the police took him . they left this address , in case anyone ...\n", - "Target: there was an incident a few days ago ... a nervous breakdown , they said . the police took him . they left this address , in case anyone ...\n", - "\n", - "Decoding: what 's trouble ?\n", - "Target: what 's the trouble ?\n", - "\n", - "Decoding: mister ... seymour butts .\n", - "Target: a mister ... seymour butts .\n", - "\n", - "Decoding: what 's the gentleman , maria ?\n", - "Target: what gentleman , maria ?\n", - "\n", - "Decoding: i would n't mention following , except he was very insistent . it 's obviously some sort of prank ...\n", - "Target: i would n't mention the following , except he was very insistent . it 's obviously some sort of prank ...\n", - "\n", - "Decoding: i send your regrets . honestly , why must i even bother ?\n", - "Target: i 'll send your regrets . honestly , why must i even bother ?\n", - "\n", - "Decoding: the hinchberger 's wedding .\n", - "Target: the hinchberger wedding .\n", - "\n", - "Decoding: invitations : museum gala .\n", - "Target: invitations : the museum gala .\n", - "\n", - "Decoding: nice touch . does a game use real bullets ... ?\n", - "Target: nice touch . does the game use real bullets ... ?\n", - "\n", - "Decoding: it 's what they do . it 's like ... being toyed with by a bunch of ... depraved children\n", - "Target: it 's what they do . it 's like ... being toyed with by a bunch of ... depraved children .\n", - "\n", - "Decoding: find out about a company called the c .r .s . consumer recreation services .\n", - "Target: find out about a company called c .r .s . consumer recreation services .\n", - "\n", - "Decoding: someone 's playing hardball . it 's complicated . can i ask favor ?\n", - "Target: someone 's playing hardball . it 's complicated . can i ask a favor ?\n", - "\n", - "Decoding: how 's the concerned should i be ?\n", - "Target: how concerned should i be ?\n", - "\n", - "Decoding: that you 've a involved conrad ... is unforgivable . i am now your enemy .\n", - "Target: that you 've involved conrad ... is unforgivable . i am now your enemy .\n", - "\n", - "Decoding: what happened ...\n", - "Target: what 's happened ...\n", - "\n", - "Decoding: modelling small-group dynamics in formation of narrative hallucinations . you brought us here to scare us . insomnia , that was just a decoy issue . you 're disgusting .\n", - "Target: modelling small-group dynamics in the formation of narrative hallucinations . you brought us here to scare us . insomnia , that was just a decoy issue . you 're disgusting .\n", - "\n", - "Decoding: come on . these are the typically sentimental gestures of depraved industrialist .\n", - "Target: come on . these are the typically sentimental gestures of a depraved industrialist .\n", - "\n", - "Decoding: the children . children hugh crain built the house for . the children he never had .\n", - "Target: the children . the children hugh crain built the house for . the children he never had .\n", - "\n", - "Decoding: obsessive worrier . join club . and you ? i 'd guess ...\n", - "Target: obsessive worrier . join the club . and you ? i 'd guess ...\n", - "\n", - "Decoding: so why did you need the addam family mansion for a scientific test ?\n", - "Target: so why did you need the addam 's family mansion for a scientific test ?\n", - "\n", - "Decoding: -- how much is this car 's worth ?\n", - "Target: -- how much is this car worth ?\n", - "\n", - "Decoding: you do n't really believe it haunted ... do you believe in ghosts ?\n", - "Target: you do n't really believe it 's haunted ... do you believe in ghosts ?\n", - "\n", - "Decoding: so could you ! is this some fucked up the idea of art , putting someone else 's name to a painting ?\n", - "Target: so could you ! is this some fucked up idea of art , putting someone else 's name to a painting ?\n", - "\n", - "Decoding: and why did n't marrow tell < u > us < /u > ? does n't he a trust women ? that fuck .\n", - "Target: and why did n't marrow tell < u > us < /u > ? does n't he trust women ? that fuck .\n", - "\n", - "Decoding: nah , you 're going crazy with doubt , all of your mistakes are coming back up the pipes , and it 's worse than nightmare . --\n", - "Target: nah , you 're going crazy with doubt , all of your mistakes are coming back up the pipes , and it 's worse than a nightmare . --\n", - "\n", - "Decoding: not the way you 've constructed your group , it just not ethical !\n", - "Target: not the way you 've constructed your group , it 's just not ethical !\n", - "\n", - "Decoding: children want me . they 're calling me . they need me .\n", - "Target: the children want me . they 're calling me . they need me .\n", - "\n", - "Decoding: i looked at theo . she had look on her face .\n", - "Target: i looked at theo . she had a look on her face .\n", - "\n", - "Decoding: i was n't thinking about my mother bathroom .\n", - "Target: i was n't thinking about my mother 's bathroom .\n", - "\n", - "Decoding: so ... smell ... is ... smell is sense that triggers the most powerful memories . and memory can trigger a smell .\n", - "Target: so ... smell ... is ... smell is the sense that triggers the most powerful memories . and a memory can trigger a smell .\n", - "\n", - "Decoding: in the bathroom in my mother 's room , toilet was next to old wooden table . it smelled like that wood .\n", - "Target: in the bathroom in my mother 's room , the toilet was next to an old wooden table . it smelled like that wood .\n", - "\n", - "Decoding: cold sensation . who felt it first ?\n", - "Target: the cold sensation . who felt it first ?\n", - "\n", - "Decoding: i really ... honored to be part of this study , jim .\n", - "Target: i 'm really ... honored to be part of this study , jim .\n", - "\n", - "Decoding: nell . good enough . and i jim .\n", - "Target: nell . good enough . and i 'm jim .\n", - "\n", - "Decoding: that ? that 's a hill house .\n", - "Target: that ? that 's hill house .\n", - "\n", - "Decoding: here 's how they 're organized . groups of five , very different personalities : scored all over the kiersey temperament sorter just like you asked for . and they all score high on insomnia charts .\n", - "Target: here 's how they 're organized . groups of five , very different personalities : scored all over the kiersey temperament sorter just like you asked for . and they all score high on the insomnia charts .\n", - "\n", - "Decoding: you hear the vibrations in the wire . there 's magnetic pulse in the wires , you feel it . i could test it .\n", - "Target: you hear the vibrations in the wire . there 's a magnetic pulse in the wires , you feel it . i could test it .\n", - "\n", - "Decoding: but experiment was a failure .\n", - "Target: but the experiment was a failure .\n", - "\n", - "Decoding: he wandering around house , and nell heard him . she thought it was ghosts . let 's go look for him again .\n", - "Target: he 's wandering around the house , and nell heard him . she thought it was ghosts . let 's go look for him again .\n", - "\n", - "Decoding: i 'll take her with me to university tomorrow . i ca n't believe i read the test wrong . i did n't see anything that looked like she was suicidal .\n", - "Target: i 'll take her with me to the university tomorrow . i ca n't believe i read the test wrong . i did n't see anything that looked like she was suicidal .\n", - "\n", - "Decoding: no , but nell been here longer than i have .\n", - "Target: no , but nell 's been here longer than i have .\n", - "\n", - "Decoding: rene crain . up there . rope . ship 's hawser . hard to tie . do n't know how she 's got it .\n", - "Target: rene crain . up there . rope . ship 's hawser . hard to tie . do n't know how she got it .\n", - "\n", - "Decoding: mrs . dudley be waiting for you .\n", - "Target: mrs . dudley 'll be waiting for you .\n", - "\n", - "Decoding: that 's a good question . what is it about fences ? sometimes a locked chain makes people on both sides of fence just a little more comfortable . why would that be ?\n", - "Target: that 's a good question . what is it about fences ? sometimes a locked chain makes people on both sides of the fence just a little more comfortable . why would that be ?\n", - "\n", - "Decoding: well , i 've never lived with a beauty . you must love working here .\n", - "Target: well , i 've never lived with beauty . you must love working here .\n", - "\n", - "Decoding: nell , it makes sense . it 's all makes sense . you and i , we were scaring each other , working each other up .\n", - "Target: nell , it makes sense . it all makes sense . you and i , we were scaring each other , working each other up .\n", - "\n" - ] + "metadata": { + "id": "8XrUWRSQhE5c", + "colab_type": "code", + "colab": {}, + "outputId": "ec2f81ac-3ef4-4711-fcac-d4a86eb2cedc" + }, + "cell_type": "code", + "source": [ + "decode_sentence(sess, model, data_reader, \"blablahblah and bladdddd went to market\", corrective_tokens=corrective_tokens,\n", + " verbose=False)" + ], + "execution_count": 0, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "['blablahblah', 'and', 'bladdddd', 'went', 'to', 'the', 'market']" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 10 + } + ] + }, + { + "metadata": { + "id": "c0Cak7Z6hE5k", + "colab_type": "code", + "colab": {}, + "outputId": "f26db643-96bd-4fd9-b108-36d0f4cf0332" + }, + "cell_type": "code", + "source": [ + "decode_sentence(sess, model, data_reader, \"do you have book\", corrective_tokens=corrective_tokens, verbose=False)" + ], + "execution_count": 0, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "['do', 'you', 'have', 'a', 'book']" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 11 + } + ] + }, + { + "metadata": { + "id": "Cri5wAWohE5w", + "colab_type": "code", + "colab": {}, + "outputId": "6721d419-8445-4665-fed6-28e0e5517b0a" + }, + "cell_type": "code", + "source": [ + "decode_sentence(sess, model, data_reader, \"the cardinals did better then the cubs\", corrective_tokens=corrective_tokens, verbose=False)" + ], + "execution_count": 0, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "['the', 'cardinals', 'did', 'better', 'than', 'the', 'cubs']" + ] + }, + "metadata": { + "tags": [] + }, + "execution_count": 10 + } + ] + }, + { + "metadata": { + "id": "8U9ROikQhE6C", + "colab_type": "code", + "colab": {}, + "outputId": "98b28f97-50d0-47a5-f1e4-cf21aba107aa" + }, + "cell_type": "code", + "source": [ + "# 4 layers, 40k steps\n", + "errors = evaluate_accuracy(sess, model, data_reader, corrective_tokens, test_path)#, max_samples=1000)" + ], + "execution_count": 0, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Bucket 0: (10, 10)\n", + "\tBaseline BLEU = 0.8354\n", + "\tModel BLEU = 0.8492\n", + "\tBaseline Accuracy: 0.9090\n", + "\tModel Accuracy: 0.9354\n", + "Bucket 1: (15, 15)\n", + "\tBaseline BLEU = 0.8826\n", + "\tModel BLEU = 0.8595\n", + "\tBaseline Accuracy: 0.8055\n", + "\tModel Accuracy: 0.8149\n", + "Bucket 2: (20, 20)\n", + "\tBaseline BLEU = 0.8880\n", + "\tModel BLEU = 0.8216\n", + "\tBaseline Accuracy: 0.7301\n", + "\tModel Accuracy: 0.6689\n", + "Bucket 3: (40, 40)\n", + "\tBaseline BLEU = 0.9097\n", + "\tModel BLEU = 0.6357\n", + "\tBaseline Accuracy: 0.5981\n", + "\tModel Accuracy: 0.2283\n" + ], + "name": "stdout" + } + ] + }, + { + "metadata": { + "id": "nZicPETkhE6R", + "colab_type": "code", + "colab": {}, + "outputId": "43cbebb1-d26f-4bf5-93b8-8181de4c2b60" + }, + "cell_type": "code", + "source": [ + "# 4 layers, 30k steps\n", + "errors = evaluate_accuracy(sess, model, data_reader, corrective_tokens, test_path)#, max_samples=1000)" + ], + "execution_count": 0, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Bucket 0: (10, 10)\n", + "\tBaseline BLEU = 0.8368\n", + "\tModel BLEU = 0.8425\n", + "\tBaseline Accuracy: 0.9110\n", + "\tModel Accuracy: 0.9303\n", + "Bucket 1: (15, 15)\n", + "\tBaseline BLEU = 0.8818\n", + "\tModel BLEU = 0.8459\n", + "\tBaseline Accuracy: 0.8063\n", + "\tModel Accuracy: 0.8014\n", + "Bucket 2: (20, 20)\n", + "\tBaseline BLEU = 0.8891\n", + "\tModel BLEU = 0.7986\n", + "\tBaseline Accuracy: 0.7309\n", + "\tModel Accuracy: 0.6281\n", + "Bucket 3: (40, 40)\n", + "\tBaseline BLEU = 0.9099\n", + "\tModel BLEU = 0.5997\n", + "\tBaseline Accuracy: 0.6007\n", + "\tModel Accuracy: 0.1607\n" + ], + "name": "stdout" + } + ] + }, + { + "metadata": { + "id": "d67TF_BhhE6g", + "colab_type": "code", + "colab": {}, + "outputId": "cb82fbc7-a675-4979-8235-4933a098db63" + }, + "cell_type": "code", + "source": [ + "# 4 layers, 20k steps\n", + "errors = evaluate_accuracy(sess, model, data_reader, corrective_tokens, test_path)#, max_samples=1000)" + ], + "execution_count": 0, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Bucket 0: (10, 10)\n", + "\tBaseline BLEU = 0.8330\n", + "\tModel BLEU = 0.8335\n", + "\tBaseline Accuracy: 0.9067\n", + "\tModel Accuracy: 0.9218\n", + "Bucket 1: (15, 15)\n", + "\tBaseline BLEU = 0.8772\n", + "\tModel BLEU = 0.8100\n", + "\tBaseline Accuracy: 0.7980\n", + "\tModel Accuracy: 0.7437\n", + "Bucket 2: (20, 20)\n", + "\tBaseline BLEU = 0.8898\n", + "\tModel BLEU = 0.7636\n", + "\tBaseline Accuracy: 0.7366\n", + "\tModel Accuracy: 0.5370\n", + "Bucket 3: (40, 40)\n", + "\tBaseline BLEU = 0.9098\n", + "\tModel BLEU = 0.5387\n", + "\tBaseline Accuracy: 0.6041\n", + "\tModel Accuracy: 0.1117\n" + ], + "name": "stdout" + } + ] + }, + { + "metadata": { + "id": "4RNm5Bw4hE6o", + "colab_type": "code", + "colab": {}, + "outputId": "8eaa23fb-0e32-422d-beb9-0f1d473f31bc" + }, + "cell_type": "code", + "source": [ + "errors = evaluate_accuracy(sess, model, data_reader, corrective_tokens, test_path)#, max_samples=1000)" + ], + "execution_count": 0, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Bucket 0: (10, 10)\n", + "\tBaseline BLEU = 0.8341\n", + "\tModel BLEU = 0.8516\n", + "\tBaseline Accuracy: 0.9083\n", + "\tModel Accuracy: 0.9384\n", + "Bucket 1: (15, 15)\n", + "\tBaseline BLEU = 0.8850\n", + "\tModel BLEU = 0.8860\n", + "\tBaseline Accuracy: 0.8156\n", + "\tModel Accuracy: 0.8491\n", + "Bucket 2: (20, 20)\n", + "\tBaseline BLEU = 0.8876\n", + "\tModel BLEU = 0.8880\n", + "\tBaseline Accuracy: 0.7291\n", + "\tModel Accuracy: 0.7817\n", + "Bucket 3: (40, 40)\n", + "\tBaseline BLEU = 0.9099\n", + "\tModel BLEU = 0.9045\n", + "\tBaseline Accuracy: 0.6073\n", + "\tModel Accuracy: 0.6425\n" + ], + "name": "stdout" + } + ] + }, + { + "metadata": { + "scrolled": false, + "id": "094eOtNYhE6x", + "colab_type": "code", + "colab": {}, + "outputId": "7eb95328-b467-4c5b-e526-8445e33fb007" + }, + "cell_type": "code", + "source": [ + "for decoding, target in errors:\n", + " print(\"Decoding: \" + \" \".join(decoding))\n", + " print(\"Target: \" + \" \".join(target) + \"\\n\")" + ], + "execution_count": 0, + "outputs": [ + { + "output_type": "stream", + "text": [ + "Decoding: you beg for mercy in a second .\n", + "Target: you 'll beg for mercy in a second .\n", + "\n", + "Decoding: i 'm dying for a shower . you could use the one too . and we 'd better check that bandage .\n", + "Target: i 'm dying for a shower . you could use one too . and we 'd better check that bandage .\n", + "\n", + "Decoding: whatever ... they 've become hotshot computer guys so they get a job to build el computer grande ... skynet ... for the government . right ?\n", + "Target: whatever ... they become the hotshot computer guys so they get the job to build el computer grande ... skynet ... for the government . right ?\n", + "\n", + "Decoding: did n't you say that they 're going to develop this revolutionary a new thing ...\n", + "Target: did n't you say that they 're going to develop this revolutionary new thing ...\n", + "\n", + "Decoding: bag some z ?\n", + "Target: bag some z 's ?\n", + "\n", + "Decoding: sleep . it 'll be a light soon .\n", + "Target: sleep . it 'll be light soon .\n", + "\n", + "Decoding: well , at least i know what to name him . i do n't suppose you 'd know who father is ? so i do n't tell him to get lost when i meet him .\n", + "Target: well , at least i know what to name him . i do n't suppose you 'd know who the father is ? so i do n't tell him to get lost when i meet him .\n", + "\n", + "Decoding: we got ta get you to doctor .\n", + "Target: we got ta get you to a doctor .\n", + "\n", + "Decoding: hunter killers . patrol machines . a build in automated factories . most of us were rounded up , put in camps ... for orderly disposal .\n", + "Target: hunter killers . patrol machines . build in automated factories . most of us were rounded up , put in camps ... for orderly disposal .\n", + "\n", + "Decoding: but outside , it 's a living human tissue . flesh , skin , hair ... blood . grown for the cyborgs .\n", + "Target: but outside , it 's living human tissue . flesh , skin , hair ... blood . grown for the cyborgs .\n", + "\n", + "Decoding: you heard enough . decide . are you going to release me ?\n", + "Target: you 've heard enough . decide . are you going to release me ?\n", + "\n", + "Decoding: okay . okay . but this ... cyborg ... if it metal ...\n", + "Target: okay . okay . but this ... cyborg ... if it 's metal ...\n", + "\n", + "Decoding: you go naked . something about the field generated by living organism . nothing dead will go .\n", + "Target: you go naked . something about the field generated by a living organism . nothing dead will go .\n", + "\n", + "Decoding: ca n't . nobody goes home . nobody else comes through . it just him and me .\n", + "Target: ca n't . nobody goes home . nobody else comes through . it 's just him and me .\n", + "\n", + "Decoding: i see . and this ... computer , thinks it can win by killing the mother of its enemy , kill- ing him , in effect , before he is even conceived ? sort of retroactive abortion ?\n", + "Target: i see . and this ... computer , thinks it can win by killing the mother of its enemy , kill- ing him , in effect , before he is even conceived ? a sort of retroactive abortion ?\n", + "\n", + "Decoding: skynet . a computer defense system built for sac-norad by cyber dynamics . modified series 4800 .\n", + "Target: skynet . a computer defense system built for sac-norad by cyber dynamics . a modified series 4800 .\n", + "\n", + "Decoding: a year 2027 ?\n", + "Target: the year 2027 ?\n", + "\n", + "Decoding: with one thirty a second under perry , from '21 to '27 --\n", + "Target: with the one thirty second under perry , from '21 to '27 --\n", + "\n", + "Decoding: why do n't you just stretch out here and get some sleep . it take your mom 's a good hour to get here from redlands .\n", + "Target: why do n't you just stretch out here and get some sleep . it 'll take your mom a good hour to get here from redlands .\n", + "\n", + "Decoding: lieutenant , are you sure it them ? maybe i should see the ... bodies .\n", + "Target: lieutenant , are you sure it 's them ? maybe i should see the ... bodies .\n", + "\n", + "Decoding: i already did . no answer at the door and the apartment manager 's out . i keeping them there .\n", + "Target: i already did . no answer at the door and the apartment manager 's out . i 'm keeping them there .\n", + "\n", + "Decoding: that stuff two hours cold .\n", + "Target: that stuff 's two hours cold .\n", + "\n", + "Decoding: you got ta be kidding me . the new guys 'll be short-stroking it over this one . one-day pattern killer .\n", + "Target: you got ta be kidding me . the new guys 'll be short-stroking it over this one . a one-day pattern killer .\n", + "\n", + "Decoding: give me a short version .\n", + "Target: give me the short version .\n", + "\n", + "Decoding: because it 's fair . give me the next quarter . if you still feel this way , vote your shares ...\n", + "Target: because it 's fair . give me next quarter . if you still feel this way , vote your shares ...\n", + "\n", + "Decoding: it 's probably will . in fact , i 'd go so far as to say it 's almost certainly will , in time . why should i settle for that ?\n", + "Target: it probably will . in fact , i 'd go so far as to say it almost certainly will , in time . why should i settle for that ?\n", + "\n", + "Decoding: stock will turn .\n", + "Target: the stock will turn .\n", + "\n", + "Decoding: you want to know what it is ? what 's it all about ? john . chapter nine . verse twenty-five .\n", + "Target: you want to know what it is ? what it 's all about ? john . chapter nine . verse twenty-five .\n", + "\n", + "Decoding: i only mention it because i took a test this afternoon , down on montgomery street .\n", + "Target: i only mention it because i took the test this afternoon , down on montgomery street .\n", + "\n", + "Decoding: christine ! mister van orton is valued customer ...\n", + "Target: christine ! mister van orton is a valued customer ...\n", + "\n", + "Decoding: a single ?\n", + "Target: single ?\n", + "\n", + "Decoding: there 's another gig starting in saudi arabia . i just a walk-on this time though . bit-part .\n", + "Target: there 's another gig starting in saudi arabia . i 'm just a walk-on this time though . bit-part .\n", + "\n", + "Decoding: no ! you take another step , i shoot ! they 're trying to kill me ...\n", + "Target: no ! you take another step , i 'll shoot ! they 're trying to kill me ...\n", + "\n", + "Decoding: listen very carefully , i 'm telling the truth ... this is a game . this was all the game .\n", + "Target: listen very carefully , i 'm telling the truth ... this is the game . this was all the game .\n", + "\n", + "Decoding: that 's gun . that 's ... that 's not automatic . the guard had an automatic ...\n", + "Target: that gun . that ... that 's not automatic . the guard had an automatic ...\n", + "\n", + "Decoding: take a picture out .\n", + "Target: take the picture out .\n", + "\n", + "Decoding: yeah . first communion . are n't i little angel ?\n", + "Target: yeah . first communion . are n't i a little angel ?\n", + "\n", + "Decoding: let me go get some clothes on . we talk , okay ? be right back .\n", + "Target: let me go get some clothes on . we 'll talk , okay ? be right back .\n", + "\n", + "Decoding: i 'm tired . i 'm sorry , i should go . i 've been enough of nuisance .\n", + "Target: i 'm tired . i 'm sorry , i should go . i 've been enough of a nuisance .\n", + "\n", + "Decoding: they said five hundred . i said six . they said man in the gray flannel suit . i think i said , you mean the attractive guy in the gray flannel suit ?\n", + "Target: they said five hundred . i said six . they said the man in the gray flannel suit . i think i said , you mean the attractive guy in the gray flannel suit ?\n", + "\n", + "Decoding: i have a confession to make . someone gave me six-hundred dollars to spill a drinks on you , as a practical joke .\n", + "Target: i have a confession to make . someone gave me six-hundred dollars to spill drinks on you , as a practical joke .\n", + "\n", + "Decoding: maitre d ' called you christine .\n", + "Target: the maitre d ' called you christine .\n", + "\n", + "Decoding: i know owner of campton place . i could talk to him in the morning .\n", + "Target: i know the owner of campton place . i could talk to him in the morning .\n", + "\n", + "Decoding: fresh shirt ...\n", + "Target: a fresh shirt ...\n", + "\n", + "Decoding: investment banking . moving money from a place to place .\n", + "Target: investment banking . moving money from place to place .\n", + "\n", + "Decoding: what 's the c .r .s . ?\n", + "Target: what 's c .r .s . ?\n", + "\n", + "Decoding: this is a c .r .s .\n", + "Target: this is c .r .s .\n", + "\n", + "Decoding: their ladder here .\n", + "Target: there 's a ladder here .\n", + "\n", + "Decoding: this is n't attempt to be gallant . if i do n't lift you , how are you going to get there ?\n", + "Target: this is n't an attempt to be gallant . if i do n't lift you , how are you going to get there ?\n", + "\n", + "Decoding: are you suggesting we wait till someone 's finds us ?\n", + "Target: are you suggesting we wait till someone finds us ?\n", + "\n", + "Decoding: `` ... wait for help . '' wait for help . i 'm not opening that specifically warns me not to .\n", + "Target: `` ... wait for help . '' wait for help . i 'm not opening a door that specifically warns me not to .\n", + "\n", + "Decoding: read what it says : `` warning , do < u > not < /u > attempt to open . if elevator stops , use the emergency ... ``\n", + "Target: read what it says : `` warning , do < u > not < /u > attempt to open . if elevator stops , use emergency ... ``\n", + "\n", + "Decoding: long story . i found this key in the mouth of wooden harlequin .\n", + "Target: long story . i found this key in the mouth of a wooden harlequin .\n", + "\n", + "Decoding: how do you know that way ?\n", + "Target: how do you know that 's the way ?\n", + "\n", + "Decoding: it 's run by company ... they play elaborate pranks . things like this . i 'm really only now finding out myself .\n", + "Target: it 's run by a company ... they play elaborate pranks . things like this . i 'm really only now finding out myself .\n", + "\n", + "Decoding: you got to be kidding .\n", + "Target: you 've got to be kidding .\n", + "\n", + "Decoding: i do n't think he breathing .\n", + "Target: i do n't think he 's breathing .\n", + "\n", + "Decoding: a bad month . you did exact the same thing to me last week .\n", + "Target: a bad month . you did the exact same thing to me last week .\n", + "\n", + "Decoding: yeah , yeah . she 's called a cab . said something about catching plane .\n", + "Target: yeah , yeah . she called a cab . said something about catching a plane .\n", + "\n", + "Decoding: oh , god yes please . thanks , man . i take you up on that .\n", + "Target: oh , god yes please . thanks , man . i 'll take you up on that .\n", + "\n", + "Decoding: this ... ? oh , this is just ... this is bill .\n", + "Target: this ... ? oh , this is just ... this is the bill .\n", + "\n", + "Decoding: baby , they were all over the house with metal detectors . they switched your gun with look-alike , rigged barrel , loaded with blanks . pop-gun .\n", + "Target: baby , they were all over the house with metal detectors . they switched your gun with a look-alike , rigged barrel , loaded with blanks . pop-gun .\n", + "\n", + "Decoding: you dodged bullet .\n", + "Target: you dodged a bullet .\n", + "\n", + "Decoding: c .r .s . who do you think ? jesus h . , thank your lucky charms . to think what i 've almost got you into .\n", + "Target: c .r .s . who do you think ? jesus h . , thank your lucky charms . to think what i almost got you into .\n", + "\n", + "Decoding: it 's profound life experience .\n", + "Target: it 's a profound life experience .\n", + "\n", + "Decoding: you 've heard of it . you 've seen other people having it . they 're entertainment service , but more than that .\n", + "Target: you 've heard of it . you 've seen other people having it . they 're an entertainment service , but more than that .\n", + "\n", + "Decoding: they make your life fun . there 's only guarantee is you will not be bored .\n", + "Target: they make your life fun . their only guarantee is you will not be bored .\n", + "\n", + "Decoding: not after i done with it . actually , i 've been here . in grad-school i bought crystal-meth from the maitre d ' .\n", + "Target: not after i 'm done with it . actually , i 've been here . in grad-school i bought crystal-meth from the maitre d ' .\n", + "\n", + "Decoding: that 's why it 's a classic . come on , man ... how 'bout hug ... ?\n", + "Target: that 's why it 's a classic . come on , man ... how 'bout a hug ... ?\n", + "\n", + "Decoding: how much is it ? a few thousand , at least . a rolex like that ... lucky for you 've missed it .\n", + "Target: how much is it ? a few thousand , at least . a rolex like that ... lucky for you they missed it .\n", + "\n", + "Decoding: i told you , they hired me over the phone . i 've never met anyone .\n", + "Target: i told you , they hired me over the phone . i never met anyone .\n", + "\n", + "Decoding: i do n't want money . i 'm pulling back curtain . i 'm here to meet the wizard .\n", + "Target: i do n't want money . i 'm pulling back the curtain . i 'm here to meet the wizard .\n", + "\n", + "Decoding: tell them the cops are after you ... tell them you got to talk to someone , i 'm threatening to blow the whistle .\n", + "Target: tell them the cops are after you ... tell them you 've got to talk to someone , i 'm threatening to blow the whistle .\n", + "\n", + "Decoding: they own the whole building . they just move from the floor to floor .\n", + "Target: they own the whole building . they just move from floor to floor .\n", + "\n", + "Decoding: look , it was just a job . nothing personal , ya know ? i play my part , improvise little . that 's what i 'm good at .\n", + "Target: look , it was just a job . nothing personal , ya know ? i play my part , improvise a little . that 's what i 'm good at .\n", + "\n", + "Decoding: that 's right -- you 're left-brain the word fetishist .\n", + "Target: that 's right -- you 're a left-brain word fetishist .\n", + "\n", + "Decoding: one guarantee . payment 's entirely at your brother discretion and , as a gift , dependent on your satisfaction .\n", + "Target: one guarantee . payment 's entirely at your brother 's discretion and , as a gift , dependent on your satisfaction .\n", + "\n", + "Decoding: your brother was a client with our branch . we do a sort of informal scoring . his numbers were outstanding . sure you 're not hungry at all ... ? tung hoy , best in chinatown ...\n", + "Target: your brother was a client with our london branch . we do a sort of informal scoring . his numbers were outstanding . sure you 're not hungry at all ... ? tung hoy , best in chinatown ...\n", + "\n", + "Decoding: key ?\n", + "Target: the key ?\n", + "\n", + "Decoding: nobody 's worried about your father .\n", + "Target: nobody worried about your father .\n", + "\n", + "Decoding: there 's been a break in . lock this door and stay here . do n't move muscle .\n", + "Target: there 's been a break in . lock this door and stay here . do n't move a muscle .\n", + "\n", + "Decoding: i do n't know what you 're talking about . what happened ?\n", + "Target: i do n't know what you 're talking about . what 's happened ?\n", + "\n", + "Decoding: did alarm go off ? the house ... they ... you did n't see ... ?\n", + "Target: did the alarm go off ? the house ... they ... you did n't see ... ?\n", + "\n", + "Decoding: then then .\n", + "Target: goodnight then .\n", + "\n", + "Decoding: okay . i think he into some sort of new personal improvement cult .\n", + "Target: okay . i think he 's into some sort of new personal improvement cult .\n", + "\n", + "Decoding: dinner in the oven .\n", + "Target: dinner 's in the oven .\n", + "\n", + "Decoding: there was incident a few days ago ... a nervous breakdown , they said . the police took him . they left this address , in case anyone ...\n", + "Target: there was an incident a few days ago ... a nervous breakdown , they said . the police took him . they left this address , in case anyone ...\n", + "\n", + "Decoding: what 's trouble ?\n", + "Target: what 's the trouble ?\n", + "\n", + "Decoding: mister ... seymour butts .\n", + "Target: a mister ... seymour butts .\n", + "\n", + "Decoding: what 's the gentleman , maria ?\n", + "Target: what gentleman , maria ?\n", + "\n", + "Decoding: i would n't mention following , except he was very insistent . it 's obviously some sort of prank ...\n", + "Target: i would n't mention the following , except he was very insistent . it 's obviously some sort of prank ...\n", + "\n", + "Decoding: i send your regrets . honestly , why must i even bother ?\n", + "Target: i 'll send your regrets . honestly , why must i even bother ?\n", + "\n", + "Decoding: the hinchberger 's wedding .\n", + "Target: the hinchberger wedding .\n", + "\n", + "Decoding: invitations : museum gala .\n", + "Target: invitations : the museum gala .\n", + "\n", + "Decoding: nice touch . does a game use real bullets ... ?\n", + "Target: nice touch . does the game use real bullets ... ?\n", + "\n", + "Decoding: it 's what they do . it 's like ... being toyed with by a bunch of ... depraved children\n", + "Target: it 's what they do . it 's like ... being toyed with by a bunch of ... depraved children .\n", + "\n", + "Decoding: find out about a company called the c .r .s . consumer recreation services .\n", + "Target: find out about a company called c .r .s . consumer recreation services .\n", + "\n", + "Decoding: someone 's playing hardball . it 's complicated . can i ask favor ?\n", + "Target: someone 's playing hardball . it 's complicated . can i ask a favor ?\n", + "\n", + "Decoding: how 's the concerned should i be ?\n", + "Target: how concerned should i be ?\n", + "\n", + "Decoding: that you 've a involved conrad ... is unforgivable . i am now your enemy .\n", + "Target: that you 've involved conrad ... is unforgivable . i am now your enemy .\n", + "\n", + "Decoding: what happened ...\n", + "Target: what 's happened ...\n", + "\n", + "Decoding: modelling small-group dynamics in formation of narrative hallucinations . you brought us here to scare us . insomnia , that was just a decoy issue . you 're disgusting .\n", + "Target: modelling small-group dynamics in the formation of narrative hallucinations . you brought us here to scare us . insomnia , that was just a decoy issue . you 're disgusting .\n", + "\n", + "Decoding: come on . these are the typically sentimental gestures of depraved industrialist .\n", + "Target: come on . these are the typically sentimental gestures of a depraved industrialist .\n", + "\n", + "Decoding: the children . children hugh crain built the house for . the children he never had .\n", + "Target: the children . the children hugh crain built the house for . the children he never had .\n", + "\n", + "Decoding: obsessive worrier . join club . and you ? i 'd guess ...\n", + "Target: obsessive worrier . join the club . and you ? i 'd guess ...\n", + "\n", + "Decoding: so why did you need the addam family mansion for a scientific test ?\n", + "Target: so why did you need the addam 's family mansion for a scientific test ?\n", + "\n", + "Decoding: -- how much is this car 's worth ?\n", + "Target: -- how much is this car worth ?\n", + "\n", + "Decoding: you do n't really believe it haunted ... do you believe in ghosts ?\n", + "Target: you do n't really believe it 's haunted ... do you believe in ghosts ?\n", + "\n", + "Decoding: so could you ! is this some fucked up the idea of art , putting someone else 's name to a painting ?\n", + "Target: so could you ! is this some fucked up idea of art , putting someone else 's name to a painting ?\n", + "\n", + "Decoding: and why did n't marrow tell < u > us < /u > ? does n't he a trust women ? that fuck .\n", + "Target: and why did n't marrow tell < u > us < /u > ? does n't he trust women ? that fuck .\n", + "\n", + "Decoding: nah , you 're going crazy with doubt , all of your mistakes are coming back up the pipes , and it 's worse than nightmare . --\n", + "Target: nah , you 're going crazy with doubt , all of your mistakes are coming back up the pipes , and it 's worse than a nightmare . --\n", + "\n", + "Decoding: not the way you 've constructed your group , it just not ethical !\n", + "Target: not the way you 've constructed your group , it 's just not ethical !\n", + "\n", + "Decoding: children want me . they 're calling me . they need me .\n", + "Target: the children want me . they 're calling me . they need me .\n", + "\n", + "Decoding: i looked at theo . she had look on her face .\n", + "Target: i looked at theo . she had a look on her face .\n", + "\n", + "Decoding: i was n't thinking about my mother bathroom .\n", + "Target: i was n't thinking about my mother 's bathroom .\n", + "\n", + "Decoding: so ... smell ... is ... smell is sense that triggers the most powerful memories . and memory can trigger a smell .\n", + "Target: so ... smell ... is ... smell is the sense that triggers the most powerful memories . and a memory can trigger a smell .\n", + "\n", + "Decoding: in the bathroom in my mother 's room , toilet was next to old wooden table . it smelled like that wood .\n", + "Target: in the bathroom in my mother 's room , the toilet was next to an old wooden table . it smelled like that wood .\n", + "\n", + "Decoding: cold sensation . who felt it first ?\n", + "Target: the cold sensation . who felt it first ?\n", + "\n", + "Decoding: i really ... honored to be part of this study , jim .\n", + "Target: i 'm really ... honored to be part of this study , jim .\n", + "\n", + "Decoding: nell . good enough . and i jim .\n", + "Target: nell . good enough . and i 'm jim .\n", + "\n", + "Decoding: that ? that 's a hill house .\n", + "Target: that ? that 's hill house .\n", + "\n", + "Decoding: here 's how they 're organized . groups of five , very different personalities : scored all over the kiersey temperament sorter just like you asked for . and they all score high on insomnia charts .\n", + "Target: here 's how they 're organized . groups of five , very different personalities : scored all over the kiersey temperament sorter just like you asked for . and they all score high on the insomnia charts .\n", + "\n", + "Decoding: you hear the vibrations in the wire . there 's magnetic pulse in the wires , you feel it . i could test it .\n", + "Target: you hear the vibrations in the wire . there 's a magnetic pulse in the wires , you feel it . i could test it .\n", + "\n", + "Decoding: but experiment was a failure .\n", + "Target: but the experiment was a failure .\n", + "\n", + "Decoding: he wandering around house , and nell heard him . she thought it was ghosts . let 's go look for him again .\n", + "Target: he 's wandering around the house , and nell heard him . she thought it was ghosts . let 's go look for him again .\n", + "\n", + "Decoding: i 'll take her with me to university tomorrow . i ca n't believe i read the test wrong . i did n't see anything that looked like she was suicidal .\n", + "Target: i 'll take her with me to the university tomorrow . i ca n't believe i read the test wrong . i did n't see anything that looked like she was suicidal .\n", + "\n", + "Decoding: no , but nell been here longer than i have .\n", + "Target: no , but nell 's been here longer than i have .\n", + "\n", + "Decoding: rene crain . up there . rope . ship 's hawser . hard to tie . do n't know how she 's got it .\n", + "Target: rene crain . up there . rope . ship 's hawser . hard to tie . do n't know how she got it .\n", + "\n", + "Decoding: mrs . dudley be waiting for you .\n", + "Target: mrs . dudley 'll be waiting for you .\n", + "\n", + "Decoding: that 's a good question . what is it about fences ? sometimes a locked chain makes people on both sides of fence just a little more comfortable . why would that be ?\n", + "Target: that 's a good question . what is it about fences ? sometimes a locked chain makes people on both sides of the fence just a little more comfortable . why would that be ?\n", + "\n", + "Decoding: well , i 've never lived with a beauty . you must love working here .\n", + "Target: well , i 've never lived with beauty . you must love working here .\n", + "\n", + "Decoding: nell , it makes sense . it 's all makes sense . you and i , we were scaring each other , working each other up .\n", + "Target: nell , it makes sense . it all makes sense . you and i , we were scaring each other , working each other up .\n", + "\n" + ], + "name": "stdout" + } + ] } - ], - "source": [ - "for decoding, target in errors:\n", - " print(\"Decoding: \" + \" \".join(decoding))\n", - " print(\"Target: \" + \" \".join(target) + \"\\n\")" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 2", - "language": "python", - "name": "python2" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 2 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython2", - "version": "2.7.11" - } - }, - "nbformat": 4, - "nbformat_minor": 0 -} + ] +} \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..8484d8d --- /dev/null +++ b/requirements.txt @@ -0,0 +1,5 @@ +pandas==0.19.2 +scipy==0.18.1 +nltk==3.2.2 +matplotlib==1.5.3 +tensorflow==1.8.0 diff --git a/seq2seq.py b/seq2seq.py index 77cdde8..94df462 100644 --- a/seq2seq.py +++ b/seq2seq.py @@ -73,10 +73,14 @@ from tensorflow.python.ops import rnn_cell from tensorflow.python.ops import variable_scope from tensorflow.python.util import nest +from tensorflow.contrib.rnn.python.ops import core_rnn_cell # TODO(ebrevdo): Remove once _linear is fully deprecated. -linear = rnn_cell._linear # pylint: disable=protected-access - +# original code - linear = rnn_cell._linear # pylint: disable=protected-access +# solved I think? +# changed based on @lutein comment here +# https://github.com/atpaino/deep-text-corrector/issues/10#issuecomment-414587857 +linear = core_rnn_cell._Linear def _extract_argmax_and_embed(embedding, output_projection=None, update_embedding=True):