From f7d55ffff3d585f0128634f906fb754ab87c7be0 Mon Sep 17 00:00:00 2001
From: Oxer11 <17300240035@fudan.edu.cn>
Date: Sun, 12 May 2019 13:40:46 +0800
Subject: [PATCH 1/2] some corrections

---
 docs/assignment-3/index.html | 66 ++++++++++++++++++------------------
 docs/assignment-3/index.md   | 10 +++---
 2 files changed, 38 insertions(+), 38 deletions(-)
diff --git a/docs/assignment-3/index.html b/docs/assignment-3/index.html
index 153932e..2a6bf6c 100644
--- a/docs/assignment-3/index.html
+++ b/docs/assignment-3/index.html
@@ -4,21 +4,20 @@
 <meta charset='UTF-8'><meta name='viewport' content='width=device-width initial-scale=1'>
 <title>index</title><link href='https://fonts.loli.net/css?family=PT+Serif:400,400italic,700,700italic&subset=latin,cyrillic-ext,cyrillic,latin-ext' rel='stylesheet' type='text/css' /><style type='text/css'>html {overflow-x: initial !important;}:root { --bg-color:#ffffff; --text-color:#333333; --select-text-bg-color:#B5D6FC; --select-text-font-color:auto; --monospace:"Lucida Console",Consolas,"Courier",monospace; }
 html { font-size: 14px; background-color: var(--bg-color); color: var(--text-color); font-family: "Helvetica Neue", Helvetica, Arial, sans-serif; -webkit-font-smoothing: antialiased; }
-body { margin: 0px; padding: 0px; height: auto; bottom: 0px; top: 0px; left: 0px; right: 0px; font-size: 1rem; line-height: 1.42857; overflow-x: hidden; background: inherit; tab-size: 4; }
+body { margin: 0px; padding: 0px; height: auto; bottom: 0px; top: 0px; left: 0px; right: 0px; font-size: 1rem; line-height: 1.42857; overflow-x: hidden; background: inherit; }
 iframe { margin: auto; }
 a.url { word-break: break-all; }
 a:active, a:hover { outline: 0px; }
 .in-text-selection, ::selection { text-shadow: none; background: var(--select-text-bg-color); color: var(--select-text-font-color); }
-#write { margin: 0px auto; height: auto; width: inherit; word-break: normal; word-wrap: break-word; position: relative; white-space: normal; overflow-x: visible; padding-top: 40px; }
+#write { margin: 0px auto; height: auto; width: inherit; word-break: normal; word-wrap: break-word; position: relative; white-space: normal; overflow-x: visible; }
 #write.first-line-indent p { text-indent: 2em; }
 #write.first-line-indent li p, #write.first-line-indent p * { text-indent: 0px; }
 #write.first-line-indent li { margin-left: 2em; }
 .for-image #write { padding-left: 8px; padding-right: 8px; }
 body.typora-export { padding-left: 30px; padding-right: 30px; }
-.typora-export .footnote-line, .typora-export li, .typora-export p { white-space: pre-wrap; }
+.typora-export .footnote-line, .typora-export p { white-space: pre-wrap; }
 @media screen and (max-width: 500px) {
   body.typora-export { padding-left: 0px; padding-right: 0px; }
-  #write { padding-left: 20px; padding-right: 20px; }
   .CodeMirror-sizer { margin-left: 0px !important; }
   .CodeMirror-gutters { display: none !important; }
 }
@@ -50,9 +49,8 @@
 tr { break-inside: avoid; break-after: auto; }
 thead { display: table-header-group; }
 table { border-collapse: collapse; border-spacing: 0px; width: 100%; overflow: auto; break-inside: auto; text-align: left; }
-table.md-table td { min-width: 32px; }
+table.md-table td { min-width: 80px; }
 .CodeMirror-gutters { border-right: 0px; background-color: inherit; }
-.CodeMirror-linenumber { user-select: none; }
 .CodeMirror { text-align: left; }
 .CodeMirror-placeholder { opacity: 0.3; }
 .CodeMirror pre { padding: 0px 4px; }
@@ -82,25 +80,25 @@
 #write .footnote-line { white-space: pre-wrap; }
 @media print {
   body, html { border: 1px solid transparent; height: 99%; break-after: avoid; break-before: avoid; }
-  #write { margin-top: 0px; padding-top: 0px; border-color: transparent !important; }
+  #write { margin-top: 0px; border-color: transparent !important; }
   .typora-export * { -webkit-print-color-adjust: exact; }
   html.blink-to-pdf { font-size: 13px; }
-  .typora-export #write { padding-left: 32px; padding-right: 32px; padding-bottom: 0px; break-after: avoid; }
+  .typora-export #write { padding-left: 1cm; padding-right: 1cm; padding-bottom: 0px; break-after: avoid; }
   .typora-export #write::after { height: 0px; }
   @page { margin: 20mm 0px; }
 }
 .footnote-line { margin-top: 0.714em; font-size: 0.7em; }
 a img, img a { cursor: pointer; }
 pre.md-meta-block { font-size: 0.8rem; min-height: 0.8rem; white-space: pre-wrap; background: rgb(204, 204, 204); display: block; overflow-x: hidden; }
-p > .md-image:only-child:not(.md-img-error) img, p > img:only-child { display: block; margin: auto; }
-p > .md-image:only-child { display: inline-block; width: 100%; }
+p > img:only-child { display: block; margin: auto; }
+p > .md-image:only-child { display: inline-block; width: 100%; text-align: center; }
 #write .MathJax_Display { margin: 0.8em 0px 0px; }
 .md-math-block { width: 100%; }
 .md-math-block:not(:empty)::after { display: none; }
 [contenteditable="true"]:active, [contenteditable="true"]:focus { outline: 0px; box-shadow: none; }
 .md-task-list-item { position: relative; list-style-type: none; }
 .task-list-item.md-task-list-item { padding-left: 0px; }
-.md-task-list-item > input { position: absolute; top: 0px; left: 0px; margin-left: -1.2em; margin-top: calc(1em - 10px); border: none; }
+.md-task-list-item > input { position: absolute; top: 0px; left: 0px; margin-left: -1.2em; margin-top: calc(1em - 10px); }
 .math { font-size: 1rem; }
 .md-toc { min-height: 3.58rem; position: relative; font-size: 0.9rem; border-radius: 10px; }
 .md-toc-content { position: relative; margin-left: 0px; }
@@ -131,7 +129,6 @@
 code { text-align: left; vertical-align: initial; }
 a.md-print-anchor { white-space: pre !important; border-width: initial !important; border-style: none !important; border-color: initial !important; display: inline-block !important; position: absolute !important; width: 1px !important; right: 0px !important; outline: 0px !important; background: 0px 0px !important; text-decoration: initial !important; text-shadow: initial !important; }
 .md-inline-math .MathJax_SVG .noError { display: none !important; }
-.html-for-mac .inline-math-svg .MathJax_SVG { vertical-align: 0.2px; }
 .md-math-block .MathJax_SVG_Display { text-align: center; margin: 0px; position: relative; text-indent: 0px; max-width: none; max-height: none; min-height: 0px; min-width: 100%; width: auto; overflow-y: hidden; display: block !important; }
 .MathJax_SVG_Display, .md-inline-math .MathJax_SVG_Display { width: auto; margin: inherit; display: inline-block !important; }
 .MathJax_SVG .MJX-monospace { font-family: var(--monospace); }
@@ -149,7 +146,12 @@
 .highlight td, .highlight tr { border: 0px; }
 
 
-:root { --active-file-bg-color: rgba(32, 43, 51, 0.63); --active-file-text-color: white; --bg-color: #f3f2ee; --text-color: #1f0909; --control-text-color: #444; --rawblock-edit-panel-bd: #e5e5e5; --select-text-bg-color: rgba(32, 43, 51, 0.63); --select-text-font-color: white; }
+:root { --rawblock-edit-panel-bd: #e5e5e5; }
+@font-face { font-family: "PT Serif"; font-style: normal; font-weight: normal; src: local("PT Serif"), local("PTSerif-Regular"), url("./newsprint/pt-serif-v9-latin-regular.woff") format("woff"); }
+@font-face { font-family: "PT Serif"; font-style: italic; font-weight: normal; src: local("PT Serif"), local("PTSerif-Italic"), url("./newsprint/pt-serif-v9-latin-italic.woff") format("woff"); }
+@font-face { font-family: "PT Serif"; font-style: normal; font-weight: bold; src: local("PT Serif"), local("PTSerif-Bold"), url("./newsprint/pt-serif-v9-latin-700.woff") format("woff"); }
+@font-face { font-family: "PT Serif"; font-style: italic; font-weight: bold; src: local("PT Serif"), local("PTSerif-BoldItalic"), url("./newsprint/pt-serif-v9-latin-700italic.woff") format("woff"); }
+:root { --active-file-bg-color: rgba(32, 43, 51, 0.63); --active-file-text-color: white; --bg-color: #f3f2ee; --text-color: #1f0909; --select-text-bg-color: rgba(32, 43, 51, 0.63); --select-text-font-color: white; }
 pre { --select-text-bg-color: #36284e; --select-text-font-color: #fff; }
 html { font-size: 16px; }
 html, body { background-color: rgb(243, 242, 238); font-family: "PT Serif", "Times New Roman", Times, serif; color: rgb(31, 9, 9); line-height: 1.5em; }
@@ -178,21 +180,20 @@
 thead th, tfoot th { padding: 0.25em 0.25em 0.25em 0.4em; text-transform: uppercase; }
 th { text-align: left; }
 td { vertical-align: top; padding: 0.25em 0.25em 0.25em 0.4em; }
-code, .md-fences { background-color: rgb(218, 218, 218); }
-code { padding-left: 2px; padding-right: 2px; }
-.md-fences { margin-left: 2em; margin-bottom: 3em; padding-left: 1ch; padding-right: 1ch; }
+code, .md-fences { background-color: rgb(218, 218, 218); padding-left: 1ch; padding-right: 1ch; }
+.md-fences { margin-left: 2em; margin-bottom: 3em; }
 pre, code, tt { font-size: 0.875em; line-height: 1.71429em; }
 h1 { line-height: 1.3em; font-weight: normal; margin-bottom: 0.5em; }
-p + ul, p + ol { margin-top: 0.5em; }
+p + ul, p + ol { margin-top: -1em; }
 h3 + ul, h4 + ul, h5 + ul, h6 + ul, h3 + ol, h4 + ol, h5 + ol, h6 + ol { margin-top: 0.5em; }
 li > ul, li > ol { margin-top: inherit; margin-bottom: 0px; }
-li ol > li { list-style-type: lower-alpha; }
-li li ol > li { list-style-type: lower-roman; }
+li li { list-style-type: lower-alpha; }
+li li li { list-style-type: lower-roman; }
+li > blockquote { margin-bottom: 0px; }
 h2, h3 { margin-bottom: 0.75em; }
 hr { border-top: none; border-right: none; border-bottom: 1px solid; border-left: none; }
 h1 { border-color: rgb(197, 197, 197); }
 blockquote { border-color: rgb(186, 186, 186); color: rgb(101, 101, 101); }
-blockquote ul, blockquote ol { margin-left: 0px; }
 .ty-table-edit { background-color: transparent; }
 thead { background-color: rgb(218, 218, 218); }
 tr:nth-child(2n) { background: rgb(232, 231, 231); }
@@ -249,17 +250,16 @@
 input { border: 1px solid rgb(170, 170, 170); }
 .megamenu-menu-header #megamenu-menu-header-title, .megamenu-menu-header:hover, .megamenu-menu-header:focus { color: inherit; }
 .dropdown-menu .divider { border-color: rgb(229, 229, 229); }
-.os-windows-7 strong, .os-windows-7 strong { font-weight: 760; }
 
 
 
 
 
- .typora-export li, .typora-export p, .typora-export,  .footnote-line {white-space: normal;} 
+ .typora-export p, .typora-export .footnote-line {white-space: normal;} 
 </style>
 </head>
-<body class='typora-export' >
-<div  id='write'  class = 'is-node'><h2><a name='header-n202' class='md-header-anchor '></a><center>Pattern Recognition and Machine Learning</center></h2><h3><a name='header-n204' class='md-header-anchor '></a><center>Fudan University / 2019 Spring</center></h3><center>Assignment 3</center><p>In this assignment you are going to implement a RNN (namely LSTM) for generating Tang poetry. This assignment description will outline the landscape for you to know how to do it! You&#39;ll also get yourself familiar with PyTorch and FastNLP once you complete this assignment, their docs are very recommended for you to get started, and you could also try out some examples included in their code repository.</p><h4><a name='header-n207' class='md-header-anchor '></a>Description</h4><p>In the previous assignment, you&#39;ve already implemented back-propagation of gradients with numpy, you must have had a lot of fun playing with it. Although nowadays autograd tools like Tensorflow and PyTorch are pervasive, and people rarely write deep neural networks without them, not only because they provided great convenience over gradient computation, but also could they leverage GPUs for amazingly fast training, knowing the details under the hook should be very beneficial if you want to dive deeper into deep learning and these are very frequently asked during interviews.</p><h5><a name='header-n209' class='md-header-anchor '></a>Part 1, Differentiate LSTM, 20%</h5><p>In the course, we talked about Recursive Neural Network, and one of its mostly used variation, LSTM (Long-Short Term Memory <sup class='md-footnote'><a href='#dfref-footnote-1' name='ref-footnote-1'>1</a></sup>) network. To remind your how LSTM works, the LSTM unit <span class="MathJax_SVG" tabindex="-1" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" width="22.045ex" height="2.577ex" viewBox="0 -806.1 9491.7 1109.7" role="img" focusable="false" style="vertical-align: -0.705ex;"><defs><path stroke-width="0" id="E12-MJMAINB-68" d="M40 686L131 690Q222 694 223 694H229V533L230 372L238 381Q248 394 264 407T317 435T398 450Q428 450 448 447T491 434T529 402T551 346Q553 335 554 198V62H623V0H614Q596 3 489 3Q374 3 365 0H356V62H425V194V275Q425 348 416 373T371 399Q326 399 288 370T238 290Q236 281 235 171V62H304V0H295Q277 3 171 3Q64 3 46 0H37V62H106V332Q106 387 106 453T107 534Q107 593 105 605T91 620Q77 624 50 624H37V686H40Z"></path><path stroke-width="0" id="E12-MJMATHI-74" d="M26 385Q19 392 19 395Q19 399 22 411T27 425Q29 430 36 430T87 431H140L159 511Q162 522 166 540T173 566T179 586T187 603T197 615T211 624T229 626Q247 625 254 615T261 596Q261 589 252 549T232 470L222 433Q222 431 272 431H323Q330 424 330 420Q330 398 317 385H210L174 240Q135 80 135 68Q135 26 162 26Q197 26 230 60T283 144Q285 150 288 151T303 153H307Q322 153 322 145Q322 142 319 133Q314 117 301 95T267 48T216 6T155 -11Q125 -11 98 4T59 56Q57 64 57 83V101L92 241Q127 382 128 383Q128 385 77 385H26Z"></path><path stroke-width="0" id="E12-MJMAIN-3D" d="M56 347Q56 360 70 367H707Q722 359 722 347Q722 336 708 328L390 327H72Q56 332 56 347ZM56 153Q56 168 72 173H708Q722 163 722 153Q722 140 707 133H70Q56 140 56 153Z"></path><path stroke-width="0" id="E12-MJMATHI-4C" d="M228 637Q194 637 192 641Q191 643 191 649Q191 673 202 682Q204 683 217 683Q271 680 344 680Q485 680 506 683H518Q524 677 524 674T522 656Q517 641 513 637H475Q406 636 394 628Q387 624 380 600T313 336Q297 271 279 198T252 88L243 52Q243 48 252 48T311 46H328Q360 46 379 47T428 54T478 72T522 106T564 161Q580 191 594 228T611 270Q616 273 628 273H641Q647 264 647 262T627 203T583 83T557 9Q555 4 553 3T537 0T494 -1Q483 -1 418 -1T294 0H116Q32 0 32 10Q32 17 34 24Q39 43 44 45Q48 46 59 46H65Q92 46 125 49Q139 52 144 61Q147 65 216 339T285 628Q285 635 228 637Z"></path><path stroke-width="0" id="E12-MJMATHI-53" d="M308 24Q367 24 416 76T466 197Q466 260 414 284Q308 311 278 321T236 341Q176 383 176 462Q176 523 208 573T273 648Q302 673 343 688T407 704H418H425Q521 704 564 640Q565 640 577 653T603 682T623 704Q624 704 627 704T632 705Q645 705 645 698T617 577T585 459T569 456Q549 456 549 465Q549 471 550 475Q550 478 551 494T553 520Q553 554 544 579T526 616T501 641Q465 662 419 662Q362 662 313 616T263 510Q263 480 278 458T319 427Q323 425 389 408T456 390Q490 379 522 342T554 242Q554 216 546 186Q541 164 528 137T492 78T426 18T332 -20Q320 -22 298 -22Q199 -22 144 33L134 44L106 13Q83 -14 78 -18T65 -22Q52 -22 52 -14Q52 -11 110 221Q112 227 130 227H143Q149 221 149 216Q149 214 148 207T144 186T142 153Q144 114 160 87T203 47T255 29T308 24Z"></path><path stroke-width="0" id="E12-MJMATHI-54" d="M40 437Q21 437 21 445Q21 450 37 501T71 602L88 651Q93 669 101 677H569H659Q691 677 697 676T704 667Q704 661 687 553T668 444Q668 437 649 437Q640 437 637 437T631 442L629 445Q629 451 635 490T641 551Q641 586 628 604T573 629Q568 630 515 631Q469 631 457 630T439 622Q438 621 368 343T298 60Q298 48 386 46Q418 46 427 45T436 36Q436 31 433 22Q429 4 424 1L422 0Q419 0 415 0Q410 0 363 1T228 2Q99 2 64 0H49Q43 6 43 9T45 27Q49 40 55 46H83H94Q174 46 189 55Q190 56 191 56Q196 59 201 76T241 233Q258 301 269 344Q339 619 339 625Q339 630 310 630H279Q212 630 191 624Q146 614 121 583T67 467Q60 445 57 441T43 437H40Z"></path><path stroke-width="0" id="E12-MJMATHI-4D" d="M289 629Q289 635 232 637Q208 637 201 638T194 648Q194 649 196 659Q197 662 198 666T199 671T201 676T203 679T207 681T212 683T220 683T232 684Q238 684 262 684T307 683Q386 683 398 683T414 678Q415 674 451 396L487 117L510 154Q534 190 574 254T662 394Q837 673 839 675Q840 676 842 678T846 681L852 683H948Q965 683 988 683T1017 684Q1051 684 1051 673Q1051 668 1048 656T1045 643Q1041 637 1008 637Q968 636 957 634T939 623Q936 618 867 340T797 59Q797 55 798 54T805 50T822 48T855 46H886Q892 37 892 35Q892 19 885 5Q880 0 869 0Q864 0 828 1T736 2Q675 2 644 2T609 1Q592 1 592 11Q592 13 594 25Q598 41 602 43T625 46Q652 46 685 49Q699 52 704 61Q706 65 742 207T813 490T848 631L654 322Q458 10 453 5Q451 4 449 3Q444 0 433 0Q418 0 415 7Q413 11 374 317L335 624L267 354Q200 88 200 79Q206 46 272 46H282Q288 41 289 37T286 19Q282 3 278 1Q274 0 267 0Q265 0 255 0T221 1T157 2Q127 2 95 1T58 0Q43 0 39 2T35 11Q35 13 38 25T43 40Q45 46 65 46Q135 46 154 86Q158 92 223 354T289 629Z"></path><path stroke-width="0" id="E12-MJMAIN-28" d="M94 250Q94 319 104 381T127 488T164 576T202 643T244 695T277 729T302 750H315H319Q333 750 333 741Q333 738 316 720T275 667T226 581T184 443T167 250T184 58T225 -81T274 -167T316 -220T333 -241Q333 -250 318 -250H315H302L274 -226Q180 -141 137 -14T94 250Z"></path><path stroke-width="0" id="E12-MJMAIN-2212" d="M84 237T84 250T98 270H679Q694 262 694 250T679 230H98Q84 237 84 250Z"></path><path stroke-width="0" id="E12-MJMAIN-31" d="M213 578L200 573Q186 568 160 563T102 556H83V602H102Q149 604 189 617T245 641T273 663Q275 666 285 666Q294 666 302 660V361L303 61Q310 54 315 52T339 48T401 46H427V0H416Q395 3 257 3Q121 3 100 0H88V46H114Q136 46 152 46T177 47T193 50T201 52T207 57T213 61V578Z"></path><path stroke-width="0" id="E12-MJMAIN-2C" d="M78 35T78 60T94 103T137 121Q165 121 187 96T210 8Q210 -27 201 -60T180 -117T154 -158T130 -185T117 -194Q113 -194 104 -185T95 -172Q95 -168 106 -156T131 -126T157 -76T173 -3V9L172 8Q170 7 167 6T161 3T152 1T140 0Q113 0 96 17Z"></path><path stroke-width="0" id="E12-MJMAINB-78" d="M227 0Q212 3 121 3Q40 3 28 0H21V62H117L245 213L109 382H26V444H34Q49 441 143 441Q247 441 265 444H274V382H246L281 339Q315 297 316 297Q320 297 354 341L389 382H352V444H360Q375 441 466 441Q547 441 559 444H566V382H471L355 246L504 63L545 62H586V0H578Q563 3 469 3Q365 3 347 0H338V62H366Q366 63 326 112T285 163L198 63L217 62H235V0H227Z"></path><path stroke-width="0" id="E12-MJMAIN-29" d="M60 749L64 750Q69 750 74 750H86L114 726Q208 641 251 514T294 250Q294 182 284 119T261 12T224 -76T186 -143T145 -194T113 -227T90 -246Q87 -249 86 -250H74Q66 -250 63 -250T58 -247T55 -238Q56 -237 66 -225Q221 -64 221 250T66 725Q56 737 55 738Q55 746 60 749Z"></path></defs><g stroke="currentColor" fill="currentColor" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xlink:href="#E12-MJMAINB-68" x="0" y="0"></use><use transform="scale(0.707)" xlink:href="#E12-MJMATHI-74" x="903" y="-213"></use><use xlink:href="#E12-MJMAIN-3D" x="1272" y="0"></use><use xlink:href="#E12-MJMATHI-4C" x="2327" y="0"></use><use xlink:href="#E12-MJMATHI-53" x="3008" y="0"></use><use xlink:href="#E12-MJMATHI-54" x="3653" y="0"></use><use xlink:href="#E12-MJMATHI-4D" x="4357" y="0"></use><use xlink:href="#E12-MJMAIN-28" x="5408" y="0"></use><g transform="translate(5797,0)"><use xlink:href="#E12-MJMAINB-68" x="0" y="0"></use><g transform="translate(639,-150)"><use transform="scale(0.707)" xlink:href="#E12-MJMATHI-74" x="0" y="0"></use><use transform="scale(0.707)" xlink:href="#E12-MJMAIN-2212" x="361" y="0"></use><use transform="scale(0.707)" xlink:href="#E12-MJMAIN-31" x="1139" y="0"></use></g></g><use xlink:href="#E12-MJMAIN-2C" x="7695" y="0"></use><g transform="translate(8140,0)"><use xlink:href="#E12-MJMAINB-78" x="0" y="0"></use><use transform="scale(0.707)" xlink:href="#E12-MJMATHI-74" x="858" y="-213"></use></g><use xlink:href="#E12-MJMAIN-29" x="9102" y="0"></use></g></svg></span><script type="math/tex"> \mathbf{h}_t=LSTM(\mathbf{h}_{t-1}, \mathbf{x}_t)</script> processing the input in the following manner under the hook</p><div contenteditable="false" spellcheck="false" class="mathjax-block md-end-block md-math-block md-rawblock" id="mathjax-n211" cid="n211" mdtype="math_block"><div class="md-rawblock-container md-math-container" tabindex="-1"><div class="MathJax_SVG_Display" style="text-align: center;"><span class="MathJax_SVG" id="MathJax-Element-93-Frame" tabindex="-1" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" width="199.113px" height="175.068px" viewBox="0 -4953.4 10716.6 9422.4" role="img" focusable="false" style="vertical-align: -10.127ex; margin-bottom: -0.253ex; max-width: 100%;"><defs><path stroke-width="0" id="E96-MJMAINB-7A" d="M48 262Q48 264 54 349T60 436V444H252Q289 444 336 444T394 445Q441 445 450 441T459 418Q459 406 458 404Q456 399 327 229T194 55H237Q260 56 268 56T297 58T325 65T348 77T370 98T384 128T395 170Q400 197 400 216Q400 217 431 217H462V211Q461 208 453 108T444 6V0H245Q46 0 43 2Q32 7 32 28V33Q32 41 40 52T84 112Q129 170 164 217L298 393H256Q189 392 165 380Q124 360 115 303Q110 280 110 256Q110 254 79 254H48V262Z"></path><path stroke-width="0" id="E96-MJMAIN-3D" d="M56 347Q56 360 70 367H707Q722 359 722 347Q722 336 708 328L390 327H72Q56 332 56 347ZM56 153Q56 168 72 173H708Q722 163 722 153Q722 140 707 133H70Q56 140 56 153Z"></path><path stroke-width="0" id="E96-MJMAIN-5B" d="M118 -250V750H255V710H158V-210H255V-250H118Z"></path><path stroke-width="0" id="E96-MJMAINB-68" d="M40 686L131 690Q222 694 223 694H229V533L230 372L238 381Q248 394 264 407T317 435T398 450Q428 450 448 447T491 434T529 402T551 346Q553 335 554 198V62H623V0H614Q596 3 489 3Q374 3 365 0H356V62H425V194V275Q425 348 416 373T371 399Q326 399 288 370T238 290Q236 281 235 171V62H304V0H295Q277 3 171 3Q64 3 46 0H37V62H106V332Q106 387 106 453T107 534Q107 593 105 605T91 620Q77 624 50 624H37V686H40Z"></path><path stroke-width="0" id="E96-MJMATHI-74" d="M26 385Q19 392 19 395Q19 399 22 411T27 425Q29 430 36 430T87 431H140L159 511Q162 522 166 540T173 566T179 586T187 603T197 615T211 624T229 626Q247 625 254 615T261 596Q261 589 252 549T232 470L222 433Q222 431 272 431H323Q330 424 330 420Q330 398 317 385H210L174 240Q135 80 135 68Q135 26 162 26Q197 26 230 60T283 144Q285 150 288 151T303 153H307Q322 153 322 145Q322 142 319 133Q314 117 301 95T267 48T216 6T155 -11Q125 -11 98 4T59 56Q57 64 57 83V101L92 241Q127 382 128 383Q128 385 77 385H26Z"></path><path stroke-width="0" id="E96-MJMAIN-2212" d="M84 237T84 250T98 270H679Q694 262 694 250T679 230H98Q84 237 84 250Z"></path><path stroke-width="0" id="E96-MJMAIN-31" d="M213 578L200 573Q186 568 160 563T102 556H83V602H102Q149 604 189 617T245 641T273 663Q275 666 285 666Q294 666 302 660V361L303 61Q310 54 315 52T339 48T401 46H427V0H416Q395 3 257 3Q121 3 100 0H88V46H114Q136 46 152 46T177 47T193 50T201 52T207 57T213 61V578Z"></path><path stroke-width="0" id="E96-MJMAIN-2C" d="M78 35T78 60T94 103T137 121Q165 121 187 96T210 8Q210 -27 201 -60T180 -117T154 -158T130 -185T117 -194Q113 -194 104 -185T95 -172Q95 -168 106 -156T131 -126T157 -76T173 -3V9L172 8Q170 7 167 6T161 3T152 1T140 0Q113 0 96 17Z"></path><path stroke-width="0" id="E96-MJMAINB-78" d="M227 0Q212 3 121 3Q40 3 28 0H21V62H117L245 213L109 382H26V444H34Q49 441 143 441Q247 441 265 444H274V382H246L281 339Q315 297 316 297Q320 297 354 341L389 382H352V444H360Q375 441 466 441Q547 441 559 444H566V382H471L355 246L504 63L545 62H586V0H578Q563 3 469 3Q365 3 347 0H338V62H366Q366 63 326 112T285 163L198 63L217 62H235V0H227Z"></path><path stroke-width="0" id="E96-MJMAIN-5D" d="M22 710V750H159V-250H22V-210H119V710H22Z"></path><path stroke-width="0" id="E96-MJMAINB-66" d="M308 0Q290 3 172 3Q58 3 49 0H40V62H109V382H42V444H109V503L110 562L112 572Q127 625 178 658T316 699Q318 699 330 699T348 700Q381 698 404 687T436 658T449 629T452 606Q452 576 432 557T383 537Q355 537 335 555T314 605Q314 635 328 649H325Q311 649 293 644T253 618T227 560Q226 555 226 498V444H340V382H232V62H318V0H308Z"></path><path stroke-width="0" id="E96-MJMATHI-3C3" d="M184 -11Q116 -11 74 34T31 147Q31 247 104 333T274 430Q275 431 414 431H552Q553 430 555 429T559 427T562 425T565 422T567 420T569 416T570 412T571 407T572 401Q572 357 507 357Q500 357 490 357T476 358H416L421 348Q439 310 439 263Q439 153 359 71T184 -11ZM361 278Q361 358 276 358Q152 358 115 184Q114 180 114 178Q106 141 106 117Q106 67 131 47T188 26Q242 26 287 73Q316 103 334 153T356 233T361 278Z"></path><path stroke-width="0" id="E96-MJMAIN-28" d="M94 250Q94 319 104 381T127 488T164 576T202 643T244 695T277 729T302 750H315H319Q333 750 333 741Q333 738 316 720T275 667T226 581T184 443T167 250T184 58T225 -81T274 -167T316 -220T333 -241Q333 -250 318 -250H315H302L274 -226Q180 -141 137 -14T94 250Z"></path><path stroke-width="0" id="E96-MJMATHI-57" d="M436 683Q450 683 486 682T553 680Q604 680 638 681T677 682Q695 682 695 674Q695 670 692 659Q687 641 683 639T661 637Q636 636 621 632T600 624T597 615Q597 603 613 377T629 138L631 141Q633 144 637 151T649 170T666 200T690 241T720 295T759 362Q863 546 877 572T892 604Q892 619 873 628T831 637Q817 637 817 647Q817 650 819 660Q823 676 825 679T839 682Q842 682 856 682T895 682T949 681Q1015 681 1034 683Q1048 683 1048 672Q1048 666 1045 655T1038 640T1028 637Q1006 637 988 631T958 617T939 600T927 584L923 578L754 282Q586 -14 585 -15Q579 -22 561 -22Q546 -22 542 -17Q539 -14 523 229T506 480L494 462Q472 425 366 239Q222 -13 220 -15T215 -19Q210 -22 197 -22Q178 -22 176 -15Q176 -12 154 304T131 622Q129 631 121 633T82 637H58Q51 644 51 648Q52 671 64 683H76Q118 680 176 680Q301 680 313 683H323Q329 677 329 674T327 656Q322 641 318 637H297Q236 634 232 620Q262 160 266 136L501 550L499 587Q496 629 489 632Q483 636 447 637Q428 637 422 639T416 648Q416 650 418 660Q419 664 420 669T421 676T424 680T428 682T436 683Z"></path><path stroke-width="0" id="E96-MJMATHI-66" d="M118 -162Q120 -162 124 -164T135 -167T147 -168Q160 -168 171 -155T187 -126Q197 -99 221 27T267 267T289 382V385H242Q195 385 192 387Q188 390 188 397L195 425Q197 430 203 430T250 431Q298 431 298 432Q298 434 307 482T319 540Q356 705 465 705Q502 703 526 683T550 630Q550 594 529 578T487 561Q443 561 443 603Q443 622 454 636T478 657L487 662Q471 668 457 668Q445 668 434 658T419 630Q412 601 403 552T387 469T380 433Q380 431 435 431Q480 431 487 430T498 424Q499 420 496 407T491 391Q489 386 482 386T428 385H372L349 263Q301 15 282 -47Q255 -132 212 -173Q175 -205 139 -205Q107 -205 81 -186T55 -132Q55 -95 76 -78T118 -61Q162 -61 162 -103Q162 -122 151 -136T127 -157L118 -162Z"></path><path stroke-width="0" id="E96-MJMAIN-22C5" d="M78 250Q78 274 95 292T138 310Q162 310 180 294T199 251Q199 226 182 208T139 190T96 207T78 250Z"></path><path stroke-width="0" id="E96-MJMAIN-2B" d="M56 237T56 250T70 270H369V420L370 570Q380 583 389 583Q402 583 409 568V270H707Q722 262 722 250T707 230H409V-68Q401 -82 391 -82H389H387Q375 -82 369 -68V230H70Q56 237 56 250Z"></path><path stroke-width="0" id="E96-MJMATHI-62" d="M73 647Q73 657 77 670T89 683Q90 683 161 688T234 694Q246 694 246 685T212 542Q204 508 195 472T180 418L176 399Q176 396 182 402Q231 442 283 442Q345 442 383 396T422 280Q422 169 343 79T173 -11Q123 -11 82 27T40 150V159Q40 180 48 217T97 414Q147 611 147 623T109 637Q104 637 101 637H96Q86 637 83 637T76 640T73 647ZM336 325V331Q336 405 275 405Q258 405 240 397T207 376T181 352T163 330L157 322L136 236Q114 150 114 114Q114 66 138 42Q154 26 178 26Q211 26 245 58Q270 81 285 114T318 219Q336 291 336 325Z"></path><path stroke-width="0" id="E96-MJMAIN-29" d="M60 749L64 750Q69 750 74 750H86L114 726Q208 641 251 514T294 250Q294 182 284 119T261 12T224 -76T186 -143T145 -194T113 -227T90 -246Q87 -249 86 -250H74Q66 -250 63 -250T58 -247T55 -238Q56 -237 66 -225Q221 -64 221 250T66 725Q56 737 55 738Q55 746 60 749Z"></path><path stroke-width="0" id="E96-MJMAINB-69" d="M72 610Q72 649 98 672T159 695Q193 693 217 670T241 610Q241 572 217 549T157 525Q120 525 96 548T72 610ZM46 442L136 446L226 450H232V62H294V0H286Q271 3 171 3Q67 3 49 0H40V62H109V209Q109 358 108 362Q103 380 55 380H43V442H46Z"></path><path stroke-width="0" id="E96-MJMATHI-69" d="M184 600Q184 624 203 642T247 661Q265 661 277 649T290 619Q290 596 270 577T226 557Q211 557 198 567T184 600ZM21 287Q21 295 30 318T54 369T98 420T158 442Q197 442 223 419T250 357Q250 340 236 301T196 196T154 83Q149 61 149 51Q149 26 166 26Q175 26 185 29T208 43T235 78T260 137Q263 149 265 151T282 153Q302 153 302 143Q302 135 293 112T268 61T223 11T161 -11Q129 -11 102 10T74 74Q74 91 79 106T122 220Q160 321 166 341T173 380Q173 404 156 404H154Q124 404 99 371T61 287Q60 286 59 284T58 281T56 279T53 278T49 278T41 278H27Q21 284 21 287Z"></path><path stroke-width="0" id="E96-MJMATHI-43" d="M50 252Q50 367 117 473T286 641T490 704Q580 704 633 653Q642 643 648 636T656 626L657 623Q660 623 684 649Q691 655 699 663T715 679T725 690L740 705H746Q760 705 760 698Q760 694 728 561Q692 422 692 421Q690 416 687 415T669 413H653Q647 419 647 422Q647 423 648 429T650 449T651 481Q651 552 619 605T510 659Q484 659 454 652T382 628T299 572T226 479Q194 422 175 346T156 222Q156 108 232 58Q280 24 350 24Q441 24 512 92T606 240Q610 253 612 255T628 257Q648 257 648 248Q648 243 647 239Q618 132 523 55T319 -22Q206 -22 128 53T50 252Z"></path><path stroke-width="0" id="E96-MJMAIN-AF" d="M69 544V590H430V544H69Z"></path><path stroke-width="0" id="E96-MJMATHI-61" d="M33 157Q33 258 109 349T280 441Q331 441 370 392Q386 422 416 422Q429 422 439 414T449 394Q449 381 412 234T374 68Q374 43 381 35T402 26Q411 27 422 35Q443 55 463 131Q469 151 473 152Q475 153 483 153H487Q506 153 506 144Q506 138 501 117T481 63T449 13Q436 0 417 -8Q409 -10 393 -10Q359 -10 336 5T306 36L300 51Q299 52 296 50Q294 48 292 46Q233 -10 172 -10Q117 -10 75 30T33 157ZM351 328Q351 334 346 350T323 385T277 405Q242 405 210 374T160 293Q131 214 119 129Q119 126 119 118T118 106Q118 61 136 44T179 26Q217 26 254 59T298 110Q300 114 325 217T351 328Z"></path><path stroke-width="0" id="E96-MJMATHI-6E" d="M21 287Q22 293 24 303T36 341T56 388T89 425T135 442Q171 442 195 424T225 390T231 369Q231 367 232 367L243 378Q304 442 382 442Q436 442 469 415T503 336T465 179T427 52Q427 26 444 26Q450 26 453 27Q482 32 505 65T540 145Q542 153 560 153Q580 153 580 145Q580 144 576 130Q568 101 554 73T508 17T439 -10Q392 -10 371 17T350 73Q350 92 386 193T423 345Q423 404 379 404H374Q288 404 229 303L222 291L189 157Q156 26 151 16Q138 -11 108 -11Q95 -11 87 -5T76 7T74 17Q74 30 112 180T152 343Q153 348 153 366Q153 405 129 405Q91 405 66 305Q60 285 60 284Q58 278 41 278H27Q21 284 21 287Z"></path><path stroke-width="0" id="E96-MJMATHI-68" d="M137 683Q138 683 209 688T282 694Q294 694 294 685Q294 674 258 534Q220 386 220 383Q220 381 227 388Q288 442 357 442Q411 442 444 415T478 336Q478 285 440 178T402 50Q403 36 407 31T422 26Q450 26 474 56T513 138Q516 149 519 151T535 153Q555 153 555 145Q555 144 551 130Q535 71 500 33Q466 -10 419 -10H414Q367 -10 346 17T325 74Q325 90 361 192T398 345Q398 404 354 404H349Q266 404 205 306L198 293L164 158Q132 28 127 16Q114 -11 83 -11Q69 -11 59 -2T48 16Q48 30 121 320L195 616Q195 629 188 632T149 637H128Q122 643 122 645T124 664Q129 683 137 683Z"></path><path stroke-width="0" id="E96-MJMAIN-2217" d="M229 286Q216 420 216 436Q216 454 240 464Q241 464 245 464T251 465Q263 464 273 456T283 436Q283 419 277 356T270 286L328 328Q384 369 389 372T399 375Q412 375 423 365T435 338Q435 325 425 315Q420 312 357 282T289 250L355 219L425 184Q434 175 434 161Q434 146 425 136T401 125Q393 125 383 131T328 171L270 213Q283 79 283 63Q283 53 276 44T250 35Q231 35 224 44T216 63Q216 80 222 143T229 213L171 171Q115 130 110 127Q106 124 100 124Q87 124 76 134T64 161Q64 166 64 169T67 175T72 181T81 188T94 195T113 204T138 215T170 230T210 250L74 315Q65 324 65 338Q65 353 74 363T98 374Q106 374 116 368T171 328L229 286Z"></path><path stroke-width="0" id="E96-MJMAINB-6F" d="M287 -5Q228 -5 182 10T109 48T63 102T39 161T32 219Q32 272 50 314T94 382T154 423T214 446T265 452H279Q319 452 326 451Q428 439 485 376T542 221Q542 156 514 108T442 33Q384 -5 287 -5ZM399 230V250Q399 280 398 298T391 338T372 372T338 392T282 401Q241 401 212 380Q190 363 183 334T175 230Q175 202 175 189T177 153T183 118T195 91T215 68T245 56T287 50Q348 50 374 84Q388 101 393 132T399 230Z"></path><path stroke-width="0" id="E96-MJMATHI-6F" d="M201 -11Q126 -11 80 38T34 156Q34 221 64 279T146 380Q222 441 301 441Q333 441 341 440Q354 437 367 433T402 417T438 387T464 338T476 268Q476 161 390 75T201 -11ZM121 120Q121 70 147 48T206 26Q250 26 289 58T351 142Q360 163 374 216T388 308Q388 352 370 375Q346 405 306 405Q243 405 195 347Q158 303 140 230T121 120Z"></path></defs><g stroke="currentColor" fill="currentColor" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><g transform="translate(167,0)"><g transform="translate(-14,0)"><use xlink:href="#E96-MJMAINB-7A" x="604" y="4060"></use><g transform="translate(408,2710)"><use xlink:href="#E96-MJMAINB-66" x="0" y="0"></use><use transform="scale(0.707)" xlink:href="#E96-MJMATHI-74" x="496" y="-213"></use></g><g transform="translate(440,1311)"><use xlink:href="#E96-MJMAINB-69" x="0" y="0"></use><use transform="scale(0.707)" xlink:href="#E96-MJMATHI-74" x="451" y="-213"></use></g><g transform="translate(0,-71)"><use xlink:href="#E96-MJMATHI-43" x="0" y="0"></use><use xlink:href="#E96-MJMAIN-AF" x="242" y="241"></use><use transform="scale(0.707)" xlink:href="#E96-MJMATHI-74" x="1074" y="-213"></use></g><g transform="translate(45,-1453)"><use xlink:href="#E96-MJMATHI-43" x="0" y="0"></use><use transform="scale(0.707)" xlink:href="#E96-MJMATHI-74" x="1011" y="-213"></use></g><g transform="translate(184,-2761)"><use xlink:href="#E96-MJMAINB-6F" x="0" y="0"></use><use transform="scale(0.707)" xlink:href="#E96-MJMATHI-74" x="813" y="-213"></use></g><g transform="translate(120,-4111)"><use xlink:href="#E96-MJMAINB-68" x="0" y="0"></use><use transform="scale(0.707)" xlink:href="#E96-MJMATHI-74" x="903" y="-213"></use></g></g><g transform="translate(1101,0)"><g transform="translate(0,4060)"><use xlink:href="#E96-MJMAIN-3D" x="277" y="0"></use><use xlink:href="#E96-MJMAIN-5B" x="1333" y="0"></use><g transform="translate(1611,0)"><use xlink:href="#E96-MJMAINB-68" x="0" y="0"></use><g transform="translate(639,-150)"><use transform="scale(0.707)" xlink:href="#E96-MJMATHI-74" x="0" y="0"></use><use transform="scale(0.707)" xlink:href="#E96-MJMAIN-2212" x="361" y="0"></use><use transform="scale(0.707)" xlink:href="#E96-MJMAIN-31" x="1139" y="0"></use></g></g><use xlink:href="#E96-MJMAIN-2C" x="3509" y="0"></use><g transform="translate(3954,0)"><use xlink:href="#E96-MJMAINB-78" x="0" y="0"></use><use transform="scale(0.707)" xlink:href="#E96-MJMATHI-74" x="858" y="-213"></use></g><use xlink:href="#E96-MJMAIN-5D" x="4916" y="0"></use></g><g transform="translate(0,2710)"><use xlink:href="#E96-MJMAIN-3D" x="277" y="0"></use><use xlink:href="#E96-MJMATHI-3C3" x="1333" y="0"></use><use xlink:href="#E96-MJMAIN-28" x="1905" y="0"></use><g transform="translate(2294,0)"><use xlink:href="#E96-MJMATHI-57" x="0" y="0"></use><use transform="scale(0.707)" xlink:href="#E96-MJMATHI-66" x="1335" y="-218"></use></g><use xlink:href="#E96-MJMAIN-22C5" x="3949" y="0"></use><use xlink:href="#E96-MJMAINB-7A" x="4449" y="0"></use><use xlink:href="#E96-MJMAIN-2B" x="5183" y="0"></use><g transform="translate(6183,0)"><use xlink:href="#E96-MJMATHI-62" x="0" y="0"></use><use transform="scale(0.707)" xlink:href="#E96-MJMATHI-66" x="606" y="-218"></use></g><use xlink:href="#E96-MJMAIN-29" x="7101" y="0"></use></g><g transform="translate(0,1311)"><use xlink:href="#E96-MJMAIN-3D" x="277" y="0"></use><use xlink:href="#E96-MJMATHI-3C3" x="1333" y="0"></use><use xlink:href="#E96-MJMAIN-28" x="1905" y="0"></use><g transform="translate(2294,0)"><use xlink:href="#E96-MJMATHI-57" x="0" y="0"></use><use transform="scale(0.707)" xlink:href="#E96-MJMATHI-69" x="1335" y="-213"></use></g><use xlink:href="#E96-MJMAIN-22C5" x="3804" y="0"></use><use xlink:href="#E96-MJMAINB-7A" x="4304" y="0"></use><use xlink:href="#E96-MJMAIN-2B" x="5038" y="0"></use><g transform="translate(6038,0)"><use xlink:href="#E96-MJMATHI-62" x="0" y="0"></use><use transform="scale(0.707)" xlink:href="#E96-MJMATHI-69" x="606" y="-213"></use></g><use xlink:href="#E96-MJMAIN-29" x="6811" y="0"></use></g><g transform="translate(0,-71)"><use xlink:href="#E96-MJMAIN-3D" x="277" y="0"></use><use xlink:href="#E96-MJMATHI-74" x="1333" y="0"></use><use xlink:href="#E96-MJMATHI-61" x="1694" y="0"></use><use xlink:href="#E96-MJMATHI-6E" x="2223" y="0"></use><use xlink:href="#E96-MJMATHI-68" x="2823" y="0"></use><use xlink:href="#E96-MJMAIN-28" x="3399" y="0"></use><g transform="translate(3788,0)"><use xlink:href="#E96-MJMATHI-57" x="0" y="0"></use><use transform="scale(0.707)" xlink:href="#E96-MJMATHI-43" x="1335" y="-218"></use></g><use xlink:href="#E96-MJMAIN-22C5" x="5592" y="0"></use><use xlink:href="#E96-MJMAINB-7A" x="6092" y="0"></use><use xlink:href="#E96-MJMAIN-2B" x="6825" y="0"></use><g transform="translate(7825,0)"><use xlink:href="#E96-MJMATHI-62" x="0" y="0"></use><use transform="scale(0.707)" xlink:href="#E96-MJMATHI-43" x="606" y="-218"></use></g><use xlink:href="#E96-MJMAIN-29" x="8892" y="0"></use></g><g transform="translate(0,-1453)"><use xlink:href="#E96-MJMAIN-3D" x="277" y="0"></use><g transform="translate(1333,0)"><use xlink:href="#E96-MJMAINB-66" x="0" y="0"></use><use transform="scale(0.707)" xlink:href="#E96-MJMATHI-74" x="496" y="-213"></use></g><use xlink:href="#E96-MJMAIN-2217" x="2262" y="0"></use><g transform="translate(2984,0)"><use xlink:href="#E96-MJMATHI-43" x="0" y="0"></use><g transform="translate(715,-150)"><use transform="scale(0.707)" xlink:href="#E96-MJMATHI-74" x="0" y="0"></use><use transform="scale(0.707)" xlink:href="#E96-MJMAIN-2212" x="361" y="0"></use><use transform="scale(0.707)" xlink:href="#E96-MJMAIN-31" x="1139" y="0"></use></g></g><use xlink:href="#E96-MJMAIN-2B" x="5180" y="0"></use><g transform="translate(6180,0)"><use xlink:href="#E96-MJMAINB-69" x="0" y="0"></use><use transform="scale(0.707)" xlink:href="#E96-MJMATHI-74" x="451" y="-213"></use></g><use xlink:href="#E96-MJMAIN-2217" x="7077" y="0"></use><g transform="translate(7799,0)"><use xlink:href="#E96-MJMATHI-43" x="0" y="0"></use><use xlink:href="#E96-MJMAIN-AF" x="242" y="241"></use><use transform="scale(0.707)" xlink:href="#E96-MJMATHI-74" x="1074" y="-213"></use></g></g><g transform="translate(0,-2761)"><use xlink:href="#E96-MJMAIN-3D" x="277" y="0"></use><use xlink:href="#E96-MJMATHI-3C3" x="1333" y="0"></use><use xlink:href="#E96-MJMAIN-28" x="1905" y="0"></use><g transform="translate(2294,0)"><use xlink:href="#E96-MJMATHI-57" x="0" y="0"></use><use transform="scale(0.707)" xlink:href="#E96-MJMATHI-6F" x="1335" y="-213"></use></g><use xlink:href="#E96-MJMAIN-22C5" x="3903" y="0"></use><use xlink:href="#E96-MJMAINB-7A" x="4403" y="0"></use><use xlink:href="#E96-MJMAIN-2B" x="5137" y="0"></use><g transform="translate(6137,0)"><use xlink:href="#E96-MJMATHI-62" x="0" y="0"></use><use transform="scale(0.707)" xlink:href="#E96-MJMATHI-74" x="606" y="-213"></use></g><use xlink:href="#E96-MJMAIN-29" x="6921" y="0"></use></g><g transform="translate(0,-4111)"><use xlink:href="#E96-MJMAIN-3D" x="277" y="0"></use><g transform="translate(1333,0)"><use xlink:href="#E96-MJMAINB-6F" x="0" y="0"></use><use transform="scale(0.707)" xlink:href="#E96-MJMATHI-74" x="813" y="-213"></use></g><use xlink:href="#E96-MJMAIN-2217" x="2486" y="0"></use><use xlink:href="#E96-MJMATHI-74" x="3208" y="0"></use><use xlink:href="#E96-MJMATHI-61" x="3569" y="0"></use><use xlink:href="#E96-MJMATHI-6E" x="4098" y="0"></use><use xlink:href="#E96-MJMATHI-68" x="4698" y="0"></use><use xlink:href="#E96-MJMAIN-28" x="5274" y="0"></use><g transform="translate(5663,0)"><use xlink:href="#E96-MJMATHI-43" x="0" y="0"></use><use transform="scale(0.707)" xlink:href="#E96-MJMATHI-74" x="1011" y="-213"></use></g><use xlink:href="#E96-MJMAIN-29" x="6733" y="0"></use></g></g></g></g></svg></span></div><script type="math/tex; mode=display" id="MathJax-Element-93">\begin{align}
+<body class='typora-export os-windows' >
+<div  id='write'  class = 'is-node'><h2><a name='header-n0' class='md-header-anchor '></a><center>Pattern Recognition and Machine Learning</center></h2><h3><a name='header-n2' class='md-header-anchor '></a><center>Fudan University / 2019 Spring</center></h3><center>Assignment 3</center><p>In this assignment you are going to implement a RNN (namely LSTM) for generating Tang poetry. This assignment description will outline the landscape for you to know how to do it! You&#39;ll also get yourself familiar with PyTorch and FastNLP once you complete this assignment, their docs are very recommended for you to get started, and you could also try out some examples included in their code repository.</p><h4><a name='header-n5' class='md-header-anchor '></a>Description</h4><p>In the previous assignment, you&#39;ve already implemented back-propagation of gradients with numpy, you must have had a lot of fun playing with it. Although nowadays autograd tools like Tensorflow and PyTorch are pervasive, and people rarely write deep neural networks without them, not only because they provided great convenience over gradient computation, but also could they leverage GPUs for amazingly fast training, knowing the details under the hook should be very beneficial if you want to dive deeper into deep learning and these are very frequently asked during interviews.</p><h5><a name='header-n7' class='md-header-anchor '></a>Part 1, Differentiate LSTM, 20%</h5><p>In the course, we talked about Recursive Neural Network, and one of its mostly used variation, LSTM (Long-Short Term Memory <sup class='md-footnote'><a href='#dfref-footnote-1' name='ref-footnote-1'>1</a></sup>) network. To remind your how LSTM works, the LSTM unit <span class="MathJax_Preview"></span><span class="MathJax_SVG" tabindex="-1" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" width="22.045ex" height="2.634ex" viewBox="0 -809.3 9491.7 1134.2" role="img" focusable="false" style="vertical-align: -0.755ex;"><defs><path stroke-width="0" id="E3-MJMAINB-68" d="M40 686L131 690Q222 694 223 694H229V533L230 372L238 381Q248 394 264 407T317 435T398 450Q428 450 448 447T491 434T529 402T551 346Q553 335 554 198V62H623V0H614Q596 3 489 3Q374 3 365 0H356V62H425V194V275Q425 348 416 373T371 399Q326 399 288 370T238 290Q236 281 235 171V62H304V0H295Q277 3 171 3Q64 3 46 0H37V62H106V332Q106 387 106 453T107 534Q107 593 105 605T91 620Q77 624 50 624H37V686H40Z"></path><path stroke-width="0" id="E3-MJMATHI-74" d="M26 385Q19 392 19 395Q19 399 22 411T27 425Q29 430 36 430T87 431H140L159 511Q162 522 166 540T173 566T179 586T187 603T197 615T211 624T229 626Q247 625 254 615T261 596Q261 589 252 549T232 470L222 433Q222 431 272 431H323Q330 424 330 420Q330 398 317 385H210L174 240Q135 80 135 68Q135 26 162 26Q197 26 230 60T283 144Q285 150 288 151T303 153H307Q322 153 322 145Q322 142 319 133Q314 117 301 95T267 48T216 6T155 -11Q125 -11 98 4T59 56Q57 64 57 83V101L92 241Q127 382 128 383Q128 385 77 385H26Z"></path><path stroke-width="0" id="E3-MJMAIN-3D" d="M56 347Q56 360 70 367H707Q722 359 722 347Q722 336 708 328L390 327H72Q56 332 56 347ZM56 153Q56 168 72 173H708Q722 163 722 153Q722 140 707 133H70Q56 140 56 153Z"></path><path stroke-width="0" id="E3-MJMATHI-4C" d="M228 637Q194 637 192 641Q191 643 191 649Q191 673 202 682Q204 683 217 683Q271 680 344 680Q485 680 506 683H518Q524 677 524 674T522 656Q517 641 513 637H475Q406 636 394 628Q387 624 380 600T313 336Q297 271 279 198T252 88L243 52Q243 48 252 48T311 46H328Q360 46 379 47T428 54T478 72T522 106T564 161Q580 191 594 228T611 270Q616 273 628 273H641Q647 264 647 262T627 203T583 83T557 9Q555 4 553 3T537 0T494 -1Q483 -1 418 -1T294 0H116Q32 0 32 10Q32 17 34 24Q39 43 44 45Q48 46 59 46H65Q92 46 125 49Q139 52 144 61Q147 65 216 339T285 628Q285 635 228 637Z"></path><path stroke-width="0" id="E3-MJMATHI-53" d="M308 24Q367 24 416 76T466 197Q466 260 414 284Q308 311 278 321T236 341Q176 383 176 462Q176 523 208 573T273 648Q302 673 343 688T407 704H418H425Q521 704 564 640Q565 640 577 653T603 682T623 704Q624 704 627 704T632 705Q645 705 645 698T617 577T585 459T569 456Q549 456 549 465Q549 471 550 475Q550 478 551 494T553 520Q553 554 544 579T526 616T501 641Q465 662 419 662Q362 662 313 616T263 510Q263 480 278 458T319 427Q323 425 389 408T456 390Q490 379 522 342T554 242Q554 216 546 186Q541 164 528 137T492 78T426 18T332 -20Q320 -22 298 -22Q199 -22 144 33L134 44L106 13Q83 -14 78 -18T65 -22Q52 -22 52 -14Q52 -11 110 221Q112 227 130 227H143Q149 221 149 216Q149 214 148 207T144 186T142 153Q144 114 160 87T203 47T255 29T308 24Z"></path><path stroke-width="0" id="E3-MJMATHI-54" d="M40 437Q21 437 21 445Q21 450 37 501T71 602L88 651Q93 669 101 677H569H659Q691 677 697 676T704 667Q704 661 687 553T668 444Q668 437 649 437Q640 437 637 437T631 442L629 445Q629 451 635 490T641 551Q641 586 628 604T573 629Q568 630 515 631Q469 631 457 630T439 622Q438 621 368 343T298 60Q298 48 386 46Q418 46 427 45T436 36Q436 31 433 22Q429 4 424 1L422 0Q419 0 415 0Q410 0 363 1T228 2Q99 2 64 0H49Q43 6 43 9T45 27Q49 40 55 46H83H94Q174 46 189 55Q190 56 191 56Q196 59 201 76T241 233Q258 301 269 344Q339 619 339 625Q339 630 310 630H279Q212 630 191 624Q146 614 121 583T67 467Q60 445 57 441T43 437H40Z"></path><path stroke-width="0" id="E3-MJMATHI-4D" d="M289 629Q289 635 232 637Q208 637 201 638T194 648Q194 649 196 659Q197 662 198 666T199 671T201 676T203 679T207 681T212 683T220 683T232 684Q238 684 262 684T307 683Q386 683 398 683T414 678Q415 674 451 396L487 117L510 154Q534 190 574 254T662 394Q837 673 839 675Q840 676 842 678T846 681L852 683H948Q965 683 988 683T1017 684Q1051 684 1051 673Q1051 668 1048 656T1045 643Q1041 637 1008 637Q968 636 957 634T939 623Q936 618 867 340T797 59Q797 55 798 54T805 50T822 48T855 46H886Q892 37 892 35Q892 19 885 5Q880 0 869 0Q864 0 828 1T736 2Q675 2 644 2T609 1Q592 1 592 11Q592 13 594 25Q598 41 602 43T625 46Q652 46 685 49Q699 52 704 61Q706 65 742 207T813 490T848 631L654 322Q458 10 453 5Q451 4 449 3Q444 0 433 0Q418 0 415 7Q413 11 374 317L335 624L267 354Q200 88 200 79Q206 46 272 46H282Q288 41 289 37T286 19Q282 3 278 1Q274 0 267 0Q265 0 255 0T221 1T157 2Q127 2 95 1T58 0Q43 0 39 2T35 11Q35 13 38 25T43 40Q45 46 65 46Q135 46 154 86Q158 92 223 354T289 629Z"></path><path stroke-width="0" id="E3-MJMAIN-28" d="M94 250Q94 319 104 381T127 488T164 576T202 643T244 695T277 729T302 750H315H319Q333 750 333 741Q333 738 316 720T275 667T226 581T184 443T167 250T184 58T225 -81T274 -167T316 -220T333 -241Q333 -250 318 -250H315H302L274 -226Q180 -141 137 -14T94 250Z"></path><path stroke-width="0" id="E3-MJMAIN-2212" d="M84 237T84 250T98 270H679Q694 262 694 250T679 230H98Q84 237 84 250Z"></path><path stroke-width="0" id="E3-MJMAIN-31" d="M213 578L200 573Q186 568 160 563T102 556H83V602H102Q149 604 189 617T245 641T273 663Q275 666 285 666Q294 666 302 660V361L303 61Q310 54 315 52T339 48T401 46H427V0H416Q395 3 257 3Q121 3 100 0H88V46H114Q136 46 152 46T177 47T193 50T201 52T207 57T213 61V578Z"></path><path stroke-width="0" id="E3-MJMAIN-2C" d="M78 35T78 60T94 103T137 121Q165 121 187 96T210 8Q210 -27 201 -60T180 -117T154 -158T130 -185T117 -194Q113 -194 104 -185T95 -172Q95 -168 106 -156T131 -126T157 -76T173 -3V9L172 8Q170 7 167 6T161 3T152 1T140 0Q113 0 96 17Z"></path><path stroke-width="0" id="E3-MJMAINB-78" d="M227 0Q212 3 121 3Q40 3 28 0H21V62H117L245 213L109 382H26V444H34Q49 441 143 441Q247 441 265 444H274V382H246L281 339Q315 297 316 297Q320 297 354 341L389 382H352V444H360Q375 441 466 441Q547 441 559 444H566V382H471L355 246L504 63L545 62H586V0H578Q563 3 469 3Q365 3 347 0H338V62H366Q366 63 326 112T285 163L198 63L217 62H235V0H227Z"></path><path stroke-width="0" id="E3-MJMAIN-29" d="M60 749L64 750Q69 750 74 750H86L114 726Q208 641 251 514T294 250Q294 182 284 119T261 12T224 -76T186 -143T145 -194T113 -227T90 -246Q87 -249 86 -250H74Q66 -250 63 -250T58 -247T55 -238Q56 -237 66 -225Q221 -64 221 250T66 725Q56 737 55 738Q55 746 60 749Z"></path></defs><g stroke="currentColor" fill="currentColor" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xlink:href="#E3-MJMAINB-68" x="0" y="0"></use><use transform="scale(0.707)" xlink:href="#E3-MJMATHI-74" x="903" y="-213"></use><use xlink:href="#E3-MJMAIN-3D" x="1272" y="0"></use><use xlink:href="#E3-MJMATHI-4C" x="2327" y="0"></use><use xlink:href="#E3-MJMATHI-53" x="3008" y="0"></use><use xlink:href="#E3-MJMATHI-54" x="3653" y="0"></use><use xlink:href="#E3-MJMATHI-4D" x="4357" y="0"></use><use xlink:href="#E3-MJMAIN-28" x="5408" y="0"></use><g transform="translate(5797,0)"><use xlink:href="#E3-MJMAINB-68" x="0" y="0"></use><g transform="translate(639,-150)"><use transform="scale(0.707)" xlink:href="#E3-MJMATHI-74" x="0" y="0"></use><use transform="scale(0.707)" xlink:href="#E3-MJMAIN-2212" x="361" y="0"></use><use transform="scale(0.707)" xlink:href="#E3-MJMAIN-31" x="1139" y="0"></use></g></g><use xlink:href="#E3-MJMAIN-2C" x="7695" y="0"></use><g transform="translate(8140,0)"><use xlink:href="#E3-MJMAINB-78" x="0" y="0"></use><use transform="scale(0.707)" xlink:href="#E3-MJMATHI-74" x="858" y="-213"></use></g><use xlink:href="#E3-MJMAIN-29" x="9102" y="0"></use></g></svg></span><script type="math/tex"> \mathbf{h}_t=LSTM(\mathbf{h}_{t-1}, \mathbf{x}_t)</script> processing the input in the following manner under the hook</p><p class='md-math-block'><div contenteditable="false" spellcheck="false" class="mathjax-block md-end-block md-math-block md-rawblock" id="mathjax-n9" cid="n9" mdtype="math_block"><div class="md-rawblock-container md-math-container" tabindex="-1"><span class="md-math-source"><span class="MathJax_Preview"></span><span class="MathJax_SVG_Display" style="text-align: center;"><span class="MathJax_SVG" id="MathJax-Element-1-Frame" tabindex="-1" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" width="24.89ex" height="21.759ex" viewBox="0 -4953.4 10716.6 9368.5" role="img" focusable="false" style="vertical-align: -10.118ex; margin-bottom: -0.137ex;"><defs><path stroke-width="0" id="E1-MJMAINB-7A" d="M48 262Q48 264 54 349T60 436V444H252Q289 444 336 444T394 445Q441 445 450 441T459 418Q459 406 458 404Q456 399 327 229T194 55H237Q260 56 268 56T297 58T325 65T348 77T370 98T384 128T395 170Q400 197 400 216Q400 217 431 217H462V211Q461 208 453 108T444 6V0H245Q46 0 43 2Q32 7 32 28V33Q32 41 40 52T84 112Q129 170 164 217L298 393H256Q189 392 165 380Q124 360 115 303Q110 280 110 256Q110 254 79 254H48V262Z"></path><path stroke-width="0" id="E1-MJMAIN-3D" d="M56 347Q56 360 70 367H707Q722 359 722 347Q722 336 708 328L390 327H72Q56 332 56 347ZM56 153Q56 168 72 173H708Q722 163 722 153Q722 140 707 133H70Q56 140 56 153Z"></path><path stroke-width="0" id="E1-MJMAIN-5B" d="M118 -250V750H255V710H158V-210H255V-250H118Z"></path><path stroke-width="0" id="E1-MJMAINB-68" d="M40 686L131 690Q222 694 223 694H229V533L230 372L238 381Q248 394 264 407T317 435T398 450Q428 450 448 447T491 434T529 402T551 346Q553 335 554 198V62H623V0H614Q596 3 489 3Q374 3 365 0H356V62H425V194V275Q425 348 416 373T371 399Q326 399 288 370T238 290Q236 281 235 171V62H304V0H295Q277 3 171 3Q64 3 46 0H37V62H106V332Q106 387 106 453T107 534Q107 593 105 605T91 620Q77 624 50 624H37V686H40Z"></path><path stroke-width="0" id="E1-MJMATHI-74" d="M26 385Q19 392 19 395Q19 399 22 411T27 425Q29 430 36 430T87 431H140L159 511Q162 522 166 540T173 566T179 586T187 603T197 615T211 624T229 626Q247 625 254 615T261 596Q261 589 252 549T232 470L222 433Q222 431 272 431H323Q330 424 330 420Q330 398 317 385H210L174 240Q135 80 135 68Q135 26 162 26Q197 26 230 60T283 144Q285 150 288 151T303 153H307Q322 153 322 145Q322 142 319 133Q314 117 301 95T267 48T216 6T155 -11Q125 -11 98 4T59 56Q57 64 57 83V101L92 241Q127 382 128 383Q128 385 77 385H26Z"></path><path stroke-width="0" id="E1-MJMAIN-2212" d="M84 237T84 250T98 270H679Q694 262 694 250T679 230H98Q84 237 84 250Z"></path><path stroke-width="0" id="E1-MJMAIN-31" d="M213 578L200 573Q186 568 160 563T102 556H83V602H102Q149 604 189 617T245 641T273 663Q275 666 285 666Q294 666 302 660V361L303 61Q310 54 315 52T339 48T401 46H427V0H416Q395 3 257 3Q121 3 100 0H88V46H114Q136 46 152 46T177 47T193 50T201 52T207 57T213 61V578Z"></path><path stroke-width="0" id="E1-MJMAIN-2C" d="M78 35T78 60T94 103T137 121Q165 121 187 96T210 8Q210 -27 201 -60T180 -117T154 -158T130 -185T117 -194Q113 -194 104 -185T95 -172Q95 -168 106 -156T131 -126T157 -76T173 -3V9L172 8Q170 7 167 6T161 3T152 1T140 0Q113 0 96 17Z"></path><path stroke-width="0" id="E1-MJMAINB-78" d="M227 0Q212 3 121 3Q40 3 28 0H21V62H117L245 213L109 382H26V444H34Q49 441 143 441Q247 441 265 444H274V382H246L281 339Q315 297 316 297Q320 297 354 341L389 382H352V444H360Q375 441 466 441Q547 441 559 444H566V382H471L355 246L504 63L545 62H586V0H578Q563 3 469 3Q365 3 347 0H338V62H366Q366 63 326 112T285 163L198 63L217 62H235V0H227Z"></path><path stroke-width="0" id="E1-MJMAIN-5D" d="M22 710V750H159V-250H22V-210H119V710H22Z"></path><path stroke-width="0" id="E1-MJMAINB-66" d="M308 0Q290 3 172 3Q58 3 49 0H40V62H109V382H42V444H109V503L110 562L112 572Q127 625 178 658T316 699Q318 699 330 699T348 700Q381 698 404 687T436 658T449 629T452 606Q452 576 432 557T383 537Q355 537 335 555T314 605Q314 635 328 649H325Q311 649 293 644T253 618T227 560Q226 555 226 498V444H340V382H232V62H318V0H308Z"></path><path stroke-width="0" id="E1-MJMATHI-3C3" d="M184 -11Q116 -11 74 34T31 147Q31 247 104 333T274 430Q275 431 414 431H552Q553 430 555 429T559 427T562 425T565 422T567 420T569 416T570 412T571 407T572 401Q572 357 507 357Q500 357 490 357T476 358H416L421 348Q439 310 439 263Q439 153 359 71T184 -11ZM361 278Q361 358 276 358Q152 358 115 184Q114 180 114 178Q106 141 106 117Q106 67 131 47T188 26Q242 26 287 73Q316 103 334 153T356 233T361 278Z"></path><path stroke-width="0" id="E1-MJMAIN-28" d="M94 250Q94 319 104 381T127 488T164 576T202 643T244 695T277 729T302 750H315H319Q333 750 333 741Q333 738 316 720T275 667T226 581T184 443T167 250T184 58T225 -81T274 -167T316 -220T333 -241Q333 -250 318 -250H315H302L274 -226Q180 -141 137 -14T94 250Z"></path><path stroke-width="0" id="E1-MJMATHI-57" d="M436 683Q450 683 486 682T553 680Q604 680 638 681T677 682Q695 682 695 674Q695 670 692 659Q687 641 683 639T661 637Q636 636 621 632T600 624T597 615Q597 603 613 377T629 138L631 141Q633 144 637 151T649 170T666 200T690 241T720 295T759 362Q863 546 877 572T892 604Q892 619 873 628T831 637Q817 637 817 647Q817 650 819 660Q823 676 825 679T839 682Q842 682 856 682T895 682T949 681Q1015 681 1034 683Q1048 683 1048 672Q1048 666 1045 655T1038 640T1028 637Q1006 637 988 631T958 617T939 600T927 584L923 578L754 282Q586 -14 585 -15Q579 -22 561 -22Q546 -22 542 -17Q539 -14 523 229T506 480L494 462Q472 425 366 239Q222 -13 220 -15T215 -19Q210 -22 197 -22Q178 -22 176 -15Q176 -12 154 304T131 622Q129 631 121 633T82 637H58Q51 644 51 648Q52 671 64 683H76Q118 680 176 680Q301 680 313 683H323Q329 677 329 674T327 656Q322 641 318 637H297Q236 634 232 620Q262 160 266 136L501 550L499 587Q496 629 489 632Q483 636 447 637Q428 637 422 639T416 648Q416 650 418 660Q419 664 420 669T421 676T424 680T428 682T436 683Z"></path><path stroke-width="0" id="E1-MJMATHI-66" d="M118 -162Q120 -162 124 -164T135 -167T147 -168Q160 -168 171 -155T187 -126Q197 -99 221 27T267 267T289 382V385H242Q195 385 192 387Q188 390 188 397L195 425Q197 430 203 430T250 431Q298 431 298 432Q298 434 307 482T319 540Q356 705 465 705Q502 703 526 683T550 630Q550 594 529 578T487 561Q443 561 443 603Q443 622 454 636T478 657L487 662Q471 668 457 668Q445 668 434 658T419 630Q412 601 403 552T387 469T380 433Q380 431 435 431Q480 431 487 430T498 424Q499 420 496 407T491 391Q489 386 482 386T428 385H372L349 263Q301 15 282 -47Q255 -132 212 -173Q175 -205 139 -205Q107 -205 81 -186T55 -132Q55 -95 76 -78T118 -61Q162 -61 162 -103Q162 -122 151 -136T127 -157L118 -162Z"></path><path stroke-width="0" id="E1-MJMAIN-22C5" d="M78 250Q78 274 95 292T138 310Q162 310 180 294T199 251Q199 226 182 208T139 190T96 207T78 250Z"></path><path stroke-width="0" id="E1-MJMAIN-2B" d="M56 237T56 250T70 270H369V420L370 570Q380 583 389 583Q402 583 409 568V270H707Q722 262 722 250T707 230H409V-68Q401 -82 391 -82H389H387Q375 -82 369 -68V230H70Q56 237 56 250Z"></path><path stroke-width="0" id="E1-MJMATHI-62" d="M73 647Q73 657 77 670T89 683Q90 683 161 688T234 694Q246 694 246 685T212 542Q204 508 195 472T180 418L176 399Q176 396 182 402Q231 442 283 442Q345 442 383 396T422 280Q422 169 343 79T173 -11Q123 -11 82 27T40 150V159Q40 180 48 217T97 414Q147 611 147 623T109 637Q104 637 101 637H96Q86 637 83 637T76 640T73 647ZM336 325V331Q336 405 275 405Q258 405 240 397T207 376T181 352T163 330L157 322L136 236Q114 150 114 114Q114 66 138 42Q154 26 178 26Q211 26 245 58Q270 81 285 114T318 219Q336 291 336 325Z"></path><path stroke-width="0" id="E1-MJMAIN-29" d="M60 749L64 750Q69 750 74 750H86L114 726Q208 641 251 514T294 250Q294 182 284 119T261 12T224 -76T186 -143T145 -194T113 -227T90 -246Q87 -249 86 -250H74Q66 -250 63 -250T58 -247T55 -238Q56 -237 66 -225Q221 -64 221 250T66 725Q56 737 55 738Q55 746 60 749Z"></path><path stroke-width="0" id="E1-MJMAINB-69" d="M72 610Q72 649 98 672T159 695Q193 693 217 670T241 610Q241 572 217 549T157 525Q120 525 96 548T72 610ZM46 442L136 446L226 450H232V62H294V0H286Q271 3 171 3Q67 3 49 0H40V62H109V209Q109 358 108 362Q103 380 55 380H43V442H46Z"></path><path stroke-width="0" id="E1-MJMATHI-69" d="M184 600Q184 624 203 642T247 661Q265 661 277 649T290 619Q290 596 270 577T226 557Q211 557 198 567T184 600ZM21 287Q21 295 30 318T54 369T98 420T158 442Q197 442 223 419T250 357Q250 340 236 301T196 196T154 83Q149 61 149 51Q149 26 166 26Q175 26 185 29T208 43T235 78T260 137Q263 149 265 151T282 153Q302 153 302 143Q302 135 293 112T268 61T223 11T161 -11Q129 -11 102 10T74 74Q74 91 79 106T122 220Q160 321 166 341T173 380Q173 404 156 404H154Q124 404 99 371T61 287Q60 286 59 284T58 281T56 279T53 278T49 278T41 278H27Q21 284 21 287Z"></path><path stroke-width="0" id="E1-MJMATHI-43" d="M50 252Q50 367 117 473T286 641T490 704Q580 704 633 653Q642 643 648 636T656 626L657 623Q660 623 684 649Q691 655 699 663T715 679T725 690L740 705H746Q760 705 760 698Q760 694 728 561Q692 422 692 421Q690 416 687 415T669 413H653Q647 419 647 422Q647 423 648 429T650 449T651 481Q651 552 619 605T510 659Q484 659 454 652T382 628T299 572T226 479Q194 422 175 346T156 222Q156 108 232 58Q280 24 350 24Q441 24 512 92T606 240Q610 253 612 255T628 257Q648 257 648 248Q648 243 647 239Q618 132 523 55T319 -22Q206 -22 128 53T50 252Z"></path><path stroke-width="0" id="E1-MJMAIN-AF" d="M69 544V590H430V544H69Z"></path><path stroke-width="0" id="E1-MJMATHI-61" d="M33 157Q33 258 109 349T280 441Q331 441 370 392Q386 422 416 422Q429 422 439 414T449 394Q449 381 412 234T374 68Q374 43 381 35T402 26Q411 27 422 35Q443 55 463 131Q469 151 473 152Q475 153 483 153H487Q506 153 506 144Q506 138 501 117T481 63T449 13Q436 0 417 -8Q409 -10 393 -10Q359 -10 336 5T306 36L300 51Q299 52 296 50Q294 48 292 46Q233 -10 172 -10Q117 -10 75 30T33 157ZM351 328Q351 334 346 350T323 385T277 405Q242 405 210 374T160 293Q131 214 119 129Q119 126 119 118T118 106Q118 61 136 44T179 26Q217 26 254 59T298 110Q300 114 325 217T351 328Z"></path><path stroke-width="0" id="E1-MJMATHI-6E" d="M21 287Q22 293 24 303T36 341T56 388T89 425T135 442Q171 442 195 424T225 390T231 369Q231 367 232 367L243 378Q304 442 382 442Q436 442 469 415T503 336T465 179T427 52Q427 26 444 26Q450 26 453 27Q482 32 505 65T540 145Q542 153 560 153Q580 153 580 145Q580 144 576 130Q568 101 554 73T508 17T439 -10Q392 -10 371 17T350 73Q350 92 386 193T423 345Q423 404 379 404H374Q288 404 229 303L222 291L189 157Q156 26 151 16Q138 -11 108 -11Q95 -11 87 -5T76 7T74 17Q74 30 112 180T152 343Q153 348 153 366Q153 405 129 405Q91 405 66 305Q60 285 60 284Q58 278 41 278H27Q21 284 21 287Z"></path><path stroke-width="0" id="E1-MJMATHI-68" d="M137 683Q138 683 209 688T282 694Q294 694 294 685Q294 674 258 534Q220 386 220 383Q220 381 227 388Q288 442 357 442Q411 442 444 415T478 336Q478 285 440 178T402 50Q403 36 407 31T422 26Q450 26 474 56T513 138Q516 149 519 151T535 153Q555 153 555 145Q555 144 551 130Q535 71 500 33Q466 -10 419 -10H414Q367 -10 346 17T325 74Q325 90 361 192T398 345Q398 404 354 404H349Q266 404 205 306L198 293L164 158Q132 28 127 16Q114 -11 83 -11Q69 -11 59 -2T48 16Q48 30 121 320L195 616Q195 629 188 632T149 637H128Q122 643 122 645T124 664Q129 683 137 683Z"></path><path stroke-width="0" id="E1-MJMAIN-2217" d="M229 286Q216 420 216 436Q216 454 240 464Q241 464 245 464T251 465Q263 464 273 456T283 436Q283 419 277 356T270 286L328 328Q384 369 389 372T399 375Q412 375 423 365T435 338Q435 325 425 315Q420 312 357 282T289 250L355 219L425 184Q434 175 434 161Q434 146 425 136T401 125Q393 125 383 131T328 171L270 213Q283 79 283 63Q283 53 276 44T250 35Q231 35 224 44T216 63Q216 80 222 143T229 213L171 171Q115 130 110 127Q106 124 100 124Q87 124 76 134T64 161Q64 166 64 169T67 175T72 181T81 188T94 195T113 204T138 215T170 230T210 250L74 315Q65 324 65 338Q65 353 74 363T98 374Q106 374 116 368T171 328L229 286Z"></path><path stroke-width="0" id="E1-MJMAINB-6F" d="M287 -5Q228 -5 182 10T109 48T63 102T39 161T32 219Q32 272 50 314T94 382T154 423T214 446T265 452H279Q319 452 326 451Q428 439 485 376T542 221Q542 156 514 108T442 33Q384 -5 287 -5ZM399 230V250Q399 280 398 298T391 338T372 372T338 392T282 401Q241 401 212 380Q190 363 183 334T175 230Q175 202 175 189T177 153T183 118T195 91T215 68T245 56T287 50Q348 50 374 84Q388 101 393 132T399 230Z"></path><path stroke-width="0" id="E1-MJMATHI-6F" d="M201 -11Q126 -11 80 38T34 156Q34 221 64 279T146 380Q222 441 301 441Q333 441 341 440Q354 437 367 433T402 417T438 387T464 338T476 268Q476 161 390 75T201 -11ZM121 120Q121 70 147 48T206 26Q250 26 289 58T351 142Q360 163 374 216T388 308Q388 352 370 375Q346 405 306 405Q243 405 195 347Q158 303 140 230T121 120Z"></path></defs><g stroke="currentColor" fill="currentColor" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><g transform="translate(167,0)"><g transform="translate(-14,0)"><use xlink:href="#E1-MJMAINB-7A" x="604" y="4056"></use><g transform="translate(408,2706)"><use xlink:href="#E1-MJMAINB-66" x="0" y="0"></use><use transform="scale(0.707)" xlink:href="#E1-MJMATHI-74" x="496" y="-213"></use></g><g transform="translate(440,1307)"><use xlink:href="#E1-MJMAINB-69" x="0" y="0"></use><use transform="scale(0.707)" xlink:href="#E1-MJMATHI-74" x="451" y="-213"></use></g><g transform="translate(0,-75)"><use xlink:href="#E1-MJMATHI-43" x="0" y="0"></use><use xlink:href="#E1-MJMAIN-AF" x="242" y="241"></use><use transform="scale(0.707)" xlink:href="#E1-MJMATHI-74" x="1074" y="-213"></use></g><g transform="translate(45,-1457)"><use xlink:href="#E1-MJMATHI-43" x="0" y="0"></use><use transform="scale(0.707)" xlink:href="#E1-MJMATHI-74" x="1011" y="-213"></use></g><g transform="translate(184,-2757)"><use xlink:href="#E1-MJMAINB-6F" x="0" y="0"></use><use transform="scale(0.707)" xlink:href="#E1-MJMATHI-74" x="813" y="-213"></use></g><g transform="translate(120,-4107)"><use xlink:href="#E1-MJMAINB-68" x="0" y="0"></use><use transform="scale(0.707)" xlink:href="#E1-MJMATHI-74" x="903" y="-213"></use></g></g><g transform="translate(1101,0)"><g transform="translate(0,4056)"><use xlink:href="#E1-MJMAIN-3D" x="277" y="0"></use><use xlink:href="#E1-MJMAIN-5B" x="1333" y="0"></use><g transform="translate(1611,0)"><use xlink:href="#E1-MJMAINB-68" x="0" y="0"></use><g transform="translate(639,-150)"><use transform="scale(0.707)" xlink:href="#E1-MJMATHI-74" x="0" y="0"></use><use transform="scale(0.707)" xlink:href="#E1-MJMAIN-2212" x="361" y="0"></use><use transform="scale(0.707)" xlink:href="#E1-MJMAIN-31" x="1139" y="0"></use></g></g><use xlink:href="#E1-MJMAIN-2C" x="3509" y="0"></use><g transform="translate(3954,0)"><use xlink:href="#E1-MJMAINB-78" x="0" y="0"></use><use transform="scale(0.707)" xlink:href="#E1-MJMATHI-74" x="858" y="-213"></use></g><use xlink:href="#E1-MJMAIN-5D" x="4916" y="0"></use></g><g transform="translate(0,2706)"><use xlink:href="#E1-MJMAIN-3D" x="277" y="0"></use><use xlink:href="#E1-MJMATHI-3C3" x="1333" y="0"></use><use xlink:href="#E1-MJMAIN-28" x="1905" y="0"></use><g transform="translate(2294,0)"><use xlink:href="#E1-MJMATHI-57" x="0" y="0"></use><use transform="scale(0.707)" xlink:href="#E1-MJMATHI-66" x="1335" y="-218"></use></g><use xlink:href="#E1-MJMAIN-22C5" x="3949" y="0"></use><use xlink:href="#E1-MJMAINB-7A" x="4449" y="0"></use><use xlink:href="#E1-MJMAIN-2B" x="5183" y="0"></use><g transform="translate(6183,0)"><use xlink:href="#E1-MJMATHI-62" x="0" y="0"></use><use transform="scale(0.707)" xlink:href="#E1-MJMATHI-66" x="606" y="-218"></use></g><use xlink:href="#E1-MJMAIN-29" x="7101" y="0"></use></g><g transform="translate(0,1307)"><use xlink:href="#E1-MJMAIN-3D" x="277" y="0"></use><use xlink:href="#E1-MJMATHI-3C3" x="1333" y="0"></use><use xlink:href="#E1-MJMAIN-28" x="1905" y="0"></use><g transform="translate(2294,0)"><use xlink:href="#E1-MJMATHI-57" x="0" y="0"></use><use transform="scale(0.707)" xlink:href="#E1-MJMATHI-69" x="1335" y="-213"></use></g><use xlink:href="#E1-MJMAIN-22C5" x="3804" y="0"></use><use xlink:href="#E1-MJMAINB-7A" x="4304" y="0"></use><use xlink:href="#E1-MJMAIN-2B" x="5038" y="0"></use><g transform="translate(6038,0)"><use xlink:href="#E1-MJMATHI-62" x="0" y="0"></use><use transform="scale(0.707)" xlink:href="#E1-MJMATHI-69" x="606" y="-213"></use></g><use xlink:href="#E1-MJMAIN-29" x="6811" y="0"></use></g><g transform="translate(0,-75)"><use xlink:href="#E1-MJMAIN-3D" x="277" y="0"></use><use xlink:href="#E1-MJMATHI-74" x="1333" y="0"></use><use xlink:href="#E1-MJMATHI-61" x="1694" y="0"></use><use xlink:href="#E1-MJMATHI-6E" x="2223" y="0"></use><use xlink:href="#E1-MJMATHI-68" x="2823" y="0"></use><use xlink:href="#E1-MJMAIN-28" x="3399" y="0"></use><g transform="translate(3788,0)"><use xlink:href="#E1-MJMATHI-57" x="0" y="0"></use><use transform="scale(0.707)" xlink:href="#E1-MJMATHI-43" x="1335" y="-218"></use></g><use xlink:href="#E1-MJMAIN-22C5" x="5592" y="0"></use><use xlink:href="#E1-MJMAINB-7A" x="6092" y="0"></use><use xlink:href="#E1-MJMAIN-2B" x="6825" y="0"></use><g transform="translate(7825,0)"><use xlink:href="#E1-MJMATHI-62" x="0" y="0"></use><use transform="scale(0.707)" xlink:href="#E1-MJMATHI-43" x="606" y="-218"></use></g><use xlink:href="#E1-MJMAIN-29" x="8892" y="0"></use></g><g transform="translate(0,-1457)"><use xlink:href="#E1-MJMAIN-3D" x="277" y="0"></use><g transform="translate(1333,0)"><use xlink:href="#E1-MJMAINB-66" x="0" y="0"></use><use transform="scale(0.707)" xlink:href="#E1-MJMATHI-74" x="496" y="-213"></use></g><use xlink:href="#E1-MJMAIN-2217" x="2262" y="0"></use><g transform="translate(2984,0)"><use xlink:href="#E1-MJMATHI-43" x="0" y="0"></use><g transform="translate(715,-150)"><use transform="scale(0.707)" xlink:href="#E1-MJMATHI-74" x="0" y="0"></use><use transform="scale(0.707)" xlink:href="#E1-MJMAIN-2212" x="361" y="0"></use><use transform="scale(0.707)" xlink:href="#E1-MJMAIN-31" x="1139" y="0"></use></g></g><use xlink:href="#E1-MJMAIN-2B" x="5180" y="0"></use><g transform="translate(6180,0)"><use xlink:href="#E1-MJMAINB-69" x="0" y="0"></use><use transform="scale(0.707)" xlink:href="#E1-MJMATHI-74" x="451" y="-213"></use></g><use xlink:href="#E1-MJMAIN-2217" x="7077" y="0"></use><g transform="translate(7799,0)"><use xlink:href="#E1-MJMATHI-43" x="0" y="0"></use><use xlink:href="#E1-MJMAIN-AF" x="242" y="241"></use><use transform="scale(0.707)" xlink:href="#E1-MJMATHI-74" x="1074" y="-213"></use></g></g><g transform="translate(0,-2757)"><use xlink:href="#E1-MJMAIN-3D" x="277" y="0"></use><use xlink:href="#E1-MJMATHI-3C3" x="1333" y="0"></use><use xlink:href="#E1-MJMAIN-28" x="1905" y="0"></use><g transform="translate(2294,0)"><use xlink:href="#E1-MJMATHI-57" x="0" y="0"></use><use transform="scale(0.707)" xlink:href="#E1-MJMATHI-6F" x="1335" y="-213"></use></g><use xlink:href="#E1-MJMAIN-22C5" x="3903" y="0"></use><use xlink:href="#E1-MJMAINB-7A" x="4403" y="0"></use><use xlink:href="#E1-MJMAIN-2B" x="5137" y="0"></use><g transform="translate(6137,0)"><use xlink:href="#E1-MJMATHI-62" x="0" y="0"></use><use transform="scale(0.707)" xlink:href="#E1-MJMATHI-74" x="606" y="-213"></use></g><use xlink:href="#E1-MJMAIN-29" x="6921" y="0"></use></g><g transform="translate(0,-4107)"><use xlink:href="#E1-MJMAIN-3D" x="277" y="0"></use><g transform="translate(1333,0)"><use xlink:href="#E1-MJMAINB-6F" x="0" y="0"></use><use transform="scale(0.707)" xlink:href="#E1-MJMATHI-74" x="813" y="-213"></use></g><use xlink:href="#E1-MJMAIN-2217" x="2486" y="0"></use><use xlink:href="#E1-MJMATHI-74" x="3208" y="0"></use><use xlink:href="#E1-MJMATHI-61" x="3569" y="0"></use><use xlink:href="#E1-MJMATHI-6E" x="4098" y="0"></use><use xlink:href="#E1-MJMATHI-68" x="4698" y="0"></use><use xlink:href="#E1-MJMAIN-28" x="5274" y="0"></use><g transform="translate(5663,0)"><use xlink:href="#E1-MJMATHI-43" x="0" y="0"></use><use transform="scale(0.707)" xlink:href="#E1-MJMATHI-74" x="1011" y="-213"></use></g><use xlink:href="#E1-MJMAIN-29" x="6733" y="0"></use></g></g></g></g></svg></span></span><script type="math/tex; mode=display" id="MathJax-Element-1">\begin{align}
 \mathbf{z} &= [\mathbf{h}_{t-1},\mathbf{x}_t]\\
 \mathbf{f}_t & = \sigma(W_f \cdot \mathbf{z} + b_f) \\
 \mathbf{i}_t & = \sigma(W_i \cdot \mathbf{z} + b_i) \\
@@ -267,16 +267,16 @@
 C_t & = \mathbf{f}_t * C_{t-1} + \mathbf{i}_t * \bar{C}_t \\
 \mathbf{o}_t & = \sigma(W_o \cdot \mathbf{z} + b_t) \\
 \mathbf{h}_t &= \mathbf{o}_t * tanh(C_t) \\
-\end{align}</script></div></div><p>where <span class="MathJax_SVG" tabindex="-1" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" width="0.646ex" height="1.06ex" viewBox="0 -404.1 278 456.3" role="img" focusable="false" style="vertical-align: 0.441ex; margin-bottom: -0.563ex;"><defs><path stroke-width="0" id="E13-MJMAIN-22C5" d="M78 250Q78 274 95 292T138 310Q162 310 180 294T199 251Q199 226 182 208T139 190T96 207T78 250Z"></path></defs><g stroke="currentColor" fill="currentColor" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xlink:href="#E13-MJMAIN-22C5" x="0" y="0"></use></g></svg></span><script type="math/tex">\cdot</script> stands for matrix multiplication, <span class="MathJax_SVG" tabindex="-1" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" width="1.161ex" height="1.41ex" viewBox="0 -554.9 500 607.1" role="img" focusable="false" style="vertical-align: 0.081ex; margin-bottom: -0.203ex;"><defs><path stroke-width="0" id="E14-MJMAIN-2217" d="M229 286Q216 420 216 436Q216 454 240 464Q241 464 245 464T251 465Q263 464 273 456T283 436Q283 419 277 356T270 286L328 328Q384 369 389 372T399 375Q412 375 423 365T435 338Q435 325 425 315Q420 312 357 282T289 250L355 219L425 184Q434 175 434 161Q434 146 425 136T401 125Q393 125 383 131T328 171L270 213Q283 79 283 63Q283 53 276 44T250 35Q231 35 224 44T216 63Q216 80 222 143T229 213L171 171Q115 130 110 127Q106 124 100 124Q87 124 76 134T64 161Q64 166 64 169T67 175T72 181T81 188T94 195T113 204T138 215T170 230T210 250L74 315Q65 324 65 338Q65 353 74 363T98 374Q106 374 116 368T171 328L229 286Z"></path></defs><g stroke="currentColor" fill="currentColor" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xlink:href="#E14-MJMAIN-2217" x="0" y="0"></use></g></svg></span><script type="math/tex">*</script> for dot production and <span class="MathJax_SVG" tabindex="-1" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" width="3.615ex" height="2.577ex" viewBox="0 -806.1 1556.7 1109.7" role="img" focusable="false" style="vertical-align: -0.705ex;"><defs><path stroke-width="0" id="E15-MJMAIN-5B" d="M118 -250V750H255V710H158V-210H255V-250H118Z"></path><path stroke-width="0" id="E15-MJMAIN-22C5" d="M78 250Q78 274 95 292T138 310Q162 310 180 294T199 251Q199 226 182 208T139 190T96 207T78 250Z"></path><path stroke-width="0" id="E15-MJMAIN-2C" d="M78 35T78 60T94 103T137 121Q165 121 187 96T210 8Q210 -27 201 -60T180 -117T154 -158T130 -185T117 -194Q113 -194 104 -185T95 -172Q95 -168 106 -156T131 -126T157 -76T173 -3V9L172 8Q170 7 167 6T161 3T152 1T140 0Q113 0 96 17Z"></path><path stroke-width="0" id="E15-MJMAIN-5D" d="M22 710V750H159V-250H22V-210H119V710H22Z"></path></defs><g stroke="currentColor" fill="currentColor" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xlink:href="#E15-MJMAIN-5B" x="0" y="0"></use><use xlink:href="#E15-MJMAIN-22C5" x="278" y="0"></use><use xlink:href="#E15-MJMAIN-2C" x="556" y="0"></use><use xlink:href="#E15-MJMAIN-22C5" x="1000" y="0"></use><use xlink:href="#E15-MJMAIN-5D" x="1278" y="0"></use></g></svg></span><script type="math/tex">[\cdot, \cdot]</script> for vector concatenation. Note <span class="MathJax_SVG" tabindex="-1" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" width="3.246ex" height="2.227ex" viewBox="0 -755.9 1397.6 958.9" role="img" focusable="false" style="vertical-align: -0.291ex; margin-bottom: -0.181ex;"><defs><path stroke-width="0" id="E16-MJMATHI-57" d="M436 683Q450 683 486 682T553 680Q604 680 638 681T677 682Q695 682 695 674Q695 670 692 659Q687 641 683 639T661 637Q636 636 621 632T600 624T597 615Q597 603 613 377T629 138L631 141Q633 144 637 151T649 170T666 200T690 241T720 295T759 362Q863 546 877 572T892 604Q892 619 873 628T831 637Q817 637 817 647Q817 650 819 660Q823 676 825 679T839 682Q842 682 856 682T895 682T949 681Q1015 681 1034 683Q1048 683 1048 672Q1048 666 1045 655T1038 640T1028 637Q1006 637 988 631T958 617T939 600T927 584L923 578L754 282Q586 -14 585 -15Q579 -22 561 -22Q546 -22 542 -17Q539 -14 523 229T506 480L494 462Q472 425 366 239Q222 -13 220 -15T215 -19Q210 -22 197 -22Q178 -22 176 -15Q176 -12 154 304T131 622Q129 631 121 633T82 637H58Q51 644 51 648Q52 671 64 683H76Q118 680 176 680Q301 680 313 683H323Q329 677 329 674T327 656Q322 641 318 637H297Q236 634 232 620Q262 160 266 136L501 550L499 587Q496 629 489 632Q483 636 447 637Q428 637 422 639T416 648Q416 650 418 660Q419 664 420 669T421 676T424 680T428 682T436 683Z"></path><path stroke-width="0" id="E16-MJMAIN-2217" d="M229 286Q216 420 216 436Q216 454 240 464Q241 464 245 464T251 465Q263 464 273 456T283 436Q283 419 277 356T270 286L328 328Q384 369 389 372T399 375Q412 375 423 365T435 338Q435 325 425 315Q420 312 357 282T289 250L355 219L425 184Q434 175 434 161Q434 146 425 136T401 125Q393 125 383 131T328 171L270 213Q283 79 283 63Q283 53 276 44T250 35Q231 35 224 44T216 63Q216 80 222 143T229 213L171 171Q115 130 110 127Q106 124 100 124Q87 124 76 134T64 161Q64 166 64 169T67 175T72 181T81 188T94 195T113 204T138 215T170 230T210 250L74 315Q65 324 65 338Q65 353 74 363T98 374Q106 374 116 368T171 328L229 286Z"></path></defs><g stroke="currentColor" fill="currentColor" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xlink:href="#E16-MJMATHI-57" x="0" y="0"></use><use transform="scale(0.707)" xlink:href="#E16-MJMAIN-2217" x="1335" y="-213"></use></g></svg></span><script type="math/tex">W_{*}</script> and <span class="MathJax_SVG" tabindex="-1" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" width="2.05ex" height="2.227ex" viewBox="0 -755.9 882.6 958.9" role="img" focusable="false" style="vertical-align: -0.291ex; margin-bottom: -0.181ex;"><defs><path stroke-width="0" id="E17-MJMATHI-62" d="M73 647Q73 657 77 670T89 683Q90 683 161 688T234 694Q246 694 246 685T212 542Q204 508 195 472T180 418L176 399Q176 396 182 402Q231 442 283 442Q345 442 383 396T422 280Q422 169 343 79T173 -11Q123 -11 82 27T40 150V159Q40 180 48 217T97 414Q147 611 147 623T109 637Q104 637 101 637H96Q86 637 83 637T76 640T73 647ZM336 325V331Q336 405 275 405Q258 405 240 397T207 376T181 352T163 330L157 322L136 236Q114 150 114 114Q114 66 138 42Q154 26 178 26Q211 26 245 58Q270 81 285 114T318 219Q336 291 336 325Z"></path><path stroke-width="0" id="E17-MJMAIN-2217" d="M229 286Q216 420 216 436Q216 454 240 464Q241 464 245 464T251 465Q263 464 273 456T283 436Q283 419 277 356T270 286L328 328Q384 369 389 372T399 375Q412 375 423 365T435 338Q435 325 425 315Q420 312 357 282T289 250L355 219L425 184Q434 175 434 161Q434 146 425 136T401 125Q393 125 383 131T328 171L270 213Q283 79 283 63Q283 53 276 44T250 35Q231 35 224 44T216 63Q216 80 222 143T229 213L171 171Q115 130 110 127Q106 124 100 124Q87 124 76 134T64 161Q64 166 64 169T67 175T72 181T81 188T94 195T113 204T138 215T170 230T210 250L74 315Q65 324 65 338Q65 353 74 363T98 374Q106 374 116 368T171 328L229 286Z"></path></defs><g stroke="currentColor" fill="currentColor" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xlink:href="#E17-MJMATHI-62" x="0" y="0"></use><use transform="scale(0.707)" xlink:href="#E17-MJMAIN-2217" x="606" y="-213"></use></g></svg></span><script type="math/tex">b_{*}</script> are parameters of the LSTM that is the same throughout all steps.</p><p>Also note that here the input is a vector while in your implementation, please use batched input as matrix multiplication on matrix is the same as multiplying vectors concatenated horizontally.</p><p>For language modeling, we use LSTM to predict the next word or character at each step. For example, if we have a sentence <span class="MathJax_SVG" tabindex="-1" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" width="12.8ex" height="1.76ex" viewBox="0 -504.6 5511 757.9" role="img" focusable="false" style="vertical-align: -0.588ex;"><defs><path stroke-width="0" id="E18-MJMATHI-73" d="M131 289Q131 321 147 354T203 415T300 442Q362 442 390 415T419 355Q419 323 402 308T364 292Q351 292 340 300T328 326Q328 342 337 354T354 372T367 378Q368 378 368 379Q368 382 361 388T336 399T297 405Q249 405 227 379T204 326Q204 301 223 291T278 274T330 259Q396 230 396 163Q396 135 385 107T352 51T289 7T195 -10Q118 -10 86 19T53 87Q53 126 74 143T118 160Q133 160 146 151T160 120Q160 94 142 76T111 58Q109 57 108 57T107 55Q108 52 115 47T146 34T201 27Q237 27 263 38T301 66T318 97T323 122Q323 150 302 164T254 181T195 196T148 231Q131 256 131 289Z"></path><path stroke-width="0" id="E18-MJMAIN-31" d="M213 578L200 573Q186 568 160 563T102 556H83V602H102Q149 604 189 617T245 641T273 663Q275 666 285 666Q294 666 302 660V361L303 61Q310 54 315 52T339 48T401 46H427V0H416Q395 3 257 3Q121 3 100 0H88V46H114Q136 46 152 46T177 47T193 50T201 52T207 57T213 61V578Z"></path><path stroke-width="0" id="E18-MJMAIN-2C" d="M78 35T78 60T94 103T137 121Q165 121 187 96T210 8Q210 -27 201 -60T180 -117T154 -158T130 -185T117 -194Q113 -194 104 -185T95 -172Q95 -168 106 -156T131 -126T157 -76T173 -3V9L172 8Q170 7 167 6T161 3T152 1T140 0Q113 0 96 17Z"></path><path stroke-width="0" id="E18-MJMAIN-32" d="M109 429Q82 429 66 447T50 491Q50 562 103 614T235 666Q326 666 387 610T449 465Q449 422 429 383T381 315T301 241Q265 210 201 149L142 93L218 92Q375 92 385 97Q392 99 409 186V189H449V186Q448 183 436 95T421 3V0H50V19V31Q50 38 56 46T86 81Q115 113 136 137Q145 147 170 174T204 211T233 244T261 278T284 308T305 340T320 369T333 401T340 431T343 464Q343 527 309 573T212 619Q179 619 154 602T119 569T109 550Q109 549 114 549Q132 549 151 535T170 489Q170 464 154 447T109 429Z"></path><path stroke-width="0" id="E18-MJMAIN-22EF" d="M78 250Q78 274 95 292T138 310Q162 310 180 294T199 251Q199 226 182 208T139 190T96 207T78 250ZM525 250Q525 274 542 292T585 310Q609 310 627 294T646 251Q646 226 629 208T586 190T543 207T525 250ZM972 250Q972 274 989 292T1032 310Q1056 310 1074 294T1093 251Q1093 226 1076 208T1033 190T990 207T972 250Z"></path><path stroke-width="0" id="E18-MJMATHI-6E" d="M21 287Q22 293 24 303T36 341T56 388T89 425T135 442Q171 442 195 424T225 390T231 369Q231 367 232 367L243 378Q304 442 382 442Q436 442 469 415T503 336T465 179T427 52Q427 26 444 26Q450 26 453 27Q482 32 505 65T540 145Q542 153 560 153Q580 153 580 145Q580 144 576 130Q568 101 554 73T508 17T439 -10Q392 -10 371 17T350 73Q350 92 386 193T423 345Q423 404 379 404H374Q288 404 229 303L222 291L189 157Q156 26 151 16Q138 -11 108 -11Q95 -11 87 -5T76 7T74 17Q74 30 112 180T152 343Q153 348 153 366Q153 405 129 405Q91 405 66 305Q60 285 60 284Q58 278 41 278H27Q21 284 21 287Z"></path></defs><g stroke="currentColor" fill="currentColor" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xlink:href="#E18-MJMATHI-73" x="0" y="0"></use><use transform="scale(0.707)" xlink:href="#E18-MJMAIN-31" x="663" y="-213"></use><use xlink:href="#E18-MJMAIN-2C" x="922" y="0"></use><g transform="translate(1367,0)"><use xlink:href="#E18-MJMATHI-73" x="0" y="0"></use><use transform="scale(0.707)" xlink:href="#E18-MJMAIN-32" x="663" y="-213"></use></g><use xlink:href="#E18-MJMAIN-2C" x="2289" y="0"></use><use xlink:href="#E18-MJMAIN-22EF" x="2734" y="0"></use><use xlink:href="#E18-MJMAIN-2C" x="4073" y="0"></use><g transform="translate(4517,0)"><use xlink:href="#E18-MJMATHI-73" x="0" y="0"></use><use transform="scale(0.707)" xlink:href="#E18-MJMATHI-6E" x="663" y="-213"></use></g></g></svg></span><script type="math/tex">s_1, s_2,\cdots, s_n</script> for the input at each step for the LSTM, the output at each step should be <span class="MathJax_SVG" tabindex="-1" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" width="18.513ex" height="2.461ex" viewBox="0 -806.1 7970.7 1059.4" role="img" focusable="false" style="vertical-align: -0.588ex;"><defs><path stroke-width="0" id="E19-MJMATHI-73" d="M131 289Q131 321 147 354T203 415T300 442Q362 442 390 415T419 355Q419 323 402 308T364 292Q351 292 340 300T328 326Q328 342 337 354T354 372T367 378Q368 378 368 379Q368 382 361 388T336 399T297 405Q249 405 227 379T204 326Q204 301 223 291T278 274T330 259Q396 230 396 163Q396 135 385 107T352 51T289 7T195 -10Q118 -10 86 19T53 87Q53 126 74 143T118 160Q133 160 146 151T160 120Q160 94 142 76T111 58Q109 57 108 57T107 55Q108 52 115 47T146 34T201 27Q237 27 263 38T301 66T318 97T323 122Q323 150 302 164T254 181T195 196T148 231Q131 256 131 289Z"></path><path stroke-width="0" id="E19-MJMAIN-32" d="M109 429Q82 429 66 447T50 491Q50 562 103 614T235 666Q326 666 387 610T449 465Q449 422 429 383T381 315T301 241Q265 210 201 149L142 93L218 92Q375 92 385 97Q392 99 409 186V189H449V186Q448 183 436 95T421 3V0H50V19V31Q50 38 56 46T86 81Q115 113 136 137Q145 147 170 174T204 211T233 244T261 278T284 308T305 340T320 369T333 401T340 431T343 464Q343 527 309 573T212 619Q179 619 154 602T119 569T109 550Q109 549 114 549Q132 549 151 535T170 489Q170 464 154 447T109 429Z"></path><path stroke-width="0" id="E19-MJMAIN-2C" d="M78 35T78 60T94 103T137 121Q165 121 187 96T210 8Q210 -27 201 -60T180 -117T154 -158T130 -185T117 -194Q113 -194 104 -185T95 -172Q95 -168 106 -156T131 -126T157 -76T173 -3V9L172 8Q170 7 167 6T161 3T152 1T140 0Q113 0 96 17Z"></path><path stroke-width="0" id="E19-MJMAIN-33" d="M127 463Q100 463 85 480T69 524Q69 579 117 622T233 665Q268 665 277 664Q351 652 390 611T430 522Q430 470 396 421T302 350L299 348Q299 347 308 345T337 336T375 315Q457 262 457 175Q457 96 395 37T238 -22Q158 -22 100 21T42 130Q42 158 60 175T105 193Q133 193 151 175T169 130Q169 119 166 110T159 94T148 82T136 74T126 70T118 67L114 66Q165 21 238 21Q293 21 321 74Q338 107 338 175V195Q338 290 274 322Q259 328 213 329L171 330L168 332Q166 335 166 348Q166 366 174 366Q202 366 232 371Q266 376 294 413T322 525V533Q322 590 287 612Q265 626 240 626Q208 626 181 615T143 592T132 580H135Q138 579 143 578T153 573T165 566T175 555T183 540T186 520Q186 498 172 481T127 463Z"></path><path stroke-width="0" id="E19-MJMAIN-22EF" d="M78 250Q78 274 95 292T138 310Q162 310 180 294T199 251Q199 226 182 208T139 190T96 207T78 250ZM525 250Q525 274 542 292T585 310Q609 310 627 294T646 251Q646 226 629 208T586 190T543 207T525 250ZM972 250Q972 274 989 292T1032 310Q1056 310 1074 294T1093 251Q1093 226 1076 208T1033 190T990 207T972 250Z"></path><path stroke-width="0" id="E19-MJMATHI-6E" d="M21 287Q22 293 24 303T36 341T56 388T89 425T135 442Q171 442 195 424T225 390T231 369Q231 367 232 367L243 378Q304 442 382 442Q436 442 469 415T503 336T465 179T427 52Q427 26 444 26Q450 26 453 27Q482 32 505 65T540 145Q542 153 560 153Q580 153 580 145Q580 144 576 130Q568 101 554 73T508 17T439 -10Q392 -10 371 17T350 73Q350 92 386 193T423 345Q423 404 379 404H374Q288 404 229 303L222 291L189 157Q156 26 151 16Q138 -11 108 -11Q95 -11 87 -5T76 7T74 17Q74 30 112 180T152 343Q153 348 153 366Q153 405 129 405Q91 405 66 305Q60 285 60 284Q58 278 41 278H27Q21 284 21 287Z"></path><path stroke-width="0" id="E19-MJMAIN-45" d="M128 619Q121 626 117 628T101 631T58 634H25V680H597V676Q599 670 611 560T625 444V440H585V444Q584 447 582 465Q578 500 570 526T553 571T528 601T498 619T457 629T411 633T353 634Q266 634 251 633T233 622Q233 622 233 621Q232 619 232 497V376H286Q359 378 377 385Q413 401 416 469Q416 471 416 473V493H456V213H416V233Q415 268 408 288T383 317T349 328T297 330Q290 330 286 330H232V196V114Q232 57 237 52Q243 47 289 47H340H391Q428 47 452 50T505 62T552 92T584 146Q594 172 599 200T607 247T612 270V273H652V270Q651 267 632 137T610 3V0H25V46H58Q100 47 109 49T128 61V619Z"></path><path stroke-width="0" id="E19-MJMAIN-4F" d="M56 340Q56 423 86 494T164 610T270 680T388 705Q521 705 621 601T722 341Q722 260 693 191T617 75T510 4T388 -22T267 3T160 74T85 189T56 340ZM467 647Q426 665 388 665Q360 665 331 654T269 620T213 549T179 439Q174 411 174 354Q174 144 277 61Q327 20 385 20H389H391Q474 20 537 99Q603 188 603 354Q603 411 598 439Q577 592 467 647Z"></path><path stroke-width="0" id="E19-MJMAIN-53" d="M55 507Q55 590 112 647T243 704H257Q342 704 405 641L426 672Q431 679 436 687T446 700L449 704Q450 704 453 704T459 705H463Q466 705 472 699V462L466 456H448Q437 456 435 459T430 479Q413 605 329 646Q292 662 254 662Q201 662 168 626T135 542Q135 508 152 480T200 435Q210 431 286 412T370 389Q427 367 463 314T500 191Q500 110 448 45T301 -21Q245 -21 201 -4T140 27L122 41Q118 36 107 21T87 -7T78 -21Q76 -22 68 -22H64Q61 -22 55 -16V101Q55 220 56 222Q58 227 76 227H89Q95 221 95 214Q95 182 105 151T139 90T205 42T305 24Q352 24 386 62T420 155Q420 198 398 233T340 281Q284 295 266 300Q261 301 239 306T206 314T174 325T141 343T112 367T85 402Q55 451 55 507Z"></path></defs><g stroke="currentColor" fill="currentColor" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xlink:href="#E19-MJMATHI-73" x="0" y="0"></use><use transform="scale(0.707)" xlink:href="#E19-MJMAIN-32" x="663" y="-213"></use><use xlink:href="#E19-MJMAIN-2C" x="922" y="0"></use><g transform="translate(1367,0)"><use xlink:href="#E19-MJMATHI-73" x="0" y="0"></use><use transform="scale(0.707)" xlink:href="#E19-MJMAIN-33" x="663" y="-213"></use></g><use xlink:href="#E19-MJMAIN-2C" x="2289" y="0"></use><use xlink:href="#E19-MJMAIN-22EF" x="2734" y="0"></use><use xlink:href="#E19-MJMAIN-2C" x="4073" y="0"></use><g transform="translate(4517,0)"><use xlink:href="#E19-MJMATHI-73" x="0" y="0"></use><use transform="scale(0.707)" xlink:href="#E19-MJMATHI-6E" x="663" y="-213"></use></g><use xlink:href="#E19-MJMAIN-2C" x="5511" y="0"></use><g transform="translate(5955,0)"><use xlink:href="#E19-MJMAIN-45"></use><use xlink:href="#E19-MJMAIN-4F" x="681" y="0"></use><use xlink:href="#E19-MJMAIN-53" x="1459" y="0"></use></g></g></svg></span><script type="math/tex">s_2,s_3,\cdots, s_n, \text{EOS}</script> where EOS stands for end of sentence. To obtain an prediction from LSTM, we first create an vocabulary <span class="MathJax_SVG" tabindex="-1" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" width="1.786ex" height="1.994ex" viewBox="0 -755.9 769 858.4" role="img" focusable="false" style="vertical-align: -0.238ex;"><defs><path stroke-width="0" id="E20-MJMATHI-56" d="M52 648Q52 670 65 683H76Q118 680 181 680Q299 680 320 683H330Q336 677 336 674T334 656Q329 641 325 637H304Q282 635 274 635Q245 630 242 620Q242 618 271 369T301 118L374 235Q447 352 520 471T595 594Q599 601 599 609Q599 633 555 637Q537 637 537 648Q537 649 539 661Q542 675 545 679T558 683Q560 683 570 683T604 682T668 681Q737 681 755 683H762Q769 676 769 672Q769 655 760 640Q757 637 743 637Q730 636 719 635T698 630T682 623T670 615T660 608T652 599T645 592L452 282Q272 -9 266 -16Q263 -18 259 -21L241 -22H234Q216 -22 216 -15Q213 -9 177 305Q139 623 138 626Q133 637 76 637H59Q52 642 52 648Z"></path></defs><g stroke="currentColor" fill="currentColor" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xlink:href="#E20-MJMATHI-56" x="0" y="0"></use></g></svg></span><script type="math/tex">V</script> to map each word to an integer which is an ordered set that contains all the word in your training dataset, and then we could map each integer <span class="MathJax_SVG" tabindex="-1" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" width="0.801ex" height="1.994ex" viewBox="0 -755.9 345 858.4" role="img" focusable="false" style="vertical-align: -0.238ex;"><defs><path stroke-width="0" id="E21-MJMATHI-69" d="M184 600Q184 624 203 642T247 661Q265 661 277 649T290 619Q290 596 270 577T226 557Q211 557 198 567T184 600ZM21 287Q21 295 30 318T54 369T98 420T158 442Q197 442 223 419T250 357Q250 340 236 301T196 196T154 83Q149 61 149 51Q149 26 166 26Q175 26 185 29T208 43T235 78T260 137Q263 149 265 151T282 153Q302 153 302 143Q302 135 293 112T268 61T223 11T161 -11Q129 -11 102 10T74 74Q74 91 79 106T122 220Q160 321 166 341T173 380Q173 404 156 404H154Q124 404 99 371T61 287Q60 286 59 284T58 281T56 279T53 278T49 278T41 278H27Q21 284 21 287Z"></path></defs><g stroke="currentColor" fill="currentColor" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xlink:href="#E21-MJMATHI-69" x="0" y="0"></use></g></svg></span><script type="math/tex">i</script> to an vector <span class="MathJax_SVG" tabindex="-1" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" width="2.209ex" height="1.76ex" viewBox="0 -504.6 951 757.9" role="img" focusable="false" style="vertical-align: -0.588ex;"><defs><path stroke-width="0" id="E22-MJMAINB-78" d="M227 0Q212 3 121 3Q40 3 28 0H21V62H117L245 213L109 382H26V444H34Q49 441 143 441Q247 441 265 444H274V382H246L281 339Q315 297 316 297Q320 297 354 341L389 382H352V444H360Q375 441 466 441Q547 441 559 444H566V382H471L355 246L504 63L545 62H586V0H578Q563 3 469 3Q365 3 347 0H338V62H366Q366 63 326 112T285 163L198 63L217 62H235V0H227Z"></path><path stroke-width="0" id="E22-MJMATHI-69" d="M184 600Q184 624 203 642T247 661Q265 661 277 649T290 619Q290 596 270 577T226 557Q211 557 198 567T184 600ZM21 287Q21 295 30 318T54 369T98 420T158 442Q197 442 223 419T250 357Q250 340 236 301T196 196T154 83Q149 61 149 51Q149 26 166 26Q175 26 185 29T208 43T235 78T260 137Q263 149 265 151T282 153Q302 153 302 143Q302 135 293 112T268 61T223 11T161 -11Q129 -11 102 10T74 74Q74 91 79 106T122 220Q160 321 166 341T173 380Q173 404 156 404H154Q124 404 99 371T61 287Q60 286 59 284T58 281T56 279T53 278T49 278T41 278H27Q21 284 21 287Z"></path></defs><g stroke="currentColor" fill="currentColor" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xlink:href="#E22-MJMAINB-78" x="0" y="0"></use><use transform="scale(0.707)" xlink:href="#E22-MJMATHI-69" x="858" y="-213"></use></g></svg></span><script type="math/tex">\mathbf{x}_i</script> which will be the input for the LSTM. Then we rely on the hidden vector, at each step t, we can use a linear transformation <span class="MathJax_SVG" tabindex="-1" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" width="13.8ex" height="2.344ex" viewBox="0 -755.9 5941.5 1009.2" role="img" focusable="false" style="vertical-align: -0.588ex;"><defs><path stroke-width="0" id="E23-MJMAINB-61" d="M64 349Q64 399 107 426T255 453Q346 453 402 423T473 341Q478 327 478 310T479 196V77Q493 63 529 62Q549 62 553 57T558 31Q558 9 552 5T514 0H497H481Q375 0 367 56L356 46Q300 -6 210 -6Q130 -6 81 30T32 121Q32 188 111 226T332 272H350V292Q350 313 348 327T337 361T306 391T248 402T194 399H189Q204 376 204 354Q204 327 187 306T134 284Q97 284 81 305T64 349ZM164 121Q164 89 186 67T238 45Q274 45 307 63T346 108L350 117V226H347Q248 218 206 189T164 121Z"></path><path stroke-width="0" id="E23-MJMATHI-74" d="M26 385Q19 392 19 395Q19 399 22 411T27 425Q29 430 36 430T87 431H140L159 511Q162 522 166 540T173 566T179 586T187 603T197 615T211 624T229 626Q247 625 254 615T261 596Q261 589 252 549T232 470L222 433Q222 431 272 431H323Q330 424 330 420Q330 398 317 385H210L174 240Q135 80 135 68Q135 26 162 26Q197 26 230 60T283 144Q285 150 288 151T303 153H307Q322 153 322 145Q322 142 319 133Q314 117 301 95T267 48T216 6T155 -11Q125 -11 98 4T59 56Q57 64 57 83V101L92 241Q127 382 128 383Q128 385 77 385H26Z"></path><path stroke-width="0" id="E23-MJMAIN-3D" d="M56 347Q56 360 70 367H707Q722 359 722 347Q722 336 708 328L390 327H72Q56 332 56 347ZM56 153Q56 168 72 173H708Q722 163 722 153Q722 140 707 133H70Q56 140 56 153Z"></path><path stroke-width="0" id="E23-MJMATHI-57" d="M436 683Q450 683 486 682T553 680Q604 680 638 681T677 682Q695 682 695 674Q695 670 692 659Q687 641 683 639T661 637Q636 636 621 632T600 624T597 615Q597 603 613 377T629 138L631 141Q633 144 637 151T649 170T666 200T690 241T720 295T759 362Q863 546 877 572T892 604Q892 619 873 628T831 637Q817 637 817 647Q817 650 819 660Q823 676 825 679T839 682Q842 682 856 682T895 682T949 681Q1015 681 1034 683Q1048 683 1048 672Q1048 666 1045 655T1038 640T1028 637Q1006 637 988 631T958 617T939 600T927 584L923 578L754 282Q586 -14 585 -15Q579 -22 561 -22Q546 -22 542 -17Q539 -14 523 229T506 480L494 462Q472 425 366 239Q222 -13 220 -15T215 -19Q210 -22 197 -22Q178 -22 176 -15Q176 -12 154 304T131 622Q129 631 121 633T82 637H58Q51 644 51 648Q52 671 64 683H76Q118 680 176 680Q301 680 313 683H323Q329 677 329 674T327 656Q322 641 318 637H297Q236 634 232 620Q262 160 266 136L501 550L499 587Q496 629 489 632Q483 636 447 637Q428 637 422 639T416 648Q416 650 418 660Q419 664 420 669T421 676T424 680T428 682T436 683Z"></path><path stroke-width="0" id="E23-MJMAINB-68" d="M40 686L131 690Q222 694 223 694H229V533L230 372L238 381Q248 394 264 407T317 435T398 450Q428 450 448 447T491 434T529 402T551 346Q553 335 554 198V62H623V0H614Q596 3 489 3Q374 3 365 0H356V62H425V194V275Q425 348 416 373T371 399Q326 399 288 370T238 290Q236 281 235 171V62H304V0H295Q277 3 171 3Q64 3 46 0H37V62H106V332Q106 387 106 453T107 534Q107 593 105 605T91 620Q77 624 50 624H37V686H40Z"></path><path stroke-width="0" id="E23-MJMAIN-2B" d="M56 237T56 250T70 270H369V420L370 570Q380 583 389 583Q402 583 409 568V270H707Q722 262 722 250T707 230H409V-68Q401 -82 391 -82H389H387Q375 -82 369 -68V230H70Q56 237 56 250Z"></path><path stroke-width="0" id="E23-MJMATHI-62" d="M73 647Q73 657 77 670T89 683Q90 683 161 688T234 694Q246 694 246 685T212 542Q204 508 195 472T180 418L176 399Q176 396 182 402Q231 442 283 442Q345 442 383 396T422 280Q422 169 343 79T173 -11Q123 -11 82 27T40 150V159Q40 180 48 217T97 414Q147 611 147 623T109 637Q104 637 101 637H96Q86 637 83 637T76 640T73 647ZM336 325V331Q336 405 275 405Q258 405 240 397T207 376T181 352T163 330L157 322L136 236Q114 150 114 114Q114 66 138 42Q154 26 178 26Q211 26 245 58Q270 81 285 114T318 219Q336 291 336 325Z"></path></defs><g stroke="currentColor" fill="currentColor" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xlink:href="#E23-MJMAINB-61" x="0" y="0"></use><use transform="scale(0.707)" xlink:href="#E23-MJMATHI-74" x="790" y="-213"></use><use xlink:href="#E23-MJMAIN-3D" x="1192" y="0"></use><use xlink:href="#E23-MJMATHI-57" x="2247" y="0"></use><g transform="translate(3295,0)"><use xlink:href="#E23-MJMAINB-68" x="0" y="0"></use><use transform="scale(0.707)" xlink:href="#E23-MJMATHI-74" x="903" y="-213"></use></g><use xlink:href="#E23-MJMAIN-2B" x="4512" y="0"></use><use xlink:href="#E23-MJMATHI-62" x="5512" y="0"></use></g></svg></span><script type="math/tex">\mathbf{a}_t = W\mathbf{h}_t + b</script>  where <span class="MathJax_SVG" tabindex="-1" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" width="2.123ex" height="1.877ex" viewBox="0 -554.9 914.3 808.1" role="img" focusable="false" style="vertical-align: -0.588ex;"><defs><path stroke-width="0" id="E26-MJMAINB-61" d="M64 349Q64 399 107 426T255 453Q346 453 402 423T473 341Q478 327 478 310T479 196V77Q493 63 529 62Q549 62 553 57T558 31Q558 9 552 5T514 0H497H481Q375 0 367 56L356 46Q300 -6 210 -6Q130 -6 81 30T32 121Q32 188 111 226T332 272H350V292Q350 313 348 327T337 361T306 391T248 402T194 399H189Q204 376 204 354Q204 327 187 306T134 284Q97 284 81 305T64 349ZM164 121Q164 89 186 67T238 45Q274 45 307 63T346 108L350 117V226H347Q248 218 206 189T164 121Z"></path><path stroke-width="0" id="E26-MJMATHI-74" d="M26 385Q19 392 19 395Q19 399 22 411T27 425Q29 430 36 430T87 431H140L159 511Q162 522 166 540T173 566T179 586T187 603T197 615T211 624T229 626Q247 625 254 615T261 596Q261 589 252 549T232 470L222 433Q222 431 272 431H323Q330 424 330 420Q330 398 317 385H210L174 240Q135 80 135 68Q135 26 162 26Q197 26 230 60T283 144Q285 150 288 151T303 153H307Q322 153 322 145Q322 142 319 133Q314 117 301 95T267 48T216 6T155 -11Q125 -11 98 4T59 56Q57 64 57 83V101L92 241Q127 382 128 383Q128 385 77 385H26Z"></path></defs><g stroke="currentColor" fill="currentColor" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xlink:href="#E26-MJMAINB-61" x="0" y="0"></use><use transform="scale(0.707)" xlink:href="#E26-MJMATHI-74" x="790" y="-213"></use></g></svg></span><script type="math/tex">\mathbf{a}_t</script> is a vector of size <span class="MathJax_SVG" tabindex="-1" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" width="3.077ex" height="2.577ex" viewBox="0 -806.1 1325 1109.7" role="img" focusable="false" style="vertical-align: -0.705ex;"><defs><path stroke-width="0" id="E33-MJMAIN-7C" d="M139 -249H137Q125 -249 119 -235V251L120 737Q130 750 139 750Q152 750 159 735V-235Q151 -249 141 -249H139Z"></path><path stroke-width="0" id="E33-MJMATHI-56" d="M52 648Q52 670 65 683H76Q118 680 181 680Q299 680 320 683H330Q336 677 336 674T334 656Q329 641 325 637H304Q282 635 274 635Q245 630 242 620Q242 618 271 369T301 118L374 235Q447 352 520 471T595 594Q599 601 599 609Q599 633 555 637Q537 637 537 648Q537 649 539 661Q542 675 545 679T558 683Q560 683 570 683T604 682T668 681Q737 681 755 683H762Q769 676 769 672Q769 655 760 640Q757 637 743 637Q730 636 719 635T698 630T682 623T670 615T660 608T652 599T645 592L452 282Q272 -9 266 -16Q263 -18 259 -21L241 -22H234Q216 -22 216 -15Q213 -9 177 305Q139 623 138 626Q133 637 76 637H59Q52 642 52 648Z"></path></defs><g stroke="currentColor" fill="currentColor" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xlink:href="#E33-MJMAIN-7C" x="0" y="0"></use><use xlink:href="#E33-MJMATHI-56" x="278" y="0"></use><use xlink:href="#E33-MJMAIN-7C" x="1047" y="0"></use></g></svg></span><script type="math/tex">|V|</script>. Because linear transformation results in value that is unbounded, to make prediction a probability distribution, we first take exponential and then normalize it with the sum e.g. take the softmax of <span class="MathJax_SVG" tabindex="-1" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" width="2.123ex" height="1.877ex" viewBox="0 -554.9 914.3 808.1" role="img" focusable="false" style="vertical-align: -0.588ex;"><defs><path stroke-width="0" id="E26-MJMAINB-61" d="M64 349Q64 399 107 426T255 453Q346 453 402 423T473 341Q478 327 478 310T479 196V77Q493 63 529 62Q549 62 553 57T558 31Q558 9 552 5T514 0H497H481Q375 0 367 56L356 46Q300 -6 210 -6Q130 -6 81 30T32 121Q32 188 111 226T332 272H350V292Q350 313 348 327T337 361T306 391T248 402T194 399H189Q204 376 204 354Q204 327 187 306T134 284Q97 284 81 305T64 349ZM164 121Q164 89 186 67T238 45Q274 45 307 63T346 108L350 117V226H347Q248 218 206 189T164 121Z"></path><path stroke-width="0" id="E26-MJMATHI-74" d="M26 385Q19 392 19 395Q19 399 22 411T27 425Q29 430 36 430T87 431H140L159 511Q162 522 166 540T173 566T179 586T187 603T197 615T211 624T229 626Q247 625 254 615T261 596Q261 589 252 549T232 470L222 433Q222 431 272 431H323Q330 424 330 420Q330 398 317 385H210L174 240Q135 80 135 68Q135 26 162 26Q197 26 230 60T283 144Q285 150 288 151T303 153H307Q322 153 322 145Q322 142 319 133Q314 117 301 95T267 48T216 6T155 -11Q125 -11 98 4T59 56Q57 64 57 83V101L92 241Q127 382 128 383Q128 385 77 385H26Z"></path></defs><g stroke="currentColor" fill="currentColor" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xlink:href="#E26-MJMAINB-61" x="0" y="0"></use><use transform="scale(0.707)" xlink:href="#E26-MJMATHI-74" x="790" y="-213"></use></g></svg></span><script type="math/tex">\mathbf{a}_t</script>, <span class="MathJax_SVG" tabindex="-1" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" width="20.65ex" height="5.729ex" viewBox="0 -1409.3 8890.9 2466.7" role="img" focusable="false" style="vertical-align: -2.456ex;"><defs><path stroke-width="0" id="E70-MJMAINB-79" d="M84 -102Q84 -110 87 -119T102 -138T133 -149Q148 -148 162 -143T186 -131T206 -114T222 -95T234 -76T243 -59T249 -45T252 -37L269 0L96 382H26V444H34Q49 441 146 441Q252 441 270 444H279V382H255Q232 382 232 380L337 151L442 382H394V444H401Q413 441 495 441Q568 441 574 444H580V382H510L406 152Q298 -84 297 -87Q269 -139 225 -169T131 -200Q85 -200 54 -172T23 -100Q23 -64 44 -50T87 -35Q111 -35 130 -50T152 -92V-100H84V-102Z"></path><path stroke-width="0" id="E70-MJMAIN-28" d="M94 250Q94 319 104 381T127 488T164 576T202 643T244 695T277 729T302 750H315H319Q333 750 333 741Q333 738 316 720T275 667T226 581T184 443T167 250T184 58T225 -81T274 -167T316 -220T333 -241Q333 -250 318 -250H315H302L274 -226Q180 -141 137 -14T94 250Z"></path><path stroke-width="0" id="E70-MJMATHI-69" d="M184 600Q184 624 203 642T247 661Q265 661 277 649T290 619Q290 596 270 577T226 557Q211 557 198 567T184 600ZM21 287Q21 295 30 318T54 369T98 420T158 442Q197 442 223 419T250 357Q250 340 236 301T196 196T154 83Q149 61 149 51Q149 26 166 26Q175 26 185 29T208 43T235 78T260 137Q263 149 265 151T282 153Q302 153 302 143Q302 135 293 112T268 61T223 11T161 -11Q129 -11 102 10T74 74Q74 91 79 106T122 220Q160 321 166 341T173 380Q173 404 156 404H154Q124 404 99 371T61 287Q60 286 59 284T58 281T56 279T53 278T49 278T41 278H27Q21 284 21 287Z"></path><path stroke-width="0" id="E70-MJMAIN-29" d="M60 749L64 750Q69 750 74 750H86L114 726Q208 641 251 514T294 250Q294 182 284 119T261 12T224 -76T186 -143T145 -194T113 -227T90 -246Q87 -249 86 -250H74Q66 -250 63 -250T58 -247T55 -238Q56 -237 66 -225Q221 -64 221 250T66 725Q56 737 55 738Q55 746 60 749Z"></path><path stroke-width="0" id="E70-MJMATHI-74" d="M26 385Q19 392 19 395Q19 399 22 411T27 425Q29 430 36 430T87 431H140L159 511Q162 522 166 540T173 566T179 586T187 603T197 615T211 624T229 626Q247 625 254 615T261 596Q261 589 252 549T232 470L222 433Q222 431 272 431H323Q330 424 330 420Q330 398 317 385H210L174 240Q135 80 135 68Q135 26 162 26Q197 26 230 60T283 144Q285 150 288 151T303 153H307Q322 153 322 145Q322 142 319 133Q314 117 301 95T267 48T216 6T155 -11Q125 -11 98 4T59 56Q57 64 57 83V101L92 241Q127 382 128 383Q128 385 77 385H26Z"></path><path stroke-width="0" id="E70-MJMAIN-3D" d="M56 347Q56 360 70 367H707Q722 359 722 347Q722 336 708 328L390 327H72Q56 332 56 347ZM56 153Q56 168 72 173H708Q722 163 722 153Q722 140 707 133H70Q56 140 56 153Z"></path><path stroke-width="0" id="E70-MJMAIN-65" d="M28 218Q28 273 48 318T98 391T163 433T229 448Q282 448 320 430T378 380T406 316T415 245Q415 238 408 231H126V216Q126 68 226 36Q246 30 270 30Q312 30 342 62Q359 79 369 104L379 128Q382 131 395 131H398Q415 131 415 121Q415 117 412 108Q393 53 349 21T250 -11Q155 -11 92 58T28 218ZM333 275Q322 403 238 411H236Q228 411 220 410T195 402T166 381T143 340T127 274V267H333V275Z"></path><path stroke-width="0" id="E70-MJMAIN-78" d="M201 0Q189 3 102 3Q26 3 17 0H11V46H25Q48 47 67 52T96 61T121 78T139 96T160 122T180 150L226 210L168 288Q159 301 149 315T133 336T122 351T113 363T107 370T100 376T94 379T88 381T80 383Q74 383 44 385H16V431H23Q59 429 126 429Q219 429 229 431H237V385Q201 381 201 369Q201 367 211 353T239 315T268 274L272 270L297 304Q329 345 329 358Q329 364 327 369T322 376T317 380T310 384L307 385H302V431H309Q324 428 408 428Q487 428 493 431H499V385H492Q443 385 411 368Q394 360 377 341T312 257L296 236L358 151Q424 61 429 57T446 50Q464 46 499 46H516V0H510H502Q494 1 482 1T457 2T432 2T414 3Q403 3 377 3T327 1L304 0H295V46H298Q309 46 320 51T331 63Q331 65 291 120L250 175Q249 174 219 133T185 88Q181 83 181 74Q181 63 188 55T206 46Q208 46 208 23V0H201Z"></path><path stroke-width="0" id="E70-MJMAIN-70" d="M36 -148H50Q89 -148 97 -134V-126Q97 -119 97 -107T97 -77T98 -38T98 6T98 55T98 106Q98 140 98 177T98 243T98 296T97 335T97 351Q94 370 83 376T38 385H20V408Q20 431 22 431L32 432Q42 433 61 434T98 436Q115 437 135 438T165 441T176 442H179V416L180 390L188 397Q247 441 326 441Q407 441 464 377T522 216Q522 115 457 52T310 -11Q242 -11 190 33L182 40V-45V-101Q182 -128 184 -134T195 -145Q216 -148 244 -148H260V-194H252L228 -193Q205 -192 178 -192T140 -191Q37 -191 28 -194H20V-148H36ZM424 218Q424 292 390 347T305 402Q234 402 182 337V98Q222 26 294 26Q345 26 384 80T424 218Z"></path><path stroke-width="0" id="E70-MJMAINB-61" d="M64 349Q64 399 107 426T255 453Q346 453 402 423T473 341Q478 327 478 310T479 196V77Q493 63 529 62Q549 62 553 57T558 31Q558 9 552 5T514 0H497H481Q375 0 367 56L356 46Q300 -6 210 -6Q130 -6 81 30T32 121Q32 188 111 226T332 272H350V292Q350 313 348 327T337 361T306 391T248 402T194 399H189Q204 376 204 354Q204 327 187 306T134 284Q97 284 81 305T64 349ZM164 121Q164 89 186 67T238 45Q274 45 307 63T346 108L350 117V226H347Q248 218 206 189T164 121Z"></path><path stroke-width="0" id="E70-MJMAIN-2F" d="M423 750Q432 750 438 744T444 730Q444 725 271 248T92 -240Q85 -250 75 -250Q68 -250 62 -245T56 -231Q56 -221 230 257T407 740Q411 750 423 750Z"></path><path stroke-width="0" id="E70-MJMATHI-3C4" d="M39 284Q18 284 18 294Q18 301 45 338T99 398Q134 425 164 429Q170 431 332 431Q492 431 497 429Q517 424 517 402Q517 388 508 376T485 360Q479 358 389 358T299 356Q298 355 283 274T251 109T233 20Q228 5 215 -4T186 -13Q153 -13 153 20V30L203 192Q214 228 227 272T248 336L254 357Q254 358 208 358Q206 358 197 358T183 359Q105 359 61 295Q56 287 53 286T39 284Z"></path><path stroke-width="0" id="E70-MJSZ1-2211" d="M61 748Q64 750 489 750H913L954 640Q965 609 976 579T993 533T999 516H979L959 517Q936 579 886 621T777 682Q724 700 655 705T436 710H319Q183 710 183 709Q186 706 348 484T511 259Q517 250 513 244L490 216Q466 188 420 134T330 27L149 -187Q149 -188 362 -188Q388 -188 436 -188T506 -189Q679 -189 778 -162T936 -43Q946 -27 959 6H999L913 -249L489 -250Q65 -250 62 -248Q56 -246 56 -239Q56 -234 118 -161Q186 -81 245 -11L428 206Q428 207 242 462L57 717L56 728Q56 744 61 748Z"></path><path stroke-width="0" id="E70-MJMAIN-7C" d="M139 -249H137Q125 -249 119 -235V251L120 737Q130 750 139 750Q152 750 159 735V-235Q151 -249 141 -249H139Z"></path><path stroke-width="0" id="E70-MJMATHI-56" d="M52 648Q52 670 65 683H76Q118 680 181 680Q299 680 320 683H330Q336 677 336 674T334 656Q329 641 325 637H304Q282 635 274 635Q245 630 242 620Q242 618 271 369T301 118L374 235Q447 352 520 471T595 594Q599 601 599 609Q599 633 555 637Q537 637 537 648Q537 649 539 661Q542 675 545 679T558 683Q560 683 570 683T604 682T668 681Q737 681 755 683H762Q769 676 769 672Q769 655 760 640Q757 637 743 637Q730 636 719 635T698 630T682 623T670 615T660 608T652 599T645 592L452 282Q272 -9 266 -16Q263 -18 259 -21L241 -22H234Q216 -22 216 -15Q213 -9 177 305Q139 623 138 626Q133 637 76 637H59Q52 642 52 648Z"></path><path stroke-width="0" id="E70-MJMAIN-2212" d="M84 237T84 250T98 270H679Q694 262 694 250T679 230H98Q84 237 84 250Z"></path><path stroke-width="0" id="E70-MJMAIN-31" d="M213 578L200 573Q186 568 160 563T102 556H83V602H102Q149 604 189 617T245 641T273 663Q275 666 285 666Q294 666 302 660V361L303 61Q310 54 315 52T339 48T401 46H427V0H416Q395 3 257 3Q121 3 100 0H88V46H114Q136 46 152 46T177 47T193 50T201 52T207 57T213 61V578Z"></path><path stroke-width="0" id="E70-MJMATHI-6A" d="M297 596Q297 627 318 644T361 661Q378 661 389 651T403 623Q403 595 384 576T340 557Q322 557 310 567T297 596ZM288 376Q288 405 262 405Q240 405 220 393T185 362T161 325T144 293L137 279Q135 278 121 278H107Q101 284 101 286T105 299Q126 348 164 391T252 441Q253 441 260 441T272 442Q296 441 316 432Q341 418 354 401T367 348V332L318 133Q267 -67 264 -75Q246 -125 194 -164T75 -204Q25 -204 7 -183T-12 -137Q-12 -110 7 -91T53 -71Q70 -71 82 -81T95 -112Q95 -148 63 -167Q69 -168 77 -168Q111 -168 139 -140T182 -74L193 -32Q204 11 219 72T251 197T278 308T289 365Q289 372 288 376Z"></path><path stroke-width="0" id="E70-MJMAIN-30" d="M96 585Q152 666 249 666Q297 666 345 640T423 548Q460 465 460 320Q460 165 417 83Q397 41 362 16T301 -15T250 -22Q224 -22 198 -16T137 16T82 83Q39 165 39 320Q39 494 96 585ZM321 597Q291 629 250 629Q208 629 178 597Q153 571 145 525T137 333Q137 175 145 125T181 46Q209 16 250 16Q290 16 318 46Q347 76 354 130T362 333Q362 478 354 524T321 597Z"></path></defs><g stroke="currentColor" fill="currentColor" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xlink:href="#E70-MJMAINB-79" x="0" y="0"></use><g transform="translate(607,521)"><use transform="scale(0.707)" xlink:href="#E70-MJMAIN-28" x="0" y="0"></use><use transform="scale(0.707)" xlink:href="#E70-MJMATHI-69" x="389" y="0"></use><use transform="scale(0.707)" xlink:href="#E70-MJMAIN-29" x="733" y="0"></use></g><use transform="scale(0.707)" xlink:href="#E70-MJMATHI-74" x="858" y="-394"></use><use xlink:href="#E70-MJMAIN-3D" x="1778" y="0"></use><g transform="translate(2556,0)"><g transform="translate(397,0)"><rect stroke="none" width="5816" height="60" x="0" y="220"></rect><g transform="translate(1219,606)"><use transform="scale(0.707)" xlink:href="#E70-MJMAIN-65"></use><use transform="scale(0.707)" xlink:href="#E70-MJMAIN-78" x="444" y="0"></use><use transform="scale(0.707)" xlink:href="#E70-MJMAIN-70" x="972" y="0"></use><use transform="scale(0.707)" xlink:href="#E70-MJMAIN-28" x="1528" y="0"></use><g transform="translate(1355,0)"><use transform="scale(0.707)" xlink:href="#E70-MJMAINB-61" x="0" y="0"></use><g transform="translate(395,368)"><use transform="scale(0.5)" xlink:href="#E70-MJMAIN-28" x="0" y="0"></use><use transform="scale(0.5)" xlink:href="#E70-MJMATHI-69" x="389" y="0"></use><use transform="scale(0.5)" xlink:href="#E70-MJMAIN-29" x="734" y="0"></use></g><use transform="scale(0.5)" xlink:href="#E70-MJMATHI-74" x="790" y="-394"></use></g><use transform="scale(0.707)" xlink:href="#E70-MJMAIN-2F" x="3370" y="0"></use><use transform="scale(0.707)" xlink:href="#E70-MJMATHI-3C4" x="3870" y="0"></use><use transform="scale(0.707)" xlink:href="#E70-MJMAIN-29" x="4387" y="0"></use></g><g transform="translate(60,-648)"><use transform="scale(0.707)" xlink:href="#E70-MJSZ1-2211" x="0" y="-1"></use><g transform="translate(746,368)"><use transform="scale(0.5)" xlink:href="#E70-MJMAIN-7C" x="0" y="0"></use><use transform="scale(0.5)" xlink:href="#E70-MJMATHI-56" x="278" y="0"></use><use transform="scale(0.5)" xlink:href="#E70-MJMAIN-7C" x="1047" y="0"></use><use transform="scale(0.5)" xlink:href="#E70-MJMAIN-2212" x="1325" y="0"></use><use transform="scale(0.5)" xlink:href="#E70-MJMAIN-31" x="2103" y="0"></use></g><g transform="translate(746,-217)"><use transform="scale(0.5)" xlink:href="#E70-MJMATHI-6A" x="0" y="0"></use><use transform="scale(0.5)" xlink:href="#E70-MJMAIN-3D" x="412" y="0"></use><use transform="scale(0.5)" xlink:href="#E70-MJMAIN-30" x="1190" y="0"></use></g><g transform="translate(2285,0)"><use transform="scale(0.707)" xlink:href="#E70-MJMAIN-65"></use><use transform="scale(0.707)" xlink:href="#E70-MJMAIN-78" x="444" y="0"></use><use transform="scale(0.707)" xlink:href="#E70-MJMAIN-70" x="972" y="0"></use></g><use transform="scale(0.707)" xlink:href="#E70-MJMAIN-28" x="4760" y="0"></use><g transform="translate(3641,0)"><use transform="scale(0.707)" xlink:href="#E70-MJMAINB-61" x="0" y="0"></use><g transform="translate(395,368)"><use transform="scale(0.5)" xlink:href="#E70-MJMAIN-28" x="0" y="0"></use><use transform="scale(0.5)" xlink:href="#E70-MJMATHI-6A" x="389" y="0"></use><use transform="scale(0.5)" xlink:href="#E70-MJMAIN-29" x="801" y="0"></use></g><use transform="scale(0.5)" xlink:href="#E70-MJMATHI-74" x="790" y="-394"></use></g><use transform="scale(0.707)" xlink:href="#E70-MJMAIN-2F" x="6649" y="0"></use><use transform="scale(0.707)" xlink:href="#E70-MJMATHI-3C4" x="7149" y="0"></use><use transform="scale(0.707)" xlink:href="#E70-MJMAIN-29" x="7666" y="0"></use></g></g></g></g></svg></span><script type="math/tex">\mathbf{y}_t^{(i)}=\frac{\exp(\mathbf{a}_t^{(i)}/\tau)}{\sum_{j=0}^{|V|-1}\exp(\mathbf{a}_t^{(j)}/\tau)}</script>, where <span class="MathJax_SVG" tabindex="-1" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" width="1.201ex" height="1.41ex" viewBox="0 -504.6 517 607.1" role="img" focusable="false" style="vertical-align: -0.238ex;"><defs><path stroke-width="0" id="E86-MJMATHI-3C4" d="M39 284Q18 284 18 294Q18 301 45 338T99 398Q134 425 164 429Q170 431 332 431Q492 431 497 429Q517 424 517 402Q517 388 508 376T485 360Q479 358 389 358T299 356Q298 355 283 274T251 109T233 20Q228 5 215 -4T186 -13Q153 -13 153 20V30L203 192Q214 228 227 272T248 336L254 357Q254 358 208 358Q206 358 197 358T183 359Q105 359 61 295Q56 287 53 286T39 284Z"></path></defs><g stroke="currentColor" fill="currentColor" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xlink:href="#E86-MJMATHI-3C4" x="0" y="0"></use></g></svg></span><script type="math/tex">\tau</script> is the <strong>temperature</strong> term that is usually 1, you will encounter this term later. As we&#39;ve learned from the previous assignment, we could use cross entropy loss to urge the prediction to be the next word <span class="MathJax_SVG" tabindex="-1" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" width="4.013ex" height="1.877ex" viewBox="0 -504.6 1727.9 808.1" role="img" focusable="false" style="vertical-align: -0.705ex;"><defs><path stroke-width="0" id="E28-MJMATHI-73" d="M131 289Q131 321 147 354T203 415T300 442Q362 442 390 415T419 355Q419 323 402 308T364 292Q351 292 340 300T328 326Q328 342 337 354T354 372T367 378Q368 378 368 379Q368 382 361 388T336 399T297 405Q249 405 227 379T204 326Q204 301 223 291T278 274T330 259Q396 230 396 163Q396 135 385 107T352 51T289 7T195 -10Q118 -10 86 19T53 87Q53 126 74 143T118 160Q133 160 146 151T160 120Q160 94 142 76T111 58Q109 57 108 57T107 55Q108 52 115 47T146 34T201 27Q237 27 263 38T301 66T318 97T323 122Q323 150 302 164T254 181T195 196T148 231Q131 256 131 289Z"></path><path stroke-width="0" id="E28-MJMATHI-74" d="M26 385Q19 392 19 395Q19 399 22 411T27 425Q29 430 36 430T87 431H140L159 511Q162 522 166 540T173 566T179 586T187 603T197 615T211 624T229 626Q247 625 254 615T261 596Q261 589 252 549T232 470L222 433Q222 431 272 431H323Q330 424 330 420Q330 398 317 385H210L174 240Q135 80 135 68Q135 26 162 26Q197 26 230 60T283 144Q285 150 288 151T303 153H307Q322 153 322 145Q322 142 319 133Q314 117 301 95T267 48T216 6T155 -11Q125 -11 98 4T59 56Q57 64 57 83V101L92 241Q127 382 128 383Q128 385 77 385H26Z"></path><path stroke-width="0" id="E28-MJMAIN-2B" d="M56 237T56 250T70 270H369V420L370 570Q380 583 389 583Q402 583 409 568V270H707Q722 262 722 250T707 230H409V-68Q401 -82 391 -82H389H387Q375 -82 369 -68V230H70Q56 237 56 250Z"></path><path stroke-width="0" id="E28-MJMAIN-31" d="M213 578L200 573Q186 568 160 563T102 556H83V602H102Q149 604 189 617T245 641T273 663Q275 666 285 666Q294 666 302 660V361L303 61Q310 54 315 52T339 48T401 46H427V0H416Q395 3 257 3Q121 3 100 0H88V46H114Q136 46 152 46T177 47T193 50T201 52T207 57T213 61V578Z"></path></defs><g stroke="currentColor" fill="currentColor" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xlink:href="#E28-MJMATHI-73" x="0" y="0"></use><g transform="translate(469,-150)"><use transform="scale(0.707)" xlink:href="#E28-MJMATHI-74" x="0" y="0"></use><use transform="scale(0.707)" xlink:href="#E28-MJMAIN-2B" x="361" y="0"></use><use transform="scale(0.707)" xlink:href="#E28-MJMAIN-31" x="1139" y="0"></use></g></g></svg></span><script type="math/tex">s_{t+1}</script>, and we could try to minimize the average loss to provide the training signal for the network.</p><p>Requirements</p><ol start='' ><li>Differentiate one step of LSTM with respect to <span class="MathJax_SVG" tabindex="-1" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" width="2.309ex" height="2.344ex" viewBox="0 -755.9 994.3 1009.2" role="img" focusable="false" style="vertical-align: -0.588ex;"><defs><path stroke-width="0" id="E29-MJMAINB-68" d="M40 686L131 690Q222 694 223 694H229V533L230 372L238 381Q248 394 264 407T317 435T398 450Q428 450 448 447T491 434T529 402T551 346Q553 335 554 198V62H623V0H614Q596 3 489 3Q374 3 365 0H356V62H425V194V275Q425 348 416 373T371 399Q326 399 288 370T238 290Q236 281 235 171V62H304V0H295Q277 3 171 3Q64 3 46 0H37V62H106V332Q106 387 106 453T107 534Q107 593 105 605T91 620Q77 624 50 624H37V686H40Z"></path><path stroke-width="0" id="E29-MJMATHI-74" d="M26 385Q19 392 19 395Q19 399 22 411T27 425Q29 430 36 430T87 431H140L159 511Q162 522 166 540T173 566T179 586T187 603T197 615T211 624T229 626Q247 625 254 615T261 596Q261 589 252 549T232 470L222 433Q222 431 272 431H323Q330 424 330 420Q330 398 317 385H210L174 240Q135 80 135 68Q135 26 162 26Q197 26 230 60T283 144Q285 150 288 151T303 153H307Q322 153 322 145Q322 142 319 133Q314 117 301 95T267 48T216 6T155 -11Q125 -11 98 4T59 56Q57 64 57 83V101L92 241Q127 382 128 383Q128 385 77 385H26Z"></path></defs><g stroke="currentColor" fill="currentColor" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xlink:href="#E29-MJMAINB-68" x="0" y="0"></use><use transform="scale(0.707)" xlink:href="#E29-MJMATHI-74" x="903" y="-213"></use></g></svg></span><script type="math/tex">\mathbf{h}_t</script> for <span class="MathJax_SVG" tabindex="-1" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" width="31.499ex" height="2.811ex" viewBox="0 -906.7 13562.1 1210.2" role="img" focusable="false" style="vertical-align: -0.705ex;"><defs><path stroke-width="0" id="E30-MJMAINB-66" d="M308 0Q290 3 172 3Q58 3 49 0H40V62H109V382H42V444H109V503L110 562L112 572Q127 625 178 658T316 699Q318 699 330 699T348 700Q381 698 404 687T436 658T449 629T452 606Q452 576 432 557T383 537Q355 537 335 555T314 605Q314 635 328 649H325Q311 649 293 644T253 618T227 560Q226 555 226 498V444H340V382H232V62H318V0H308Z"></path><path stroke-width="0" id="E30-MJMATHI-74" d="M26 385Q19 392 19 395Q19 399 22 411T27 425Q29 430 36 430T87 431H140L159 511Q162 522 166 540T173 566T179 586T187 603T197 615T211 624T229 626Q247 625 254 615T261 596Q261 589 252 549T232 470L222 433Q222 431 272 431H323Q330 424 330 420Q330 398 317 385H210L174 240Q135 80 135 68Q135 26 162 26Q197 26 230 60T283 144Q285 150 288 151T303 153H307Q322 153 322 145Q322 142 319 133Q314 117 301 95T267 48T216 6T155 -11Q125 -11 98 4T59 56Q57 64 57 83V101L92 241Q127 382 128 383Q128 385 77 385H26Z"></path><path stroke-width="0" id="E30-MJMAIN-2C" d="M78 35T78 60T94 103T137 121Q165 121 187 96T210 8Q210 -27 201 -60T180 -117T154 -158T130 -185T117 -194Q113 -194 104 -185T95 -172Q95 -168 106 -156T131 -126T157 -76T173 -3V9L172 8Q170 7 167 6T161 3T152 1T140 0Q113 0 96 17Z"></path><path stroke-width="0" id="E30-MJMAINB-69" d="M72 610Q72 649 98 672T159 695Q193 693 217 670T241 610Q241 572 217 549T157 525Q120 525 96 548T72 610ZM46 442L136 446L226 450H232V62H294V0H286Q271 3 171 3Q67 3 49 0H40V62H109V209Q109 358 108 362Q103 380 55 380H43V442H46Z"></path><path stroke-width="0" id="E30-MJMATHI-43" d="M50 252Q50 367 117 473T286 641T490 704Q580 704 633 653Q642 643 648 636T656 626L657 623Q660 623 684 649Q691 655 699 663T715 679T725 690L740 705H746Q760 705 760 698Q760 694 728 561Q692 422 692 421Q690 416 687 415T669 413H653Q647 419 647 422Q647 423 648 429T650 449T651 481Q651 552 619 605T510 659Q484 659 454 652T382 628T299 572T226 479Q194 422 175 346T156 222Q156 108 232 58Q280 24 350 24Q441 24 512 92T606 240Q610 253 612 255T628 257Q648 257 648 248Q648 243 647 239Q618 132 523 55T319 -22Q206 -22 128 53T50 252Z"></path><path stroke-width="0" id="E30-MJMAIN-AF" d="M69 544V590H430V544H69Z"></path><path stroke-width="0" id="E30-MJMAIN-2212" d="M84 237T84 250T98 270H679Q694 262 694 250T679 230H98Q84 237 84 250Z"></path><path stroke-width="0" id="E30-MJMAIN-31" d="M213 578L200 573Q186 568 160 563T102 556H83V602H102Q149 604 189 617T245 641T273 663Q275 666 285 666Q294 666 302 660V361L303 61Q310 54 315 52T339 48T401 46H427V0H416Q395 3 257 3Q121 3 100 0H88V46H114Q136 46 152 46T177 47T193 50T201 52T207 57T213 61V578Z"></path><path stroke-width="0" id="E30-MJMAINB-6F" d="M287 -5Q228 -5 182 10T109 48T63 102T39 161T32 219Q32 272 50 314T94 382T154 423T214 446T265 452H279Q319 452 326 451Q428 439 485 376T542 221Q542 156 514 108T442 33Q384 -5 287 -5ZM399 230V250Q399 280 398 298T391 338T372 372T338 392T282 401Q241 401 212 380Q190 363 183 334T175 230Q175 202 175 189T177 153T183 118T195 91T215 68T245 56T287 50Q348 50 374 84Q388 101 393 132T399 230Z"></path><path stroke-width="0" id="E30-MJMAINB-68" d="M40 686L131 690Q222 694 223 694H229V533L230 372L238 381Q248 394 264 407T317 435T398 450Q428 450 448 447T491 434T529 402T551 346Q553 335 554 198V62H623V0H614Q596 3 489 3Q374 3 365 0H356V62H425V194V275Q425 348 416 373T371 399Q326 399 288 370T238 290Q236 281 235 171V62H304V0H295Q277 3 171 3Q64 3 46 0H37V62H106V332Q106 387 106 453T107 534Q107 593 105 605T91 620Q77 624 50 624H37V686H40Z"></path><path stroke-width="0" id="E30-MJMAINB-78" d="M227 0Q212 3 121 3Q40 3 28 0H21V62H117L245 213L109 382H26V444H34Q49 441 143 441Q247 441 265 444H274V382H246L281 339Q315 297 316 297Q320 297 354 341L389 382H352V444H360Q375 441 466 441Q547 441 559 444H566V382H471L355 246L504 63L545 62H586V0H578Q563 3 469 3Q365 3 347 0H338V62H366Q366 63 326 112T285 163L198 63L217 62H235V0H227Z"></path></defs><g stroke="currentColor" fill="currentColor" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xlink:href="#E30-MJMAINB-66" x="0" y="0"></use><use transform="scale(0.707)" xlink:href="#E30-MJMATHI-74" x="496" y="-213"></use><use xlink:href="#E30-MJMAIN-2C" x="706" y="0"></use><g transform="translate(1150,0)"><use xlink:href="#E30-MJMAINB-69" x="0" y="0"></use><use transform="scale(0.707)" xlink:href="#E30-MJMATHI-74" x="451" y="-213"></use></g><use xlink:href="#E30-MJMAIN-2C" x="1825" y="0"></use><g transform="translate(2269,0)"><use xlink:href="#E30-MJMAINB-69" x="0" y="0"></use><use transform="scale(0.707)" xlink:href="#E30-MJMATHI-74" x="451" y="-213"></use></g><use xlink:href="#E30-MJMAIN-2C" x="2944" y="0"></use><g transform="translate(3388,0)"><use xlink:href="#E30-MJMATHI-43" x="0" y="0"></use><use xlink:href="#E30-MJMAIN-AF" x="242" y="236"></use><use transform="scale(0.707)" xlink:href="#E30-MJMATHI-74" x="1074" y="-213"></use></g><use xlink:href="#E30-MJMAIN-2C" x="4504" y="0"></use><g transform="translate(4948,0)"><use xlink:href="#E30-MJMATHI-43" x="0" y="0"></use><use transform="scale(0.707)" xlink:href="#E30-MJMATHI-74" x="1011" y="-213"></use></g><use xlink:href="#E30-MJMAIN-2C" x="6018" y="0"></use><g transform="translate(6463,0)"><use xlink:href="#E30-MJMATHI-43" x="0" y="0"></use><g transform="translate(715,-150)"><use transform="scale(0.707)" xlink:href="#E30-MJMATHI-74" x="0" y="0"></use><use transform="scale(0.707)" xlink:href="#E30-MJMAIN-2212" x="361" y="0"></use><use transform="scale(0.707)" xlink:href="#E30-MJMAIN-31" x="1139" y="0"></use></g></g><use xlink:href="#E30-MJMAIN-2C" x="8437" y="0"></use><g transform="translate(8882,0)"><use xlink:href="#E30-MJMAINB-6F" x="0" y="0"></use><use transform="scale(0.707)" xlink:href="#E30-MJMATHI-74" x="813" y="-213"></use></g><use xlink:href="#E30-MJMAIN-2C" x="9812" y="0"></use><g transform="translate(10257,0)"><use xlink:href="#E30-MJMAINB-68" x="0" y="0"></use><g transform="translate(639,-150)"><use transform="scale(0.707)" xlink:href="#E30-MJMATHI-74" x="0" y="0"></use><use transform="scale(0.707)" xlink:href="#E30-MJMAIN-2212" x="361" y="0"></use><use transform="scale(0.707)" xlink:href="#E30-MJMAIN-31" x="1139" y="0"></use></g></g><use xlink:href="#E30-MJMAIN-2C" x="12155" y="0"></use><g transform="translate(12599,0)"><use xlink:href="#E30-MJMAINB-78" x="0" y="0"></use><use transform="scale(0.707)" xlink:href="#E30-MJMATHI-74" x="858" y="-213"></use></g></g></svg></span><script type="math/tex">\mathbf{f}_t, \mathbf{i}_t, \mathbf{i}_t, \bar{C}_t, C_t, C_{t-1},\mathbf{o}_t, \mathbf{h}_{t-1}, \mathbf{x}_t</script>. i.e. <span class="MathJax_SVG" tabindex="-1" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" width="3.4ex" height="3.978ex" viewBox="0 -1107.7 1464 1712.8" role="img" focusable="false" style="vertical-align: -1.405ex;"><defs><path stroke-width="0" id="E31-MJMAIN-2202" d="M202 508Q179 508 169 520T158 547Q158 557 164 577T185 624T230 675T301 710L333 715H345Q378 715 384 714Q447 703 489 661T549 568T566 457Q566 362 519 240T402 53Q321 -22 223 -22Q123 -22 73 56Q42 102 42 148V159Q42 276 129 370T322 465Q383 465 414 434T455 367L458 378Q478 461 478 515Q478 603 437 639T344 676Q266 676 223 612Q264 606 264 572Q264 547 246 528T202 508ZM430 306Q430 372 401 400T333 428Q270 428 222 382Q197 354 183 323T150 221Q132 149 132 116Q132 21 232 21Q244 21 250 22Q327 35 374 112Q389 137 409 196T430 306Z"></path><path stroke-width="0" id="E31-MJMAINB-68" d="M40 686L131 690Q222 694 223 694H229V533L230 372L238 381Q248 394 264 407T317 435T398 450Q428 450 448 447T491 434T529 402T551 346Q553 335 554 198V62H623V0H614Q596 3 489 3Q374 3 365 0H356V62H425V194V275Q425 348 416 373T371 399Q326 399 288 370T238 290Q236 281 235 171V62H304V0H295Q277 3 171 3Q64 3 46 0H37V62H106V332Q106 387 106 453T107 534Q107 593 105 605T91 620Q77 624 50 624H37V686H40Z"></path><path stroke-width="0" id="E31-MJMATHI-74" d="M26 385Q19 392 19 395Q19 399 22 411T27 425Q29 430 36 430T87 431H140L159 511Q162 522 166 540T173 566T179 586T187 603T197 615T211 624T229 626Q247 625 254 615T261 596Q261 589 252 549T232 470L222 433Q222 431 272 431H323Q330 424 330 420Q330 398 317 385H210L174 240Q135 80 135 68Q135 26 162 26Q197 26 230 60T283 144Q285 150 288 151T303 153H307Q322 153 322 145Q322 142 319 133Q314 117 301 95T267 48T216 6T155 -11Q125 -11 98 4T59 56Q57 64 57 83V101L92 241Q127 382 128 383Q128 385 77 385H26Z"></path><path stroke-width="0" id="E31-MJMAINB-66" d="M308 0Q290 3 172 3Q58 3 49 0H40V62H109V382H42V444H109V503L110 562L112 572Q127 625 178 658T316 699Q318 699 330 699T348 700Q381 698 404 687T436 658T449 629T452 606Q452 576 432 557T383 537Q355 537 335 555T314 605Q314 635 328 649H325Q311 649 293 644T253 618T227 560Q226 555 226 498V444H340V382H232V62H318V0H308Z"></path></defs><g stroke="currentColor" fill="currentColor" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><g transform="translate(120,0)"><rect stroke="none" width="1223" height="60" x="0" y="220"></rect><g transform="translate(60,515)"><use transform="scale(0.707)" xlink:href="#E31-MJMAIN-2202" x="0" y="0"></use><g transform="translate(400,0)"><use transform="scale(0.707)" xlink:href="#E31-MJMAINB-68" x="0" y="0"></use><use transform="scale(0.5)" xlink:href="#E31-MJMATHI-74" x="903" y="-213"></use></g></g><g transform="translate(161,-410)"><use transform="scale(0.707)" xlink:href="#E31-MJMAIN-2202" x="0" y="0"></use><g transform="translate(400,0)"><use transform="scale(0.707)" xlink:href="#E31-MJMAINB-66" x="0" y="0"></use><use transform="scale(0.5)" xlink:href="#E31-MJMATHI-74" x="496" y="-213"></use></g></g></g></g></svg></span><script type="math/tex">\frac{\partial \mathbf{h}_t}{\partial \mathbf{f}_t}</script>, include your formalization and derivation in your report. 10%</li><li>Describe how can you differentiate through time for the training of an LSTM language model for sentence <span class="MathJax_SVG" tabindex="-1" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" width="12.8ex" height="1.76ex" viewBox="0 -504.6 5511 757.9" role="img" focusable="false" style="vertical-align: -0.588ex;"><defs><path stroke-width="0" id="E32-MJMATHI-73" d="M131 289Q131 321 147 354T203 415T300 442Q362 442 390 415T419 355Q419 323 402 308T364 292Q351 292 340 300T328 326Q328 342 337 354T354 372T367 378Q368 378 368 379Q368 382 361 388T336 399T297 405Q249 405 227 379T204 326Q204 301 223 291T278 274T330 259Q396 230 396 163Q396 135 385 107T352 51T289 7T195 -10Q118 -10 86 19T53 87Q53 126 74 143T118 160Q133 160 146 151T160 120Q160 94 142 76T111 58Q109 57 108 57T107 55Q108 52 115 47T146 34T201 27Q237 27 263 38T301 66T318 97T323 122Q323 150 302 164T254 181T195 196T148 231Q131 256 131 289Z"></path><path stroke-width="0" id="E32-MJMAIN-31" d="M213 578L200 573Q186 568 160 563T102 556H83V602H102Q149 604 189 617T245 641T273 663Q275 666 285 666Q294 666 302 660V361L303 61Q310 54 315 52T339 48T401 46H427V0H416Q395 3 257 3Q121 3 100 0H88V46H114Q136 46 152 46T177 47T193 50T201 52T207 57T213 61V578Z"></path><path stroke-width="0" id="E32-MJMAIN-2C" d="M78 35T78 60T94 103T137 121Q165 121 187 96T210 8Q210 -27 201 -60T180 -117T154 -158T130 -185T117 -194Q113 -194 104 -185T95 -172Q95 -168 106 -156T131 -126T157 -76T173 -3V9L172 8Q170 7 167 6T161 3T152 1T140 0Q113 0 96 17Z"></path><path stroke-width="0" id="E32-MJMAIN-32" d="M109 429Q82 429 66 447T50 491Q50 562 103 614T235 666Q326 666 387 610T449 465Q449 422 429 383T381 315T301 241Q265 210 201 149L142 93L218 92Q375 92 385 97Q392 99 409 186V189H449V186Q448 183 436 95T421 3V0H50V19V31Q50 38 56 46T86 81Q115 113 136 137Q145 147 170 174T204 211T233 244T261 278T284 308T305 340T320 369T333 401T340 431T343 464Q343 527 309 573T212 619Q179 619 154 602T119 569T109 550Q109 549 114 549Q132 549 151 535T170 489Q170 464 154 447T109 429Z"></path><path stroke-width="0" id="E32-MJMAIN-22EF" d="M78 250Q78 274 95 292T138 310Q162 310 180 294T199 251Q199 226 182 208T139 190T96 207T78 250ZM525 250Q525 274 542 292T585 310Q609 310 627 294T646 251Q646 226 629 208T586 190T543 207T525 250ZM972 250Q972 274 989 292T1032 310Q1056 310 1074 294T1093 251Q1093 226 1076 208T1033 190T990 207T972 250Z"></path><path stroke-width="0" id="E32-MJMATHI-6E" d="M21 287Q22 293 24 303T36 341T56 388T89 425T135 442Q171 442 195 424T225 390T231 369Q231 367 232 367L243 378Q304 442 382 442Q436 442 469 415T503 336T465 179T427 52Q427 26 444 26Q450 26 453 27Q482 32 505 65T540 145Q542 153 560 153Q580 153 580 145Q580 144 576 130Q568 101 554 73T508 17T439 -10Q392 -10 371 17T350 73Q350 92 386 193T423 345Q423 404 379 404H374Q288 404 229 303L222 291L189 157Q156 26 151 16Q138 -11 108 -11Q95 -11 87 -5T76 7T74 17Q74 30 112 180T152 343Q153 348 153 366Q153 405 129 405Q91 405 66 305Q60 285 60 284Q58 278 41 278H27Q21 284 21 287Z"></path></defs><g stroke="currentColor" fill="currentColor" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xlink:href="#E32-MJMATHI-73" x="0" y="0"></use><use transform="scale(0.707)" xlink:href="#E32-MJMAIN-31" x="663" y="-213"></use><use xlink:href="#E32-MJMAIN-2C" x="922" y="0"></use><g transform="translate(1367,0)"><use xlink:href="#E32-MJMATHI-73" x="0" y="0"></use><use transform="scale(0.707)" xlink:href="#E32-MJMAIN-32" x="663" y="-213"></use></g><use xlink:href="#E32-MJMAIN-2C" x="2289" y="0"></use><use xlink:href="#E32-MJMAIN-22EF" x="2734" y="0"></use><use xlink:href="#E32-MJMAIN-2C" x="4073" y="0"></use><g transform="translate(4517,0)"><use xlink:href="#E32-MJMATHI-73" x="0" y="0"></use><use transform="scale(0.707)" xlink:href="#E32-MJMATHI-6E" x="663" y="-213"></use></g></g></svg></span><script type="math/tex">s_1,s_2,\cdots, s_n</script>. 10%</li></ol><h5><a name='header-n221' class='md-header-anchor '></a>Part 2, Autograd Training of LSTM, 80%</h5><p>In this part you are going to implement an LSTM to build a language model to generate Tang poetry.</p><p>You are given a small dataset containing some Tang poems, you first split the dataset to a training dataset and development dataset, we would recommend an 80% and 20% split. Then you create a vocabulary containing all the words (or characters, but we stick to use words to refer to them) in the training dataset, be aware that you might want to insert a new word <code>EOS</code> and a special token  <code>OOV</code> for unknown word (or known as out-of-vocabulary word). To process the dataset, you should transform the poems into a sequence of integer representing words in the vocabulary. Then you could randomly crop the sequence into batches of short sequences for the training of the LSTM. Note that at each step a single input of the LSTM should be a vector, we should create a mapping from integers to vectors, this step is also known as <strong>embedding</strong> in NLP. You are encouraged to use <a href='https://github.com/fastnlp/fastNLP/blob/master/fastNLP/core/vocabulary.py'>vocabulary</a>, <a href='https://github.com/fastnlp/fastNLP/blob/master/fastNLP/core/dataset.py'>dataset</a> from FastNLP to implemented yours vocabulary.</p><p>Follow the previous discussion we could come to a loss function that could provide gradient to the parameters and also the embedding (as you could either fix the embedding to its initialization or update it with the gradient).</p><p>As the model is pretty clear here, you should include the hyperparameter and training setting your are using in your report. They are</p><ul><li>Vocabulary size, <span class="MathJax_SVG" tabindex="-1" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" width="3.077ex" height="2.577ex" viewBox="0 -806.1 1325 1109.7" role="img" focusable="false" style="vertical-align: -0.705ex;"><defs><path stroke-width="0" id="E33-MJMAIN-7C" d="M139 -249H137Q125 -249 119 -235V251L120 737Q130 750 139 750Q152 750 159 735V-235Q151 -249 141 -249H139Z"></path><path stroke-width="0" id="E33-MJMATHI-56" d="M52 648Q52 670 65 683H76Q118 680 181 680Q299 680 320 683H330Q336 677 336 674T334 656Q329 641 325 637H304Q282 635 274 635Q245 630 242 620Q242 618 271 369T301 118L374 235Q447 352 520 471T595 594Q599 601 599 609Q599 633 555 637Q537 637 537 648Q537 649 539 661Q542 675 545 679T558 683Q560 683 570 683T604 682T668 681Q737 681 755 683H762Q769 676 769 672Q769 655 760 640Q757 637 743 637Q730 636 719 635T698 630T682 623T670 615T660 608T652 599T645 592L452 282Q272 -9 266 -16Q263 -18 259 -21L241 -22H234Q216 -22 216 -15Q213 -9 177 305Q139 623 138 626Q133 637 76 637H59Q52 642 52 648Z"></path></defs><g stroke="currentColor" fill="currentColor" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xlink:href="#E33-MJMAIN-7C" x="0" y="0"></use><use xlink:href="#E33-MJMATHI-56" x="278" y="0"></use><use xlink:href="#E33-MJMAIN-7C" x="1047" y="0"></use></g></svg></span><script type="math/tex">|V|</script></li><li>Batch size, <span class="MathJax_SVG" tabindex="-1" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" width="2.086ex" height="1.994ex" viewBox="0 -755.9 898 858.4" role="img" focusable="false" style="vertical-align: -0.238ex;"><defs><path stroke-width="0" id="E34-MJMATHI-62" d="M73 647Q73 657 77 670T89 683Q90 683 161 688T234 694Q246 694 246 685T212 542Q204 508 195 472T180 418L176 399Q176 396 182 402Q231 442 283 442Q345 442 383 396T422 280Q422 169 343 79T173 -11Q123 -11 82 27T40 150V159Q40 180 48 217T97 414Q147 611 147 623T109 637Q104 637 101 637H96Q86 637 83 637T76 640T73 647ZM336 325V331Q336 405 275 405Q258 405 240 397T207 376T181 352T163 330L157 322L136 236Q114 150 114 114Q114 66 138 42Q154 26 178 26Q211 26 245 58Q270 81 285 114T318 219Q336 291 336 325Z"></path><path stroke-width="0" id="E34-MJMATHI-73" d="M131 289Q131 321 147 354T203 415T300 442Q362 442 390 415T419 355Q419 323 402 308T364 292Q351 292 340 300T328 326Q328 342 337 354T354 372T367 378Q368 378 368 379Q368 382 361 388T336 399T297 405Q249 405 227 379T204 326Q204 301 223 291T278 274T330 259Q396 230 396 163Q396 135 385 107T352 51T289 7T195 -10Q118 -10 86 19T53 87Q53 126 74 143T118 160Q133 160 146 151T160 120Q160 94 142 76T111 58Q109 57 108 57T107 55Q108 52 115 47T146 34T201 27Q237 27 263 38T301 66T318 97T323 122Q323 150 302 164T254 181T195 196T148 231Q131 256 131 289Z"></path></defs><g stroke="currentColor" fill="currentColor" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xlink:href="#E34-MJMATHI-62" x="0" y="0"></use><use xlink:href="#E34-MJMATHI-73" x="429" y="0"></use></g></svg></span><script type="math/tex">bs</script></li><li>Sentence length, <span class="MathJax_SVG" tabindex="-1" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" width="1.781ex" height="1.994ex" viewBox="0 -755.9 767 858.4" role="img" focusable="false" style="vertical-align: -0.238ex;"><defs><path stroke-width="0" id="E38-MJMATHI-73" d="M131 289Q131 321 147 354T203 415T300 442Q362 442 390 415T419 355Q419 323 402 308T364 292Q351 292 340 300T328 326Q328 342 337 354T354 372T367 378Q368 378 368 379Q368 382 361 388T336 399T297 405Q249 405 227 379T204 326Q204 301 223 291T278 274T330 259Q396 230 396 163Q396 135 385 107T352 51T289 7T195 -10Q118 -10 86 19T53 87Q53 126 74 143T118 160Q133 160 146 151T160 120Q160 94 142 76T111 58Q109 57 108 57T107 55Q108 52 115 47T146 34T201 27Q237 27 263 38T301 66T318 97T323 122Q323 150 302 164T254 181T195 196T148 231Q131 256 131 289Z"></path><path stroke-width="0" id="E38-MJMATHI-6C" d="M117 59Q117 26 142 26Q179 26 205 131Q211 151 215 152Q217 153 225 153H229Q238 153 241 153T246 151T248 144Q247 138 245 128T234 90T214 43T183 6T137 -11Q101 -11 70 11T38 85Q38 97 39 102L104 360Q167 615 167 623Q167 626 166 628T162 632T157 634T149 635T141 636T132 637T122 637Q112 637 109 637T101 638T95 641T94 647Q94 649 96 661Q101 680 107 682T179 688Q194 689 213 690T243 693T254 694Q266 694 266 686Q266 675 193 386T118 83Q118 81 118 75T117 65V59Z"></path></defs><g stroke="currentColor" fill="currentColor" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xlink:href="#E38-MJMATHI-73" x="0" y="0"></use><use xlink:href="#E38-MJMATHI-6C" x="469" y="0"></use></g></svg></span><script type="math/tex">sl</script></li><li>Hidden size, i.e. the length of the hidden vector of the LSTM, <span class="MathJax_SVG" tabindex="-1" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" width="2.427ex" height="1.994ex" viewBox="0 -755.9 1045 858.4" role="img" focusable="false" style="vertical-align: -0.238ex;"><defs><path stroke-width="0" id="E36-MJMATHI-68" d="M137 683Q138 683 209 688T282 694Q294 694 294 685Q294 674 258 534Q220 386 220 383Q220 381 227 388Q288 442 357 442Q411 442 444 415T478 336Q478 285 440 178T402 50Q403 36 407 31T422 26Q450 26 474 56T513 138Q516 149 519 151T535 153Q555 153 555 145Q555 144 551 130Q535 71 500 33Q466 -10 419 -10H414Q367 -10 346 17T325 74Q325 90 361 192T398 345Q398 404 354 404H349Q266 404 205 306L198 293L164 158Q132 28 127 16Q114 -11 83 -11Q69 -11 59 -2T48 16Q48 30 121 320L195 616Q195 629 188 632T149 637H128Q122 643 122 645T124 664Q129 683 137 683Z"></path><path stroke-width="0" id="E36-MJMATHI-73" d="M131 289Q131 321 147 354T203 415T300 442Q362 442 390 415T419 355Q419 323 402 308T364 292Q351 292 340 300T328 326Q328 342 337 354T354 372T367 378Q368 378 368 379Q368 382 361 388T336 399T297 405Q249 405 227 379T204 326Q204 301 223 291T278 274T330 259Q396 230 396 163Q396 135 385 107T352 51T289 7T195 -10Q118 -10 86 19T53 87Q53 126 74 143T118 160Q133 160 146 151T160 120Q160 94 142 76T111 58Q109 57 108 57T107 55Q108 52 115 47T146 34T201 27Q237 27 263 38T301 66T318 97T323 122Q323 150 302 164T254 181T195 196T148 231Q131 256 131 289Z"></path></defs><g stroke="currentColor" fill="currentColor" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xlink:href="#E36-MJMATHI-68" x="0" y="0"></use><use xlink:href="#E36-MJMATHI-73" x="576" y="0"></use></g></svg></span><script type="math/tex">hs</script></li><li>Input size, i.e. the length of the input vector for the LSTM, <span class="MathJax_SVG" tabindex="-1" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" width="1.891ex" height="1.994ex" viewBox="0 -755.9 814 858.4" role="img" focusable="false" style="vertical-align: -0.238ex;"><defs><path stroke-width="0" id="E37-MJMATHI-69" d="M184 600Q184 624 203 642T247 661Q265 661 277 649T290 619Q290 596 270 577T226 557Q211 557 198 567T184 600ZM21 287Q21 295 30 318T54 369T98 420T158 442Q197 442 223 419T250 357Q250 340 236 301T196 196T154 83Q149 61 149 51Q149 26 166 26Q175 26 185 29T208 43T235 78T260 137Q263 149 265 151T282 153Q302 153 302 143Q302 135 293 112T268 61T223 11T161 -11Q129 -11 102 10T74 74Q74 91 79 106T122 220Q160 321 166 341T173 380Q173 404 156 404H154Q124 404 99 371T61 287Q60 286 59 284T58 281T56 279T53 278T49 278T41 278H27Q21 284 21 287Z"></path><path stroke-width="0" id="E37-MJMATHI-73" d="M131 289Q131 321 147 354T203 415T300 442Q362 442 390 415T419 355Q419 323 402 308T364 292Q351 292 340 300T328 326Q328 342 337 354T354 372T367 378Q368 378 368 379Q368 382 361 388T336 399T297 405Q249 405 227 379T204 326Q204 301 223 291T278 274T330 259Q396 230 396 163Q396 135 385 107T352 51T289 7T195 -10Q118 -10 86 19T53 87Q53 126 74 143T118 160Q133 160 146 151T160 120Q160 94 142 76T111 58Q109 57 108 57T107 55Q108 52 115 47T146 34T201 27Q237 27 263 38T301 66T318 97T323 122Q323 150 302 164T254 181T195 196T148 231Q131 256 131 289Z"></path></defs><g stroke="currentColor" fill="currentColor" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xlink:href="#E37-MJMATHI-69" x="0" y="0"></use><use xlink:href="#E37-MJMATHI-73" x="345" y="0"></use></g></svg></span><script type="math/tex">is</script></li></ul><p>The training of the model stops when it could not get better in predicting the next word on the development dataset, which could be evaluated by <em>perplexity</em> </p><div contenteditable="false" spellcheck="false" class="mathjax-block md-end-block md-math-block md-rawblock" id="mathjax-n237" cid="n237" mdtype="math_block"><div class="md-rawblock-container md-math-container" contenteditable="false" tabindex="-1"><div class="MathJax_SVG_Display" style="text-align: center;"><span class="MathJax_SVG" id="MathJax-Element-94-Frame" tabindex="-1" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" width="272.203px" height="87.066px" viewBox="0 -2585.3 14650.1 4686.3" role="img" focusable="false" style="vertical-align: -4.88ex; max-width: 100%;"><defs><path stroke-width="0" id="E99-MJMATHI-50" d="M287 628Q287 635 230 637Q206 637 199 638T192 648Q192 649 194 659Q200 679 203 681T397 683Q587 682 600 680Q664 669 707 631T751 530Q751 453 685 389Q616 321 507 303Q500 302 402 301H307L277 182Q247 66 247 59Q247 55 248 54T255 50T272 48T305 46H336Q342 37 342 35Q342 19 335 5Q330 0 319 0Q316 0 282 1T182 2Q120 2 87 2T51 1Q33 1 33 11Q33 13 36 25Q40 41 44 43T67 46Q94 46 127 49Q141 52 146 61Q149 65 218 339T287 628ZM645 554Q645 567 643 575T634 597T609 619T560 635Q553 636 480 637Q463 637 445 637T416 636T404 636Q391 635 386 627Q384 621 367 550T332 412T314 344Q314 342 395 342H407H430Q542 342 590 392Q617 419 631 471T645 554Z"></path><path stroke-width="0" id="E99-MJMAIN-28" d="M94 250Q94 319 104 381T127 488T164 576T202 643T244 695T277 729T302 750H315H319Q333 750 333 741Q333 738 316 720T275 667T226 581T184 443T167 250T184 58T225 -81T274 -167T316 -220T333 -241Q333 -250 318 -250H315H302L274 -226Q180 -141 137 -14T94 250Z"></path><path stroke-width="0" id="E99-MJMATHI-53" d="M308 24Q367 24 416 76T466 197Q466 260 414 284Q308 311 278 321T236 341Q176 383 176 462Q176 523 208 573T273 648Q302 673 343 688T407 704H418H425Q521 704 564 640Q565 640 577 653T603 682T623 704Q624 704 627 704T632 705Q645 705 645 698T617 577T585 459T569 456Q549 456 549 465Q549 471 550 475Q550 478 551 494T553 520Q553 554 544 579T526 616T501 641Q465 662 419 662Q362 662 313 616T263 510Q263 480 278 458T319 427Q323 425 389 408T456 390Q490 379 522 342T554 242Q554 216 546 186Q541 164 528 137T492 78T426 18T332 -20Q320 -22 298 -22Q199 -22 144 33L134 44L106 13Q83 -14 78 -18T65 -22Q52 -22 52 -14Q52 -11 110 221Q112 227 130 227H143Q149 221 149 216Q149 214 148 207T144 186T142 153Q144 114 160 87T203 47T255 29T308 24Z"></path><path stroke-width="0" id="E99-MJMAIN-29" d="M60 749L64 750Q69 750 74 750H86L114 726Q208 641 251 514T294 250Q294 182 284 119T261 12T224 -76T186 -143T145 -194T113 -227T90 -246Q87 -249 86 -250H74Q66 -250 63 -250T58 -247T55 -238Q56 -237 66 -225Q221 -64 221 250T66 725Q56 737 55 738Q55 746 60 749Z"></path><path stroke-width="0" id="E99-MJMAIN-3D" d="M56 347Q56 360 70 367H707Q722 359 722 347Q722 336 708 328L390 327H72Q56 332 56 347ZM56 153Q56 168 72 173H708Q722 163 722 153Q722 140 707 133H70Q56 140 56 153Z"></path><path stroke-width="0" id="E99-MJMATHI-73" d="M131 289Q131 321 147 354T203 415T300 442Q362 442 390 415T419 355Q419 323 402 308T364 292Q351 292 340 300T328 326Q328 342 337 354T354 372T367 378Q368 378 368 379Q368 382 361 388T336 399T297 405Q249 405 227 379T204 326Q204 301 223 291T278 274T330 259Q396 230 396 163Q396 135 385 107T352 51T289 7T195 -10Q118 -10 86 19T53 87Q53 126 74 143T118 160Q133 160 146 151T160 120Q160 94 142 76T111 58Q109 57 108 57T107 55Q108 52 115 47T146 34T201 27Q237 27 263 38T301 66T318 97T323 122Q323 150 302 164T254 181T195 196T148 231Q131 256 131 289Z"></path><path stroke-width="0" id="E99-MJMAIN-31" d="M213 578L200 573Q186 568 160 563T102 556H83V602H102Q149 604 189 617T245 641T273 663Q275 666 285 666Q294 666 302 660V361L303 61Q310 54 315 52T339 48T401 46H427V0H416Q395 3 257 3Q121 3 100 0H88V46H114Q136 46 152 46T177 47T193 50T201 52T207 57T213 61V578Z"></path><path stroke-width="0" id="E99-MJMAIN-32" d="M109 429Q82 429 66 447T50 491Q50 562 103 614T235 666Q326 666 387 610T449 465Q449 422 429 383T381 315T301 241Q265 210 201 149L142 93L218 92Q375 92 385 97Q392 99 409 186V189H449V186Q448 183 436 95T421 3V0H50V19V31Q50 38 56 46T86 81Q115 113 136 137Q145 147 170 174T204 211T233 244T261 278T284 308T305 340T320 369T333 401T340 431T343 464Q343 527 309 573T212 619Q179 619 154 602T119 569T109 550Q109 549 114 549Q132 549 151 535T170 489Q170 464 154 447T109 429Z"></path><path stroke-width="0" id="E99-MJMAIN-22EF" d="M78 250Q78 274 95 292T138 310Q162 310 180 294T199 251Q199 226 182 208T139 190T96 207T78 250ZM525 250Q525 274 542 292T585 310Q609 310 627 294T646 251Q646 226 629 208T586 190T543 207T525 250ZM972 250Q972 274 989 292T1032 310Q1056 310 1074 294T1093 251Q1093 226 1076 208T1033 190T990 207T972 250Z"></path><path stroke-width="0" id="E99-MJMATHI-4E" d="M234 637Q231 637 226 637Q201 637 196 638T191 649Q191 676 202 682Q204 683 299 683Q376 683 387 683T401 677Q612 181 616 168L670 381Q723 592 723 606Q723 633 659 637Q635 637 635 648Q635 650 637 660Q641 676 643 679T653 683Q656 683 684 682T767 680Q817 680 843 681T873 682Q888 682 888 672Q888 650 880 642Q878 637 858 637Q787 633 769 597L620 7Q618 0 599 0Q585 0 582 2Q579 5 453 305L326 604L261 344Q196 88 196 79Q201 46 268 46H278Q284 41 284 38T282 19Q278 6 272 0H259Q228 2 151 2Q123 2 100 2T63 2T46 1Q31 1 31 10Q31 14 34 26T39 40Q41 46 62 46Q130 49 150 85Q154 91 221 362L289 634Q287 635 234 637Z"></path><path stroke-width="0" id="E99-MJMAIN-7C" d="M139 -249H137Q125 -249 119 -235V251L120 737Q130 750 139 750Q152 750 159 735V-235Q151 -249 141 -249H139Z"></path><path stroke-width="0" id="E99-MJMATHI-4C" d="M228 637Q194 637 192 641Q191 643 191 649Q191 673 202 682Q204 683 217 683Q271 680 344 680Q485 680 506 683H518Q524 677 524 674T522 656Q517 641 513 637H475Q406 636 394 628Q387 624 380 600T313 336Q297 271 279 198T252 88L243 52Q243 48 252 48T311 46H328Q360 46 379 47T428 54T478 72T522 106T564 161Q580 191 594 228T611 270Q616 273 628 273H641Q647 264 647 262T627 203T583 83T557 9Q555 4 553 3T537 0T494 -1Q483 -1 418 -1T294 0H116Q32 0 32 10Q32 17 34 24Q39 43 44 45Q48 46 59 46H65Q92 46 125 49Q139 52 144 61Q147 65 216 339T285 628Q285 635 228 637Z"></path><path stroke-width="0" id="E99-MJMATHI-4D" d="M289 629Q289 635 232 637Q208 637 201 638T194 648Q194 649 196 659Q197 662 198 666T199 671T201 676T203 679T207 681T212 683T220 683T232 684Q238 684 262 684T307 683Q386 683 398 683T414 678Q415 674 451 396L487 117L510 154Q534 190 574 254T662 394Q837 673 839 675Q840 676 842 678T846 681L852 683H948Q965 683 988 683T1017 684Q1051 684 1051 673Q1051 668 1048 656T1045 643Q1041 637 1008 637Q968 636 957 634T939 623Q936 618 867 340T797 59Q797 55 798 54T805 50T822 48T855 46H886Q892 37 892 35Q892 19 885 5Q880 0 869 0Q864 0 828 1T736 2Q675 2 644 2T609 1Q592 1 592 11Q592 13 594 25Q598 41 602 43T625 46Q652 46 685 49Q699 52 704 61Q706 65 742 207T813 490T848 631L654 322Q458 10 453 5Q451 4 449 3Q444 0 433 0Q418 0 415 7Q413 11 374 317L335 624L267 354Q200 88 200 79Q206 46 272 46H282Q288 41 289 37T286 19Q282 3 278 1Q274 0 267 0Q265 0 255 0T221 1T157 2Q127 2 95 1T58 0Q43 0 39 2T35 11Q35 13 38 25T43 40Q45 46 65 46Q135 46 154 86Q158 92 223 354T289 629Z"></path><path stroke-width="0" id="E99-MJMAIN-2212" d="M84 237T84 250T98 270H679Q694 262 694 250T679 230H98Q84 237 84 250Z"></path><path stroke-width="0" id="E99-MJMAIN-2F" d="M423 750Q432 750 438 744T444 730Q444 725 271 248T92 -240Q85 -250 75 -250Q68 -250 62 -245T56 -231Q56 -221 230 257T407 740Q411 750 423 750Z"></path><path stroke-width="0" id="E99-MJSZ1-220F" d="M158 656Q147 684 131 694Q110 707 69 710H55V750H888V710H874Q840 708 820 698T795 678T786 656V-155Q798 -206 874 -210H888V-250H570V-210H584Q618 -208 638 -197T663 -178T673 -155V710H270V277L271 -155Q283 -206 359 -210H373V-250H55V-210H69Q103 -208 123 -197T148 -178T158 -155V656Z"></path><path stroke-width="0" id="E99-MJMATHI-69" d="M184 600Q184 624 203 642T247 661Q265 661 277 649T290 619Q290 596 270 577T226 557Q211 557 198 567T184 600ZM21 287Q21 295 30 318T54 369T98 420T158 442Q197 442 223 419T250 357Q250 340 236 301T196 196T154 83Q149 61 149 51Q149 26 166 26Q175 26 185 29T208 43T235 78T260 137Q263 149 265 151T282 153Q302 153 302 143Q302 135 293 112T268 61T223 11T161 -11Q129 -11 102 10T74 74Q74 91 79 106T122 220Q160 321 166 341T173 380Q173 404 156 404H154Q124 404 99 371T61 287Q60 286 59 284T58 281T56 279T53 278T49 278T41 278H27Q21 284 21 287Z"></path><path stroke-width="0" id="E99-MJMATHI-74" d="M26 385Q19 392 19 395Q19 399 22 411T27 425Q29 430 36 430T87 431H140L159 511Q162 522 166 540T173 566T179 586T187 603T197 615T211 624T229 626Q247 625 254 615T261 596Q261 589 252 549T232 470L222 433Q222 431 272 431H323Q330 424 330 420Q330 398 317 385H210L174 240Q135 80 135 68Q135 26 162 26Q197 26 230 60T283 144Q285 150 288 151T303 153H307Q322 153 322 145Q322 142 319 133Q314 117 301 95T267 48T216 6T155 -11Q125 -11 98 4T59 56Q57 64 57 83V101L92 241Q127 382 128 383Q128 385 77 385H26Z"></path><path stroke-width="0" id="E99-MJMAINB-79" d="M84 -102Q84 -110 87 -119T102 -138T133 -149Q148 -148 162 -143T186 -131T206 -114T222 -95T234 -76T243 -59T249 -45T252 -37L269 0L96 382H26V444H34Q49 441 146 441Q252 441 270 444H279V382H255Q232 382 232 380L337 151L442 382H394V444H401Q413 441 495 441Q568 441 574 444H580V382H510L406 152Q298 -84 297 -87Q269 -139 225 -169T131 -200Q85 -200 54 -172T23 -100Q23 -64 44 -50T87 -35Q111 -35 130 -50T152 -92V-100H84V-102Z"></path><path stroke-width="0" id="E99-MJMAIN-2B" d="M56 237T56 250T70 270H369V420L370 570Q380 583 389 583Q402 583 409 568V270H707Q722 262 722 250T707 230H409V-68Q401 -82 391 -82H389H387Q375 -82 369 -68V230H70Q56 237 56 250Z"></path><path stroke-width="0" id="E99-MJSZ4-221A" d="M983 1739Q988 1750 1001 1750Q1008 1750 1013 1745T1020 1733Q1020 1726 742 244T460 -1241Q458 -1250 439 -1250H436Q424 -1250 424 -1248L410 -1166Q395 -1083 367 -920T312 -601L201 44L137 -83L111 -57L187 96L264 247Q265 246 369 -357Q470 -958 473 -963L727 384Q979 1729 983 1739Z"></path></defs><g stroke="currentColor" fill="currentColor" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><g transform="translate(167,0)"><g transform="translate(-14,0)"><g transform="translate(0,1583)"><use xlink:href="#E99-MJMATHI-50" x="0" y="0"></use><use xlink:href="#E99-MJMATHI-50" x="751" y="0"></use><use xlink:href="#E99-MJMAIN-28" x="1502" y="0"></use><use xlink:href="#E99-MJMATHI-53" x="1891" y="0"></use><use xlink:href="#E99-MJMAIN-29" x="2536" y="0"></use></g></g><g transform="translate(2911,0)"><g transform="translate(0,1583)"><use xlink:href="#E99-MJMAIN-3D" x="277" y="0"></use><use xlink:href="#E99-MJMATHI-50" x="1333" y="0"></use><use xlink:href="#E99-MJMAIN-28" x="2084" y="0"></use><g transform="translate(2473,0)"><use xlink:href="#E99-MJMATHI-73" x="0" y="0"></use><use transform="scale(0.707)" xlink:href="#E99-MJMAIN-31" x="663" y="-213"></use></g><g transform="translate(3396,0)"><use xlink:href="#E99-MJMATHI-73" x="0" y="0"></use><use transform="scale(0.707)" xlink:href="#E99-MJMAIN-32" x="663" y="-213"></use></g><use xlink:href="#E99-MJMAIN-22EF" x="4485" y="0"></use><g transform="translate(5823,0)"><use xlink:href="#E99-MJMATHI-73" x="0" y="0"></use><use transform="scale(0.707)" xlink:href="#E99-MJMATHI-4E" x="663" y="-213"></use></g><use xlink:href="#E99-MJMAIN-7C" x="7020" y="0"></use><use xlink:href="#E99-MJMATHI-4C" x="7298" y="0"></use><use xlink:href="#E99-MJMATHI-4D" x="7979" y="0"></use><g transform="translate(9030,0)"><use xlink:href="#E99-MJMAIN-29" x="0" y="0"></use><g transform="translate(389,412)"><use transform="scale(0.707)" xlink:href="#E99-MJMAIN-2212" x="0" y="0"></use><use transform="scale(0.707)" xlink:href="#E99-MJMAIN-31" x="778" y="0"></use><use transform="scale(0.707)" xlink:href="#E99-MJMAIN-2F" x="1278" y="0"></use><use transform="scale(0.707)" xlink:href="#E99-MJMATHI-4E" x="1777" y="0"></use></g></g></g><g transform="translate(0,-515)"><use xlink:href="#E99-MJMAIN-3D" x="277" y="0"></use><g transform="translate(1333,0)"><use transform="scale(0.5)" xlink:href="#E99-MJMATHI-4E" x="411" y="65"></use><use xlink:href="#E99-MJSZ4-221A" x="0" y="-263"></use><rect stroke="none" width="5098" height="60" x="1000" y="1427"></rect><g transform="translate(1000,0)"><g transform="translate(120,0)"><rect stroke="none" width="4858" height="60" x="0" y="220"></rect><use xlink:href="#E99-MJMAIN-31" x="2179" y="676"></use><g transform="translate(60,-1027)"><use xlink:href="#E99-MJSZ1-220F" x="0" y="0"></use><use transform="scale(0.707)" xlink:href="#E99-MJMATHI-4E" x="1335" y="674"></use><g transform="translate(944,-286)"><use transform="scale(0.707)" xlink:href="#E99-MJMATHI-69" x="0" y="0"></use><use transform="scale(0.707)" xlink:href="#E99-MJMAIN-3D" x="345" y="0"></use><use transform="scale(0.707)" xlink:href="#E99-MJMATHI-74" x="1123" y="0"></use></g><g transform="translate(2260,0)"><use xlink:href="#E99-MJMAINB-79" x="0" y="0"></use><g transform="translate(607,521)"><use transform="scale(0.707)" xlink:href="#E99-MJMAIN-28" x="0" y="0"></use><g transform="translate(275,0)"><use transform="scale(0.707)" xlink:href="#E99-MJMATHI-73" x="0" y="0"></use><g transform="translate(331,-107)"><use transform="scale(0.5)" xlink:href="#E99-MJMATHI-74" x="0" y="0"></use><use transform="scale(0.5)" xlink:href="#E99-MJMAIN-2B" x="361" y="0"></use><use transform="scale(0.5)" xlink:href="#E99-MJMAIN-31" x="1139" y="0"></use></g></g><use transform="scale(0.707)" xlink:href="#E99-MJMAIN-29" x="2116" y="0"></use></g><use transform="scale(0.707)" xlink:href="#E99-MJMATHI-74" x="858" y="-394"></use></g></g></g></g></g></g></g></g></g></svg></span></div><script type="math/tex; mode=display" id="MathJax-Element-94">\begin{align}
+\end{align}</script></span></div></div></p><p>where <span class="MathJax_Preview"></span><span class="MathJax_SVG" tabindex="-1" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" width="0.646ex" height="1.009ex" viewBox="0 -378.7 278 434.6" role="img" focusable="false" style="vertical-align: 0.441ex; margin-bottom: -0.571ex;"><defs><path stroke-width="0" id="E4-MJMAIN-22C5" d="M78 250Q78 274 95 292T138 310Q162 310 180 294T199 251Q199 226 182 208T139 190T96 207T78 250Z"></path></defs><g stroke="currentColor" fill="currentColor" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xlink:href="#E4-MJMAIN-22C5" x="0" y="0"></use></g></svg></span><script type="math/tex">\cdot</script> stands for matrix multiplication, <span class="MathJax_Preview"></span><span class="MathJax_SVG" tabindex="-1" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" width="1.161ex" height="1.384ex" viewBox="0 -540.2 500 596" role="img" focusable="false" style="vertical-align: 0.081ex; margin-bottom: -0.211ex;"><defs><path stroke-width="0" id="E5-MJMAIN-2217" d="M229 286Q216 420 216 436Q216 454 240 464Q241 464 245 464T251 465Q263 464 273 456T283 436Q283 419 277 356T270 286L328 328Q384 369 389 372T399 375Q412 375 423 365T435 338Q435 325 425 315Q420 312 357 282T289 250L355 219L425 184Q434 175 434 161Q434 146 425 136T401 125Q393 125 383 131T328 171L270 213Q283 79 283 63Q283 53 276 44T250 35Q231 35 224 44T216 63Q216 80 222 143T229 213L171 171Q115 130 110 127Q106 124 100 124Q87 124 76 134T64 161Q64 166 64 169T67 175T72 181T81 188T94 195T113 204T138 215T170 230T210 250L74 315Q65 324 65 338Q65 353 74 363T98 374Q106 374 116 368T171 328L229 286Z"></path></defs><g stroke="currentColor" fill="currentColor" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xlink:href="#E5-MJMAIN-2217" x="0" y="0"></use></g></svg></span><script type="math/tex">*</script> for element-wise production and <span class="MathJax_Preview"></span><span class="MathJax_SVG" tabindex="-1" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" width="3.615ex" height="2.634ex" viewBox="0 -809.3 1556.7 1134.2" role="img" focusable="false" style="vertical-align: -0.755ex;"><defs><path stroke-width="0" id="E6-MJMAIN-5B" d="M118 -250V750H255V710H158V-210H255V-250H118Z"></path><path stroke-width="0" id="E6-MJMAIN-22C5" d="M78 250Q78 274 95 292T138 310Q162 310 180 294T199 251Q199 226 182 208T139 190T96 207T78 250Z"></path><path stroke-width="0" id="E6-MJMAIN-2C" d="M78 35T78 60T94 103T137 121Q165 121 187 96T210 8Q210 -27 201 -60T180 -117T154 -158T130 -185T117 -194Q113 -194 104 -185T95 -172Q95 -168 106 -156T131 -126T157 -76T173 -3V9L172 8Q170 7 167 6T161 3T152 1T140 0Q113 0 96 17Z"></path><path stroke-width="0" id="E6-MJMAIN-5D" d="M22 710V750H159V-250H22V-210H119V710H22Z"></path></defs><g stroke="currentColor" fill="currentColor" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xlink:href="#E6-MJMAIN-5B" x="0" y="0"></use><use xlink:href="#E6-MJMAIN-22C5" x="278" y="0"></use><use xlink:href="#E6-MJMAIN-2C" x="556" y="0"></use><use xlink:href="#E6-MJMAIN-22C5" x="1000" y="0"></use><use xlink:href="#E6-MJMAIN-5D" x="1278" y="0"></use></g></svg></span><script type="math/tex">[\cdot, \cdot]</script> for vector concatenation. Note <span class="MathJax_Preview"></span><span class="MathJax_SVG" tabindex="-1" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" width="3.246ex" height="2.259ex" viewBox="0 -755.5 1397.6 972.7" role="img" focusable="false" style="vertical-align: -0.291ex; margin-bottom: -0.214ex;"><defs><path stroke-width="0" id="E7-MJMATHI-57" d="M436 683Q450 683 486 682T553 680Q604 680 638 681T677 682Q695 682 695 674Q695 670 692 659Q687 641 683 639T661 637Q636 636 621 632T600 624T597 615Q597 603 613 377T629 138L631 141Q633 144 637 151T649 170T666 200T690 241T720 295T759 362Q863 546 877 572T892 604Q892 619 873 628T831 637Q817 637 817 647Q817 650 819 660Q823 676 825 679T839 682Q842 682 856 682T895 682T949 681Q1015 681 1034 683Q1048 683 1048 672Q1048 666 1045 655T1038 640T1028 637Q1006 637 988 631T958 617T939 600T927 584L923 578L754 282Q586 -14 585 -15Q579 -22 561 -22Q546 -22 542 -17Q539 -14 523 229T506 480L494 462Q472 425 366 239Q222 -13 220 -15T215 -19Q210 -22 197 -22Q178 -22 176 -15Q176 -12 154 304T131 622Q129 631 121 633T82 637H58Q51 644 51 648Q52 671 64 683H76Q118 680 176 680Q301 680 313 683H323Q329 677 329 674T327 656Q322 641 318 637H297Q236 634 232 620Q262 160 266 136L501 550L499 587Q496 629 489 632Q483 636 447 637Q428 637 422 639T416 648Q416 650 418 660Q419 664 420 669T421 676T424 680T428 682T436 683Z"></path><path stroke-width="0" id="E7-MJMAIN-2217" d="M229 286Q216 420 216 436Q216 454 240 464Q241 464 245 464T251 465Q263 464 273 456T283 436Q283 419 277 356T270 286L328 328Q384 369 389 372T399 375Q412 375 423 365T435 338Q435 325 425 315Q420 312 357 282T289 250L355 219L425 184Q434 175 434 161Q434 146 425 136T401 125Q393 125 383 131T328 171L270 213Q283 79 283 63Q283 53 276 44T250 35Q231 35 224 44T216 63Q216 80 222 143T229 213L171 171Q115 130 110 127Q106 124 100 124Q87 124 76 134T64 161Q64 166 64 169T67 175T72 181T81 188T94 195T113 204T138 215T170 230T210 250L74 315Q65 324 65 338Q65 353 74 363T98 374Q106 374 116 368T171 328L229 286Z"></path></defs><g stroke="currentColor" fill="currentColor" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xlink:href="#E7-MJMATHI-57" x="0" y="0"></use><use transform="scale(0.707)" xlink:href="#E7-MJMAIN-2217" x="1335" y="-213"></use></g></svg></span><script type="math/tex">W_{*}</script> and <span class="MathJax_Preview"></span><span class="MathJax_SVG" tabindex="-1" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" width="2.05ex" height="2.259ex" viewBox="0 -755.5 882.6 972.7" role="img" focusable="false" style="vertical-align: -0.291ex; margin-bottom: -0.214ex;"><defs><path stroke-width="0" id="E8-MJMATHI-62" d="M73 647Q73 657 77 670T89 683Q90 683 161 688T234 694Q246 694 246 685T212 542Q204 508 195 472T180 418L176 399Q176 396 182 402Q231 442 283 442Q345 442 383 396T422 280Q422 169 343 79T173 -11Q123 -11 82 27T40 150V159Q40 180 48 217T97 414Q147 611 147 623T109 637Q104 637 101 637H96Q86 637 83 637T76 640T73 647ZM336 325V331Q336 405 275 405Q258 405 240 397T207 376T181 352T163 330L157 322L136 236Q114 150 114 114Q114 66 138 42Q154 26 178 26Q211 26 245 58Q270 81 285 114T318 219Q336 291 336 325Z"></path><path stroke-width="0" id="E8-MJMAIN-2217" d="M229 286Q216 420 216 436Q216 454 240 464Q241 464 245 464T251 465Q263 464 273 456T283 436Q283 419 277 356T270 286L328 328Q384 369 389 372T399 375Q412 375 423 365T435 338Q435 325 425 315Q420 312 357 282T289 250L355 219L425 184Q434 175 434 161Q434 146 425 136T401 125Q393 125 383 131T328 171L270 213Q283 79 283 63Q283 53 276 44T250 35Q231 35 224 44T216 63Q216 80 222 143T229 213L171 171Q115 130 110 127Q106 124 100 124Q87 124 76 134T64 161Q64 166 64 169T67 175T72 181T81 188T94 195T113 204T138 215T170 230T210 250L74 315Q65 324 65 338Q65 353 74 363T98 374Q106 374 116 368T171 328L229 286Z"></path></defs><g stroke="currentColor" fill="currentColor" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xlink:href="#E8-MJMATHI-62" x="0" y="0"></use><use transform="scale(0.707)" xlink:href="#E8-MJMAIN-2217" x="606" y="-213"></use></g></svg></span><script type="math/tex">b_{*}</script> are parameters of the LSTM that is the same throughout all steps.</p><p>Also note that here the input is a vector while in your implementation, please use batched input as matrix multiplication on matrix is the same as multiplying vectors concatenated horizontally.</p><p>For language modeling, we use LSTM to predict the next word or character at each step. For example, if we have a sentence <span class="MathJax_Preview"></span><span class="MathJax_SVG" tabindex="-1" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" width="12.8ex" height="1.884ex" viewBox="0 -540.2 5511 811.3" role="img" focusable="false" style="vertical-align: -0.63ex;"><defs><path stroke-width="0" id="E9-MJMATHI-73" d="M131 289Q131 321 147 354T203 415T300 442Q362 442 390 415T419 355Q419 323 402 308T364 292Q351 292 340 300T328 326Q328 342 337 354T354 372T367 378Q368 378 368 379Q368 382 361 388T336 399T297 405Q249 405 227 379T204 326Q204 301 223 291T278 274T330 259Q396 230 396 163Q396 135 385 107T352 51T289 7T195 -10Q118 -10 86 19T53 87Q53 126 74 143T118 160Q133 160 146 151T160 120Q160 94 142 76T111 58Q109 57 108 57T107 55Q108 52 115 47T146 34T201 27Q237 27 263 38T301 66T318 97T323 122Q323 150 302 164T254 181T195 196T148 231Q131 256 131 289Z"></path><path stroke-width="0" id="E9-MJMAIN-31" d="M213 578L200 573Q186 568 160 563T102 556H83V602H102Q149 604 189 617T245 641T273 663Q275 666 285 666Q294 666 302 660V361L303 61Q310 54 315 52T339 48T401 46H427V0H416Q395 3 257 3Q121 3 100 0H88V46H114Q136 46 152 46T177 47T193 50T201 52T207 57T213 61V578Z"></path><path stroke-width="0" id="E9-MJMAIN-2C" d="M78 35T78 60T94 103T137 121Q165 121 187 96T210 8Q210 -27 201 -60T180 -117T154 -158T130 -185T117 -194Q113 -194 104 -185T95 -172Q95 -168 106 -156T131 -126T157 -76T173 -3V9L172 8Q170 7 167 6T161 3T152 1T140 0Q113 0 96 17Z"></path><path stroke-width="0" id="E9-MJMAIN-32" d="M109 429Q82 429 66 447T50 491Q50 562 103 614T235 666Q326 666 387 610T449 465Q449 422 429 383T381 315T301 241Q265 210 201 149L142 93L218 92Q375 92 385 97Q392 99 409 186V189H449V186Q448 183 436 95T421 3V0H50V19V31Q50 38 56 46T86 81Q115 113 136 137Q145 147 170 174T204 211T233 244T261 278T284 308T305 340T320 369T333 401T340 431T343 464Q343 527 309 573T212 619Q179 619 154 602T119 569T109 550Q109 549 114 549Q132 549 151 535T170 489Q170 464 154 447T109 429Z"></path><path stroke-width="0" id="E9-MJMAIN-22EF" d="M78 250Q78 274 95 292T138 310Q162 310 180 294T199 251Q199 226 182 208T139 190T96 207T78 250ZM525 250Q525 274 542 292T585 310Q609 310 627 294T646 251Q646 226 629 208T586 190T543 207T525 250ZM972 250Q972 274 989 292T1032 310Q1056 310 1074 294T1093 251Q1093 226 1076 208T1033 190T990 207T972 250Z"></path><path stroke-width="0" id="E9-MJMATHI-6E" d="M21 287Q22 293 24 303T36 341T56 388T89 425T135 442Q171 442 195 424T225 390T231 369Q231 367 232 367L243 378Q304 442 382 442Q436 442 469 415T503 336T465 179T427 52Q427 26 444 26Q450 26 453 27Q482 32 505 65T540 145Q542 153 560 153Q580 153 580 145Q580 144 576 130Q568 101 554 73T508 17T439 -10Q392 -10 371 17T350 73Q350 92 386 193T423 345Q423 404 379 404H374Q288 404 229 303L222 291L189 157Q156 26 151 16Q138 -11 108 -11Q95 -11 87 -5T76 7T74 17Q74 30 112 180T152 343Q153 348 153 366Q153 405 129 405Q91 405 66 305Q60 285 60 284Q58 278 41 278H27Q21 284 21 287Z"></path></defs><g stroke="currentColor" fill="currentColor" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xlink:href="#E9-MJMATHI-73" x="0" y="0"></use><use transform="scale(0.707)" xlink:href="#E9-MJMAIN-31" x="663" y="-213"></use><use xlink:href="#E9-MJMAIN-2C" x="922" y="0"></use><g transform="translate(1367,0)"><use xlink:href="#E9-MJMATHI-73" x="0" y="0"></use><use transform="scale(0.707)" xlink:href="#E9-MJMAIN-32" x="663" y="-213"></use></g><use xlink:href="#E9-MJMAIN-2C" x="2289" y="0"></use><use xlink:href="#E9-MJMAIN-22EF" x="2734" y="0"></use><use xlink:href="#E9-MJMAIN-2C" x="4073" y="0"></use><g transform="translate(4517,0)"><use xlink:href="#E9-MJMATHI-73" x="0" y="0"></use><use transform="scale(0.707)" xlink:href="#E9-MJMATHI-6E" x="663" y="-213"></use></g></g></svg></span><script type="math/tex">s_1, s_2,\cdots, s_n</script> for the input at each step for the LSTM, the output at each step should be <span class="MathJax_Preview"></span><span class="MathJax_SVG" tabindex="-1" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" width="18.513ex" height="2.509ex" viewBox="0 -809.3 7970.7 1080.4" role="img" focusable="false" style="vertical-align: -0.63ex;"><defs><path stroke-width="0" id="E10-MJMATHI-73" d="M131 289Q131 321 147 354T203 415T300 442Q362 442 390 415T419 355Q419 323 402 308T364 292Q351 292 340 300T328 326Q328 342 337 354T354 372T367 378Q368 378 368 379Q368 382 361 388T336 399T297 405Q249 405 227 379T204 326Q204 301 223 291T278 274T330 259Q396 230 396 163Q396 135 385 107T352 51T289 7T195 -10Q118 -10 86 19T53 87Q53 126 74 143T118 160Q133 160 146 151T160 120Q160 94 142 76T111 58Q109 57 108 57T107 55Q108 52 115 47T146 34T201 27Q237 27 263 38T301 66T318 97T323 122Q323 150 302 164T254 181T195 196T148 231Q131 256 131 289Z"></path><path stroke-width="0" id="E10-MJMAIN-32" d="M109 429Q82 429 66 447T50 491Q50 562 103 614T235 666Q326 666 387 610T449 465Q449 422 429 383T381 315T301 241Q265 210 201 149L142 93L218 92Q375 92 385 97Q392 99 409 186V189H449V186Q448 183 436 95T421 3V0H50V19V31Q50 38 56 46T86 81Q115 113 136 137Q145 147 170 174T204 211T233 244T261 278T284 308T305 340T320 369T333 401T340 431T343 464Q343 527 309 573T212 619Q179 619 154 602T119 569T109 550Q109 549 114 549Q132 549 151 535T170 489Q170 464 154 447T109 429Z"></path><path stroke-width="0" id="E10-MJMAIN-2C" d="M78 35T78 60T94 103T137 121Q165 121 187 96T210 8Q210 -27 201 -60T180 -117T154 -158T130 -185T117 -194Q113 -194 104 -185T95 -172Q95 -168 106 -156T131 -126T157 -76T173 -3V9L172 8Q170 7 167 6T161 3T152 1T140 0Q113 0 96 17Z"></path><path stroke-width="0" id="E10-MJMAIN-33" d="M127 463Q100 463 85 480T69 524Q69 579 117 622T233 665Q268 665 277 664Q351 652 390 611T430 522Q430 470 396 421T302 350L299 348Q299 347 308 345T337 336T375 315Q457 262 457 175Q457 96 395 37T238 -22Q158 -22 100 21T42 130Q42 158 60 175T105 193Q133 193 151 175T169 130Q169 119 166 110T159 94T148 82T136 74T126 70T118 67L114 66Q165 21 238 21Q293 21 321 74Q338 107 338 175V195Q338 290 274 322Q259 328 213 329L171 330L168 332Q166 335 166 348Q166 366 174 366Q202 366 232 371Q266 376 294 413T322 525V533Q322 590 287 612Q265 626 240 626Q208 626 181 615T143 592T132 580H135Q138 579 143 578T153 573T165 566T175 555T183 540T186 520Q186 498 172 481T127 463Z"></path><path stroke-width="0" id="E10-MJMAIN-22EF" d="M78 250Q78 274 95 292T138 310Q162 310 180 294T199 251Q199 226 182 208T139 190T96 207T78 250ZM525 250Q525 274 542 292T585 310Q609 310 627 294T646 251Q646 226 629 208T586 190T543 207T525 250ZM972 250Q972 274 989 292T1032 310Q1056 310 1074 294T1093 251Q1093 226 1076 208T1033 190T990 207T972 250Z"></path><path stroke-width="0" id="E10-MJMATHI-6E" d="M21 287Q22 293 24 303T36 341T56 388T89 425T135 442Q171 442 195 424T225 390T231 369Q231 367 232 367L243 378Q304 442 382 442Q436 442 469 415T503 336T465 179T427 52Q427 26 444 26Q450 26 453 27Q482 32 505 65T540 145Q542 153 560 153Q580 153 580 145Q580 144 576 130Q568 101 554 73T508 17T439 -10Q392 -10 371 17T350 73Q350 92 386 193T423 345Q423 404 379 404H374Q288 404 229 303L222 291L189 157Q156 26 151 16Q138 -11 108 -11Q95 -11 87 -5T76 7T74 17Q74 30 112 180T152 343Q153 348 153 366Q153 405 129 405Q91 405 66 305Q60 285 60 284Q58 278 41 278H27Q21 284 21 287Z"></path><path stroke-width="0" id="E10-MJMAIN-45" d="M128 619Q121 626 117 628T101 631T58 634H25V680H597V676Q599 670 611 560T625 444V440H585V444Q584 447 582 465Q578 500 570 526T553 571T528 601T498 619T457 629T411 633T353 634Q266 634 251 633T233 622Q233 622 233 621Q232 619 232 497V376H286Q359 378 377 385Q413 401 416 469Q416 471 416 473V493H456V213H416V233Q415 268 408 288T383 317T349 328T297 330Q290 330 286 330H232V196V114Q232 57 237 52Q243 47 289 47H340H391Q428 47 452 50T505 62T552 92T584 146Q594 172 599 200T607 247T612 270V273H652V270Q651 267 632 137T610 3V0H25V46H58Q100 47 109 49T128 61V619Z"></path><path stroke-width="0" id="E10-MJMAIN-4F" d="M56 340Q56 423 86 494T164 610T270 680T388 705Q521 705 621 601T722 341Q722 260 693 191T617 75T510 4T388 -22T267 3T160 74T85 189T56 340ZM467 647Q426 665 388 665Q360 665 331 654T269 620T213 549T179 439Q174 411 174 354Q174 144 277 61Q327 20 385 20H389H391Q474 20 537 99Q603 188 603 354Q603 411 598 439Q577 592 467 647Z"></path><path stroke-width="0" id="E10-MJMAIN-53" d="M55 507Q55 590 112 647T243 704H257Q342 704 405 641L426 672Q431 679 436 687T446 700L449 704Q450 704 453 704T459 705H463Q466 705 472 699V462L466 456H448Q437 456 435 459T430 479Q413 605 329 646Q292 662 254 662Q201 662 168 626T135 542Q135 508 152 480T200 435Q210 431 286 412T370 389Q427 367 463 314T500 191Q500 110 448 45T301 -21Q245 -21 201 -4T140 27L122 41Q118 36 107 21T87 -7T78 -21Q76 -22 68 -22H64Q61 -22 55 -16V101Q55 220 56 222Q58 227 76 227H89Q95 221 95 214Q95 182 105 151T139 90T205 42T305 24Q352 24 386 62T420 155Q420 198 398 233T340 281Q284 295 266 300Q261 301 239 306T206 314T174 325T141 343T112 367T85 402Q55 451 55 507Z"></path></defs><g stroke="currentColor" fill="currentColor" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xlink:href="#E10-MJMATHI-73" x="0" y="0"></use><use transform="scale(0.707)" xlink:href="#E10-MJMAIN-32" x="663" y="-213"></use><use xlink:href="#E10-MJMAIN-2C" x="922" y="0"></use><g transform="translate(1367,0)"><use xlink:href="#E10-MJMATHI-73" x="0" y="0"></use><use transform="scale(0.707)" xlink:href="#E10-MJMAIN-33" x="663" y="-213"></use></g><use xlink:href="#E10-MJMAIN-2C" x="2289" y="0"></use><use xlink:href="#E10-MJMAIN-22EF" x="2734" y="0"></use><use xlink:href="#E10-MJMAIN-2C" x="4073" y="0"></use><g transform="translate(4517,0)"><use xlink:href="#E10-MJMATHI-73" x="0" y="0"></use><use transform="scale(0.707)" xlink:href="#E10-MJMATHI-6E" x="663" y="-213"></use></g><use xlink:href="#E10-MJMAIN-2C" x="5511" y="0"></use><g transform="translate(5955,0)"><use xlink:href="#E10-MJMAIN-45"></use><use xlink:href="#E10-MJMAIN-4F" x="681" y="0"></use><use xlink:href="#E10-MJMAIN-53" x="1459" y="0"></use></g></g></svg></span><script type="math/tex">s_2,s_3,\cdots, s_n, \text{EOS}</script> where EOS stands for end of sentence. To obtain an prediction from LSTM, we first create an vocabulary <span class="MathJax_Preview"></span><span class="MathJax_SVG" tabindex="-1" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" width="1.786ex" height="2.009ex" viewBox="0 -755.5 769 865.1" role="img" focusable="false" style="vertical-align: -0.255ex;"><defs><path stroke-width="0" id="E11-MJMATHI-56" d="M52 648Q52 670 65 683H76Q118 680 181 680Q299 680 320 683H330Q336 677 336 674T334 656Q329 641 325 637H304Q282 635 274 635Q245 630 242 620Q242 618 271 369T301 118L374 235Q447 352 520 471T595 594Q599 601 599 609Q599 633 555 637Q537 637 537 648Q537 649 539 661Q542 675 545 679T558 683Q560 683 570 683T604 682T668 681Q737 681 755 683H762Q769 676 769 672Q769 655 760 640Q757 637 743 637Q730 636 719 635T698 630T682 623T670 615T660 608T652 599T645 592L452 282Q272 -9 266 -16Q263 -18 259 -21L241 -22H234Q216 -22 216 -15Q213 -9 177 305Q139 623 138 626Q133 637 76 637H59Q52 642 52 648Z"></path></defs><g stroke="currentColor" fill="currentColor" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xlink:href="#E11-MJMATHI-56" x="0" y="0"></use></g></svg></span><script type="math/tex">V</script> to map each word to an integer which is an ordered set that contains all the word in your training dataset, and then we could map each integer <span class="MathJax_Preview"></span><span class="MathJax_SVG" tabindex="-1" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" width="0.801ex" height="2.009ex" viewBox="0 -755.5 345 865.1" role="img" focusable="false" style="vertical-align: -0.255ex;"><defs><path stroke-width="0" id="E12-MJMATHI-69" d="M184 600Q184 624 203 642T247 661Q265 661 277 649T290 619Q290 596 270 577T226 557Q211 557 198 567T184 600ZM21 287Q21 295 30 318T54 369T98 420T158 442Q197 442 223 419T250 357Q250 340 236 301T196 196T154 83Q149 61 149 51Q149 26 166 26Q175 26 185 29T208 43T235 78T260 137Q263 149 265 151T282 153Q302 153 302 143Q302 135 293 112T268 61T223 11T161 -11Q129 -11 102 10T74 74Q74 91 79 106T122 220Q160 321 166 341T173 380Q173 404 156 404H154Q124 404 99 371T61 287Q60 286 59 284T58 281T56 279T53 278T49 278T41 278H27Q21 284 21 287Z"></path></defs><g stroke="currentColor" fill="currentColor" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xlink:href="#E12-MJMATHI-69" x="0" y="0"></use></g></svg></span><script type="math/tex">i</script> to an vector <span class="MathJax_Preview"></span><span class="MathJax_SVG" tabindex="-1" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" width="2.209ex" height="1.759ex" viewBox="0 -540.2 951 757.5" role="img" focusable="false" style="vertical-align: -0.505ex;"><defs><path stroke-width="0" id="E13-MJMAINB-78" d="M227 0Q212 3 121 3Q40 3 28 0H21V62H117L245 213L109 382H26V444H34Q49 441 143 441Q247 441 265 444H274V382H246L281 339Q315 297 316 297Q320 297 354 341L389 382H352V444H360Q375 441 466 441Q547 441 559 444H566V382H471L355 246L504 63L545 62H586V0H578Q563 3 469 3Q365 3 347 0H338V62H366Q366 63 326 112T285 163L198 63L217 62H235V0H227Z"></path><path stroke-width="0" id="E13-MJMATHI-69" d="M184 600Q184 624 203 642T247 661Q265 661 277 649T290 619Q290 596 270 577T226 557Q211 557 198 567T184 600ZM21 287Q21 295 30 318T54 369T98 420T158 442Q197 442 223 419T250 357Q250 340 236 301T196 196T154 83Q149 61 149 51Q149 26 166 26Q175 26 185 29T208 43T235 78T260 137Q263 149 265 151T282 153Q302 153 302 143Q302 135 293 112T268 61T223 11T161 -11Q129 -11 102 10T74 74Q74 91 79 106T122 220Q160 321 166 341T173 380Q173 404 156 404H154Q124 404 99 371T61 287Q60 286 59 284T58 281T56 279T53 278T49 278T41 278H27Q21 284 21 287Z"></path></defs><g stroke="currentColor" fill="currentColor" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xlink:href="#E13-MJMAINB-78" x="0" y="0"></use><use transform="scale(0.707)" xlink:href="#E13-MJMATHI-69" x="858" y="-213"></use></g></svg></span><script type="math/tex">\mathbf{x}_i</script> which will be the input for the LSTM. Then we rely on the hidden vector, at each step t, we can use a linear transformation <span class="MathJax_Preview"></span><span class="MathJax_SVG" tabindex="-1" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" width="13.8ex" height="2.259ex" viewBox="0 -755.5 5941.5 972.7" role="img" focusable="false" style="vertical-align: -0.505ex;"><defs><path stroke-width="0" id="E14-MJMAINB-61" d="M64 349Q64 399 107 426T255 453Q346 453 402 423T473 341Q478 327 478 310T479 196V77Q493 63 529 62Q549 62 553 57T558 31Q558 9 552 5T514 0H497H481Q375 0 367 56L356 46Q300 -6 210 -6Q130 -6 81 30T32 121Q32 188 111 226T332 272H350V292Q350 313 348 327T337 361T306 391T248 402T194 399H189Q204 376 204 354Q204 327 187 306T134 284Q97 284 81 305T64 349ZM164 121Q164 89 186 67T238 45Q274 45 307 63T346 108L350 117V226H347Q248 218 206 189T164 121Z"></path><path stroke-width="0" id="E14-MJMATHI-74" d="M26 385Q19 392 19 395Q19 399 22 411T27 425Q29 430 36 430T87 431H140L159 511Q162 522 166 540T173 566T179 586T187 603T197 615T211 624T229 626Q247 625 254 615T261 596Q261 589 252 549T232 470L222 433Q222 431 272 431H323Q330 424 330 420Q330 398 317 385H210L174 240Q135 80 135 68Q135 26 162 26Q197 26 230 60T283 144Q285 150 288 151T303 153H307Q322 153 322 145Q322 142 319 133Q314 117 301 95T267 48T216 6T155 -11Q125 -11 98 4T59 56Q57 64 57 83V101L92 241Q127 382 128 383Q128 385 77 385H26Z"></path><path stroke-width="0" id="E14-MJMAIN-3D" d="M56 347Q56 360 70 367H707Q722 359 722 347Q722 336 708 328L390 327H72Q56 332 56 347ZM56 153Q56 168 72 173H708Q722 163 722 153Q722 140 707 133H70Q56 140 56 153Z"></path><path stroke-width="0" id="E14-MJMATHI-57" d="M436 683Q450 683 486 682T553 680Q604 680 638 681T677 682Q695 682 695 674Q695 670 692 659Q687 641 683 639T661 637Q636 636 621 632T600 624T597 615Q597 603 613 377T629 138L631 141Q633 144 637 151T649 170T666 200T690 241T720 295T759 362Q863 546 877 572T892 604Q892 619 873 628T831 637Q817 637 817 647Q817 650 819 660Q823 676 825 679T839 682Q842 682 856 682T895 682T949 681Q1015 681 1034 683Q1048 683 1048 672Q1048 666 1045 655T1038 640T1028 637Q1006 637 988 631T958 617T939 600T927 584L923 578L754 282Q586 -14 585 -15Q579 -22 561 -22Q546 -22 542 -17Q539 -14 523 229T506 480L494 462Q472 425 366 239Q222 -13 220 -15T215 -19Q210 -22 197 -22Q178 -22 176 -15Q176 -12 154 304T131 622Q129 631 121 633T82 637H58Q51 644 51 648Q52 671 64 683H76Q118 680 176 680Q301 680 313 683H323Q329 677 329 674T327 656Q322 641 318 637H297Q236 634 232 620Q262 160 266 136L501 550L499 587Q496 629 489 632Q483 636 447 637Q428 637 422 639T416 648Q416 650 418 660Q419 664 420 669T421 676T424 680T428 682T436 683Z"></path><path stroke-width="0" id="E14-MJMAINB-68" d="M40 686L131 690Q222 694 223 694H229V533L230 372L238 381Q248 394 264 407T317 435T398 450Q428 450 448 447T491 434T529 402T551 346Q553 335 554 198V62H623V0H614Q596 3 489 3Q374 3 365 0H356V62H425V194V275Q425 348 416 373T371 399Q326 399 288 370T238 290Q236 281 235 171V62H304V0H295Q277 3 171 3Q64 3 46 0H37V62H106V332Q106 387 106 453T107 534Q107 593 105 605T91 620Q77 624 50 624H37V686H40Z"></path><path stroke-width="0" id="E14-MJMAIN-2B" d="M56 237T56 250T70 270H369V420L370 570Q380 583 389 583Q402 583 409 568V270H707Q722 262 722 250T707 230H409V-68Q401 -82 391 -82H389H387Q375 -82 369 -68V230H70Q56 237 56 250Z"></path><path stroke-width="0" id="E14-MJMATHI-62" d="M73 647Q73 657 77 670T89 683Q90 683 161 688T234 694Q246 694 246 685T212 542Q204 508 195 472T180 418L176 399Q176 396 182 402Q231 442 283 442Q345 442 383 396T422 280Q422 169 343 79T173 -11Q123 -11 82 27T40 150V159Q40 180 48 217T97 414Q147 611 147 623T109 637Q104 637 101 637H96Q86 637 83 637T76 640T73 647ZM336 325V331Q336 405 275 405Q258 405 240 397T207 376T181 352T163 330L157 322L136 236Q114 150 114 114Q114 66 138 42Q154 26 178 26Q211 26 245 58Q270 81 285 114T318 219Q336 291 336 325Z"></path></defs><g stroke="currentColor" fill="currentColor" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xlink:href="#E14-MJMAINB-61" x="0" y="0"></use><use transform="scale(0.707)" xlink:href="#E14-MJMATHI-74" x="790" y="-213"></use><use xlink:href="#E14-MJMAIN-3D" x="1192" y="0"></use><use xlink:href="#E14-MJMATHI-57" x="2247" y="0"></use><g transform="translate(3295,0)"><use xlink:href="#E14-MJMAINB-68" x="0" y="0"></use><use transform="scale(0.707)" xlink:href="#E14-MJMATHI-74" x="903" y="-213"></use></g><use xlink:href="#E14-MJMAIN-2B" x="4512" y="0"></use><use xlink:href="#E14-MJMATHI-62" x="5512" y="0"></use></g></svg></span><script type="math/tex">\mathbf{a}_t = W\mathbf{h}_t + b</script>  where <span class="MathJax_Preview"></span><span class="MathJax_SVG" tabindex="-1" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" width="2.123ex" height="1.759ex" viewBox="0 -540.2 914.3 757.5" role="img" focusable="false" style="vertical-align: -0.505ex;"><defs><path stroke-width="0" id="E17-MJMAINB-61" d="M64 349Q64 399 107 426T255 453Q346 453 402 423T473 341Q478 327 478 310T479 196V77Q493 63 529 62Q549 62 553 57T558 31Q558 9 552 5T514 0H497H481Q375 0 367 56L356 46Q300 -6 210 -6Q130 -6 81 30T32 121Q32 188 111 226T332 272H350V292Q350 313 348 327T337 361T306 391T248 402T194 399H189Q204 376 204 354Q204 327 187 306T134 284Q97 284 81 305T64 349ZM164 121Q164 89 186 67T238 45Q274 45 307 63T346 108L350 117V226H347Q248 218 206 189T164 121Z"></path><path stroke-width="0" id="E17-MJMATHI-74" d="M26 385Q19 392 19 395Q19 399 22 411T27 425Q29 430 36 430T87 431H140L159 511Q162 522 166 540T173 566T179 586T187 603T197 615T211 624T229 626Q247 625 254 615T261 596Q261 589 252 549T232 470L222 433Q222 431 272 431H323Q330 424 330 420Q330 398 317 385H210L174 240Q135 80 135 68Q135 26 162 26Q197 26 230 60T283 144Q285 150 288 151T303 153H307Q322 153 322 145Q322 142 319 133Q314 117 301 95T267 48T216 6T155 -11Q125 -11 98 4T59 56Q57 64 57 83V101L92 241Q127 382 128 383Q128 385 77 385H26Z"></path></defs><g stroke="currentColor" fill="currentColor" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xlink:href="#E17-MJMAINB-61" x="0" y="0"></use><use transform="scale(0.707)" xlink:href="#E17-MJMATHI-74" x="790" y="-213"></use></g></svg></span><script type="math/tex">\mathbf{a}_t</script> is a vector of size <span class="MathJax_Preview"></span><span class="MathJax_SVG" tabindex="-1" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" width="3.077ex" height="2.634ex" viewBox="0 -809.3 1325 1134.2" role="img" focusable="false" style="vertical-align: -0.755ex;"><defs><path stroke-width="0" id="E25-MJMAIN-7C" d="M139 -249H137Q125 -249 119 -235V251L120 737Q130 750 139 750Q152 750 159 735V-235Q151 -249 141 -249H139Z"></path><path stroke-width="0" id="E25-MJMATHI-56" d="M52 648Q52 670 65 683H76Q118 680 181 680Q299 680 320 683H330Q336 677 336 674T334 656Q329 641 325 637H304Q282 635 274 635Q245 630 242 620Q242 618 271 369T301 118L374 235Q447 352 520 471T595 594Q599 601 599 609Q599 633 555 637Q537 637 537 648Q537 649 539 661Q542 675 545 679T558 683Q560 683 570 683T604 682T668 681Q737 681 755 683H762Q769 676 769 672Q769 655 760 640Q757 637 743 637Q730 636 719 635T698 630T682 623T670 615T660 608T652 599T645 592L452 282Q272 -9 266 -16Q263 -18 259 -21L241 -22H234Q216 -22 216 -15Q213 -9 177 305Q139 623 138 626Q133 637 76 637H59Q52 642 52 648Z"></path></defs><g stroke="currentColor" fill="currentColor" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xlink:href="#E25-MJMAIN-7C" x="0" y="0"></use><use xlink:href="#E25-MJMATHI-56" x="278" y="0"></use><use xlink:href="#E25-MJMAIN-7C" x="1047" y="0"></use></g></svg></span><script type="math/tex">|V|</script>. Because linear transformation results in value that is unbounded, to make prediction a probability distribution, we first take exponential and then normalize it with the sum e.g. take the softmax of <span class="MathJax_Preview"></span><span class="MathJax_SVG" tabindex="-1" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" width="2.123ex" height="1.759ex" viewBox="0 -540.2 914.3 757.5" role="img" focusable="false" style="vertical-align: -0.505ex;"><defs><path stroke-width="0" id="E17-MJMAINB-61" d="M64 349Q64 399 107 426T255 453Q346 453 402 423T473 341Q478 327 478 310T479 196V77Q493 63 529 62Q549 62 553 57T558 31Q558 9 552 5T514 0H497H481Q375 0 367 56L356 46Q300 -6 210 -6Q130 -6 81 30T32 121Q32 188 111 226T332 272H350V292Q350 313 348 327T337 361T306 391T248 402T194 399H189Q204 376 204 354Q204 327 187 306T134 284Q97 284 81 305T64 349ZM164 121Q164 89 186 67T238 45Q274 45 307 63T346 108L350 117V226H347Q248 218 206 189T164 121Z"></path><path stroke-width="0" id="E17-MJMATHI-74" d="M26 385Q19 392 19 395Q19 399 22 411T27 425Q29 430 36 430T87 431H140L159 511Q162 522 166 540T173 566T179 586T187 603T197 615T211 624T229 626Q247 625 254 615T261 596Q261 589 252 549T232 470L222 433Q222 431 272 431H323Q330 424 330 420Q330 398 317 385H210L174 240Q135 80 135 68Q135 26 162 26Q197 26 230 60T283 144Q285 150 288 151T303 153H307Q322 153 322 145Q322 142 319 133Q314 117 301 95T267 48T216 6T155 -11Q125 -11 98 4T59 56Q57 64 57 83V101L92 241Q127 382 128 383Q128 385 77 385H26Z"></path></defs><g stroke="currentColor" fill="currentColor" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xlink:href="#E17-MJMAINB-61" x="0" y="0"></use><use transform="scale(0.707)" xlink:href="#E17-MJMATHI-74" x="790" y="-213"></use></g></svg></span><script type="math/tex">\mathbf{a}_t</script>, <span class="MathJax_Preview"></span><span class="MathJax_SVG" tabindex="-1" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" width="20.65ex" height="5.884ex" viewBox="0 -1455.1 8890.9 2533.5" role="img" focusable="false" style="vertical-align: -2.505ex;"><defs><path stroke-width="0" id="E18-MJMAINB-79" d="M84 -102Q84 -110 87 -119T102 -138T133 -149Q148 -148 162 -143T186 -131T206 -114T222 -95T234 -76T243 -59T249 -45T252 -37L269 0L96 382H26V444H34Q49 441 146 441Q252 441 270 444H279V382H255Q232 382 232 380L337 151L442 382H394V444H401Q413 441 495 441Q568 441 574 444H580V382H510L406 152Q298 -84 297 -87Q269 -139 225 -169T131 -200Q85 -200 54 -172T23 -100Q23 -64 44 -50T87 -35Q111 -35 130 -50T152 -92V-100H84V-102Z"></path><path stroke-width="0" id="E18-MJMAIN-28" d="M94 250Q94 319 104 381T127 488T164 576T202 643T244 695T277 729T302 750H315H319Q333 750 333 741Q333 738 316 720T275 667T226 581T184 443T167 250T184 58T225 -81T274 -167T316 -220T333 -241Q333 -250 318 -250H315H302L274 -226Q180 -141 137 -14T94 250Z"></path><path stroke-width="0" id="E18-MJMATHI-69" d="M184 600Q184 624 203 642T247 661Q265 661 277 649T290 619Q290 596 270 577T226 557Q211 557 198 567T184 600ZM21 287Q21 295 30 318T54 369T98 420T158 442Q197 442 223 419T250 357Q250 340 236 301T196 196T154 83Q149 61 149 51Q149 26 166 26Q175 26 185 29T208 43T235 78T260 137Q263 149 265 151T282 153Q302 153 302 143Q302 135 293 112T268 61T223 11T161 -11Q129 -11 102 10T74 74Q74 91 79 106T122 220Q160 321 166 341T173 380Q173 404 156 404H154Q124 404 99 371T61 287Q60 286 59 284T58 281T56 279T53 278T49 278T41 278H27Q21 284 21 287Z"></path><path stroke-width="0" id="E18-MJMAIN-29" d="M60 749L64 750Q69 750 74 750H86L114 726Q208 641 251 514T294 250Q294 182 284 119T261 12T224 -76T186 -143T145 -194T113 -227T90 -246Q87 -249 86 -250H74Q66 -250 63 -250T58 -247T55 -238Q56 -237 66 -225Q221 -64 221 250T66 725Q56 737 55 738Q55 746 60 749Z"></path><path stroke-width="0" id="E18-MJMATHI-74" d="M26 385Q19 392 19 395Q19 399 22 411T27 425Q29 430 36 430T87 431H140L159 511Q162 522 166 540T173 566T179 586T187 603T197 615T211 624T229 626Q247 625 254 615T261 596Q261 589 252 549T232 470L222 433Q222 431 272 431H323Q330 424 330 420Q330 398 317 385H210L174 240Q135 80 135 68Q135 26 162 26Q197 26 230 60T283 144Q285 150 288 151T303 153H307Q322 153 322 145Q322 142 319 133Q314 117 301 95T267 48T216 6T155 -11Q125 -11 98 4T59 56Q57 64 57 83V101L92 241Q127 382 128 383Q128 385 77 385H26Z"></path><path stroke-width="0" id="E18-MJMAIN-3D" d="M56 347Q56 360 70 367H707Q722 359 722 347Q722 336 708 328L390 327H72Q56 332 56 347ZM56 153Q56 168 72 173H708Q722 163 722 153Q722 140 707 133H70Q56 140 56 153Z"></path><path stroke-width="0" id="E18-MJMAIN-65" d="M28 218Q28 273 48 318T98 391T163 433T229 448Q282 448 320 430T378 380T406 316T415 245Q415 238 408 231H126V216Q126 68 226 36Q246 30 270 30Q312 30 342 62Q359 79 369 104L379 128Q382 131 395 131H398Q415 131 415 121Q415 117 412 108Q393 53 349 21T250 -11Q155 -11 92 58T28 218ZM333 275Q322 403 238 411H236Q228 411 220 410T195 402T166 381T143 340T127 274V267H333V275Z"></path><path stroke-width="0" id="E18-MJMAIN-78" d="M201 0Q189 3 102 3Q26 3 17 0H11V46H25Q48 47 67 52T96 61T121 78T139 96T160 122T180 150L226 210L168 288Q159 301 149 315T133 336T122 351T113 363T107 370T100 376T94 379T88 381T80 383Q74 383 44 385H16V431H23Q59 429 126 429Q219 429 229 431H237V385Q201 381 201 369Q201 367 211 353T239 315T268 274L272 270L297 304Q329 345 329 358Q329 364 327 369T322 376T317 380T310 384L307 385H302V431H309Q324 428 408 428Q487 428 493 431H499V385H492Q443 385 411 368Q394 360 377 341T312 257L296 236L358 151Q424 61 429 57T446 50Q464 46 499 46H516V0H510H502Q494 1 482 1T457 2T432 2T414 3Q403 3 377 3T327 1L304 0H295V46H298Q309 46 320 51T331 63Q331 65 291 120L250 175Q249 174 219 133T185 88Q181 83 181 74Q181 63 188 55T206 46Q208 46 208 23V0H201Z"></path><path stroke-width="0" id="E18-MJMAIN-70" d="M36 -148H50Q89 -148 97 -134V-126Q97 -119 97 -107T97 -77T98 -38T98 6T98 55T98 106Q98 140 98 177T98 243T98 296T97 335T97 351Q94 370 83 376T38 385H20V408Q20 431 22 431L32 432Q42 433 61 434T98 436Q115 437 135 438T165 441T176 442H179V416L180 390L188 397Q247 441 326 441Q407 441 464 377T522 216Q522 115 457 52T310 -11Q242 -11 190 33L182 40V-45V-101Q182 -128 184 -134T195 -145Q216 -148 244 -148H260V-194H252L228 -193Q205 -192 178 -192T140 -191Q37 -191 28 -194H20V-148H36ZM424 218Q424 292 390 347T305 402Q234 402 182 337V98Q222 26 294 26Q345 26 384 80T424 218Z"></path><path stroke-width="0" id="E18-MJMAINB-61" d="M64 349Q64 399 107 426T255 453Q346 453 402 423T473 341Q478 327 478 310T479 196V77Q493 63 529 62Q549 62 553 57T558 31Q558 9 552 5T514 0H497H481Q375 0 367 56L356 46Q300 -6 210 -6Q130 -6 81 30T32 121Q32 188 111 226T332 272H350V292Q350 313 348 327T337 361T306 391T248 402T194 399H189Q204 376 204 354Q204 327 187 306T134 284Q97 284 81 305T64 349ZM164 121Q164 89 186 67T238 45Q274 45 307 63T346 108L350 117V226H347Q248 218 206 189T164 121Z"></path><path stroke-width="0" id="E18-MJMAIN-2F" d="M423 750Q432 750 438 744T444 730Q444 725 271 248T92 -240Q85 -250 75 -250Q68 -250 62 -245T56 -231Q56 -221 230 257T407 740Q411 750 423 750Z"></path><path stroke-width="0" id="E18-MJMATHI-3C4" d="M39 284Q18 284 18 294Q18 301 45 338T99 398Q134 425 164 429Q170 431 332 431Q492 431 497 429Q517 424 517 402Q517 388 508 376T485 360Q479 358 389 358T299 356Q298 355 283 274T251 109T233 20Q228 5 215 -4T186 -13Q153 -13 153 20V30L203 192Q214 228 227 272T248 336L254 357Q254 358 208 358Q206 358 197 358T183 359Q105 359 61 295Q56 287 53 286T39 284Z"></path><path stroke-width="0" id="E18-MJSZ1-2211" d="M61 748Q64 750 489 750H913L954 640Q965 609 976 579T993 533T999 516H979L959 517Q936 579 886 621T777 682Q724 700 655 705T436 710H319Q183 710 183 709Q186 706 348 484T511 259Q517 250 513 244L490 216Q466 188 420 134T330 27L149 -187Q149 -188 362 -188Q388 -188 436 -188T506 -189Q679 -189 778 -162T936 -43Q946 -27 959 6H999L913 -249L489 -250Q65 -250 62 -248Q56 -246 56 -239Q56 -234 118 -161Q186 -81 245 -11L428 206Q428 207 242 462L57 717L56 728Q56 744 61 748Z"></path><path stroke-width="0" id="E18-MJMAIN-7C" d="M139 -249H137Q125 -249 119 -235V251L120 737Q130 750 139 750Q152 750 159 735V-235Q151 -249 141 -249H139Z"></path><path stroke-width="0" id="E18-MJMATHI-56" d="M52 648Q52 670 65 683H76Q118 680 181 680Q299 680 320 683H330Q336 677 336 674T334 656Q329 641 325 637H304Q282 635 274 635Q245 630 242 620Q242 618 271 369T301 118L374 235Q447 352 520 471T595 594Q599 601 599 609Q599 633 555 637Q537 637 537 648Q537 649 539 661Q542 675 545 679T558 683Q560 683 570 683T604 682T668 681Q737 681 755 683H762Q769 676 769 672Q769 655 760 640Q757 637 743 637Q730 636 719 635T698 630T682 623T670 615T660 608T652 599T645 592L452 282Q272 -9 266 -16Q263 -18 259 -21L241 -22H234Q216 -22 216 -15Q213 -9 177 305Q139 623 138 626Q133 637 76 637H59Q52 642 52 648Z"></path><path stroke-width="0" id="E18-MJMAIN-2212" d="M84 237T84 250T98 270H679Q694 262 694 250T679 230H98Q84 237 84 250Z"></path><path stroke-width="0" id="E18-MJMAIN-31" d="M213 578L200 573Q186 568 160 563T102 556H83V602H102Q149 604 189 617T245 641T273 663Q275 666 285 666Q294 666 302 660V361L303 61Q310 54 315 52T339 48T401 46H427V0H416Q395 3 257 3Q121 3 100 0H88V46H114Q136 46 152 46T177 47T193 50T201 52T207 57T213 61V578Z"></path><path stroke-width="0" id="E18-MJMATHI-6A" d="M297 596Q297 627 318 644T361 661Q378 661 389 651T403 623Q403 595 384 576T340 557Q322 557 310 567T297 596ZM288 376Q288 405 262 405Q240 405 220 393T185 362T161 325T144 293L137 279Q135 278 121 278H107Q101 284 101 286T105 299Q126 348 164 391T252 441Q253 441 260 441T272 442Q296 441 316 432Q341 418 354 401T367 348V332L318 133Q267 -67 264 -75Q246 -125 194 -164T75 -204Q25 -204 7 -183T-12 -137Q-12 -110 7 -91T53 -71Q70 -71 82 -81T95 -112Q95 -148 63 -167Q69 -168 77 -168Q111 -168 139 -140T182 -74L193 -32Q204 11 219 72T251 197T278 308T289 365Q289 372 288 376Z"></path><path stroke-width="0" id="E18-MJMAIN-30" d="M96 585Q152 666 249 666Q297 666 345 640T423 548Q460 465 460 320Q460 165 417 83Q397 41 362 16T301 -15T250 -22Q224 -22 198 -16T137 16T82 83Q39 165 39 320Q39 494 96 585ZM321 597Q291 629 250 629Q208 629 178 597Q153 571 145 525T137 333Q137 175 145 125T181 46Q209 16 250 16Q290 16 318 46Q347 76 354 130T362 333Q362 478 354 524T321 597Z"></path></defs><g stroke="currentColor" fill="currentColor" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xlink:href="#E18-MJMAINB-79" x="0" y="0"></use><g transform="translate(607,521)"><use transform="scale(0.707)" xlink:href="#E18-MJMAIN-28" x="0" y="0"></use><use transform="scale(0.707)" xlink:href="#E18-MJMATHI-69" x="389" y="0"></use><use transform="scale(0.707)" xlink:href="#E18-MJMAIN-29" x="733" y="0"></use></g><use transform="scale(0.707)" xlink:href="#E18-MJMATHI-74" x="858" y="-394"></use><use xlink:href="#E18-MJMAIN-3D" x="1778" y="0"></use><g transform="translate(2556,0)"><g transform="translate(397,0)"><rect stroke="none" width="5816" height="60" x="0" y="220"></rect><g transform="translate(1219,613)"><use transform="scale(0.707)" xlink:href="#E18-MJMAIN-65"></use><use transform="scale(0.707)" xlink:href="#E18-MJMAIN-78" x="444" y="0"></use><use transform="scale(0.707)" xlink:href="#E18-MJMAIN-70" x="972" y="0"></use><use transform="scale(0.707)" xlink:href="#E18-MJMAIN-28" x="1528" y="0"></use><g transform="translate(1355,0)"><use transform="scale(0.707)" xlink:href="#E18-MJMAINB-61" x="0" y="0"></use><g transform="translate(395,368)"><use transform="scale(0.5)" xlink:href="#E18-MJMAIN-28" x="0" y="0"></use><use transform="scale(0.5)" xlink:href="#E18-MJMATHI-69" x="389" y="0"></use><use transform="scale(0.5)" xlink:href="#E18-MJMAIN-29" x="734" y="0"></use></g><use transform="scale(0.5)" xlink:href="#E18-MJMATHI-74" x="790" y="-394"></use></g><use transform="scale(0.707)" xlink:href="#E18-MJMAIN-2F" x="3370" y="0"></use><use transform="scale(0.707)" xlink:href="#E18-MJMATHI-3C4" x="3870" y="0"></use><use transform="scale(0.707)" xlink:href="#E18-MJMAIN-29" x="4387" y="0"></use></g><g transform="translate(60,-655)"><use transform="scale(0.707)" xlink:href="#E18-MJSZ1-2211" x="0" y="-1"></use><g transform="translate(746,368)"><use transform="scale(0.5)" xlink:href="#E18-MJMAIN-7C" x="0" y="0"></use><use transform="scale(0.5)" xlink:href="#E18-MJMATHI-56" x="278" y="0"></use><use transform="scale(0.5)" xlink:href="#E18-MJMAIN-7C" x="1047" y="0"></use><use transform="scale(0.5)" xlink:href="#E18-MJMAIN-2212" x="1325" y="0"></use><use transform="scale(0.5)" xlink:href="#E18-MJMAIN-31" x="2103" y="0"></use></g><g transform="translate(746,-217)"><use transform="scale(0.5)" xlink:href="#E18-MJMATHI-6A" x="0" y="0"></use><use transform="scale(0.5)" xlink:href="#E18-MJMAIN-3D" x="412" y="0"></use><use transform="scale(0.5)" xlink:href="#E18-MJMAIN-30" x="1190" y="0"></use></g><g transform="translate(2285,0)"><use transform="scale(0.707)" xlink:href="#E18-MJMAIN-65"></use><use transform="scale(0.707)" xlink:href="#E18-MJMAIN-78" x="444" y="0"></use><use transform="scale(0.707)" xlink:href="#E18-MJMAIN-70" x="972" y="0"></use></g><use transform="scale(0.707)" xlink:href="#E18-MJMAIN-28" x="4760" y="0"></use><g transform="translate(3641,0)"><use transform="scale(0.707)" xlink:href="#E18-MJMAINB-61" x="0" y="0"></use><g transform="translate(395,368)"><use transform="scale(0.5)" xlink:href="#E18-MJMAIN-28" x="0" y="0"></use><use transform="scale(0.5)" xlink:href="#E18-MJMATHI-6A" x="389" y="0"></use><use transform="scale(0.5)" xlink:href="#E18-MJMAIN-29" x="801" y="0"></use></g><use transform="scale(0.5)" xlink:href="#E18-MJMATHI-74" x="790" y="-394"></use></g><use transform="scale(0.707)" xlink:href="#E18-MJMAIN-2F" x="6649" y="0"></use><use transform="scale(0.707)" xlink:href="#E18-MJMATHI-3C4" x="7149" y="0"></use><use transform="scale(0.707)" xlink:href="#E18-MJMAIN-29" x="7666" y="0"></use></g></g></g></g></svg></span><script type="math/tex">\mathbf{y}_t^{(i)}=\frac{\exp(\mathbf{a}_t^{(i)}/\tau)}{\sum_{j=0}^{|V|-1}\exp(\mathbf{a}_t^{(j)}/\tau)}</script>, where <span class="MathJax_Preview"></span><span class="MathJax_SVG" tabindex="-1" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" width="1.201ex" height="1.509ex" viewBox="0 -540.2 517 649.8" role="img" focusable="false" style="vertical-align: -0.255ex;"><defs><path stroke-width="0" id="E31-MJMATHI-3C4" d="M39 284Q18 284 18 294Q18 301 45 338T99 398Q134 425 164 429Q170 431 332 431Q492 431 497 429Q517 424 517 402Q517 388 508 376T485 360Q479 358 389 358T299 356Q298 355 283 274T251 109T233 20Q228 5 215 -4T186 -13Q153 -13 153 20V30L203 192Q214 228 227 272T248 336L254 357Q254 358 208 358Q206 358 197 358T183 359Q105 359 61 295Q56 287 53 286T39 284Z"></path></defs><g stroke="currentColor" fill="currentColor" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xlink:href="#E31-MJMATHI-3C4" x="0" y="0"></use></g></svg></span><script type="math/tex">\tau</script> is the <strong>temperature</strong> term that is usually 1, you will encounter this term later. As we&#39;ve learned from the previous assignment, we could use cross entropy loss to urge the prediction to be the next word <span class="MathJax_Preview"></span><span class="MathJax_SVG" tabindex="-1" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" width="4.013ex" height="1.884ex" viewBox="0 -540.2 1727.9 811.3" role="img" focusable="false" style="vertical-align: -0.63ex;"><defs><path stroke-width="0" id="E20-MJMATHI-73" d="M131 289Q131 321 147 354T203 415T300 442Q362 442 390 415T419 355Q419 323 402 308T364 292Q351 292 340 300T328 326Q328 342 337 354T354 372T367 378Q368 378 368 379Q368 382 361 388T336 399T297 405Q249 405 227 379T204 326Q204 301 223 291T278 274T330 259Q396 230 396 163Q396 135 385 107T352 51T289 7T195 -10Q118 -10 86 19T53 87Q53 126 74 143T118 160Q133 160 146 151T160 120Q160 94 142 76T111 58Q109 57 108 57T107 55Q108 52 115 47T146 34T201 27Q237 27 263 38T301 66T318 97T323 122Q323 150 302 164T254 181T195 196T148 231Q131 256 131 289Z"></path><path stroke-width="0" id="E20-MJMATHI-74" d="M26 385Q19 392 19 395Q19 399 22 411T27 425Q29 430 36 430T87 431H140L159 511Q162 522 166 540T173 566T179 586T187 603T197 615T211 624T229 626Q247 625 254 615T261 596Q261 589 252 549T232 470L222 433Q222 431 272 431H323Q330 424 330 420Q330 398 317 385H210L174 240Q135 80 135 68Q135 26 162 26Q197 26 230 60T283 144Q285 150 288 151T303 153H307Q322 153 322 145Q322 142 319 133Q314 117 301 95T267 48T216 6T155 -11Q125 -11 98 4T59 56Q57 64 57 83V101L92 241Q127 382 128 383Q128 385 77 385H26Z"></path><path stroke-width="0" id="E20-MJMAIN-2B" d="M56 237T56 250T70 270H369V420L370 570Q380 583 389 583Q402 583 409 568V270H707Q722 262 722 250T707 230H409V-68Q401 -82 391 -82H389H387Q375 -82 369 -68V230H70Q56 237 56 250Z"></path><path stroke-width="0" id="E20-MJMAIN-31" d="M213 578L200 573Q186 568 160 563T102 556H83V602H102Q149 604 189 617T245 641T273 663Q275 666 285 666Q294 666 302 660V361L303 61Q310 54 315 52T339 48T401 46H427V0H416Q395 3 257 3Q121 3 100 0H88V46H114Q136 46 152 46T177 47T193 50T201 52T207 57T213 61V578Z"></path></defs><g stroke="currentColor" fill="currentColor" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xlink:href="#E20-MJMATHI-73" x="0" y="0"></use><g transform="translate(469,-150)"><use transform="scale(0.707)" xlink:href="#E20-MJMATHI-74" x="0" y="0"></use><use transform="scale(0.707)" xlink:href="#E20-MJMAIN-2B" x="361" y="0"></use><use transform="scale(0.707)" xlink:href="#E20-MJMAIN-31" x="1139" y="0"></use></g></g></svg></span><script type="math/tex">s_{t+1}</script>, and we could try to minimize the average loss to provide the training signal for the network.</p><p>Requirements</p><ol start='' ><li>Differentiate one step of LSTM with respect to <span class="MathJax_Preview"></span><span class="MathJax_SVG" tabindex="-1" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" width="2.309ex" height="2.259ex" viewBox="0 -755.5 994.3 972.7" role="img" focusable="false" style="vertical-align: -0.505ex;"><defs><path stroke-width="0" id="E21-MJMAINB-68" d="M40 686L131 690Q222 694 223 694H229V533L230 372L238 381Q248 394 264 407T317 435T398 450Q428 450 448 447T491 434T529 402T551 346Q553 335 554 198V62H623V0H614Q596 3 489 3Q374 3 365 0H356V62H425V194V275Q425 348 416 373T371 399Q326 399 288 370T238 290Q236 281 235 171V62H304V0H295Q277 3 171 3Q64 3 46 0H37V62H106V332Q106 387 106 453T107 534Q107 593 105 605T91 620Q77 624 50 624H37V686H40Z"></path><path stroke-width="0" id="E21-MJMATHI-74" d="M26 385Q19 392 19 395Q19 399 22 411T27 425Q29 430 36 430T87 431H140L159 511Q162 522 166 540T173 566T179 586T187 603T197 615T211 624T229 626Q247 625 254 615T261 596Q261 589 252 549T232 470L222 433Q222 431 272 431H323Q330 424 330 420Q330 398 317 385H210L174 240Q135 80 135 68Q135 26 162 26Q197 26 230 60T283 144Q285 150 288 151T303 153H307Q322 153 322 145Q322 142 319 133Q314 117 301 95T267 48T216 6T155 -11Q125 -11 98 4T59 56Q57 64 57 83V101L92 241Q127 382 128 383Q128 385 77 385H26Z"></path></defs><g stroke="currentColor" fill="currentColor" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xlink:href="#E21-MJMAINB-68" x="0" y="0"></use><use transform="scale(0.707)" xlink:href="#E21-MJMATHI-74" x="903" y="-213"></use></g></svg></span><script type="math/tex">\mathbf{h}_t</script> for <span class="MathJax_Preview"></span><span class="MathJax_SVG" tabindex="-1" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" width="58.805ex" height="3.009ex" viewBox="0 -916.9 25318.9 1295.7" role="img" focusable="false" style="vertical-align: -0.88ex;"><defs><path stroke-width="0" id="E99-MJMAINB-66" d="M308 0Q290 3 172 3Q58 3 49 0H40V62H109V382H42V444H109V503L110 562L112 572Q127 625 178 658T316 699Q318 699 330 699T348 700Q381 698 404 687T436 658T449 629T452 606Q452 576 432 557T383 537Q355 537 335 555T314 605Q314 635 328 649H325Q311 649 293 644T253 618T227 560Q226 555 226 498V444H340V382H232V62H318V0H308Z"></path><path stroke-width="0" id="E99-MJMATHI-74" d="M26 385Q19 392 19 395Q19 399 22 411T27 425Q29 430 36 430T87 431H140L159 511Q162 522 166 540T173 566T179 586T187 603T197 615T211 624T229 626Q247 625 254 615T261 596Q261 589 252 549T232 470L222 433Q222 431 272 431H323Q330 424 330 420Q330 398 317 385H210L174 240Q135 80 135 68Q135 26 162 26Q197 26 230 60T283 144Q285 150 288 151T303 153H307Q322 153 322 145Q322 142 319 133Q314 117 301 95T267 48T216 6T155 -11Q125 -11 98 4T59 56Q57 64 57 83V101L92 241Q127 382 128 383Q128 385 77 385H26Z"></path><path stroke-width="0" id="E99-MJMAIN-2C" d="M78 35T78 60T94 103T137 121Q165 121 187 96T210 8Q210 -27 201 -60T180 -117T154 -158T130 -185T117 -194Q113 -194 104 -185T95 -172Q95 -168 106 -156T131 -126T157 -76T173 -3V9L172 8Q170 7 167 6T161 3T152 1T140 0Q113 0 96 17Z"></path><path stroke-width="0" id="E99-MJMAINB-69" d="M72 610Q72 649 98 672T159 695Q193 693 217 670T241 610Q241 572 217 549T157 525Q120 525 96 548T72 610ZM46 442L136 446L226 450H232V62H294V0H286Q271 3 171 3Q67 3 49 0H40V62H109V209Q109 358 108 362Q103 380 55 380H43V442H46Z"></path><path stroke-width="0" id="E99-MJMATHI-43" d="M50 252Q50 367 117 473T286 641T490 704Q580 704 633 653Q642 643 648 636T656 626L657 623Q660 623 684 649Q691 655 699 663T715 679T725 690L740 705H746Q760 705 760 698Q760 694 728 561Q692 422 692 421Q690 416 687 415T669 413H653Q647 419 647 422Q647 423 648 429T650 449T651 481Q651 552 619 605T510 659Q484 659 454 652T382 628T299 572T226 479Q194 422 175 346T156 222Q156 108 232 58Q280 24 350 24Q441 24 512 92T606 240Q610 253 612 255T628 257Q648 257 648 248Q648 243 647 239Q618 132 523 55T319 -22Q206 -22 128 53T50 252Z"></path><path stroke-width="0" id="E99-MJMAIN-AF" d="M69 544V590H430V544H69Z"></path><path stroke-width="0" id="E99-MJMAIN-2212" d="M84 237T84 250T98 270H679Q694 262 694 250T679 230H98Q84 237 84 250Z"></path><path stroke-width="0" id="E99-MJMAIN-31" d="M213 578L200 573Q186 568 160 563T102 556H83V602H102Q149 604 189 617T245 641T273 663Q275 666 285 666Q294 666 302 660V361L303 61Q310 54 315 52T339 48T401 46H427V0H416Q395 3 257 3Q121 3 100 0H88V46H114Q136 46 152 46T177 47T193 50T201 52T207 57T213 61V578Z"></path><path stroke-width="0" id="E99-MJMAINB-6F" d="M287 -5Q228 -5 182 10T109 48T63 102T39 161T32 219Q32 272 50 314T94 382T154 423T214 446T265 452H279Q319 452 326 451Q428 439 485 376T542 221Q542 156 514 108T442 33Q384 -5 287 -5ZM399 230V250Q399 280 398 298T391 338T372 372T338 392T282 401Q241 401 212 380Q190 363 183 334T175 230Q175 202 175 189T177 153T183 118T195 91T215 68T245 56T287 50Q348 50 374 84Q388 101 393 132T399 230Z"></path><path stroke-width="0" id="E99-MJMAINB-68" d="M40 686L131 690Q222 694 223 694H229V533L230 372L238 381Q248 394 264 407T317 435T398 450Q428 450 448 447T491 434T529 402T551 346Q553 335 554 198V62H623V0H614Q596 3 489 3Q374 3 365 0H356V62H425V194V275Q425 348 416 373T371 399Q326 399 288 370T238 290Q236 281 235 171V62H304V0H295Q277 3 171 3Q64 3 46 0H37V62H106V332Q106 387 106 453T107 534Q107 593 105 605T91 620Q77 624 50 624H37V686H40Z"></path><path stroke-width="0" id="E99-MJMAINB-78" d="M227 0Q212 3 121 3Q40 3 28 0H21V62H117L245 213L109 382H26V444H34Q49 441 143 441Q247 441 265 444H274V382H246L281 339Q315 297 316 297Q320 297 354 341L389 382H352V444H360Q375 441 466 441Q547 441 559 444H566V382H471L355 246L504 63L545 62H586V0H578Q563 3 469 3Q365 3 347 0H338V62H366Q366 63 326 112T285 163L198 63L217 62H235V0H227Z"></path><path stroke-width="0" id="E99-MJMATHI-57" d="M436 683Q450 683 486 682T553 680Q604 680 638 681T677 682Q695 682 695 674Q695 670 692 659Q687 641 683 639T661 637Q636 636 621 632T600 624T597 615Q597 603 613 377T629 138L631 141Q633 144 637 151T649 170T666 200T690 241T720 295T759 362Q863 546 877 572T892 604Q892 619 873 628T831 637Q817 637 817 647Q817 650 819 660Q823 676 825 679T839 682Q842 682 856 682T895 682T949 681Q1015 681 1034 683Q1048 683 1048 672Q1048 666 1045 655T1038 640T1028 637Q1006 637 988 631T958 617T939 600T927 584L923 578L754 282Q586 -14 585 -15Q579 -22 561 -22Q546 -22 542 -17Q539 -14 523 229T506 480L494 462Q472 425 366 239Q222 -13 220 -15T215 -19Q210 -22 197 -22Q178 -22 176 -15Q176 -12 154 304T131 622Q129 631 121 633T82 637H58Q51 644 51 648Q52 671 64 683H76Q118 680 176 680Q301 680 313 683H323Q329 677 329 674T327 656Q322 641 318 637H297Q236 634 232 620Q262 160 266 136L501 550L499 587Q496 629 489 632Q483 636 447 637Q428 637 422 639T416 648Q416 650 418 660Q419 664 420 669T421 676T424 680T428 682T436 683Z"></path><path stroke-width="0" id="E99-MJMATHI-66" d="M118 -162Q120 -162 124 -164T135 -167T147 -168Q160 -168 171 -155T187 -126Q197 -99 221 27T267 267T289 382V385H242Q195 385 192 387Q188 390 188 397L195 425Q197 430 203 430T250 431Q298 431 298 432Q298 434 307 482T319 540Q356 705 465 705Q502 703 526 683T550 630Q550 594 529 578T487 561Q443 561 443 603Q443 622 454 636T478 657L487 662Q471 668 457 668Q445 668 434 658T419 630Q412 601 403 552T387 469T380 433Q380 431 435 431Q480 431 487 430T498 424Q499 420 496 407T491 391Q489 386 482 386T428 385H372L349 263Q301 15 282 -47Q255 -132 212 -173Q175 -205 139 -205Q107 -205 81 -186T55 -132Q55 -95 76 -78T118 -61Q162 -61 162 -103Q162 -122 151 -136T127 -157L118 -162Z"></path><path stroke-width="0" id="E99-MJMATHI-69" d="M184 600Q184 624 203 642T247 661Q265 661 277 649T290 619Q290 596 270 577T226 557Q211 557 198 567T184 600ZM21 287Q21 295 30 318T54 369T98 420T158 442Q197 442 223 419T250 357Q250 340 236 301T196 196T154 83Q149 61 149 51Q149 26 166 26Q175 26 185 29T208 43T235 78T260 137Q263 149 265 151T282 153Q302 153 302 143Q302 135 293 112T268 61T223 11T161 -11Q129 -11 102 10T74 74Q74 91 79 106T122 220Q160 321 166 341T173 380Q173 404 156 404H154Q124 404 99 371T61 287Q60 286 59 284T58 281T56 279T53 278T49 278T41 278H27Q21 284 21 287Z"></path><path stroke-width="0" id="E99-MJMATHI-6F" d="M201 -11Q126 -11 80 38T34 156Q34 221 64 279T146 380Q222 441 301 441Q333 441 341 440Q354 437 367 433T402 417T438 387T464 338T476 268Q476 161 390 75T201 -11ZM121 120Q121 70 147 48T206 26Q250 26 289 58T351 142Q360 163 374 216T388 308Q388 352 370 375Q346 405 306 405Q243 405 195 347Q158 303 140 230T121 120Z"></path><path stroke-width="0" id="E99-MJMATHI-62" d="M73 647Q73 657 77 670T89 683Q90 683 161 688T234 694Q246 694 246 685T212 542Q204 508 195 472T180 418L176 399Q176 396 182 402Q231 442 283 442Q345 442 383 396T422 280Q422 169 343 79T173 -11Q123 -11 82 27T40 150V159Q40 180 48 217T97 414Q147 611 147 623T109 637Q104 637 101 637H96Q86 637 83 637T76 640T73 647ZM336 325V331Q336 405 275 405Q258 405 240 397T207 376T181 352T163 330L157 322L136 236Q114 150 114 114Q114 66 138 42Q154 26 178 26Q211 26 245 58Q270 81 285 114T318 219Q336 291 336 325Z"></path></defs><g stroke="currentColor" fill="currentColor" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xlink:href="#E99-MJMAINB-66" x="0" y="0"></use><use transform="scale(0.707)" xlink:href="#E99-MJMATHI-74" x="496" y="-213"></use><use xlink:href="#E99-MJMAIN-2C" x="706" y="0"></use><g transform="translate(1150,0)"><use xlink:href="#E99-MJMAINB-69" x="0" y="0"></use><use transform="scale(0.707)" xlink:href="#E99-MJMATHI-74" x="451" y="-213"></use></g><use xlink:href="#E99-MJMAIN-2C" x="1825" y="0"></use><g transform="translate(2269,0)"><use xlink:href="#E99-MJMATHI-43" x="0" y="0"></use><use xlink:href="#E99-MJMAIN-AF" x="242" y="241"></use><use transform="scale(0.707)" xlink:href="#E99-MJMATHI-74" x="1074" y="-213"></use></g><use xlink:href="#E99-MJMAIN-2C" x="3385" y="0"></use><g transform="translate(3829,0)"><use xlink:href="#E99-MJMATHI-43" x="0" y="0"></use><use transform="scale(0.707)" xlink:href="#E99-MJMATHI-74" x="1011" y="-213"></use></g><use xlink:href="#E99-MJMAIN-2C" x="4900" y="0"></use><g transform="translate(5344,0)"><use xlink:href="#E99-MJMATHI-43" x="0" y="0"></use><g transform="translate(715,-150)"><use transform="scale(0.707)" xlink:href="#E99-MJMATHI-74" x="0" y="0"></use><use transform="scale(0.707)" xlink:href="#E99-MJMAIN-2212" x="361" y="0"></use><use transform="scale(0.707)" xlink:href="#E99-MJMAIN-31" x="1139" y="0"></use></g></g><use xlink:href="#E99-MJMAIN-2C" x="7318" y="0"></use><g transform="translate(7763,0)"><use xlink:href="#E99-MJMAINB-6F" x="0" y="0"></use><use transform="scale(0.707)" xlink:href="#E99-MJMATHI-74" x="813" y="-213"></use></g><use xlink:href="#E99-MJMAIN-2C" x="8693" y="0"></use><g transform="translate(9138,0)"><use xlink:href="#E99-MJMAINB-68" x="0" y="0"></use><g transform="translate(639,-150)"><use transform="scale(0.707)" xlink:href="#E99-MJMATHI-74" x="0" y="0"></use><use transform="scale(0.707)" xlink:href="#E99-MJMAIN-2212" x="361" y="0"></use><use transform="scale(0.707)" xlink:href="#E99-MJMAIN-31" x="1139" y="0"></use></g></g><use xlink:href="#E99-MJMAIN-2C" x="11036" y="0"></use><g transform="translate(11480,0)"><use xlink:href="#E99-MJMAINB-78" x="0" y="0"></use><use transform="scale(0.707)" xlink:href="#E99-MJMATHI-74" x="858" y="-213"></use></g><use xlink:href="#E99-MJMAIN-2C" x="12443" y="0"></use><g transform="translate(12887,0)"><use xlink:href="#E99-MJMATHI-57" x="0" y="0"></use><use transform="scale(0.707)" xlink:href="#E99-MJMATHI-66" x="1335" y="-218"></use></g><use xlink:href="#E99-MJMAIN-2C" x="14320" y="0"></use><g transform="translate(14765,0)"><use xlink:href="#E99-MJMATHI-57" x="0" y="0"></use><use transform="scale(0.707)" xlink:href="#E99-MJMATHI-69" x="1335" y="-213"></use></g><use xlink:href="#E99-MJMAIN-2C" x="16053" y="0"></use><g transform="translate(16498,0)"><use xlink:href="#E99-MJMATHI-57" x="0" y="0"></use><use transform="scale(0.707)" xlink:href="#E99-MJMATHI-43" x="1335" y="-218"></use></g><use xlink:href="#E99-MJMAIN-2C" x="18079" y="0"></use><g transform="translate(18524,0)"><use xlink:href="#E99-MJMATHI-57" x="0" y="0"></use><use transform="scale(0.707)" xlink:href="#E99-MJMATHI-6F" x="1335" y="-213"></use></g><use xlink:href="#E99-MJMAIN-2C" x="19911" y="0"></use><g transform="translate(20355,0)"><use xlink:href="#E99-MJMATHI-62" x="0" y="0"></use><use transform="scale(0.707)" xlink:href="#E99-MJMATHI-66" x="606" y="-218"></use></g><use xlink:href="#E99-MJMAIN-2C" x="21273" y="0"></use><g transform="translate(21718,0)"><use xlink:href="#E99-MJMATHI-62" x="0" y="0"></use><use transform="scale(0.707)" xlink:href="#E99-MJMATHI-69" x="606" y="-213"></use></g><use xlink:href="#E99-MJMAIN-2C" x="22491" y="0"></use><g transform="translate(22935,0)"><use xlink:href="#E99-MJMATHI-62" x="0" y="0"></use><use transform="scale(0.707)" xlink:href="#E99-MJMATHI-43" x="606" y="-218"></use></g><use xlink:href="#E99-MJMAIN-2C" x="24002" y="0"></use><g transform="translate(24446,0)"><use xlink:href="#E99-MJMATHI-62" x="0" y="0"></use><use transform="scale(0.707)" xlink:href="#E99-MJMATHI-6F" x="606" y="-213"></use></g></g></svg></span><script type="math/tex">\mathbf{f}_t, \mathbf{i}_t, \bar{C}_t, C_t, C_{t-1},\mathbf{o}_t, \mathbf{h}_{t-1}, \mathbf{x}_t, W_f, W_i, W_C, W_o, b_f, b_i, b_C, b_o </script>. i.e. <span class="MathJax_Preview"></span><span class="MathJax_SVG" tabindex="-1" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" width="3.4ex" height="4.009ex" viewBox="0 -1132.2 1464 1726.2" role="img" focusable="false" style="vertical-align: -1.38ex;"><defs><path stroke-width="0" id="E23-MJMAIN-2202" d="M202 508Q179 508 169 520T158 547Q158 557 164 577T185 624T230 675T301 710L333 715H345Q378 715 384 714Q447 703 489 661T549 568T566 457Q566 362 519 240T402 53Q321 -22 223 -22Q123 -22 73 56Q42 102 42 148V159Q42 276 129 370T322 465Q383 465 414 434T455 367L458 378Q478 461 478 515Q478 603 437 639T344 676Q266 676 223 612Q264 606 264 572Q264 547 246 528T202 508ZM430 306Q430 372 401 400T333 428Q270 428 222 382Q197 354 183 323T150 221Q132 149 132 116Q132 21 232 21Q244 21 250 22Q327 35 374 112Q389 137 409 196T430 306Z"></path><path stroke-width="0" id="E23-MJMAINB-68" d="M40 686L131 690Q222 694 223 694H229V533L230 372L238 381Q248 394 264 407T317 435T398 450Q428 450 448 447T491 434T529 402T551 346Q553 335 554 198V62H623V0H614Q596 3 489 3Q374 3 365 0H356V62H425V194V275Q425 348 416 373T371 399Q326 399 288 370T238 290Q236 281 235 171V62H304V0H295Q277 3 171 3Q64 3 46 0H37V62H106V332Q106 387 106 453T107 534Q107 593 105 605T91 620Q77 624 50 624H37V686H40Z"></path><path stroke-width="0" id="E23-MJMATHI-74" d="M26 385Q19 392 19 395Q19 399 22 411T27 425Q29 430 36 430T87 431H140L159 511Q162 522 166 540T173 566T179 586T187 603T197 615T211 624T229 626Q247 625 254 615T261 596Q261 589 252 549T232 470L222 433Q222 431 272 431H323Q330 424 330 420Q330 398 317 385H210L174 240Q135 80 135 68Q135 26 162 26Q197 26 230 60T283 144Q285 150 288 151T303 153H307Q322 153 322 145Q322 142 319 133Q314 117 301 95T267 48T216 6T155 -11Q125 -11 98 4T59 56Q57 64 57 83V101L92 241Q127 382 128 383Q128 385 77 385H26Z"></path><path stroke-width="0" id="E23-MJMAINB-66" d="M308 0Q290 3 172 3Q58 3 49 0H40V62H109V382H42V444H109V503L110 562L112 572Q127 625 178 658T316 699Q318 699 330 699T348 700Q381 698 404 687T436 658T449 629T452 606Q452 576 432 557T383 537Q355 537 335 555T314 605Q314 635 328 649H325Q311 649 293 644T253 618T227 560Q226 555 226 498V444H340V382H232V62H318V0H308Z"></path></defs><g stroke="currentColor" fill="currentColor" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><g transform="translate(120,0)"><rect stroke="none" width="1223" height="60" x="0" y="220"></rect><g transform="translate(60,522)"><use transform="scale(0.707)" xlink:href="#E23-MJMAIN-2202" x="0" y="0"></use><g transform="translate(400,0)"><use transform="scale(0.707)" xlink:href="#E23-MJMAINB-68" x="0" y="0"></use><use transform="scale(0.5)" xlink:href="#E23-MJMATHI-74" x="903" y="-213"></use></g></g><g transform="translate(161,-417)"><use transform="scale(0.707)" xlink:href="#E23-MJMAIN-2202" x="0" y="0"></use><g transform="translate(400,0)"><use transform="scale(0.707)" xlink:href="#E23-MJMAINB-66" x="0" y="0"></use><use transform="scale(0.5)" xlink:href="#E23-MJMATHI-74" x="496" y="-213"></use></g></g></g></g></svg></span><script type="math/tex">\frac{\partial \mathbf{h}_t}{\partial \mathbf{f}_t}</script>, include your formalization and derivation in your report. 10%</li><li>Describe how can you differentiate through time for the training of an LSTM language model for sentence <span class="MathJax_Preview"></span><span class="MathJax_SVG" tabindex="-1" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" width="12.8ex" height="1.884ex" viewBox="0 -540.2 5511 811.3" role="img" focusable="false" style="vertical-align: -0.63ex;"><defs><path stroke-width="0" id="E24-MJMATHI-73" d="M131 289Q131 321 147 354T203 415T300 442Q362 442 390 415T419 355Q419 323 402 308T364 292Q351 292 340 300T328 326Q328 342 337 354T354 372T367 378Q368 378 368 379Q368 382 361 388T336 399T297 405Q249 405 227 379T204 326Q204 301 223 291T278 274T330 259Q396 230 396 163Q396 135 385 107T352 51T289 7T195 -10Q118 -10 86 19T53 87Q53 126 74 143T118 160Q133 160 146 151T160 120Q160 94 142 76T111 58Q109 57 108 57T107 55Q108 52 115 47T146 34T201 27Q237 27 263 38T301 66T318 97T323 122Q323 150 302 164T254 181T195 196T148 231Q131 256 131 289Z"></path><path stroke-width="0" id="E24-MJMAIN-31" d="M213 578L200 573Q186 568 160 563T102 556H83V602H102Q149 604 189 617T245 641T273 663Q275 666 285 666Q294 666 302 660V361L303 61Q310 54 315 52T339 48T401 46H427V0H416Q395 3 257 3Q121 3 100 0H88V46H114Q136 46 152 46T177 47T193 50T201 52T207 57T213 61V578Z"></path><path stroke-width="0" id="E24-MJMAIN-2C" d="M78 35T78 60T94 103T137 121Q165 121 187 96T210 8Q210 -27 201 -60T180 -117T154 -158T130 -185T117 -194Q113 -194 104 -185T95 -172Q95 -168 106 -156T131 -126T157 -76T173 -3V9L172 8Q170 7 167 6T161 3T152 1T140 0Q113 0 96 17Z"></path><path stroke-width="0" id="E24-MJMAIN-32" d="M109 429Q82 429 66 447T50 491Q50 562 103 614T235 666Q326 666 387 610T449 465Q449 422 429 383T381 315T301 241Q265 210 201 149L142 93L218 92Q375 92 385 97Q392 99 409 186V189H449V186Q448 183 436 95T421 3V0H50V19V31Q50 38 56 46T86 81Q115 113 136 137Q145 147 170 174T204 211T233 244T261 278T284 308T305 340T320 369T333 401T340 431T343 464Q343 527 309 573T212 619Q179 619 154 602T119 569T109 550Q109 549 114 549Q132 549 151 535T170 489Q170 464 154 447T109 429Z"></path><path stroke-width="0" id="E24-MJMAIN-22EF" d="M78 250Q78 274 95 292T138 310Q162 310 180 294T199 251Q199 226 182 208T139 190T96 207T78 250ZM525 250Q525 274 542 292T585 310Q609 310 627 294T646 251Q646 226 629 208T586 190T543 207T525 250ZM972 250Q972 274 989 292T1032 310Q1056 310 1074 294T1093 251Q1093 226 1076 208T1033 190T990 207T972 250Z"></path><path stroke-width="0" id="E24-MJMATHI-6E" d="M21 287Q22 293 24 303T36 341T56 388T89 425T135 442Q171 442 195 424T225 390T231 369Q231 367 232 367L243 378Q304 442 382 442Q436 442 469 415T503 336T465 179T427 52Q427 26 444 26Q450 26 453 27Q482 32 505 65T540 145Q542 153 560 153Q580 153 580 145Q580 144 576 130Q568 101 554 73T508 17T439 -10Q392 -10 371 17T350 73Q350 92 386 193T423 345Q423 404 379 404H374Q288 404 229 303L222 291L189 157Q156 26 151 16Q138 -11 108 -11Q95 -11 87 -5T76 7T74 17Q74 30 112 180T152 343Q153 348 153 366Q153 405 129 405Q91 405 66 305Q60 285 60 284Q58 278 41 278H27Q21 284 21 287Z"></path></defs><g stroke="currentColor" fill="currentColor" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xlink:href="#E24-MJMATHI-73" x="0" y="0"></use><use transform="scale(0.707)" xlink:href="#E24-MJMAIN-31" x="663" y="-213"></use><use xlink:href="#E24-MJMAIN-2C" x="922" y="0"></use><g transform="translate(1367,0)"><use xlink:href="#E24-MJMATHI-73" x="0" y="0"></use><use transform="scale(0.707)" xlink:href="#E24-MJMAIN-32" x="663" y="-213"></use></g><use xlink:href="#E24-MJMAIN-2C" x="2289" y="0"></use><use xlink:href="#E24-MJMAIN-22EF" x="2734" y="0"></use><use xlink:href="#E24-MJMAIN-2C" x="4073" y="0"></use><g transform="translate(4517,0)"><use xlink:href="#E24-MJMATHI-73" x="0" y="0"></use><use transform="scale(0.707)" xlink:href="#E24-MJMATHI-6E" x="663" y="-213"></use></g></g></svg></span><script type="math/tex">s_1,s_2,\cdots, s_n</script>. 10%</li></ol><h5><a name='header-n19' class='md-header-anchor '></a>Part 2, Autograd Training of LSTM, 80%</h5><p>In this part you are going to implement an LSTM to build a language model to generate Tang poetry.</p><p>You are given a small dataset containing some Tang poems, you first split the dataset to a training dataset and development dataset, we would recommend an 80% and 20% split. Then you create a vocabulary containing all the words (or characters, but we stick to use words to refer to them) in the training dataset, be aware that you might want to insert a new word <code>EOS</code> and a special token  <code>OOV</code> for unknown word (or known as out-of-vocabulary word). To process the dataset, you should transform the poems into a sequence of integer representing words in the vocabulary. Then you could randomly crop the sequence into batches of short sequences for the training of the LSTM. Note that at each step a single input of the LSTM should be a vector, we should create a mapping from integers to vectors, this step is also known as <strong>embedding</strong> in NLP. You are encouraged to use <a href='https://github.com/fastnlp/fastNLP/blob/master/fastNLP/core/vocabulary.py'>vocabulary</a>, <a href='https://github.com/fastnlp/fastNLP/blob/master/fastNLP/core/dataset.py'>dataset</a> from FastNLP to implemented yours vocabulary.</p><p>Follow the previous discussion we could come to a loss function that could provide gradient to the parameters and also the embedding (as you could either fix the embedding to its initialization or update it with the gradient).</p><p>As the model is pretty clear here, you should include the hyperparameter and training setting your are using in your report. They are</p><ul><li>Vocabulary size, <span class="MathJax_Preview"></span><span class="MathJax_SVG" tabindex="-1" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" width="3.077ex" height="2.634ex" viewBox="0 -809.3 1325 1134.2" role="img" focusable="false" style="vertical-align: -0.755ex;"><defs><path stroke-width="0" id="E25-MJMAIN-7C" d="M139 -249H137Q125 -249 119 -235V251L120 737Q130 750 139 750Q152 750 159 735V-235Q151 -249 141 -249H139Z"></path><path stroke-width="0" id="E25-MJMATHI-56" d="M52 648Q52 670 65 683H76Q118 680 181 680Q299 680 320 683H330Q336 677 336 674T334 656Q329 641 325 637H304Q282 635 274 635Q245 630 242 620Q242 618 271 369T301 118L374 235Q447 352 520 471T595 594Q599 601 599 609Q599 633 555 637Q537 637 537 648Q537 649 539 661Q542 675 545 679T558 683Q560 683 570 683T604 682T668 681Q737 681 755 683H762Q769 676 769 672Q769 655 760 640Q757 637 743 637Q730 636 719 635T698 630T682 623T670 615T660 608T652 599T645 592L452 282Q272 -9 266 -16Q263 -18 259 -21L241 -22H234Q216 -22 216 -15Q213 -9 177 305Q139 623 138 626Q133 637 76 637H59Q52 642 52 648Z"></path></defs><g stroke="currentColor" fill="currentColor" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xlink:href="#E25-MJMAIN-7C" x="0" y="0"></use><use xlink:href="#E25-MJMATHI-56" x="278" y="0"></use><use xlink:href="#E25-MJMAIN-7C" x="1047" y="0"></use></g></svg></span><script type="math/tex">|V|</script></li><li>Batch size, <span class="MathJax_Preview"></span><span class="MathJax_SVG" tabindex="-1" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" width="2.086ex" height="2.009ex" viewBox="0 -755.5 898 865.1" role="img" focusable="false" style="vertical-align: -0.255ex;"><defs><path stroke-width="0" id="E26-MJMATHI-62" d="M73 647Q73 657 77 670T89 683Q90 683 161 688T234 694Q246 694 246 685T212 542Q204 508 195 472T180 418L176 399Q176 396 182 402Q231 442 283 442Q345 442 383 396T422 280Q422 169 343 79T173 -11Q123 -11 82 27T40 150V159Q40 180 48 217T97 414Q147 611 147 623T109 637Q104 637 101 637H96Q86 637 83 637T76 640T73 647ZM336 325V331Q336 405 275 405Q258 405 240 397T207 376T181 352T163 330L157 322L136 236Q114 150 114 114Q114 66 138 42Q154 26 178 26Q211 26 245 58Q270 81 285 114T318 219Q336 291 336 325Z"></path><path stroke-width="0" id="E26-MJMATHI-73" d="M131 289Q131 321 147 354T203 415T300 442Q362 442 390 415T419 355Q419 323 402 308T364 292Q351 292 340 300T328 326Q328 342 337 354T354 372T367 378Q368 378 368 379Q368 382 361 388T336 399T297 405Q249 405 227 379T204 326Q204 301 223 291T278 274T330 259Q396 230 396 163Q396 135 385 107T352 51T289 7T195 -10Q118 -10 86 19T53 87Q53 126 74 143T118 160Q133 160 146 151T160 120Q160 94 142 76T111 58Q109 57 108 57T107 55Q108 52 115 47T146 34T201 27Q237 27 263 38T301 66T318 97T323 122Q323 150 302 164T254 181T195 196T148 231Q131 256 131 289Z"></path></defs><g stroke="currentColor" fill="currentColor" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xlink:href="#E26-MJMATHI-62" x="0" y="0"></use><use xlink:href="#E26-MJMATHI-73" x="429" y="0"></use></g></svg></span><script type="math/tex">bs</script></li><li>Sentence length, <span class="MathJax_Preview"></span><span class="MathJax_SVG" tabindex="-1" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" width="1.781ex" height="2.009ex" viewBox="0 -755.5 767 865.1" role="img" focusable="false" style="vertical-align: -0.255ex;"><defs><path stroke-width="0" id="E30-MJMATHI-73" d="M131 289Q131 321 147 354T203 415T300 442Q362 442 390 415T419 355Q419 323 402 308T364 292Q351 292 340 300T328 326Q328 342 337 354T354 372T367 378Q368 378 368 379Q368 382 361 388T336 399T297 405Q249 405 227 379T204 326Q204 301 223 291T278 274T330 259Q396 230 396 163Q396 135 385 107T352 51T289 7T195 -10Q118 -10 86 19T53 87Q53 126 74 143T118 160Q133 160 146 151T160 120Q160 94 142 76T111 58Q109 57 108 57T107 55Q108 52 115 47T146 34T201 27Q237 27 263 38T301 66T318 97T323 122Q323 150 302 164T254 181T195 196T148 231Q131 256 131 289Z"></path><path stroke-width="0" id="E30-MJMATHI-6C" d="M117 59Q117 26 142 26Q179 26 205 131Q211 151 215 152Q217 153 225 153H229Q238 153 241 153T246 151T248 144Q247 138 245 128T234 90T214 43T183 6T137 -11Q101 -11 70 11T38 85Q38 97 39 102L104 360Q167 615 167 623Q167 626 166 628T162 632T157 634T149 635T141 636T132 637T122 637Q112 637 109 637T101 638T95 641T94 647Q94 649 96 661Q101 680 107 682T179 688Q194 689 213 690T243 693T254 694Q266 694 266 686Q266 675 193 386T118 83Q118 81 118 75T117 65V59Z"></path></defs><g stroke="currentColor" fill="currentColor" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xlink:href="#E30-MJMATHI-73" x="0" y="0"></use><use xlink:href="#E30-MJMATHI-6C" x="469" y="0"></use></g></svg></span><script type="math/tex">sl</script></li><li>Hidden size, i.e. the length of the hidden vector of the LSTM, <span class="MathJax_Preview"></span><span class="MathJax_SVG" tabindex="-1" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" width="2.427ex" height="2.009ex" viewBox="0 -755.5 1045 865.1" role="img" focusable="false" style="vertical-align: -0.255ex;"><defs><path stroke-width="0" id="E28-MJMATHI-68" d="M137 683Q138 683 209 688T282 694Q294 694 294 685Q294 674 258 534Q220 386 220 383Q220 381 227 388Q288 442 357 442Q411 442 444 415T478 336Q478 285 440 178T402 50Q403 36 407 31T422 26Q450 26 474 56T513 138Q516 149 519 151T535 153Q555 153 555 145Q555 144 551 130Q535 71 500 33Q466 -10 419 -10H414Q367 -10 346 17T325 74Q325 90 361 192T398 345Q398 404 354 404H349Q266 404 205 306L198 293L164 158Q132 28 127 16Q114 -11 83 -11Q69 -11 59 -2T48 16Q48 30 121 320L195 616Q195 629 188 632T149 637H128Q122 643 122 645T124 664Q129 683 137 683Z"></path><path stroke-width="0" id="E28-MJMATHI-73" d="M131 289Q131 321 147 354T203 415T300 442Q362 442 390 415T419 355Q419 323 402 308T364 292Q351 292 340 300T328 326Q328 342 337 354T354 372T367 378Q368 378 368 379Q368 382 361 388T336 399T297 405Q249 405 227 379T204 326Q204 301 223 291T278 274T330 259Q396 230 396 163Q396 135 385 107T352 51T289 7T195 -10Q118 -10 86 19T53 87Q53 126 74 143T118 160Q133 160 146 151T160 120Q160 94 142 76T111 58Q109 57 108 57T107 55Q108 52 115 47T146 34T201 27Q237 27 263 38T301 66T318 97T323 122Q323 150 302 164T254 181T195 196T148 231Q131 256 131 289Z"></path></defs><g stroke="currentColor" fill="currentColor" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xlink:href="#E28-MJMATHI-68" x="0" y="0"></use><use xlink:href="#E28-MJMATHI-73" x="576" y="0"></use></g></svg></span><script type="math/tex">hs</script></li><li>Input size, i.e. the length of the input vector for the LSTM, <span class="MathJax_Preview"></span><span class="MathJax_SVG" tabindex="-1" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" width="1.891ex" height="2.009ex" viewBox="0 -755.5 814 865.1" role="img" focusable="false" style="vertical-align: -0.255ex;"><defs><path stroke-width="0" id="E29-MJMATHI-69" d="M184 600Q184 624 203 642T247 661Q265 661 277 649T290 619Q290 596 270 577T226 557Q211 557 198 567T184 600ZM21 287Q21 295 30 318T54 369T98 420T158 442Q197 442 223 419T250 357Q250 340 236 301T196 196T154 83Q149 61 149 51Q149 26 166 26Q175 26 185 29T208 43T235 78T260 137Q263 149 265 151T282 153Q302 153 302 143Q302 135 293 112T268 61T223 11T161 -11Q129 -11 102 10T74 74Q74 91 79 106T122 220Q160 321 166 341T173 380Q173 404 156 404H154Q124 404 99 371T61 287Q60 286 59 284T58 281T56 279T53 278T49 278T41 278H27Q21 284 21 287Z"></path><path stroke-width="0" id="E29-MJMATHI-73" d="M131 289Q131 321 147 354T203 415T300 442Q362 442 390 415T419 355Q419 323 402 308T364 292Q351 292 340 300T328 326Q328 342 337 354T354 372T367 378Q368 378 368 379Q368 382 361 388T336 399T297 405Q249 405 227 379T204 326Q204 301 223 291T278 274T330 259Q396 230 396 163Q396 135 385 107T352 51T289 7T195 -10Q118 -10 86 19T53 87Q53 126 74 143T118 160Q133 160 146 151T160 120Q160 94 142 76T111 58Q109 57 108 57T107 55Q108 52 115 47T146 34T201 27Q237 27 263 38T301 66T318 97T323 122Q323 150 302 164T254 181T195 196T148 231Q131 256 131 289Z"></path></defs><g stroke="currentColor" fill="currentColor" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xlink:href="#E29-MJMATHI-69" x="0" y="0"></use><use xlink:href="#E29-MJMATHI-73" x="345" y="0"></use></g></svg></span><script type="math/tex">is</script></li></ul><p>The training of the model stops when it could not get better in predicting the next word on the development dataset, which could be evaluated by <em>perplexity</em> </p><p class='md-math-block'><div contenteditable="false" spellcheck="false" class="mathjax-block md-end-block md-math-block md-rawblock" id="mathjax-n36" cid="n36" mdtype="math_block"><div class="md-rawblock-container md-math-container" tabindex="-1"><span class="md-math-source"><span class="MathJax_Preview"></span><span class="MathJax_SVG_Display" style="text-align: center;"><span class="MathJax_SVG" id="MathJax-Element-2-Frame" tabindex="-1" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" width="34.026ex" height="10.884ex" viewBox="0 -2585.3 14650.1 4686.3" role="img" focusable="false" style="vertical-align: -4.88ex;"><defs><path stroke-width="0" id="E2-MJMATHI-50" d="M287 628Q287 635 230 637Q206 637 199 638T192 648Q192 649 194 659Q200 679 203 681T397 683Q587 682 600 680Q664 669 707 631T751 530Q751 453 685 389Q616 321 507 303Q500 302 402 301H307L277 182Q247 66 247 59Q247 55 248 54T255 50T272 48T305 46H336Q342 37 342 35Q342 19 335 5Q330 0 319 0Q316 0 282 1T182 2Q120 2 87 2T51 1Q33 1 33 11Q33 13 36 25Q40 41 44 43T67 46Q94 46 127 49Q141 52 146 61Q149 65 218 339T287 628ZM645 554Q645 567 643 575T634 597T609 619T560 635Q553 636 480 637Q463 637 445 637T416 636T404 636Q391 635 386 627Q384 621 367 550T332 412T314 344Q314 342 395 342H407H430Q542 342 590 392Q617 419 631 471T645 554Z"></path><path stroke-width="0" id="E2-MJMAIN-28" d="M94 250Q94 319 104 381T127 488T164 576T202 643T244 695T277 729T302 750H315H319Q333 750 333 741Q333 738 316 720T275 667T226 581T184 443T167 250T184 58T225 -81T274 -167T316 -220T333 -241Q333 -250 318 -250H315H302L274 -226Q180 -141 137 -14T94 250Z"></path><path stroke-width="0" id="E2-MJMATHI-53" d="M308 24Q367 24 416 76T466 197Q466 260 414 284Q308 311 278 321T236 341Q176 383 176 462Q176 523 208 573T273 648Q302 673 343 688T407 704H418H425Q521 704 564 640Q565 640 577 653T603 682T623 704Q624 704 627 704T632 705Q645 705 645 698T617 577T585 459T569 456Q549 456 549 465Q549 471 550 475Q550 478 551 494T553 520Q553 554 544 579T526 616T501 641Q465 662 419 662Q362 662 313 616T263 510Q263 480 278 458T319 427Q323 425 389 408T456 390Q490 379 522 342T554 242Q554 216 546 186Q541 164 528 137T492 78T426 18T332 -20Q320 -22 298 -22Q199 -22 144 33L134 44L106 13Q83 -14 78 -18T65 -22Q52 -22 52 -14Q52 -11 110 221Q112 227 130 227H143Q149 221 149 216Q149 214 148 207T144 186T142 153Q144 114 160 87T203 47T255 29T308 24Z"></path><path stroke-width="0" id="E2-MJMAIN-29" d="M60 749L64 750Q69 750 74 750H86L114 726Q208 641 251 514T294 250Q294 182 284 119T261 12T224 -76T186 -143T145 -194T113 -227T90 -246Q87 -249 86 -250H74Q66 -250 63 -250T58 -247T55 -238Q56 -237 66 -225Q221 -64 221 250T66 725Q56 737 55 738Q55 746 60 749Z"></path><path stroke-width="0" id="E2-MJMAIN-3D" d="M56 347Q56 360 70 367H707Q722 359 722 347Q722 336 708 328L390 327H72Q56 332 56 347ZM56 153Q56 168 72 173H708Q722 163 722 153Q722 140 707 133H70Q56 140 56 153Z"></path><path stroke-width="0" id="E2-MJMATHI-73" d="M131 289Q131 321 147 354T203 415T300 442Q362 442 390 415T419 355Q419 323 402 308T364 292Q351 292 340 300T328 326Q328 342 337 354T354 372T367 378Q368 378 368 379Q368 382 361 388T336 399T297 405Q249 405 227 379T204 326Q204 301 223 291T278 274T330 259Q396 230 396 163Q396 135 385 107T352 51T289 7T195 -10Q118 -10 86 19T53 87Q53 126 74 143T118 160Q133 160 146 151T160 120Q160 94 142 76T111 58Q109 57 108 57T107 55Q108 52 115 47T146 34T201 27Q237 27 263 38T301 66T318 97T323 122Q323 150 302 164T254 181T195 196T148 231Q131 256 131 289Z"></path><path stroke-width="0" id="E2-MJMAIN-31" d="M213 578L200 573Q186 568 160 563T102 556H83V602H102Q149 604 189 617T245 641T273 663Q275 666 285 666Q294 666 302 660V361L303 61Q310 54 315 52T339 48T401 46H427V0H416Q395 3 257 3Q121 3 100 0H88V46H114Q136 46 152 46T177 47T193 50T201 52T207 57T213 61V578Z"></path><path stroke-width="0" id="E2-MJMAIN-32" d="M109 429Q82 429 66 447T50 491Q50 562 103 614T235 666Q326 666 387 610T449 465Q449 422 429 383T381 315T301 241Q265 210 201 149L142 93L218 92Q375 92 385 97Q392 99 409 186V189H449V186Q448 183 436 95T421 3V0H50V19V31Q50 38 56 46T86 81Q115 113 136 137Q145 147 170 174T204 211T233 244T261 278T284 308T305 340T320 369T333 401T340 431T343 464Q343 527 309 573T212 619Q179 619 154 602T119 569T109 550Q109 549 114 549Q132 549 151 535T170 489Q170 464 154 447T109 429Z"></path><path stroke-width="0" id="E2-MJMAIN-22EF" d="M78 250Q78 274 95 292T138 310Q162 310 180 294T199 251Q199 226 182 208T139 190T96 207T78 250ZM525 250Q525 274 542 292T585 310Q609 310 627 294T646 251Q646 226 629 208T586 190T543 207T525 250ZM972 250Q972 274 989 292T1032 310Q1056 310 1074 294T1093 251Q1093 226 1076 208T1033 190T990 207T972 250Z"></path><path stroke-width="0" id="E2-MJMATHI-4E" d="M234 637Q231 637 226 637Q201 637 196 638T191 649Q191 676 202 682Q204 683 299 683Q376 683 387 683T401 677Q612 181 616 168L670 381Q723 592 723 606Q723 633 659 637Q635 637 635 648Q635 650 637 660Q641 676 643 679T653 683Q656 683 684 682T767 680Q817 680 843 681T873 682Q888 682 888 672Q888 650 880 642Q878 637 858 637Q787 633 769 597L620 7Q618 0 599 0Q585 0 582 2Q579 5 453 305L326 604L261 344Q196 88 196 79Q201 46 268 46H278Q284 41 284 38T282 19Q278 6 272 0H259Q228 2 151 2Q123 2 100 2T63 2T46 1Q31 1 31 10Q31 14 34 26T39 40Q41 46 62 46Q130 49 150 85Q154 91 221 362L289 634Q287 635 234 637Z"></path><path stroke-width="0" id="E2-MJMAIN-7C" d="M139 -249H137Q125 -249 119 -235V251L120 737Q130 750 139 750Q152 750 159 735V-235Q151 -249 141 -249H139Z"></path><path stroke-width="0" id="E2-MJMATHI-4C" d="M228 637Q194 637 192 641Q191 643 191 649Q191 673 202 682Q204 683 217 683Q271 680 344 680Q485 680 506 683H518Q524 677 524 674T522 656Q517 641 513 637H475Q406 636 394 628Q387 624 380 600T313 336Q297 271 279 198T252 88L243 52Q243 48 252 48T311 46H328Q360 46 379 47T428 54T478 72T522 106T564 161Q580 191 594 228T611 270Q616 273 628 273H641Q647 264 647 262T627 203T583 83T557 9Q555 4 553 3T537 0T494 -1Q483 -1 418 -1T294 0H116Q32 0 32 10Q32 17 34 24Q39 43 44 45Q48 46 59 46H65Q92 46 125 49Q139 52 144 61Q147 65 216 339T285 628Q285 635 228 637Z"></path><path stroke-width="0" id="E2-MJMATHI-4D" d="M289 629Q289 635 232 637Q208 637 201 638T194 648Q194 649 196 659Q197 662 198 666T199 671T201 676T203 679T207 681T212 683T220 683T232 684Q238 684 262 684T307 683Q386 683 398 683T414 678Q415 674 451 396L487 117L510 154Q534 190 574 254T662 394Q837 673 839 675Q840 676 842 678T846 681L852 683H948Q965 683 988 683T1017 684Q1051 684 1051 673Q1051 668 1048 656T1045 643Q1041 637 1008 637Q968 636 957 634T939 623Q936 618 867 340T797 59Q797 55 798 54T805 50T822 48T855 46H886Q892 37 892 35Q892 19 885 5Q880 0 869 0Q864 0 828 1T736 2Q675 2 644 2T609 1Q592 1 592 11Q592 13 594 25Q598 41 602 43T625 46Q652 46 685 49Q699 52 704 61Q706 65 742 207T813 490T848 631L654 322Q458 10 453 5Q451 4 449 3Q444 0 433 0Q418 0 415 7Q413 11 374 317L335 624L267 354Q200 88 200 79Q206 46 272 46H282Q288 41 289 37T286 19Q282 3 278 1Q274 0 267 0Q265 0 255 0T221 1T157 2Q127 2 95 1T58 0Q43 0 39 2T35 11Q35 13 38 25T43 40Q45 46 65 46Q135 46 154 86Q158 92 223 354T289 629Z"></path><path stroke-width="0" id="E2-MJMAIN-2212" d="M84 237T84 250T98 270H679Q694 262 694 250T679 230H98Q84 237 84 250Z"></path><path stroke-width="0" id="E2-MJMAIN-2F" d="M423 750Q432 750 438 744T444 730Q444 725 271 248T92 -240Q85 -250 75 -250Q68 -250 62 -245T56 -231Q56 -221 230 257T407 740Q411 750 423 750Z"></path><path stroke-width="0" id="E2-MJSZ1-220F" d="M158 656Q147 684 131 694Q110 707 69 710H55V750H888V710H874Q840 708 820 698T795 678T786 656V-155Q798 -206 874 -210H888V-250H570V-210H584Q618 -208 638 -197T663 -178T673 -155V710H270V277L271 -155Q283 -206 359 -210H373V-250H55V-210H69Q103 -208 123 -197T148 -178T158 -155V656Z"></path><path stroke-width="0" id="E2-MJMATHI-69" d="M184 600Q184 624 203 642T247 661Q265 661 277 649T290 619Q290 596 270 577T226 557Q211 557 198 567T184 600ZM21 287Q21 295 30 318T54 369T98 420T158 442Q197 442 223 419T250 357Q250 340 236 301T196 196T154 83Q149 61 149 51Q149 26 166 26Q175 26 185 29T208 43T235 78T260 137Q263 149 265 151T282 153Q302 153 302 143Q302 135 293 112T268 61T223 11T161 -11Q129 -11 102 10T74 74Q74 91 79 106T122 220Q160 321 166 341T173 380Q173 404 156 404H154Q124 404 99 371T61 287Q60 286 59 284T58 281T56 279T53 278T49 278T41 278H27Q21 284 21 287Z"></path><path stroke-width="0" id="E2-MJMATHI-74" d="M26 385Q19 392 19 395Q19 399 22 411T27 425Q29 430 36 430T87 431H140L159 511Q162 522 166 540T173 566T179 586T187 603T197 615T211 624T229 626Q247 625 254 615T261 596Q261 589 252 549T232 470L222 433Q222 431 272 431H323Q330 424 330 420Q330 398 317 385H210L174 240Q135 80 135 68Q135 26 162 26Q197 26 230 60T283 144Q285 150 288 151T303 153H307Q322 153 322 145Q322 142 319 133Q314 117 301 95T267 48T216 6T155 -11Q125 -11 98 4T59 56Q57 64 57 83V101L92 241Q127 382 128 383Q128 385 77 385H26Z"></path><path stroke-width="0" id="E2-MJMAINB-79" d="M84 -102Q84 -110 87 -119T102 -138T133 -149Q148 -148 162 -143T186 -131T206 -114T222 -95T234 -76T243 -59T249 -45T252 -37L269 0L96 382H26V444H34Q49 441 146 441Q252 441 270 444H279V382H255Q232 382 232 380L337 151L442 382H394V444H401Q413 441 495 441Q568 441 574 444H580V382H510L406 152Q298 -84 297 -87Q269 -139 225 -169T131 -200Q85 -200 54 -172T23 -100Q23 -64 44 -50T87 -35Q111 -35 130 -50T152 -92V-100H84V-102Z"></path><path stroke-width="0" id="E2-MJMAIN-2B" d="M56 237T56 250T70 270H369V420L370 570Q380 583 389 583Q402 583 409 568V270H707Q722 262 722 250T707 230H409V-68Q401 -82 391 -82H389H387Q375 -82 369 -68V230H70Q56 237 56 250Z"></path><path stroke-width="0" id="E2-MJSZ4-221A" d="M983 1739Q988 1750 1001 1750Q1008 1750 1013 1745T1020 1733Q1020 1726 742 244T460 -1241Q458 -1250 439 -1250H436Q424 -1250 424 -1248L410 -1166Q395 -1083 367 -920T312 -601L201 44L137 -83L111 -57L187 96L264 247Q265 246 369 -357Q470 -958 473 -963L727 384Q979 1729 983 1739Z"></path></defs><g stroke="currentColor" fill="currentColor" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><g transform="translate(167,0)"><g transform="translate(-14,0)"><g transform="translate(0,1583)"><use xlink:href="#E2-MJMATHI-50" x="0" y="0"></use><use xlink:href="#E2-MJMATHI-50" x="751" y="0"></use><use xlink:href="#E2-MJMAIN-28" x="1502" y="0"></use><use xlink:href="#E2-MJMATHI-53" x="1891" y="0"></use><use xlink:href="#E2-MJMAIN-29" x="2536" y="0"></use></g></g><g transform="translate(2911,0)"><g transform="translate(0,1583)"><use xlink:href="#E2-MJMAIN-3D" x="277" y="0"></use><use xlink:href="#E2-MJMATHI-50" x="1333" y="0"></use><use xlink:href="#E2-MJMAIN-28" x="2084" y="0"></use><g transform="translate(2473,0)"><use xlink:href="#E2-MJMATHI-73" x="0" y="0"></use><use transform="scale(0.707)" xlink:href="#E2-MJMAIN-31" x="663" y="-213"></use></g><g transform="translate(3396,0)"><use xlink:href="#E2-MJMATHI-73" x="0" y="0"></use><use transform="scale(0.707)" xlink:href="#E2-MJMAIN-32" x="663" y="-213"></use></g><use xlink:href="#E2-MJMAIN-22EF" x="4485" y="0"></use><g transform="translate(5823,0)"><use xlink:href="#E2-MJMATHI-73" x="0" y="0"></use><use transform="scale(0.707)" xlink:href="#E2-MJMATHI-4E" x="663" y="-213"></use></g><use xlink:href="#E2-MJMAIN-7C" x="7020" y="0"></use><use xlink:href="#E2-MJMATHI-4C" x="7298" y="0"></use><use xlink:href="#E2-MJMATHI-4D" x="7979" y="0"></use><g transform="translate(9030,0)"><use xlink:href="#E2-MJMAIN-29" x="0" y="0"></use><g transform="translate(389,412)"><use transform="scale(0.707)" xlink:href="#E2-MJMAIN-2212" x="0" y="0"></use><use transform="scale(0.707)" xlink:href="#E2-MJMAIN-31" x="778" y="0"></use><use transform="scale(0.707)" xlink:href="#E2-MJMAIN-2F" x="1278" y="0"></use><use transform="scale(0.707)" xlink:href="#E2-MJMATHI-4E" x="1777" y="0"></use></g></g></g><g transform="translate(0,-515)"><use xlink:href="#E2-MJMAIN-3D" x="277" y="0"></use><g transform="translate(1333,0)"><use transform="scale(0.5)" xlink:href="#E2-MJMATHI-4E" x="411" y="65"></use><use xlink:href="#E2-MJSZ4-221A" x="0" y="-263"></use><rect stroke="none" width="5098" height="60" x="1000" y="1427"></rect><g transform="translate(1000,0)"><g transform="translate(120,0)"><rect stroke="none" width="4858" height="60" x="0" y="220"></rect><use xlink:href="#E2-MJMAIN-31" x="2179" y="676"></use><g transform="translate(60,-1027)"><use xlink:href="#E2-MJSZ1-220F" x="0" y="0"></use><use transform="scale(0.707)" xlink:href="#E2-MJMATHI-4E" x="1335" y="674"></use><g transform="translate(944,-286)"><use transform="scale(0.707)" xlink:href="#E2-MJMATHI-69" x="0" y="0"></use><use transform="scale(0.707)" xlink:href="#E2-MJMAIN-3D" x="345" y="0"></use><use transform="scale(0.707)" xlink:href="#E2-MJMATHI-74" x="1123" y="0"></use></g><g transform="translate(2260,0)"><use xlink:href="#E2-MJMAINB-79" x="0" y="0"></use><g transform="translate(607,521)"><use transform="scale(0.707)" xlink:href="#E2-MJMAIN-28" x="0" y="0"></use><g transform="translate(275,0)"><use transform="scale(0.707)" xlink:href="#E2-MJMATHI-73" x="0" y="0"></use><g transform="translate(331,-107)"><use transform="scale(0.5)" xlink:href="#E2-MJMATHI-74" x="0" y="0"></use><use transform="scale(0.5)" xlink:href="#E2-MJMAIN-2B" x="361" y="0"></use><use transform="scale(0.5)" xlink:href="#E2-MJMAIN-31" x="1139" y="0"></use></g></g><use transform="scale(0.707)" xlink:href="#E2-MJMAIN-29" x="2116" y="0"></use></g><use transform="scale(0.707)" xlink:href="#E2-MJMATHI-74" x="858" y="-394"></use></g></g></g></g></g></g></g></g></g></svg></span></span><script type="math/tex; mode=display" id="MathJax-Element-2">\begin{align}
 PP(S)&=P(s_1s_2\cdots s_N|LM)^{-1/N}\\
 &=\sqrt[N]{\frac{1}{ \prod_{i=t}^{N}\mathbf{y}_t^{(s_{t+1})}}}
-\end{align}</script></div></div><p>The perplexity should be evaluated on the whole development dataset, which is to split the dataset by length <span class="MathJax_SVG" tabindex="-1" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" width="1.781ex" height="1.994ex" viewBox="0 -755.9 767 858.4" role="img" focusable="false" style="vertical-align: -0.238ex;"><defs><path stroke-width="0" id="E38-MJMATHI-73" d="M131 289Q131 321 147 354T203 415T300 442Q362 442 390 415T419 355Q419 323 402 308T364 292Q351 292 340 300T328 326Q328 342 337 354T354 372T367 378Q368 378 368 379Q368 382 361 388T336 399T297 405Q249 405 227 379T204 326Q204 301 223 291T278 274T330 259Q396 230 396 163Q396 135 385 107T352 51T289 7T195 -10Q118 -10 86 19T53 87Q53 126 74 143T118 160Q133 160 146 151T160 120Q160 94 142 76T111 58Q109 57 108 57T107 55Q108 52 115 47T146 34T201 27Q237 27 263 38T301 66T318 97T323 122Q323 150 302 164T254 181T195 196T148 231Q131 256 131 289Z"></path><path stroke-width="0" id="E38-MJMATHI-6C" d="M117 59Q117 26 142 26Q179 26 205 131Q211 151 215 152Q217 153 225 153H229Q238 153 241 153T246 151T248 144Q247 138 245 128T234 90T214 43T183 6T137 -11Q101 -11 70 11T38 85Q38 97 39 102L104 360Q167 615 167 623Q167 626 166 628T162 632T157 634T149 635T141 636T132 637T122 637Q112 637 109 637T101 638T95 641T94 647Q94 649 96 661Q101 680 107 682T179 688Q194 689 213 690T243 693T254 694Q266 694 266 686Q266 675 193 386T118 83Q118 81 118 75T117 65V59Z"></path></defs><g stroke="currentColor" fill="currentColor" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xlink:href="#E38-MJMATHI-73" x="0" y="0"></use><use xlink:href="#E38-MJMATHI-6C" x="469" y="0"></use></g></svg></span><script type="math/tex">sl</script> which is the sentence length used in the training stage, and then evaluate the average perplexity on all the split sentences. Use early stop when perplexity don&#39;t improve. You should try <a href='https://github.com/fastnlp/fastNLP/blob/master/fastNLP/core/trainer.py'>trainer.py</a> from FastNLP to this end, as early stop are already implemented in it.</p><p>To generate a Tang poem once you got the model trained, you could first sample a word to start and then use it as input to the LSTM, and them sample from the output of the LSTM and in turn send the generated word into the LSTM to have the next word generated. To allow more variation, sometimes people use a <strong>temperature term <span class="MathJax_SVG" tabindex="-1" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" width="1.201ex" height="1.41ex" viewBox="0 -504.6 517 607.1" role="img" focusable="false" style="vertical-align: -0.238ex;"><defs><path stroke-width="0" id="E86-MJMATHI-3C4" d="M39 284Q18 284 18 294Q18 301 45 338T99 398Q134 425 164 429Q170 431 332 431Q492 431 497 429Q517 424 517 402Q517 388 508 376T485 360Q479 358 389 358T299 356Q298 355 283 274T251 109T233 20Q228 5 215 -4T186 -13Q153 -13 153 20V30L203 192Q214 228 227 272T248 336L254 357Q254 358 208 358Q206 358 197 358T183 359Q105 359 61 295Q56 287 53 286T39 284Z"></path></defs><g stroke="currentColor" fill="currentColor" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xlink:href="#E86-MJMATHI-3C4" x="0" y="0"></use></g></svg></span><script type="math/tex">\tau</script></strong> in the sofmax to control the diversity of generation, for example use <span class="MathJax_SVG" tabindex="-1" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" width="7.266ex" height="1.994ex" viewBox="0 -755.9 3128.6 858.4" role="img" focusable="false" style="vertical-align: -0.238ex;"><defs><path stroke-width="0" id="E90-MJMATHI-3C4" d="M39 284Q18 284 18 294Q18 301 45 338T99 398Q134 425 164 429Q170 431 332 431Q492 431 497 429Q517 424 517 402Q517 388 508 376T485 360Q479 358 389 358T299 356Q298 355 283 274T251 109T233 20Q228 5 215 -4T186 -13Q153 -13 153 20V30L203 192Q214 228 227 272T248 336L254 357Q254 358 208 358Q206 358 197 358T183 359Q105 359 61 295Q56 287 53 286T39 284Z"></path><path stroke-width="0" id="E90-MJMAIN-3D" d="M56 347Q56 360 70 367H707Q722 359 722 347Q722 336 708 328L390 327H72Q56 332 56 347ZM56 153Q56 168 72 173H708Q722 163 722 153Q722 140 707 133H70Q56 140 56 153Z"></path><path stroke-width="0" id="E90-MJMAIN-30" d="M96 585Q152 666 249 666Q297 666 345 640T423 548Q460 465 460 320Q460 165 417 83Q397 41 362 16T301 -15T250 -22Q224 -22 198 -16T137 16T82 83Q39 165 39 320Q39 494 96 585ZM321 597Q291 629 250 629Q208 629 178 597Q153 571 145 525T137 333Q137 175 145 125T181 46Q209 16 250 16Q290 16 318 46Q347 76 354 130T362 333Q362 478 354 524T321 597Z"></path><path stroke-width="0" id="E90-MJMAIN-2E" d="M78 60Q78 84 95 102T138 120Q162 120 180 104T199 61Q199 36 182 18T139 0T96 17T78 60Z"></path><path stroke-width="0" id="E90-MJMAIN-36" d="M42 313Q42 476 123 571T303 666Q372 666 402 630T432 550Q432 525 418 510T379 495Q356 495 341 509T326 548Q326 592 373 601Q351 623 311 626Q240 626 194 566Q147 500 147 364L148 360Q153 366 156 373Q197 433 263 433H267Q313 433 348 414Q372 400 396 374T435 317Q456 268 456 210V192Q456 169 451 149Q440 90 387 34T253 -22Q225 -22 199 -14T143 16T92 75T56 172T42 313ZM257 397Q227 397 205 380T171 335T154 278T148 216Q148 133 160 97T198 39Q222 21 251 21Q302 21 329 59Q342 77 347 104T352 209Q352 289 347 316T329 361Q302 397 257 397Z"></path></defs><g stroke="currentColor" fill="currentColor" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xlink:href="#E90-MJMATHI-3C4" x="0" y="0"></use><use xlink:href="#E90-MJMAIN-3D" x="794" y="0"></use><g transform="translate(1850,0)"><use xlink:href="#E90-MJMAIN-30"></use><use xlink:href="#E90-MJMAIN-2E" x="500" y="0"></use><use xlink:href="#E90-MJMAIN-36" x="778" y="0"></use></g></g></svg></span><script type="math/tex">\tau=0.6</script> to make it more variant than <span class="MathJax_SVG" tabindex="-1" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" width="5.459ex" height="1.994ex" viewBox="0 -755.9 2350.6 858.4" role="img" focusable="false" style="vertical-align: -0.238ex;"><defs><path stroke-width="0" id="E92-MJMATHI-3C4" d="M39 284Q18 284 18 294Q18 301 45 338T99 398Q134 425 164 429Q170 431 332 431Q492 431 497 429Q517 424 517 402Q517 388 508 376T485 360Q479 358 389 358T299 356Q298 355 283 274T251 109T233 20Q228 5 215 -4T186 -13Q153 -13 153 20V30L203 192Q214 228 227 272T248 336L254 357Q254 358 208 358Q206 358 197 358T183 359Q105 359 61 295Q56 287 53 286T39 284Z"></path><path stroke-width="0" id="E92-MJMAIN-3D" d="M56 347Q56 360 70 367H707Q722 359 722 347Q722 336 708 328L390 327H72Q56 332 56 347ZM56 153Q56 168 72 173H708Q722 163 722 153Q722 140 707 133H70Q56 140 56 153Z"></path><path stroke-width="0" id="E92-MJMAIN-31" d="M213 578L200 573Q186 568 160 563T102 556H83V602H102Q149 604 189 617T245 641T273 663Q275 666 285 666Q294 666 302 660V361L303 61Q310 54 315 52T339 48T401 46H427V0H416Q395 3 257 3Q121 3 100 0H88V46H114Q136 46 152 46T177 47T193 50T201 52T207 57T213 61V578Z"></path></defs><g stroke="currentColor" fill="currentColor" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xlink:href="#E92-MJMATHI-3C4" x="0" y="0"></use><use xlink:href="#E92-MJMAIN-3D" x="794" y="0"></use><use xlink:href="#E92-MJMAIN-31" x="1850" y="0"></use></g></svg></span><script type="math/tex">\tau=1</script>.</p><p>Above all, you might find <a href='http://karpathy.github.io/2015/05/21/rnn-effectiveness/'>this artical</a> great to help understand the task, where the author implemented a vanilla RNN language model to generate not only poems, but also linux kernel code. </p><p>&nbsp;</p><p>Requirements</p><ol start='' ><li>Initialization. The embedding layer and the parameters for the model should be initialized before the training. Explain why you should not just initialize them to zero, and propose a way to initialize them properly. 10%</li><li>Generating Tang poem. Implement an LSTM to generate poems. Report the perplexity after your training, and generate poems that start with 日 、红、山、夜、湖、海、月, include the poem in your report. You are allowed to implement the LSTM with PyTorch but you should not relying on the LSTM cell or LSTM provided by it, just use its autograd module to help you deal with your gradient. Also you are not strictly required to follow everything described above, as long as you can make yourself clear in the report, you can use whatever setting that you believe is more appropriate for the task of generating poems. You might find <a href='https://www.aclweb.org/anthology/D14-1074'>this paper</a> very helpful. 50%
-Bonus: you will earn up to 20% bonus (making the full mark 120% of this assignment) if you implement the gradient calculation and back propagation by yourself with numpy. Because you&#39;ve implemented the LSTM in PyTorch you will have something to compare your gradient to, this will help you for gradient check. You might also use external data such as <a href='https://github.com/chinese-poetry/chinese-poetry'>全唐诗</a> to help your generation, this will be also be considered as bonus.</li><li>Optimization. We haven&#39;t mentioned how should you optimize your model, but of course you should use gradient descent. There are a lot of gradient descent algorithms that you could explore, a non inclusive name list: stochastic gradient descent, SGD with momentum <sup class='md-footnote'><a href='#dfref-footnote-2' name='ref-footnote-2'>2</a></sup>, Nesterov<sup class='md-footnote'><a href='#dfref-footnote-3' name='ref-footnote-3'>3</a></sup>, Adagrad<sup class='md-footnote'><a href='#dfref-footnote-4' name='ref-footnote-4'>4</a></sup>, Adadelta<sup class='md-footnote'><a href='#dfref-footnote-5' name='ref-footnote-5'>5</a></sup>, Adam<sup class='md-footnote'><a href='#dfref-footnote-6' name='ref-footnote-6'>6</a></sup> etc. You should try at least two optimization algorithm to training your model. Note that as some of the algorithms require you to keep additional parameters across batches, <strong>you should think about how it will influence the way you implement your gradient calculation</strong> if you intended for the 20% bonus in the previous requirements. Include your comparison of the algorithms you&#39;ve used in your report. 20%</li></ol><p>Clarification</p><ol><li>If you have not used PyTorch and FastNLP, you might find it challenging to finish the assignment, but you should try your best to make it and include your effort in your report, even if you did not finish the given goal of the task, as long as you tried hard, your effort will be marked. For example if you find using FastNLP hard, you could include your comment for improvements in your report.</li><li>Please do get your self familiar with PyTorch and FastNLP because in the subsequent assignment and the final project, you must use them to finish the task.</li><li>In the bonus, you only have to show case that your numpy LSTM is workable (for example by showing that its gradient matches the gradient from PyTorch version or it could overfit the training dataset), and you don&#39;t have to stick to use it for other tasks.</li></ol><div class='footnotes-area'  ><hr/>
-<div class='footnote-line'><span class='md-fn-count'>1</span> <a href='http://www.bioinf.jku.at/publications/older/2604.pdf' target='_blank' class='url'>http://www.bioinf.jku.at/publications/older/2604.pdf</a><a name='dfref-footnote-1' href='#ref-footnote-1' title='back to document' class='reversefootnote' >↩</a></div>
-<div class='footnote-line'><span class='md-fn-count'>2</span> Qian, N. (1999). On the momentum term in gradient descent learning algorithms. Neural Networks : The Official Journal of the International Neural Network Society, 12(1), 145–151. <a href='http://doi.org/10.1016/S0893-6080(98)00116-6' target='_blank' class='url'>http://doi.org/10.1016/S0893-6080(98)00116-6</a><a name='dfref-footnote-2' href='#ref-footnote-2' title='back to document' class='reversefootnote' >↩</a></div>
-<div class='footnote-line'><span class='md-fn-count'>3</span> Nesterov, Y. (1983). A method for unconstrained convex minimization problem with the rate of convergence o(1/k2). Doklady ANSSSR (translated as Soviet.Math.Docl.), vol. 269, pp. 543– 547.<a name='dfref-footnote-3' href='#ref-footnote-3' title='back to document' class='reversefootnote' >↩</a></div>
-<div class='footnote-line'><span class='md-fn-count'>4</span> Duchi, J., Hazan, E., &amp; Singer, Y. (2011). Adaptive Subgradient Methods for Online Learning and Stochastic Optimization. Journal of Machine Learning Research, 12, 2121–2159. Retrieved from <a href='http://jmlr.org/papers/v12/duchi11a.html' target='_blank' class='url'>http://jmlr.org/papers/v12/duchi11a.html</a><a name='dfref-footnote-4' href='#ref-footnote-4' title='back to document' class='reversefootnote' >↩</a></div>
-<div class='footnote-line'><span class='md-fn-count'>5</span> Zeiler, M. D. (2012). ADADELTA: An Adaptive Learning Rate Method. Retrieved from <a href='http://arxiv.org/abs/1212.5701' target='_blank' class='url'>http://arxiv.org/abs/1212.5701</a><a name='dfref-footnote-5' href='#ref-footnote-5' title='back to document' class='reversefootnote' >↩</a></div>
-<div class='footnote-line'><span class='md-fn-count'>6</span> Kingma, D. P., &amp; Ba, J. L. (2015). Adam: a Method for Stochastic Optimization. International Conference on Learning Representations, 1–13.<a name='dfref-footnote-6' href='#ref-footnote-6' title='back to document' class='reversefootnote' >↩</a></div></div></div>
+\end{align}</script></span></div></div></p><p>The perplexity should be evaluated on the whole development dataset, which is to split the dataset by length <span class="MathJax_Preview"></span><span class="MathJax_SVG" tabindex="-1" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" width="1.781ex" height="2.009ex" viewBox="0 -755.5 767 865.1" role="img" focusable="false" style="vertical-align: -0.255ex;"><defs><path stroke-width="0" id="E30-MJMATHI-73" d="M131 289Q131 321 147 354T203 415T300 442Q362 442 390 415T419 355Q419 323 402 308T364 292Q351 292 340 300T328 326Q328 342 337 354T354 372T367 378Q368 378 368 379Q368 382 361 388T336 399T297 405Q249 405 227 379T204 326Q204 301 223 291T278 274T330 259Q396 230 396 163Q396 135 385 107T352 51T289 7T195 -10Q118 -10 86 19T53 87Q53 126 74 143T118 160Q133 160 146 151T160 120Q160 94 142 76T111 58Q109 57 108 57T107 55Q108 52 115 47T146 34T201 27Q237 27 263 38T301 66T318 97T323 122Q323 150 302 164T254 181T195 196T148 231Q131 256 131 289Z"></path><path stroke-width="0" id="E30-MJMATHI-6C" d="M117 59Q117 26 142 26Q179 26 205 131Q211 151 215 152Q217 153 225 153H229Q238 153 241 153T246 151T248 144Q247 138 245 128T234 90T214 43T183 6T137 -11Q101 -11 70 11T38 85Q38 97 39 102L104 360Q167 615 167 623Q167 626 166 628T162 632T157 634T149 635T141 636T132 637T122 637Q112 637 109 637T101 638T95 641T94 647Q94 649 96 661Q101 680 107 682T179 688Q194 689 213 690T243 693T254 694Q266 694 266 686Q266 675 193 386T118 83Q118 81 118 75T117 65V59Z"></path></defs><g stroke="currentColor" fill="currentColor" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xlink:href="#E30-MJMATHI-73" x="0" y="0"></use><use xlink:href="#E30-MJMATHI-6C" x="469" y="0"></use></g></svg></span><script type="math/tex">sl</script> which is the sentence length used in the training stage, and then evaluate the average perplexity on all the split sentences. Use early stop when perplexity don&#39;t improve. You should try <a href='https://github.com/fastnlp/fastNLP/blob/master/fastNLP/core/trainer.py'>trainer.py</a> from FastNLP to this end, as early stop are already implemented in it.</p><p>To generate a Tang poem once you got the model trained, you could first sample a word to start and then use it as input to the LSTM, and them sample from the output of the LSTM and in turn send the generated word into the LSTM to have the next word generated. To allow more variation, sometimes people use a <strong>temperature term <span class="MathJax_Preview"></span><span class="MathJax_SVG" tabindex="-1" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" width="1.201ex" height="1.509ex" viewBox="0 -540.2 517 649.8" role="img" focusable="false" style="vertical-align: -0.255ex;"><defs><path stroke-width="0" id="E31-MJMATHI-3C4" d="M39 284Q18 284 18 294Q18 301 45 338T99 398Q134 425 164 429Q170 431 332 431Q492 431 497 429Q517 424 517 402Q517 388 508 376T485 360Q479 358 389 358T299 356Q298 355 283 274T251 109T233 20Q228 5 215 -4T186 -13Q153 -13 153 20V30L203 192Q214 228 227 272T248 336L254 357Q254 358 208 358Q206 358 197 358T183 359Q105 359 61 295Q56 287 53 286T39 284Z"></path></defs><g stroke="currentColor" fill="currentColor" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xlink:href="#E31-MJMATHI-3C4" x="0" y="0"></use></g></svg></span><script type="math/tex">\tau</script></strong> in the sofmax to control the diversity of generation, for example use <span class="MathJax_Preview"></span><span class="MathJax_SVG" tabindex="-1" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" width="7.266ex" height="2.009ex" viewBox="0 -755.5 3128.6 865.1" role="img" focusable="false" style="vertical-align: -0.255ex;"><defs><path stroke-width="0" id="E32-MJMATHI-3C4" d="M39 284Q18 284 18 294Q18 301 45 338T99 398Q134 425 164 429Q170 431 332 431Q492 431 497 429Q517 424 517 402Q517 388 508 376T485 360Q479 358 389 358T299 356Q298 355 283 274T251 109T233 20Q228 5 215 -4T186 -13Q153 -13 153 20V30L203 192Q214 228 227 272T248 336L254 357Q254 358 208 358Q206 358 197 358T183 359Q105 359 61 295Q56 287 53 286T39 284Z"></path><path stroke-width="0" id="E32-MJMAIN-3D" d="M56 347Q56 360 70 367H707Q722 359 722 347Q722 336 708 328L390 327H72Q56 332 56 347ZM56 153Q56 168 72 173H708Q722 163 722 153Q722 140 707 133H70Q56 140 56 153Z"></path><path stroke-width="0" id="E32-MJMAIN-30" d="M96 585Q152 666 249 666Q297 666 345 640T423 548Q460 465 460 320Q460 165 417 83Q397 41 362 16T301 -15T250 -22Q224 -22 198 -16T137 16T82 83Q39 165 39 320Q39 494 96 585ZM321 597Q291 629 250 629Q208 629 178 597Q153 571 145 525T137 333Q137 175 145 125T181 46Q209 16 250 16Q290 16 318 46Q347 76 354 130T362 333Q362 478 354 524T321 597Z"></path><path stroke-width="0" id="E32-MJMAIN-2E" d="M78 60Q78 84 95 102T138 120Q162 120 180 104T199 61Q199 36 182 18T139 0T96 17T78 60Z"></path><path stroke-width="0" id="E32-MJMAIN-36" d="M42 313Q42 476 123 571T303 666Q372 666 402 630T432 550Q432 525 418 510T379 495Q356 495 341 509T326 548Q326 592 373 601Q351 623 311 626Q240 626 194 566Q147 500 147 364L148 360Q153 366 156 373Q197 433 263 433H267Q313 433 348 414Q372 400 396 374T435 317Q456 268 456 210V192Q456 169 451 149Q440 90 387 34T253 -22Q225 -22 199 -14T143 16T92 75T56 172T42 313ZM257 397Q227 397 205 380T171 335T154 278T148 216Q148 133 160 97T198 39Q222 21 251 21Q302 21 329 59Q342 77 347 104T352 209Q352 289 347 316T329 361Q302 397 257 397Z"></path></defs><g stroke="currentColor" fill="currentColor" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xlink:href="#E32-MJMATHI-3C4" x="0" y="0"></use><use xlink:href="#E32-MJMAIN-3D" x="794" y="0"></use><g transform="translate(1850,0)"><use xlink:href="#E32-MJMAIN-30"></use><use xlink:href="#E32-MJMAIN-2E" x="500" y="0"></use><use xlink:href="#E32-MJMAIN-36" x="778" y="0"></use></g></g></svg></span><script type="math/tex">\tau=0.6</script> to make it more variant than <span class="MathJax_Preview"></span><span class="MathJax_SVG" tabindex="-1" style="font-size: 100%; display: inline-block;"><svg xmlns:xlink="http://www.w3.org/1999/xlink" width="5.459ex" height="2.009ex" viewBox="0 -755.5 2350.6 865.1" role="img" focusable="false" style="vertical-align: -0.255ex;"><defs><path stroke-width="0" id="E33-MJMATHI-3C4" d="M39 284Q18 284 18 294Q18 301 45 338T99 398Q134 425 164 429Q170 431 332 431Q492 431 497 429Q517 424 517 402Q517 388 508 376T485 360Q479 358 389 358T299 356Q298 355 283 274T251 109T233 20Q228 5 215 -4T186 -13Q153 -13 153 20V30L203 192Q214 228 227 272T248 336L254 357Q254 358 208 358Q206 358 197 358T183 359Q105 359 61 295Q56 287 53 286T39 284Z"></path><path stroke-width="0" id="E33-MJMAIN-3D" d="M56 347Q56 360 70 367H707Q722 359 722 347Q722 336 708 328L390 327H72Q56 332 56 347ZM56 153Q56 168 72 173H708Q722 163 722 153Q722 140 707 133H70Q56 140 56 153Z"></path><path stroke-width="0" id="E33-MJMAIN-31" d="M213 578L200 573Q186 568 160 563T102 556H83V602H102Q149 604 189 617T245 641T273 663Q275 666 285 666Q294 666 302 660V361L303 61Q310 54 315 52T339 48T401 46H427V0H416Q395 3 257 3Q121 3 100 0H88V46H114Q136 46 152 46T177 47T193 50T201 52T207 57T213 61V578Z"></path></defs><g stroke="currentColor" fill="currentColor" stroke-width="0" transform="matrix(1 0 0 -1 0 0)"><use xlink:href="#E33-MJMATHI-3C4" x="0" y="0"></use><use xlink:href="#E33-MJMAIN-3D" x="794" y="0"></use><use xlink:href="#E33-MJMAIN-31" x="1850" y="0"></use></g></svg></span><script type="math/tex">\tau=1</script>.</p><p>Above all, you might find <a href='http://karpathy.github.io/2015/05/21/rnn-effectiveness/'>this article</a> great to help understand the task, where the author implemented a vanilla RNN language model to generate not only poems, but also linux kernel code. </p><p>&nbsp;</p><p>Requirements</p><ol start='' ><li>Initialization. The embedding layer and the parameters for the model should be initialized before the training. Explain why you should not just initialize them to zero, and propose a way to initialize them properly. 10%</li><li>Generating Tang poem. Implement an LSTM to generate poems. Report the perplexity after your training, and generate poems that start with 日 、红、山、夜、湖、海、月, include the poem in your report. You are allowed to implement the LSTM with PyTorch but you should not relying on the LSTM cell or LSTM provided by it, just use its autograd module to help you deal with your gradient. Also you are not strictly required to follow everything described above, as long as you can make yourself clear in the report, you can use whatever setting that you believe is more appropriate for the task of generating poems. You might find <a href='https://www.aclweb.org/anthology/D14-1074'>this paper</a> very helpful. 50%
+Bonus: you will earn up to 20% bonus (making the full mark 120% of this assignment) if you implement the gradient calculation and back propagation by yourself with numpy. Because you&#39;ve implemented the LSTM in PyTorch you will have something to compare your gradient to, this will help you for gradient check. You might also use external data such as <a href='https://github.com/chinese-poetry/chinese-poetry'>全唐诗</a> to help your generation, this will be also be considered as bonus.</li><li>Optimization. We haven&#39;t mentioned how should you optimize your model, but of course you should use gradient descent. There are a lot of gradient descent algorithms that you could explore, a non inclusive name list: stochastic gradient descent, SGD with momentum <sup class='md-footnote'><a href='#dfref-footnote-2' name='ref-footnote-2'>2</a></sup>, Nesterov<sup class='md-footnote'><a href='#dfref-footnote-3' name='ref-footnote-3'>3</a></sup>, Adagrad<sup class='md-footnote'><a href='#dfref-footnote-4' name='ref-footnote-4'>4</a></sup>, Adadelta<sup class='md-footnote'><a href='#dfref-footnote-5' name='ref-footnote-5'>5</a></sup>, Adam<sup class='md-footnote'><a href='#dfref-footnote-6' name='ref-footnote-6'>6</a></sup> etc. You should try at least two optimization algorithm to training your model. Note that as some of the algorithms require you to keep additional parameters across batches, <strong>you should think about how it will influence the way you implement your gradient calculation</strong> if you intended for the 20% bonus in the previous requirements. Include your comparison of the algorithms you&#39;ve used in your report. 20%</li></ol><p>Clarification</p><ol start='' ><li>If you have not used PyTorch and FastNLP, you might find it challenging to finish the assignment, but you should try your best to make it and include your effort in your report, even if you did not finish the given goal of the task, as long as you tried hard, your effort will be marked. For example if you find using FastNLP hard, you could include your comment for improvements in your report.</li><li>Please do get your self familiar with PyTorch and FastNLP because in the subsequent assignment and the final project, you must use them to finish the task.</li><li>In the bonus, you only have to show case that your numpy LSTM is workable (for example by showing that its gradient matches the gradient from PyTorch version or it could overfit the training dataset), and you don&#39;t have to stick to use it for other tasks.</li></ol><div class='footnotes-area'  ><hr/>
+<div class='footnote-line'><span class='md-fn-count'>1</span> <a href='http://www.bioinf.jku.at/publications/older/2604.pdf' target='_blank' class='url'>http://www.bioinf.jku.at/publications/older/2604.pdf</a><a name='dfref-footnote-1' href='#ref-footnote-1' title='回到文档' class='reversefootnote' >↩</a></div>
+<div class='footnote-line'><span class='md-fn-count'>2</span> Qian, N. (1999). On the momentum term in gradient descent learning algorithms. Neural Networks : The Official Journal of the International Neural Network Society, 12(1), 145–151. <a href='http://doi.org/10.1016/S0893-6080(98)00116-6' target='_blank' class='url'>http://doi.org/10.1016/S0893-6080(98)00116-6</a><a name='dfref-footnote-2' href='#ref-footnote-2' title='回到文档' class='reversefootnote' >↩</a></div>
+<div class='footnote-line'><span class='md-fn-count'>3</span> Nesterov, Y. (1983). A method for unconstrained convex minimization problem with the rate of convergence o(1/k2). Doklady ANSSSR (translated as Soviet.Math.Docl.), vol. 269, pp. 543– 547.<a name='dfref-footnote-3' href='#ref-footnote-3' title='回到文档' class='reversefootnote' >↩</a></div>
+<div class='footnote-line'><span class='md-fn-count'>4</span> Duchi, J., Hazan, E., &amp; Singer, Y. (2011). Adaptive Subgradient Methods for Online Learning and Stochastic Optimization. Journal of Machine Learning Research, 12, 2121–2159. Retrieved from <a href='http://jmlr.org/papers/v12/duchi11a.html' target='_blank' class='url'>http://jmlr.org/papers/v12/duchi11a.html</a><a name='dfref-footnote-4' href='#ref-footnote-4' title='回到文档' class='reversefootnote' >↩</a></div>
+<div class='footnote-line'><span class='md-fn-count'>5</span> Zeiler, M. D. (2012). ADADELTA: An Adaptive Learning Rate Method. Retrieved from <a href='http://arxiv.org/abs/1212.5701' target='_blank' class='url'>http://arxiv.org/abs/1212.5701</a><a name='dfref-footnote-5' href='#ref-footnote-5' title='回到文档' class='reversefootnote' >↩</a></div>
+<div class='footnote-line'><span class='md-fn-count'>6</span> Kingma, D. P., &amp; Ba, J. L. (2015). Adam: a Method for Stochastic Optimization. International Conference on Learning Representations, 1–13.<a name='dfref-footnote-6' href='#ref-footnote-6' title='回到文档' class='reversefootnote' >↩</a></div></div></div>
 </body>
 </html>
\ No newline at end of file
diff --git a/docs/assignment-3/index.md b/docs/assignment-3/index.md
index 0a06352..ce6f104 100644
--- a/docs/assignment-3/index.md
+++ b/docs/assignment-3/index.md
@@ -25,7 +25,7 @@ C_t & = \mathbf{f}_t * C_{t-1} + \mathbf{i}_t * \bar{C}_t \\
 \mathbf{h}_t &= \mathbf{o}_t * tanh(C_t) \\
 \end{align}
 $$
-where $\cdot$ stands for matrix multiplication, $*$ for dot production and $[\cdot, \cdot]$ for vector concatenation. Note $W_{*}$ and $b_{*}$ are parameters of the LSTM that is the same throughout all steps.
+where $\cdot$ stands for matrix multiplication, $*$ for element-wise production and $[\cdot, \cdot]$ for vector concatenation. Note $W_{*}$ and $b_{*}$ are parameters of the LSTM that is the same throughout all steps.
 
 Also note that here the input is a vector while in your implementation, please use batched input as matrix multiplication on matrix is the same as multiplying vectors concatenated horizontally.
 
@@ -34,7 +34,7 @@ For language modeling, we use LSTM to predict the next word or character at each
 
 Requirements
 
-1. Differentiate one step of LSTM with respect to $\mathbf{h}_t$ for $\mathbf{f}_t, \mathbf{i}_t, \mathbf{i}_t, \bar{C}_t, C_t, C_{t-1},\mathbf{o}_t, \mathbf{h}_{t-1}, \mathbf{x}_t$. i.e. $\frac{\partial \mathbf{h}_t}{\partial \mathbf{f}_t}$, include your formalization and derivation in your report. 10%
+1. Differentiate one step of LSTM with respect to $\mathbf{h}_t$ for $\mathbf{f}_t, \mathbf{i}_t, \bar{C}_t, C_t, C_{t-1},\mathbf{o}_t, \mathbf{h}_{t-1}, \mathbf{x}_t, W_f, W_i, W_C, W_o, b_f, b_i, b_C, b_o $. i.e. $\frac{\partial \mathbf{h}_t}{\partial \mathbf{f}_t}$, include your formalization and derivation in your report. 10%
 
 2. Describe how can you differentiate through time for the training of an LSTM language model for sentence $s_1,s_2,\cdots, s_n$. 10%
 
@@ -65,7 +65,7 @@ The perplexity should be evaluated on the whole development dataset, which is to
 
 To generate a Tang poem once you got the model trained, you could first sample a word to start and then use it as input to the LSTM, and them sample from the output of the LSTM and in turn send the generated word into the LSTM to have the next word generated. To allow more variation, sometimes people use a **temperature term $\tau$** in the sofmax to control the diversity of generation, for example use $\tau=0.6$ to make it more variant than $\tau=1$.
 
-Above all, you might find [this artical](http://karpathy.github.io/2015/05/21/rnn-effectiveness/) great to help understand the task, where the author implemented a vanilla RNN language model to generate not only poems, but also linux kernel code. 
+Above all, you might find [this article](http://karpathy.github.io/2015/05/21/rnn-effectiveness/) great to help understand the task, where the author implemented a vanilla RNN language model to generate not only poems, but also linux kernel code. 
 
 
 
@@ -82,9 +82,9 @@ Clarification
 2. Please do get your self familiar with PyTorch and FastNLP because in the subsequent assignment and the final project, you must use them to finish the task.
 3. In the bonus, you only have to show case that your numpy LSTM is workable (for example by showing that its gradient matches the gradient from PyTorch version or it could overfit the training dataset), and you don't have to stick to use it for other tasks.
 
-[^Hochreiter & Schmidhuber (1997)]:http://www.bioinf.jku.at/publications/older/2604.pdf
+[^Hochreiter & Schmidhuber (1997)]: http://www.bioinf.jku.at/publications/older/2604.pdf
 [^2]: Qian, N. (1999). On the momentum term in gradient descent learning algorithms. Neural Networks : The Official Journal of the International Neural Network Society, 12(1), 145–151. <http://doi.org/10.1016/S0893-6080(98)00116-6>
 [^3]: Nesterov, Y. (1983). A method for unconstrained convex minimization problem with the rate of convergence o(1/k2). Doklady ANSSSR (translated as Soviet.Math.Docl.), vol. 269, pp. 543– 547.
 [^4]: Duchi, J., Hazan, E., & Singer, Y. (2011). Adaptive Subgradient Methods for Online Learning and Stochastic Optimization. Journal of Machine Learning Research, 12, 2121–2159. Retrieved from <http://jmlr.org/papers/v12/duchi11a.html>
 [^5]: Zeiler, M. D. (2012). ADADELTA: An Adaptive Learning Rate Method. Retrieved from <http://arxiv.org/abs/1212.5701>
-[^6]:Kingma, D. P., & Ba, J. L. (2015). Adam: a Method for Stochastic Optimization. International Conference on Learning Representations, 1–13.
\ No newline at end of file
+[^6]: Kingma, D. P., & Ba, J. L. (2015). Adam: a Method for Stochastic Optimization. International Conference on Learning Representations, 1–13.
\ No newline at end of file

From 97a34e2cae1159d54b01c7f74fd20c08f56d5810 Mon Sep 17 00:00:00 2001
From: Zuobai Zhang <44687071+Oxer11@users.noreply.github.com>
Date: Sun, 26 May 2019 10:28:25 +0800
Subject: [PATCH 2/2] Update index

---
 docs/index.html | 294 ++++++++++++++++++++++++------------------------
 1 file changed, 147 insertions(+), 147 deletions(-)

diff --git a/docs/index.html b/docs/index.html
index 66f2e6d..a40067f 100644
--- a/docs/index.html
+++ b/docs/index.html
@@ -1,147 +1,147 @@
-<!doctype html>
-<html>
-
-<head>
-  <meta charset='UTF-8'>
-  <meta name='viewport' content='width=device-width initial-scale=1'>
-  <title>PRML Course Description</title>
-  <link href='https://fonts.loli.net/css?family=PT+Serif:400,400italic,700,700italic&subset=latin,cyrillic-ext,cyrillic,latin-ext'
-    rel='stylesheet' type='text/css' />
-  <link rel="stylesheet" type="text/css" href="style.css" />
-</head>
-
-<body class='typora-export'>
-  
-  <div id='write' class='is-node'>
-      <div style="width:100px; position:absolute; left:-20px">
-          <img src='fudan.svg'/>
-      </div>
-        <div style="width:100px; position:absolute; right:-20px;">
-          <img src='logo-rev.png'/>
-      </div>
-    <h2><a name='header-n12' class='md-header-anchor '></a>
-      <center>Pattern Recognition and Machine Learning</center>
-    </h2>
-    <h3><a name='header-n94' class='md-header-anchor '></a>
-      <center>Fudan University / 2019 Spring</center>
-    </h3>
-
-      <center><a href='http://zfhu.ac.cn/PRML-Spring19-Fudan/'>[homepage]</a> <a href='https://github.com/ichn-hu/PRML-Spring19-Fudan'>[coursework]</a></center>
-    <h4><a name='header-n90' class='md-header-anchor '></a>Logistics</h4>
-    <ul>
-      <li>Instructor: Xipeng Qiu (<a href='https://github.com/xpqiu'>@xpqiu</a>)</li>
-      <li>Teaching Assistants: Zhifeng Hu (<a href='https://github.com/ichn-hu'>@ichn-hu</a>), Shaojing Wang (<a href='https://github.com/2017alan'>@2017alan</a>)</li>
-      <li>Grading: 4 assignments with a total 65% weight, and a 35% final project (detailed weight maybe updated later, won't be dramatically changed)</li>
-    </ul>
-    <h4><a name='header-n85' class='md-header-anchor '></a>Syllabus</h4>
-    <figure>
-      <table>
-        <thead>
-          <tr>
-            <th>Date</th>
-            <th>Course</th>
-            <th>Note</th>
-          </tr>
-        </thead>
-        <tbody>
-          <tr>
-            <td>Feb 27</td>
-            <td>
-              Course Introduction <br>
-              Curve Fitting <br>
-              <a href="http://vda.univie.ac.at/Teaching/ML/15s/LectureNotes/01_basics.pdf">[slides]</a>
-            </td>
-            <td>&nbsp;</td>
-          </tr>
-          <tr>
-            <td>Mar 6</td>
-            <td>Density Estimation<br />Non-parametric Methods <br>
-              <a href="http://vda.univie.ac.at/Teaching/ML/15s/LectureNotes/02_non-parametric_methods.pdf">[slides]</a>
-            </td>
-            <td>&nbsp;</td>
-          </tr>
-          <tr>
-            <td>Mar 13</td>
-            <td>Linear Regression <br>
-            <a href="http://vda.univie.ac.at/Teaching/ML/15s/LectureNotes/03_regression.pdf">[slides]</a>
-            </td>
-            <td>
-              <a href='assignment-1/index.html'><p style="color:blue">Assignment 1 Released</p></a>
-            </td>
-          </tr>
-          <tr>
-            <td>Mar 20</td>
-            <td>Linear Model for Classification <br>
-              <a href="http://vda.univie.ac.at/Teaching/ML/15s/LectureNotes/04_classification.pdf">[slides]</a></td>
-            <td><p style="color:red">Assignment 1 Due</p></td>
-          </tr>
-          <tr>
-            <td>Mar 27</td>
-            <td>Kernel Trick<br>Support Vector Machine <br>
-              <a href="http://vda.univie.ac.at/Teaching/ML/15s/LectureNotes/05_kernels.pdf">[kernel - slides]</a><a href="http://vda.univie.ac.at/Teaching/ML/15s/LectureNotes/06_SVM.pdf">[svm - slides]</a>
-            </td>
-            <td><a href='assignment-2/index.html'><p style="color:blue">Assignment 2 Released</p></td>
-          </tr>
-          <tr>
-            <td>Apr 3</td>
-            <td>Fully Connected Neural Network<br> Activation Functions <br> Swish Activation Function <br> </td>
-            <td></td>
-          </tr>
-          <tr>
-            <td>Apr 10</td>
-            <td>Convolutional Neural Network</td>
-            <td><p style="color:red">Assignment 2 Due</p></td>
-          </tr>
-          <tr>
-            <td>Apr 17</td>
-            <td>Recurrent Neural Network <br> Graph Neural Network</td>
-            <td>&nbsp;</td>
-          </tr>
-          <tr>
-            <td>Apr 24</td>
-            <td>&nbsp;</td>
-            <td>&nbsp;</td>
-          </tr>
-          <tr>
-            <td>May 1</td>
-            <td>No lecture due to vacation</td>
-            <td>&nbsp;</td>
-          </tr>
-          <tr>
-            <td>May 8</td>
-            <td>Network Optimization and Regularization <br> Attention  <br><a href="https://nndl.github.io/ppt/chap-%E7%BD%91%E7%BB%9C%E4%BC%98%E5%8C%96%E4%B8%8E%E6%AD%A3%E5%88%99%E5%8C%96.pptx">[optimization  - slides]</a><a href="https://nndl.github.io/ppt/chap-%E6%B3%A8%E6%84%8F%E5%8A%9B%E6%9C%BA%E5%88%B6%E4%B8%8E%E5%A4%96%E9%83%A8%E8%AE%B0%E5%BF%86.pptx">[Attention - slides]</a> </td>
-            <td><a href='assignment-3/index.html'><p style="color:blue">Assignment 3 Released</p>, <a href='assignment-4/index.html'><p style="color:blue">Assignment 4 Released</p></td>
-          </tr>
-          <tr>
-            <td>May 15</td>
-            <td>&nbsp;</td>
-            <td>&nbsp;</td>
-          </tr>
-          <tr>
-            <td>May 22</td>
-            <td>&nbsp;</td>
-            <td><p style="color:red">Assignment 3 Due</p></td>
-          </tr>
-          <tr>
-            <td>May 29</td>
-            <td>&nbsp;</td>
-            <td><p style="color:red">Assignment 4 Due</p></td>
-          </tr>
-          <tr>
-            <td>&nbsp;</td>
-            <td>&nbsp;</td>
-            <td>&nbsp;</td>
-          </tr>
-          <tr>
-            <td>&nbsp;</td>
-            <td>&nbsp;</td>
-            <td>&nbsp;</td>
-          </tr>
-        </tbody>
-      </table>
-    </figure>
-    <p>&nbsp;</p>
-  </div>
-</body>
-
-</html>
+<!doctype html>
+<html>
+
+<head>
+  <meta charset='UTF-8'>
+  <meta name='viewport' content='width=device-width initial-scale=1'>
+  <title>PRML Course Description</title>
+  <link href='https://fonts.loli.net/css?family=PT+Serif:400,400italic,700,700italic&subset=latin,cyrillic-ext,cyrillic,latin-ext'
+    rel='stylesheet' type='text/css' />
+  <link rel="stylesheet" type="text/css" href="style.css" />
+</head>
+
+<body class='typora-export'>
+  
+  <div id='write' class='is-node'>
+      <div style="width:100px; position:absolute; left:-20px">
+          <img src='fudan.svg'/>
+      </div>
+        <div style="width:100px; position:absolute; right:-20px;">
+          <img src='logo-rev.png'/>
+      </div>
+    <h2><a name='header-n12' class='md-header-anchor '></a>
+      <center>Pattern Recognition and Machine Learning</center>
+    </h2>
+    <h3><a name='header-n94' class='md-header-anchor '></a>
+      <center>Fudan University / 2019 Spring</center>
+    </h3>
+
+      <center><a href='http://zfhu.ac.cn/PRML-Spring19-Fudan/'>[homepage]</a> <a href='https://github.com/ichn-hu/PRML-Spring19-Fudan'>[coursework]</a></center>
+    <h4><a name='header-n90' class='md-header-anchor '></a>Logistics</h4>
+    <ul>
+      <li>Instructor: Xipeng Qiu (<a href='https://github.com/xpqiu'>@xpqiu</a>)</li>
+      <li>Teaching Assistants: Zhifeng Hu (<a href='https://github.com/ichn-hu'>@ichn-hu</a>), Shaojing Wang (<a href='https://github.com/2017alan'>@2017alan</a>)</li>
+      <li>Grading: 4 assignments with a total 65% weight, and a 35% final project (detailed weight maybe updated later, won't be dramatically changed)</li>
+    </ul>
+    <h4><a name='header-n85' class='md-header-anchor '></a>Syllabus</h4>
+    <figure>
+      <table>
+        <thead>
+          <tr>
+            <th>Date</th>
+            <th>Course</th>
+            <th>Note</th>
+          </tr>
+        </thead>
+        <tbody>
+          <tr>
+            <td>Feb 27</td>
+            <td>
+              Course Introduction <br>
+              Curve Fitting <br>
+              <a href="http://vda.univie.ac.at/Teaching/ML/15s/LectureNotes/01_basics.pdf">[slides]</a>
+            </td>
+            <td>&nbsp;</td>
+          </tr>
+          <tr>
+            <td>Mar 6</td>
+            <td>Density Estimation<br />Non-parametric Methods <br>
+              <a href="http://vda.univie.ac.at/Teaching/ML/15s/LectureNotes/02_non-parametric_methods.pdf">[slides]</a>
+            </td>
+            <td>&nbsp;</td>
+          </tr>
+          <tr>
+            <td>Mar 13</td>
+            <td>Linear Regression <br>
+            <a href="http://vda.univie.ac.at/Teaching/ML/15s/LectureNotes/03_regression.pdf">[slides]</a>
+            </td>
+            <td>
+              <a href='assignment-1/index.html'><p style="color:blue">Assignment 1 Released</p></a>
+            </td>
+          </tr>
+          <tr>
+            <td>Mar 20</td>
+            <td>Linear Model for Classification <br>
+              <a href="http://vda.univie.ac.at/Teaching/ML/15s/LectureNotes/04_classification.pdf">[slides]</a></td>
+            <td><p style="color:red">Assignment 1 Due</p></td>
+          </tr>
+          <tr>
+            <td>Mar 27</td>
+            <td>Kernel Trick<br>Support Vector Machine <br>
+              <a href="http://vda.univie.ac.at/Teaching/ML/15s/LectureNotes/05_kernels.pdf">[kernel - slides]</a><a href="http://vda.univie.ac.at/Teaching/ML/15s/LectureNotes/06_SVM.pdf">[svm - slides]</a>
+            </td>
+            <td><a href='assignment-2/index.html'><p style="color:blue">Assignment 2 Released</p></td>
+          </tr>
+          <tr>
+            <td>Apr 3</td>
+            <td>Fully Connected Neural Network<br> Activation Functions <br> Swish Activation Function <br> </td>
+            <td></td>
+          </tr>
+          <tr>
+            <td>Apr 10</td>
+            <td>Convolutional Neural Network</td>
+            <td><p style="color:red">Assignment 2 Due</p></td>
+          </tr>
+          <tr>
+            <td>Apr 17</td>
+            <td>Recurrent Neural Network <br> Graph Neural Network</td>
+            <td>&nbsp;</td>
+          </tr>
+          <tr>
+            <td>Apr 24</td>
+            <td>&nbsp;</td>
+            <td>&nbsp;</td>
+          </tr>
+          <tr>
+            <td>May 1</td>
+            <td>No lecture due to vacation</td>
+            <td>&nbsp;</td>
+          </tr>
+          <tr>
+            <td>May 8</td>
+            <td>Network Optimization and Regularization <br> Attention  <br><a href="https://nndl.github.io/ppt/chap-%E7%BD%91%E7%BB%9C%E4%BC%98%E5%8C%96%E4%B8%8E%E6%AD%A3%E5%88%99%E5%8C%96.pptx">[optimization  - slides]</a><a href="https://nndl.github.io/ppt/chap-%E6%B3%A8%E6%84%8F%E5%8A%9B%E6%9C%BA%E5%88%B6%E4%B8%8E%E5%A4%96%E9%83%A8%E8%AE%B0%E5%BF%86.pptx">[Attention - slides]</a> </td>
+            <td><a href='assignment-3/index.html'><p style="color:blue">Assignment 3 Released</p>, <a href='assignment-4/index.html'><p style="color:blue">Assignment 4 Released</p></td>
+          </tr>
+          <tr>
+            <td>May 15</td>
+            <td>Graphical Models   <br><a href="https://vda.univie.ac.at/Teaching/ML/15s/LectureNotes/09_graphical_models-I.pdf">[slides - I]</a><a href="http://vda.univie.ac.at/Teaching/ML/15s/LectureNotes/10_graphical_models-II.pdf">[slides - II]</a><a href="http://nndl.github.io/ppt/chap-%E6%A6%82%E7%8E%87%E5%9B%BE%E6%A8%A1%E5%9E%8B.pptx">[slides - III]</a> </td>
+            <td>&nbsp;</td>
+          </tr>
+          <tr>
+            <td>May 22</td>
+            <td>EM and Inference <br><a href="http://vda.univie.ac.at/Teaching/ML/15s/LectureNotes/07_EM+kmeans.pdf">[slides - I]</a> </td>
+            <td><p style="color:red">Assignment 3 Due</p></td>
+          </tr>
+          <tr>
+            <td>May 29</td>
+            <td>&nbsp;</td>
+            <td><p style="color:red">Assignment 4 Due</p></td>
+          </tr>
+          <tr>
+            <td>&nbsp;</td>
+            <td>&nbsp;</td>
+            <td>&nbsp;</td>
+          </tr>
+          <tr>
+            <td>&nbsp;</td>
+            <td>&nbsp;</td>
+            <td>&nbsp;</td>
+          </tr>
+        </tbody>
+      </table>
+    </figure>
+    <p>&nbsp;</p>
+  </div>
+</body>
+
+</html>

Date	Course	Note
Feb 27	- Course Introduction - Curve Fitting - [slides] -
Mar 6	Density Estimation Non-parametric Methods - [slides] -
Mar 13	Linear Regression - [slides] -	- Assignment 1 Released -
Mar 20	Linear Model for Classification - [slides]	Assignment 1 Due
Mar 27	Kernel Trick Support Vector Machine - [kernel - slides][svm - slides] -	Assignment 2 Released
Apr 3	Fully Connected Neural Network Activation Functions Swish Activation Function
Apr 10	Convolutional Neural Network	Assignment 2 Due
Apr 17	Recurrent Neural Network Graph Neural Network
Apr 24
May 1	No lecture due to vacation
May 8	Network Optimization and Regularization Attention [optimization - slides][Attention - slides]	Assignment 3 Released , Assignment 4 Released
May 15
May 22		Assignment 3 Due
May 29		Assignment 4 Due
Date	Course	Note
Feb 27	+ Course Introduction + Curve Fitting + [slides] +
Mar 6	Density Estimation Non-parametric Methods + [slides] +
Mar 13	Linear Regression + [slides] +	+ Assignment 1 Released +
Mar 20	Linear Model for Classification + [slides]	Assignment 1 Due
Mar 27	Kernel Trick Support Vector Machine + [kernel - slides][svm - slides] +	Assignment 2 Released
Apr 3	Fully Connected Neural Network Activation Functions Swish Activation Function
Apr 10	Convolutional Neural Network	Assignment 2 Due
Apr 17	Recurrent Neural Network Graph Neural Network
Apr 24
May 1	No lecture due to vacation
May 8	Network Optimization and Regularization Attention [optimization - slides][Attention - slides]	Assignment 3 Released , Assignment 4 Released
May 15	Graphical Models [slides - I][slides - II][slides - III]
May 22	EM and Inference [slides - I]	Assignment 3 Due
May 29		Assignment 4 Due